From a1a4caeebac95875eaf6c8afb5a9784566484b2e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 5 Jun 2018 08:40:13 +0300 Subject: IB/core: Do not set the gid type when reserving default entries When default GIDs are added, their gid type is set by ib_cache_gid_set_default_gid(). There is no need to set the gid type of a free GID entry during GID table initialization. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 81d66f56e38f..0415548eb5f3 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -792,7 +792,6 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port, unsigned int i; unsigned long roce_gid_type_mask; unsigned int num_default_gids; - unsigned int current_gid = 0; roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); num_default_gids = hweight_long(roce_gid_type_mask); @@ -800,10 +799,6 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port, struct ib_gid_table_entry *entry = &table->data_vec[i]; entry->props |= GID_TABLE_ENTRY_DEFAULT; - current_gid = find_next_bit(&roce_gid_type_mask, - BITS_PER_LONG, - current_gid); - entry->attr.gid_type = current_gid++; } } -- cgit From 1c36cf912ad19c99592c7d089aed5d1c321a678a Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 5 Jun 2018 08:40:14 +0300 Subject: IB/core: Store default GID property per-table instead of per-entry There are at max one or two default GIDs for RoCE. Instead of storing a default GID property for all the GIDs, store default GID indices as individual bit per table. This allows a future simplification to get rid of the GID property field. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 0415548eb5f3..d4751f94a93a 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -68,7 +68,6 @@ enum gid_attr_find_mask { enum gid_table_entry_props { GID_TABLE_ENTRY_INVALID = 1UL << 0, - GID_TABLE_ENTRY_DEFAULT = 1UL << 1, }; struct ib_gid_table_entry { @@ -79,7 +78,7 @@ struct ib_gid_table_entry { }; struct ib_gid_table { - int sz; + int sz; /* In RoCE, adding a GID to the table requires: * (a) Find if this GID is already exists. * (b) Find a free space. @@ -94,10 +93,12 @@ struct ib_gid_table { * rwlock. readers must hold only rwlock. All writers must be in a * sleepable context. */ - struct mutex lock; + struct mutex lock; /* rwlock protects data_vec[ix]->props. */ - rwlock_t rwlock; - struct ib_gid_table_entry *data_vec; + rwlock_t rwlock; + /* bit field, each bit indicates the index of default GID */ + u32 default_gid_indices; + struct ib_gid_table_entry *data_vec; }; static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) @@ -135,6 +136,19 @@ bool rdma_is_zero_gid(const union ib_gid *gid) } EXPORT_SYMBOL(rdma_is_zero_gid); +/** is_gid_index_default - Check if a given index belongs to + * reserved default GIDs or not. + * @table: GID table pointer + * @index: Index to check in GID table + * Returns true if index is one of the reserved default GID index otherwise + * returns false. + */ +static bool is_gid_index_default(const struct ib_gid_table *table, + unsigned int index) +{ + return index < 32 && (BIT(index) & table->default_gid_indices); +} + int ib_cache_gid_parse_type_str(const char *buf) { unsigned int i; @@ -308,7 +322,7 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, if (pempty && empty < 0) { if (data->props & GID_TABLE_ENTRY_INVALID && (default_gid == - !!(data->props & GID_TABLE_ENTRY_DEFAULT))) { + is_gid_index_default(table, curr_index))) { /* * Found an invalid (free) entry; allocate it. * If default GID is requested, then our @@ -346,8 +360,7 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, continue; if (mask & GID_ATTR_FIND_MASK_DEFAULT && - !!(data->props & GID_TABLE_ENTRY_DEFAULT) != - default_gid) + is_gid_index_default(table, curr_index) != default_gid) continue; found = curr_index; @@ -795,11 +808,9 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port, roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); num_default_gids = hweight_long(roce_gid_type_mask); - for (i = 0; i < num_default_gids && i < table->sz; i++) { - struct ib_gid_table_entry *entry = &table->data_vec[i]; - - entry->props |= GID_TABLE_ENTRY_DEFAULT; - } + /* Reserve starting indices for default GIDs */ + for (i = 0; i < num_default_gids && i < table->sz; i++) + table->default_gid_indices |= BIT(i); } -- cgit From b150c3862d21a4a9ce0f26d8067b9dcd41e2050c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 5 Jun 2018 08:40:15 +0300 Subject: IB/core: Introduce GID entry reference counts In order to be able to expose pointers to the ib_gid_attrs in the GID table we need to make it so the value of the pointer cannot be changed. Thus each GID table entry gets a unique piece of kref'd memory that is written only during initialization and remains constant for its lifetime. This eventually will allow the struct ib_gid_attrs to be returned without copy from many of query the APIs, but it also provides a way to track when all users of a HW table index go away. For roce we no longer allow an in-use HW table index to be re-used for a new an different entry. When a GID table entry needs to be removed it is hidden from the find API, but remains as a valid HW index and all ib_gid_attr points remain valid. The HW index is not relased until all users put the kref. Later patches will broadly replace the use of the sgid_index integer with the kref'd structure. Ultimately this will prevent security problems where the OS changes the properties of a HW GID table entry while an active user object is still using the entry. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 323 +++++++++++++++++++++++++++------------- include/rdma/ib_verbs.h | 1 + 2 files changed, 221 insertions(+), 103 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index d4751f94a93a..09d83c69ec65 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -66,15 +66,24 @@ enum gid_attr_find_mask { GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, }; -enum gid_table_entry_props { - GID_TABLE_ENTRY_INVALID = 1UL << 0, +enum gid_table_entry_state { + GID_TABLE_ENTRY_INVALID = 1, + GID_TABLE_ENTRY_VALID = 2, + /* + * Indicates that entry is pending to be removed, there may + * be active users of this GID entry. + * When last user of the GID entry releases reference to it, + * GID entry is detached from the table. + */ + GID_TABLE_ENTRY_PENDING_DEL = 3, }; struct ib_gid_table_entry { - unsigned long props; - union ib_gid gid; - struct ib_gid_attr attr; - void *context; + struct kref kref; + struct work_struct del_work; + struct ib_gid_attr attr; + void *context; + enum gid_table_entry_state state; }; struct ib_gid_table { @@ -90,15 +99,16 @@ struct ib_gid_table { * **/ /* Any writer to data_vec must hold this lock and the write side of - * rwlock. readers must hold only rwlock. All writers must be in a + * rwlock. Readers must hold only rwlock. All writers must be in a * sleepable context. */ struct mutex lock; - /* rwlock protects data_vec[ix]->props. */ + /* rwlock protects data_vec[ix]->state and entry pointer. + */ rwlock_t rwlock; + struct ib_gid_table_entry **data_vec; /* bit field, each bit indicates the index of default GID */ u32 default_gid_indices; - struct ib_gid_table_entry *data_vec; }; static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) @@ -178,26 +188,113 @@ static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port) return device->cache.ports[port - rdma_start_port(device)].gid; } -static void del_roce_gid(struct ib_device *device, u8 port_num, - struct ib_gid_table *table, int ix) +static bool is_gid_entry_free(const struct ib_gid_table_entry *entry) +{ + return !entry; +} + +static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry) +{ + return entry && entry->state == GID_TABLE_ENTRY_VALID; +} + +static void schedule_free_gid(struct kref *kref) +{ + struct ib_gid_table_entry *entry = + container_of(kref, struct ib_gid_table_entry, kref); + + queue_work(ib_wq, &entry->del_work); +} + +static void free_gid_entry(struct ib_gid_table_entry *entry) { + struct ib_device *device = entry->attr.device; + u8 port_num = entry->attr.port_num; + struct ib_gid_table *table = rdma_gid_table(device, port_num); + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, - device->name, port_num, ix, - table->data_vec[ix].gid.raw); + device->name, port_num, entry->attr.index, + entry->attr.gid.raw); + + mutex_lock(&table->lock); + if (rdma_cap_roce_gid_table(device, port_num) && + entry->state != GID_TABLE_ENTRY_INVALID) + device->del_gid(&entry->attr, &entry->context); + write_lock_irq(&table->rwlock); - if (rdma_cap_roce_gid_table(device, port_num)) - device->del_gid(&table->data_vec[ix].attr, - &table->data_vec[ix].context); - dev_put(table->data_vec[ix].attr.ndev); + /* + * The only way to avoid overwriting NULL in table is + * by comparing if it is same entry in table or not! + * If new entry in table is added by the time we free here, + * don't overwrite the table entry. + */ + if (entry == table->data_vec[entry->attr.index]) + table->data_vec[entry->attr.index] = NULL; + /* Now this index is ready to be allocated */ + write_unlock_irq(&table->rwlock); + mutex_unlock(&table->lock); + + if (entry->attr.ndev) + dev_put(entry->attr.ndev); + kfree(entry); +} + +/** + * free_gid_work - Release reference to the GID entry + * @work: Work structure to refer to GID entry which needs to be + * deleted. + * + * free_gid_work() frees the entry from the HCA's hardware table + * if provider supports it. It releases reference to netdevice. + */ +static void free_gid_work(struct work_struct *work) +{ + struct ib_gid_table_entry *entry = + container_of(work, struct ib_gid_table_entry, del_work); + free_gid_entry(entry); } -static int add_roce_gid(struct ib_gid_table *table, - const union ib_gid *gid, - const struct ib_gid_attr *attr) +static struct ib_gid_table_entry * +alloc_gid_entry(const struct ib_gid_attr *attr) { struct ib_gid_table_entry *entry; - int ix = attr->index; - int ret = 0; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + kref_init(&entry->kref); + memcpy(&entry->attr, attr, sizeof(*attr)); + if (entry->attr.ndev) + dev_hold(entry->attr.ndev); + INIT_WORK(&entry->del_work, free_gid_work); + entry->state = GID_TABLE_ENTRY_INVALID; + return entry; +} + +static void store_gid_entry(struct ib_gid_table *table, + struct ib_gid_table_entry *entry) +{ + entry->state = GID_TABLE_ENTRY_VALID; + + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, + entry->attr.device->name, entry->attr.port_num, + entry->attr.index, entry->attr.gid.raw); + + lockdep_assert_held(&table->lock); + write_lock_irq(&table->rwlock); + table->data_vec[entry->attr.index] = entry; + write_unlock_irq(&table->rwlock); +} + +static void put_gid_entry(struct ib_gid_table_entry *entry) +{ + kref_put(&entry->kref, schedule_free_gid); +} + +static int add_roce_gid(struct ib_gid_table_entry *entry) +{ + const struct ib_gid_attr *attr = &entry->attr; + int ret; if (!attr->ndev) { pr_err("%s NULL netdev device=%s port=%d index=%d\n", @@ -205,38 +302,22 @@ static int add_roce_gid(struct ib_gid_table *table, attr->index); return -EINVAL; } - - entry = &table->data_vec[ix]; - if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) { - WARN(1, "GID table corruption device=%s port=%d index=%d\n", - attr->device->name, attr->port_num, - attr->index); - return -EINVAL; - } - if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { - ret = attr->device->add_gid(gid, attr, &entry->context); + ret = attr->device->add_gid(&attr->gid, attr, &entry->context); if (ret) { pr_err("%s GID add failed device=%s port=%d index=%d\n", __func__, attr->device->name, attr->port_num, attr->index); - goto add_err; + return ret; } } - dev_hold(attr->ndev); - -add_err: - if (!ret) - pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, - attr->device->name, attr->port_num, ix, gid->raw); - return ret; + return 0; } /** * add_modify_gid - Add or modify GID table entry * * @table: GID table in which GID to be added or modified - * @gid: GID content * @attr: Attributes of the GID * * Returns 0 on success or appropriate error code. It accepts zero @@ -244,34 +325,42 @@ add_err: * GID. However such zero GIDs are not added to the cache. */ static int add_modify_gid(struct ib_gid_table *table, - const union ib_gid *gid, const struct ib_gid_attr *attr) { - int ret; + struct ib_gid_table_entry *entry; + int ret = 0; + + /* + * Invalidate any old entry in the table to make it safe to write to + * this index. + */ + if (is_gid_entry_valid(table->data_vec[attr->index])) + put_gid_entry(table->data_vec[attr->index]); + + /* + * Some HCA's report multiple GID entries with only one valid GID, and + * leave other unused entries as the zero GID. Convert zero GIDs to + * empty table entries instead of storing them. + */ + if (rdma_is_zero_gid(&attr->gid)) + return 0; + + entry = alloc_gid_entry(attr); + if (!entry) + return -ENOMEM; if (rdma_protocol_roce(attr->device, attr->port_num)) { - ret = add_roce_gid(table, gid, attr); + ret = add_roce_gid(entry); if (ret) - return ret; - } else { - /* - * Some HCA's report multiple GID entries with only one - * valid GID, but remaining as zero GID. - * So ignore such behavior for IB link layer and don't - * fail the call, but don't add such entry to GID cache. - */ - if (rdma_is_zero_gid(gid)) - return 0; + goto done; } - lockdep_assert_held(&table->lock); - memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid)); - memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr)); - - write_lock_irq(&table->rwlock); - table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID; - write_unlock_irq(&table->rwlock); + store_gid_entry(table, entry); return 0; + +done: + put_gid_entry(entry); + return ret; } /** @@ -286,16 +375,25 @@ static int add_modify_gid(struct ib_gid_table *table, static void del_gid(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table, int ix) { + struct ib_gid_table_entry *entry; + lockdep_assert_held(&table->lock); + + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, + ib_dev->name, port, ix, + table->data_vec[ix]->attr.gid.raw); + write_lock_irq(&table->rwlock); - table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; + entry = table->data_vec[ix]; + entry->state = GID_TABLE_ENTRY_PENDING_DEL; + /* + * For non RoCE protocol, GID entry slot is ready to use. + */ + if (!rdma_protocol_roce(ib_dev, port)) + table->data_vec[ix] = NULL; write_unlock_irq(&table->rwlock); - if (rdma_protocol_roce(ib_dev, port)) - del_roce_gid(ib_dev, port, table, ix); - memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid)); - memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr)); - table->data_vec[ix].context = NULL; + put_gid_entry(entry); } /* rwlock should be read locked, or lock should be held */ @@ -308,8 +406,8 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, int empty = pempty ? -1 : 0; while (i < table->sz && (found < 0 || empty < 0)) { - struct ib_gid_table_entry *data = &table->data_vec[i]; - struct ib_gid_attr *attr = &data->attr; + struct ib_gid_table_entry *data = table->data_vec[i]; + struct ib_gid_attr *attr; int curr_index = i; i++; @@ -320,9 +418,9 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, * so lookup free slot only if requested. */ if (pempty && empty < 0) { - if (data->props & GID_TABLE_ENTRY_INVALID && - (default_gid == - is_gid_index_default(table, curr_index))) { + if (is_gid_entry_free(data) && + default_gid == + is_gid_index_default(table, curr_index)) { /* * Found an invalid (free) entry; allocate it. * If default GID is requested, then our @@ -337,22 +435,23 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, /* * Additionally find_gid() is used to find valid entry during - * lookup operation, where validity needs to be checked. So - * find the empty entry first to continue to search for a free - * slot and ignore its INVALID flag. + * lookup operation; so ignore the entries which are marked as + * pending for removal and the entries which are marked as + * invalid. */ - if (data->props & GID_TABLE_ENTRY_INVALID) + if (!is_gid_entry_valid(data)) continue; if (found >= 0) continue; + attr = &data->attr; if (mask & GID_ATTR_FIND_MASK_GID_TYPE && attr->gid_type != val->gid_type) continue; if (mask & GID_ATTR_FIND_MASK_GID && - memcmp(gid, &data->gid, sizeof(*gid))) + memcmp(gid, &data->attr.gid, sizeof(*gid))) continue; if (mask & GID_ATTR_FIND_MASK_NETDEV && @@ -409,7 +508,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port, attr->device = ib_dev; attr->index = empty; attr->port_num = port; - ret = add_modify_gid(table, gid, attr); + attr->gid = *gid; + ret = add_modify_gid(table, attr); if (!ret) dispatch_gid_change_event(ib_dev, port); @@ -505,7 +605,8 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, mutex_lock(&table->lock); for (ix = 0; ix < table->sz; ix++) { - if (table->data_vec[ix].attr.ndev == ndev) { + if (is_gid_entry_valid(table->data_vec[ix]) && + table->data_vec[ix]->attr.ndev == ndev) { del_gid(ib_dev, port, table, ix); deleted = true; } @@ -529,12 +630,13 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, if (index < 0 || index >= table->sz) return -EINVAL; - if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) + if (!is_gid_entry_valid(table->data_vec[index])) return -EINVAL; - memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); + memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid)); if (attr) { - memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); + memcpy(attr, &table->data_vec[index]->attr, + sizeof(*attr)); if (attr->ndev) dev_hold(attr->ndev); } @@ -681,13 +783,14 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, for (i = 0; i < table->sz; i++) { struct ib_gid_attr attr; - if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) + if (!is_gid_entry_valid(table->data_vec[i])) continue; - if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) + if (memcmp(gid, &table->data_vec[i]->attr.gid, + sizeof(*gid))) continue; - memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); + memcpy(&attr, &table->data_vec[i]->attr, sizeof(attr)); if (filter(gid, &attr, context)) { found = true; @@ -705,9 +808,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, static struct ib_gid_table *alloc_gid_table(int sz) { - struct ib_gid_table *table = - kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); - int i; + struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL); if (!table) return NULL; @@ -720,12 +821,6 @@ static struct ib_gid_table *alloc_gid_table(int sz) table->sz = sz; rwlock_init(&table->rwlock); - - /* Mark all entries as invalid so that allocator can allocate - * one of the invalid (free) entry. - */ - for (i = 0; i < sz; i++) - table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID; return table; err_free_table: @@ -733,12 +828,30 @@ err_free_table: return NULL; } -static void release_gid_table(struct ib_gid_table *table) +static void release_gid_table(struct ib_device *device, u8 port, + struct ib_gid_table *table) { - if (table) { - kfree(table->data_vec); - kfree(table); + bool leak = false; + int i; + + if (!table) + return; + + for (i = 0; i < table->sz; i++) { + if (is_gid_entry_free(table->data_vec[i])) + continue; + if (kref_read(&table->data_vec[i]->kref) > 1) { + pr_err("GID entry ref leak for %s (index %d) ref=%d\n", + device->name, i, + kref_read(&table->data_vec[i]->kref)); + leak = true; + } } + if (leak) + return; + + kfree(table->data_vec); + kfree(table); } static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, @@ -752,7 +865,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, mutex_lock(&table->lock); for (i = 0; i < table->sz; ++i) { - if (!rdma_is_zero_gid(&table->data_vec[i].gid)) { + if (is_gid_entry_valid(table->data_vec[i])) { del_gid(ib_dev, port, table, i); deleted = true; } @@ -821,7 +934,7 @@ static void gid_table_release_one(struct ib_device *ib_dev) for (port = 0; port < ib_dev->phys_port_cnt; port++) { table = ib_dev->cache.ports[port].gid; - release_gid_table(table); + release_gid_table(ib_dev, port, table); ib_dev->cache.ports[port].gid = NULL; } } @@ -1100,7 +1213,6 @@ static int config_non_roce_gid_cache(struct ib_device *device, { struct ib_gid_attr gid_attr = {}; struct ib_gid_table *table; - union ib_gid gid; int ret = 0; int i; @@ -1112,14 +1224,14 @@ static int config_non_roce_gid_cache(struct ib_device *device, for (i = 0; i < gid_tbl_len; ++i) { if (!device->query_gid) continue; - ret = device->query_gid(device, port, i, &gid); + ret = device->query_gid(device, port, i, &gid_attr.gid); if (ret) { pr_warn("query_gid failed (%d) for %s (index %d)\n", ret, device->name, i); goto err; } gid_attr.index = i; - add_modify_gid(table, &gid, &gid_attr); + add_modify_gid(table, &gid_attr); } err: mutex_unlock(&table->lock); @@ -1302,4 +1414,9 @@ void ib_cache_cleanup_one(struct ib_device *device) ib_unregister_event_handler(&device->cache.event_handler); flush_workqueue(ib_wq); gid_table_cleanup_one(device); + + /* + * Flush the wq second time for any pending GID delete work. + */ + flush_workqueue(ib_wq); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4c6241bc2039..0a77afedabd0 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -94,6 +94,7 @@ enum ib_gid_type { struct ib_gid_attr { struct net_device *ndev; struct ib_device *device; + union ib_gid gid; enum ib_gid_type gid_type; u16 index; u8 port_num; -- cgit From f4df9a7c34d8f9e84af73ce187bcdf6fea65c4cb Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 5 Jun 2018 08:40:16 +0300 Subject: RDMA: Use GID from the ib_gid_attr during the add_gid() callback Now that ib_gid_attr contains the GID, make use of that in the add_gid() callback functions for the provider drivers to simplify the add_gid() implementations. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 2 +- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ++--- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 3 +-- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- drivers/infiniband/hw/hns/hns_roce_main.c | 6 ++---- drivers/infiniband/hw/mlx4/main.c | 12 ++++++------ drivers/infiniband/hw/mlx5/main.c | 5 ++--- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 10 +++------- include/rdma/ib_verbs.h | 3 +-- 11 files changed, 21 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 09d83c69ec65..e569956c4e7a 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -303,7 +303,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry) return -EINVAL; } if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { - ret = attr->device->add_gid(&attr->gid, attr, &entry->context); + ret = attr->device->add_gid(attr, &entry->context); if (ret) { pr_err("%s GID add failed device=%s port=%d index=%d\n", __func__, attr->device->name, attr->port_num, diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index a76e206704d4..62eb9e3346d5 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -364,8 +364,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context) return rc; } -int bnxt_re_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context) +int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context) { int rc; u32 tbl_idx = 0; @@ -377,7 +376,7 @@ int bnxt_re_add_gid(const union ib_gid *gid, if ((attr->ndev) && is_vlan_dev(attr->ndev)) vlan_id = vlan_dev_vlan_id(attr->ndev); - rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)gid, + rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid, rdev->qplib_res.netdev->dev_addr, vlan_id, true, &tbl_idx); if (rc == -EALREADY) { diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 5c6414cad4af..bd04d40d897a 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -158,8 +158,7 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str); int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, u16 *pkey); int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context); -int bnxt_re_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context); +int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context); int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid); enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 31221d506d9a..65f7b68d1777 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -736,7 +736,7 @@ struct hns_roce_hw { u16 token, int event); int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, - union ib_gid *gid, const struct ib_gid_attr *attr); + const union ib_gid *gid, const struct ib_gid_attr *attr); int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 8013d69c5ac4..235c67dfc6cb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1728,7 +1728,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, } static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, - int gid_index, union ib_gid *gid, + int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr) { u32 *p = NULL; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a6e11be0ea0f..454d391b4b40 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1308,7 +1308,7 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, } static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, - int gid_index, union ib_gid *gid, + int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr) { enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 21b901cfa2d6..24a2ea0018d9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -74,8 +74,7 @@ static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) return hr_dev->hw->set_mac(hr_dev, phy_port, addr); } -static int hns_roce_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context) +static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); u8 port = attr->port_num - 1; @@ -87,8 +86,7 @@ static int hns_roce_add_gid(const union ib_gid *gid, spin_lock_irqsave(&hr_dev->iboe.lock, flags); - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, - (union ib_gid *)gid, attr); + ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr); spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4ec519afc45b..859089df9f17 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -246,9 +246,7 @@ static int mlx4_ib_update_gids(struct gid_entry *gids, return mlx4_ib_update_gids_v1(gids, ibdev, port_num); } -static int mlx4_ib_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context) +static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) { struct mlx4_ib_dev *ibdev = to_mdev(attr->device); struct mlx4_ib_iboe *iboe = &ibdev->iboe; @@ -271,8 +269,9 @@ static int mlx4_ib_add_gid(const union ib_gid *gid, port_gid_table = &iboe->gids[attr->port_num - 1]; spin_lock_bh(&iboe->lock); for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { - if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) && - (port_gid_table->gids[i].gid_type == attr->gid_type)) { + if (!memcmp(&port_gid_table->gids[i].gid, + &attr->gid, sizeof(attr->gid)) && + port_gid_table->gids[i].gid_type == attr->gid_type) { found = i; break; } @@ -289,7 +288,8 @@ static int mlx4_ib_add_gid(const union ib_gid *gid, ret = -ENOMEM; } else { *context = port_gid_table->gids[free].ctx; - memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid)); + memcpy(&port_gid_table->gids[free].gid, + &attr->gid, sizeof(attr->gid)); port_gid_table->gids[free].gid_type = attr->gid_type; port_gid_table->gids[free].ctx->real_index = free; port_gid_table->gids[free].ctx->refcount = 1; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e52dd21519b4..94669df81342 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -510,12 +510,11 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, vlan_id, port_num); } -static int mlx5_ib_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, +static int mlx5_ib_add_gid(const struct ib_gid_attr *attr, __always_unused void **context) { return set_roce_addr(to_mdev(attr->device), attr->port_num, - attr->index, gid, attr); + attr->index, &attr->gid, attr); } static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 0be33a81bbe6..faa1be2d7727 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -62,9 +62,7 @@ static DEFINE_MUTEX(pvrdma_device_list_lock); static LIST_HEAD(pvrdma_device_list); static struct workqueue_struct *event_wq; -static int pvrdma_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context); +static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context); static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); static ssize_t show_hca(struct device *device, struct device_attribute *attr, @@ -650,13 +648,11 @@ static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, return 0; } -static int pvrdma_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context) +static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context) { struct pvrdma_dev *dev = to_vdev(attr->device); - return pvrdma_add_gid_at_index(dev, gid, + return pvrdma_add_gid_at_index(dev, &attr->gid, ib_gid_type_to_pvrdma(attr->gid_type), attr->index); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0a77afedabd0..1c153cc046ee 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2342,8 +2342,7 @@ struct ib_device { * concurrently for different ports. This function is only called when * roce_gid_table is used. */ - int (*add_gid)(const union ib_gid *gid, - const struct ib_gid_attr *attr, + int (*add_gid)(const struct ib_gid_attr *attr, void **context); /* When calling del_gid, the HW vendor's driver should delete the * gid of device @device at gid index gid_index of port port_num -- cgit From bf399c2cadfa66d399d01d5a92a7bb0a112f1568 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 5 Jun 2018 08:40:17 +0300 Subject: IB/core: Introduce GID attribute get, put and hold APIs This patch introduces three APIs, rdma_get_gid_attr(), rdma_put_gid_attr(), and rdma_hold_gid_attr() which expose the reference counting for GID table entries to the entire stack. The kref counting is based on the struct ib_gid_attr pointer Later patches will convert more cache query function to return struct ib_gid_attrs. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 86 +++++++++++++++++++++++++++++++++++++++++ include/rdma/ib_cache.h | 4 ++ 2 files changed, 90 insertions(+) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index e569956c4e7a..d92525fb47c7 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -286,6 +286,11 @@ static void store_gid_entry(struct ib_gid_table *table, write_unlock_irq(&table->rwlock); } +static void get_gid_entry(struct ib_gid_table_entry *entry) +{ + kref_get(&entry->kref); +} + static void put_gid_entry(struct ib_gid_table_entry *entry) { kref_put(&entry->kref, schedule_free_gid); @@ -1208,6 +1213,87 @@ int ib_get_cached_port_state(struct ib_device *device, } EXPORT_SYMBOL(ib_get_cached_port_state); +/** + * rdma_get_gid_attr - Returns GID attributes for a port of a device + * at a requested gid_index, if a valid GID entry exists. + * @device: The device to query. + * @port_num: The port number on the device where the GID value + * is to be queried. + * @index: Index of the GID table entry whose attributes are to + * be queried. + * + * rdma_get_gid_attr() acquires reference count of gid attributes from the + * cached GID table. Caller must invoke rdma_put_gid_attr() to release + * reference to gid attribute regardless of link layer. + * + * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error + * code. + */ +const struct ib_gid_attr * +rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index) +{ + const struct ib_gid_attr *attr = ERR_PTR(-EINVAL); + struct ib_gid_table *table; + unsigned long flags; + + if (!rdma_is_port_valid(device, port_num)) + return ERR_PTR(-EINVAL); + + table = rdma_gid_table(device, port_num); + if (index < 0 || index >= table->sz) + return ERR_PTR(-EINVAL); + + read_lock_irqsave(&table->rwlock, flags); + if (!is_gid_entry_valid(table->data_vec[index])) + goto done; + + get_gid_entry(table->data_vec[index]); + attr = &table->data_vec[index]->attr; +done: + read_unlock_irqrestore(&table->rwlock, flags); + return attr; +} +EXPORT_SYMBOL(rdma_get_gid_attr); + +/** + * rdma_put_gid_attr - Release reference to the GID attribute + * @attr: Pointer to the GID attribute whose reference + * needs to be released. + * + * rdma_put_gid_attr() must be used to release reference whose + * reference is acquired using rdma_get_gid_attr() or any APIs + * which returns a pointer to the ib_gid_attr regardless of link layer + * of IB or RoCE. + * + */ +void rdma_put_gid_attr(const struct ib_gid_attr *attr) +{ + struct ib_gid_table_entry *entry = + container_of(attr, struct ib_gid_table_entry, attr); + + put_gid_entry(entry); +} +EXPORT_SYMBOL(rdma_put_gid_attr); + +/** + * rdma_hold_gid_attr - Get reference to existing GID attribute + * + * @attr: Pointer to the GID attribute whose reference + * needs to be taken. + * + * Increase the reference count to a GID attribute to keep it from being + * freed. Callers are required to already be holding a reference to attribute. + * + */ +void rdma_hold_gid_attr(const struct ib_gid_attr *attr) +{ + struct ib_gid_table_entry *entry = + container_of(attr, struct ib_gid_table_entry, attr); + + get_gid_entry(entry); +} +EXPORT_SYMBOL(rdma_hold_gid_attr); + static int config_non_roce_gid_cache(struct ib_device *device, u8 port, int gid_tbl_len) { diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index a5f249828115..00ccd00d0596 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -150,4 +150,8 @@ int ib_get_cached_port_state(struct ib_device *device, enum ib_port_state *port_active); bool rdma_is_zero_gid(const union ib_gid *gid); +const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device, + u8 port_num, int index); +void rdma_put_gid_attr(const struct ib_gid_attr *attr); +void rdma_hold_gid_attr(const struct ib_gid_attr *attr); #endif /* _IB_CACHE_H */ -- cgit From 77e786fcbe2ecdac57ced610260ffb1f7cfeed00 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 5 Jun 2018 08:40:18 +0300 Subject: IB/core: Replace ib_query_gid with rdma_get_gid_attr These call sites have a use of ib_query_gid with a simple lifetime for the struct gid_attr pointer, with an easy conversion. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sysfs.c | 66 +++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 31c7efaf8e7a..7fd14ead7b37 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -42,6 +42,7 @@ #include #include +#include struct ib_port; @@ -346,7 +347,7 @@ static struct attribute *port_default_attrs[] = { NULL }; -static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf) +static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { if (!gid_attr->ndev) return -EINVAL; @@ -354,33 +355,26 @@ static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf) return sprintf(buf, "%s\n", gid_attr->ndev->name); } -static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf) +static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) { return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type)); } -static ssize_t _show_port_gid_attr(struct ib_port *p, - struct port_attribute *attr, - char *buf, - size_t (*print)(struct ib_gid_attr *gid_attr, - char *buf)) +static ssize_t _show_port_gid_attr( + struct ib_port *p, struct port_attribute *attr, char *buf, + size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - union ib_gid gid; - struct ib_gid_attr gid_attr = {}; + const struct ib_gid_attr *gid_attr; ssize_t ret; - ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, - &gid_attr); - if (ret) - goto err; + gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); - ret = print(&gid_attr, buf); - -err: - if (gid_attr.ndev) - dev_put(gid_attr.ndev); + ret = print(gid_attr, buf); + rdma_put_gid_attr(gid_attr); return ret; } @@ -389,26 +383,28 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - union ib_gid *pgid; - union ib_gid gid; + const struct ib_gid_attr *gid_attr; ssize_t ret; - ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL); + gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); + if (IS_ERR(gid_attr)) { + const union ib_gid zgid = {}; + + /* If reading GID fails, it is likely due to GID entry being + * empty (invalid) or reserved GID in the table. User space + * expects to read GID table entries as long as it given index + * is within GID table size. Administrative/debugging tool + * fails to query rest of the GID entries if it hits error + * while querying a GID of the given index. To avoid user + * space throwing such error on fail to read gid, return zero + * GID as before. This maintains backward compatibility. + */ + return sprintf(buf, "%pI6\n", zgid.raw); + } - /* If reading GID fails, it is likely due to GID entry being empty - * (invalid) or reserved GID in the table. - * User space expects to read GID table entries as long as it given - * index is within GID table size. - * Administrative/debugging tool fails to query rest of the GID entries - * if it hits error while querying a GID of the given index. - * To avoid user space throwing such error on fail to read gid, return - * zero GID as before. This maintains backward compatibility. - */ - if (ret) - pgid = &zgid; - else - pgid = &gid; - return sprintf(buf, "%pI6\n", pgid->raw); + ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw); + rdma_put_gid_attr(gid_attr); + return ret; } static ssize_t show_port_gid_attr_ndev(struct ib_port *p, -- cgit From ddb457c6993babbcdd41fca638b870d2a2fc3941 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 5 Jun 2018 08:40:19 +0300 Subject: net/smc: Replace ib_query_gid with rdma_get_gid_attr Push the copy of the gid_attr into the SMC code. This probably doesn't push it far enough, as it looks like the conn->lgr should potentially hold the reference for its lifetime. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- net/smc/smc_core.c | 20 ++++++++++---------- net/smc/smc_ib.c | 25 +++++++++++++++---------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index add82b0266f3..d99a75f75e42 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "smc.h" #include "smc_clc.h" @@ -450,8 +451,7 @@ out: static int smc_link_determine_gid(struct smc_link_group *lgr) { struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; - struct ib_gid_attr gattr; - union ib_gid gid; + const struct ib_gid_attr *gattr; int i; if (!lgr->vlan_id) { @@ -461,18 +461,18 @@ static int smc_link_determine_gid(struct smc_link_group *lgr) for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; i++) { - if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, - &gattr)) + gattr = rdma_get_gid_attr(lnk->smcibdev->ibdev, lnk->ibport, i); + if (IS_ERR(gattr)) continue; - if (gattr.ndev) { - if (is_vlan_dev(gattr.ndev) && - vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { - lnk->gid = gid; - dev_put(gattr.ndev); + if (gattr->ndev) { + if (is_vlan_dev(gattr->ndev) && + vlan_dev_vlan_id(gattr->ndev) == lgr->vlan_id) { + lnk->gid = gattr->gid; + rdma_put_gid_attr(gattr); return 0; } - dev_put(gattr.ndev); } + rdma_put_gid_attr(gattr); } return -ENODEV; } diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 0eed7ab9f28b..74f29f814ec1 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "smc_pnet.h" #include "smc_ib.h" @@ -372,17 +373,21 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) { - struct ib_gid_attr gattr; - int rc; - - rc = ib_query_gid(smcibdev->ibdev, ibport, 0, - &smcibdev->gid[ibport - 1], &gattr); - if (rc || !gattr.ndev) - return -ENODEV; + const struct ib_gid_attr *gattr; + int rc = 0; - memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN); - dev_put(gattr.ndev); - return 0; + gattr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0); + if (IS_ERR(gattr)) + return PTR_ERR(gattr); + if (!gattr->ndev) { + rc = -ENODEV; + goto done; + } + smcibdev->gid[ibport - 1] = gattr->gid; + memcpy(smcibdev->mac[ibport - 1], gattr->ndev->dev_addr, ETH_ALEN); +done: + rdma_put_gid_attr(gattr); + return rc; } /* Create an identifier unique for this instance of SMC-R. -- cgit From c3d71b69a75cbbc03c8f43571b003ddadd40d056 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 5 Jun 2018 08:40:20 +0300 Subject: IB/core: Provide rdma_ versions of the gid cache API These versions are functionally similar but all return gid_attrs and related information via reference instead of via copy. The old API is preserved, implemented as wrappers around the new, until all callers can be converted. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 264 ++++++++++++++++++++++++---------------- include/rdma/ib_cache.h | 17 +++ 2 files changed, 178 insertions(+), 103 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index d92525fb47c7..8a06e743c2dd 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -649,80 +649,37 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, return 0; } -static int _ib_cache_gid_table_find(struct ib_device *ib_dev, - const union ib_gid *gid, - const struct ib_gid_attr *val, - unsigned long mask, - u8 *port, u16 *index) -{ - struct ib_gid_table *table; - u8 p; - int local_index; - unsigned long flags; - - for (p = 0; p < ib_dev->phys_port_cnt; p++) { - table = ib_dev->cache.ports[p].gid; - read_lock_irqsave(&table->rwlock, flags); - local_index = find_gid(table, gid, val, false, mask, NULL); - if (local_index >= 0) { - if (index) - *index = local_index; - if (port) - *port = p + rdma_start_port(ib_dev); - read_unlock_irqrestore(&table->rwlock, flags); - return 0; - } - read_unlock_irqrestore(&table->rwlock, flags); - } - - return -ENOENT; -} - -static int ib_cache_gid_find(struct ib_device *ib_dev, - const union ib_gid *gid, - enum ib_gid_type gid_type, - struct net_device *ndev, u8 *port, - u16 *index) -{ - unsigned long mask = GID_ATTR_FIND_MASK_GID | - GID_ATTR_FIND_MASK_GID_TYPE; - struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; - - if (ndev) - mask |= GID_ATTR_FIND_MASK_NETDEV; - - return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, - mask, port, index); -} - /** - * ib_find_cached_gid_by_port - Returns the GID table index where a specified - * GID value occurs. It searches for the specified GID value in the local - * software cache. + * rdma_find_gid_by_port - Returns the GID entry attributes when it finds + * a valid GID entry for given search parameters. It searches for the specified + * GID value in the local software cache. * @device: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. * @port_num: The port number of the device where the GID value should be * searched. - * @ndev: In RoCE, the net device of the device. Null means ignore. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. + * @ndev: In RoCE, the net device of the device. NULL means ignore. + * + * Returns sgid attributes if the GID is found with valid reference or + * returns ERR_PTR for the error. + * The caller must invoke rdma_put_gid_attr() to release the reference. */ -int ib_find_cached_gid_by_port(struct ib_device *ib_dev, - const union ib_gid *gid, - enum ib_gid_type gid_type, - u8 port, struct net_device *ndev, - u16 *index) +const struct ib_gid_attr * +rdma_find_gid_by_port(struct ib_device *ib_dev, + const union ib_gid *gid, + enum ib_gid_type gid_type, + u8 port, struct net_device *ndev) { int local_index; struct ib_gid_table *table; unsigned long mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE; struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; + const struct ib_gid_attr *attr; unsigned long flags; if (!rdma_is_port_valid(ib_dev, port)) - return -ENOENT; + return ERR_PTR(-ENOENT); table = rdma_gid_table(ib_dev, port); @@ -732,55 +689,49 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev, read_lock_irqsave(&table->rwlock, flags); local_index = find_gid(table, gid, &val, false, mask, NULL); if (local_index >= 0) { - if (index) - *index = local_index; + get_gid_entry(table->data_vec[local_index]); + attr = &table->data_vec[local_index]->attr; read_unlock_irqrestore(&table->rwlock, flags); - return 0; + return attr; } read_unlock_irqrestore(&table->rwlock, flags); - return -ENOENT; + return ERR_PTR(-ENOENT); } -EXPORT_SYMBOL(ib_find_cached_gid_by_port); +EXPORT_SYMBOL(rdma_find_gid_by_port); /** - * ib_cache_gid_find_by_filter - Returns the GID table index where a specified - * GID value occurs + * rdma_find_gid_by_filter - Returns the GID table attribute where a + * specified GID value occurs * @device: The device to query. * @gid: The GID value to search for. - * @port_num: The port number of the device where the GID value could be + * @port: The port number of the device where the GID value could be * searched. * @filter: The filter function is executed on any matching GID in the table. * If the filter function returns true, the corresponding index is returned, * otherwise, we continue searching the GID table. It's guaranteed that * while filter is executed, ndev field is valid and the structure won't * change. filter is executed in an atomic context. filter must not be NULL. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. * - * ib_cache_gid_find_by_filter() searches for the specified GID value + * rdma_find_gid_by_filter() searches for the specified GID value * of which the filter function returns true in the port's GID table. * This function is only supported on RoCE ports. * */ -static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, - const union ib_gid *gid, - u8 port, - bool (*filter)(const union ib_gid *, - const struct ib_gid_attr *, - void *), - void *context, - u16 *index) +const struct ib_gid_attr *rdma_find_gid_by_filter( + struct ib_device *ib_dev, const union ib_gid *gid, u8 port, + bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, + void *), + void *context) { + const struct ib_gid_attr *res = ERR_PTR(-ENOENT); struct ib_gid_table *table; - unsigned int i; unsigned long flags; - bool found = false; - + unsigned int i; if (!rdma_is_port_valid(ib_dev, port) || !rdma_protocol_roce(ib_dev, port)) - return -EPROTONOSUPPORT; + return ERR_PTR(-EPROTONOSUPPORT); table = rdma_gid_table(ib_dev, port); @@ -798,18 +749,34 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, memcpy(&attr, &table->data_vec[i]->attr, sizeof(attr)); if (filter(gid, &attr, context)) { - found = true; - if (index) - *index = i; + get_gid_entry(table->data_vec[i]); + res = &table->data_vec[i]->attr; break; } } read_unlock_irqrestore(&table->rwlock, flags); + return res; +} + +int ib_find_cached_gid_by_port(struct ib_device *ib_dev, + const union ib_gid *gid, + enum ib_gid_type gid_type, + u8 port, struct net_device *ndev, + u16 *index) +{ + const struct ib_gid_attr *res; - if (!found) - return -ENOENT; + res = rdma_find_gid_by_port(ib_dev, gid, gid_type, port, ndev); + if (IS_ERR(res)) + return PTR_ERR(res); + + if (index) + *index = res->index; + rdma_put_gid_attr(res); return 0; + } +EXPORT_SYMBOL(ib_find_cached_gid_by_port); static struct ib_gid_table *alloc_gid_table(int sz) { @@ -1016,27 +983,109 @@ int ib_get_cached_gid(struct ib_device *device, EXPORT_SYMBOL(ib_get_cached_gid); /** - * ib_find_cached_gid - Returns the port number and GID table index where - * a specified GID value occurs. + * rdma_query_gid - Read the GID content from the GID software cache + * @device: Device to query the GID + * @port_num: Port number of the device + * @index: Index of the GID table entry to read + * @gid: Pointer to GID where to store the entry's GID + * + * rdma_query_gid() only reads the GID entry content for requested device, + * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't + * hold any reference to the GID table entry in the HCA or software cache. + * + * Returns 0 on success or appropriate error code. + * + */ +int rdma_query_gid(struct ib_device *device, u8 port_num, + int index, union ib_gid *gid) +{ + struct ib_gid_table *table; + unsigned long flags; + int res = -EINVAL; + + if (!rdma_is_port_valid(device, port_num)) + return -EINVAL; + + table = rdma_gid_table(device, port_num); + read_lock_irqsave(&table->rwlock, flags); + + if (index < 0 || index >= table->sz || + !is_gid_entry_valid(table->data_vec[index])) + goto done; + + memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid)); + res = 0; + +done: + read_unlock_irqrestore(&table->rwlock, flags); + return res; +} +EXPORT_SYMBOL(rdma_query_gid); + +/** + * rdma_find_gid - Returns SGID attributes if the matching GID is found. * @device: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. * @ndev: In RoCE, the net device of the device. NULL means ignore. - * @port_num: The port number of the device where the GID value was found. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. * - * ib_find_cached_gid() searches for the specified GID value in - * the local software cache. + * rdma_find_gid() searches for the specified GID value in the software cache. + * + * Returns GID attributes if a valid GID is found or returns ERR_PTR for the + * error. The caller must invoke rdma_put_gid_attr() to release the reference. + * */ -int ib_find_cached_gid(struct ib_device *device, - const union ib_gid *gid, - enum ib_gid_type gid_type, - struct net_device *ndev, - u8 *port_num, - u16 *index) +const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, + const union ib_gid *gid, + enum ib_gid_type gid_type, + struct net_device *ndev) { - return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); + unsigned long mask = GID_ATTR_FIND_MASK_GID | + GID_ATTR_FIND_MASK_GID_TYPE; + struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; + u8 p; + + if (ndev) + mask |= GID_ATTR_FIND_MASK_NETDEV; + + for (p = 0; p < device->phys_port_cnt; p++) { + struct ib_gid_table *table; + unsigned long flags; + int index; + + table = device->cache.ports[p].gid; + read_lock_irqsave(&table->rwlock, flags); + index = find_gid(table, gid, &gid_attr_val, false, mask, NULL); + if (index >= 0) { + const struct ib_gid_attr *attr; + + get_gid_entry(table->data_vec[index]); + attr = &table->data_vec[index]->attr; + read_unlock_irqrestore(&table->rwlock, flags); + return attr; + } + read_unlock_irqrestore(&table->rwlock, flags); + } + + return ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL(rdma_find_gid); + +int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, + enum ib_gid_type gid_type, struct net_device *ndev, + u8 *port_num, u16 *index) +{ + const struct ib_gid_attr *res; + + res = rdma_find_gid(device, gid, gid_type, ndev); + if (IS_ERR(res)) + return PTR_ERR(res); + if (port_num) + *port_num = res->port_num; + if (index) + *index = res->index; + rdma_put_gid_attr(res); + return 0; } EXPORT_SYMBOL(ib_find_cached_gid); @@ -1048,13 +1097,22 @@ int ib_find_gid_by_filter(struct ib_device *device, void *), void *context, u16 *index) { + const struct ib_gid_attr *res; + /* Only RoCE GID table supports filter function */ if (!rdma_protocol_roce(device, port_num) && filter) return -EPROTONOSUPPORT; - return ib_cache_gid_find_by_filter(device, gid, - port_num, filter, - context, index); + res = rdma_find_gid_by_filter(device, gid, port_num, filter, + context); + if (IS_ERR(res)) + return PTR_ERR(res); + + if (index) + *index = res->index; + + rdma_put_gid_attr(res); + return 0; } int ib_get_cached_pkey(struct ib_device *device, diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 00ccd00d0596..059f7d894939 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -54,6 +54,8 @@ int ib_get_cached_gid(struct ib_device *device, int index, union ib_gid *gid, struct ib_gid_attr *attr); +int rdma_query_gid(struct ib_device *device, u8 port_num, int index, + union ib_gid *gid); int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, @@ -61,6 +63,10 @@ int ib_find_cached_gid(struct ib_device *device, struct net_device *ndev, u8 *port_num, u16 *index); +const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, + const union ib_gid *gid, + enum ib_gid_type gid_type, + struct net_device *ndev); int ib_find_cached_gid_by_port(struct ib_device *device, const union ib_gid *gid, @@ -68,6 +74,11 @@ int ib_find_cached_gid_by_port(struct ib_device *device, u8 port_num, struct net_device *ndev, u16 *index); +const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev, + const union ib_gid *gid, + enum ib_gid_type gid_type, + u8 port, + struct net_device *ndev); int ib_find_gid_by_filter(struct ib_device *device, const union ib_gid *gid, @@ -76,6 +87,12 @@ int ib_find_gid_by_filter(struct ib_device *device, const struct ib_gid_attr *, void *), void *context, u16 *index); +const struct ib_gid_attr *rdma_find_gid_by_filter( + struct ib_device *device, const union ib_gid *gid, u8 port_num, + bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, + void *), + void *context); + /** * ib_get_cached_pkey - Returns a cached PKey table entry * @device: The device to query. -- cgit From 83f6f8d29dd3079b278791ebf14e87802f91b6bc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 5 Jun 2018 08:40:21 +0300 Subject: IB/core: Make rdma_find_gid_by_filter support all protocols There is no reason to restrict this function to roce only these days, allow the filter function to be called on any protocol. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 8a06e743c2dd..9846373c5cbc 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -715,7 +715,6 @@ EXPORT_SYMBOL(rdma_find_gid_by_port); * * rdma_find_gid_by_filter() searches for the specified GID value * of which the filter function returns true in the port's GID table. - * This function is only supported on RoCE ports. * */ const struct ib_gid_attr *rdma_find_gid_by_filter( @@ -729,28 +728,24 @@ const struct ib_gid_attr *rdma_find_gid_by_filter( unsigned long flags; unsigned int i; - if (!rdma_is_port_valid(ib_dev, port) || - !rdma_protocol_roce(ib_dev, port)) - return ERR_PTR(-EPROTONOSUPPORT); + if (!rdma_is_port_valid(ib_dev, port)) + return ERR_PTR(-EINVAL); table = rdma_gid_table(ib_dev, port); read_lock_irqsave(&table->rwlock, flags); for (i = 0; i < table->sz; i++) { - struct ib_gid_attr attr; + struct ib_gid_table_entry *entry = table->data_vec[i]; - if (!is_gid_entry_valid(table->data_vec[i])) + if (!is_gid_entry_valid(entry)) continue; - if (memcmp(gid, &table->data_vec[i]->attr.gid, - sizeof(*gid))) + if (memcmp(gid, &entry->attr.gid, sizeof(*gid))) continue; - memcpy(&attr, &table->data_vec[i]->attr, sizeof(attr)); - - if (filter(gid, &attr, context)) { - get_gid_entry(table->data_vec[i]); - res = &table->data_vec[i]->attr; + if (filter(gid, &entry->attr, context)) { + get_gid_entry(entry); + res = &entry->attr; break; } } @@ -1099,10 +1094,6 @@ int ib_find_gid_by_filter(struct ib_device *device, { const struct ib_gid_attr *res; - /* Only RoCE GID table supports filter function */ - if (!rdma_protocol_roce(device, port_num) && filter) - return -EPROTONOSUPPORT; - res = rdma_find_gid_by_filter(device, gid, port_num, filter, context); if (IS_ERR(res)) -- cgit From 1dfce294577120ec60399a64094ea00e4247103d Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 5 Jun 2018 08:40:22 +0300 Subject: IB: Replace ib_query_gid/ib_get_cached_gid with rdma_query_gid If the gid_attr argument is NULL then the functions behave identically to rdma_query_gid. ib_query_gid just calls ib_get_cached_gid, so everything can be consolidated to one function. Now that all callers either use rdma_query_gid() or ib_get_cached_gid(), ib_query_gid() API is removed. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 9 ++++----- drivers/infiniband/core/cma.c | 6 +++--- drivers/infiniband/core/device.c | 21 +-------------------- drivers/infiniband/core/mad.c | 4 ++-- drivers/infiniband/core/verbs.c | 6 ++---- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 3 ++- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++-- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 +-- include/rdma/ib_verbs.h | 4 ---- 10 files changed, 18 insertions(+), 44 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 27a7b0a2e27a..800ff69e09b3 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1646,7 +1646,7 @@ static void cm_opa_to_ib_sgid(struct cm_work *work, (ib_is_opa_gid(&path->sgid))) { union ib_gid sgid; - if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) { + if (rdma_query_gid(dev, port_num, 0, &sgid)) { dev_warn(&dev->dev, "Error updating sgid in CM request\n"); return; @@ -1997,10 +1997,9 @@ static int cm_req_handler(struct cm_work *work) if (ret) { int err; - err = ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, 0, - &work->path[0].sgid, - NULL); + err = rdma_query_gid(work->port->cm_dev->ib_device, + work->port->port_num, 0, + &work->path[0].sgid); if (err) ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, NULL, 0, NULL, 0); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index bff10ab141b0..c2f478761ae9 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -732,8 +732,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) continue; - for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, - &gid, NULL); + for (i = 0; !rdma_query_gid(cur_dev->device, + p, i, &gid); i++) { if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; @@ -2791,7 +2791,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) p = 1; port_found: - ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); + ret = rdma_query_gid(cma_dev->device, p, 0, &gid); if (ret) goto out; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 6fa4c59dc7a7..b8144f194777 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -861,25 +861,6 @@ int ib_query_port(struct ib_device *device, } EXPORT_SYMBOL(ib_query_port); -/** - * ib_query_gid - Get GID table entry - * @device:Device to query - * @port_num:Port number to query - * @index:GID table index to query - * @gid:Returned GID - * @attr: Returned GID attributes related to this GID index (only in RoCE). - * NULL means ignore. - * - * ib_query_gid() fetches the specified GID table entry from the cache. - */ -int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid, - struct ib_gid_attr *attr) -{ - return ib_get_cached_gid(device, port_num, index, gid, attr); -} -EXPORT_SYMBOL(ib_query_gid); - /** * ib_enum_roce_netdev - enumerate all RoCE ports * @ib_dev : IB device we want to query @@ -1057,7 +1038,7 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, continue; for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { - ret = ib_query_gid(device, port, i, &tmp_gid, NULL); + ret = rdma_query_gid(device, port, i, &tmp_gid); if (ret) return ret; if (!memcmp(&tmp_gid, gid, sizeof *gid)) { diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index f742ae7a768b..db0f93a9d507 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1896,8 +1896,8 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_ const struct ib_global_route *grh = rdma_ah_read_grh(&attr); - if (ib_get_cached_gid(device, port_num, - grh->sgid_index, &sgid, NULL)) + if (rdma_query_gid(device, port_num, + grh->sgid_index, &sgid)) return 0; return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 16); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 0b56828c1319..5e34e359f7b4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -516,10 +516,8 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device, grh = rdma_ah_retrieve_grh(ah_attr); - ret = ib_query_gid(device, - rdma_ah_get_port_num(ah_attr), - grh->sgid_index, - &sgid, &sgid_attr); + ret = ib_get_cached_gid(device, rdma_ah_get_port_num(ah_attr), + grh->sgid_index, &sgid, &sgid_attr); if (ret || !sgid_attr.ndev) { if (!ret) ret = -ENXIO; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f47f9ace1f48..fb3728bf7e40 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -40,6 +40,7 @@ #include #include +#include #include "ipoib.h" @@ -1069,7 +1070,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) bool ret = false; netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4); - if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL)) + if (rdma_query_gid(priv->ca, priv->port, 0, &gid0)) return false; netif_addr_lock_bh(priv->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 26cde95bc0f3..f4fac48aeade 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2287,9 +2287,9 @@ static struct net_device *ipoib_add_port(const char *format, priv->dev->broadcast[8] = priv->pkey >> 8; priv->dev->broadcast[9] = priv->pkey & 0xff; - result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL); + result = rdma_query_gid(hca, port, 0, &priv->local_gid); if (result) { - pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n", + pr_warn("%s: rdma_query_gid port %d failed (ret = %d)\n", hca->name, port, result); goto device_init_failed; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9786b24b956f..34b1aaffa521 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3843,7 +3843,7 @@ static ssize_t srp_create_target(struct device *dev, INIT_WORK(&target->tl_err_work, srp_tl_err_work); INIT_WORK(&target->remove_work, srp_remove_work); spin_lock_init(&target->lock); - ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL); + ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid); if (ret) goto out; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 3081c629a7f7..698f7779e231 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -575,8 +575,7 @@ static int srpt_refresh_port(struct srpt_port *sport) sport->sm_lid = port_attr.sm_lid; sport->lid = port_attr.lid; - ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid, - NULL); + ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); if (ret) goto err_query_port; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1c153cc046ee..67c458215512 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3046,10 +3046,6 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num) return rdma_protocol_iwarp(dev, port_num); } -int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid, - struct ib_gid_attr *attr); - int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int ib_get_vf_config(struct ib_device *device, int vf, u8 port, -- cgit From 82f82ceb8e660322c642d1ea6f835756aa061b34 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 5 Jun 2018 08:40:23 +0300 Subject: IB/rxe: Use rdma GID API rxe_netdev_from_av can now be done by the core code directly from the gid_attrs, no need for a helper in the driver. ib_find_cached_gid_by_port can be switched to use the rdma version here as well. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 50 ++++++++++++------------------------ drivers/infiniband/sw/rxe/rxe_recv.c | 12 ++++++--- 2 files changed, 25 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 59ec6d918ed4..79b69943a8af 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -182,39 +182,19 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, #endif -/* - * Derive the net_device from the av. - * For physical devices, this will just return rxe->ndev. - * But for VLAN devices, it will return the vlan dev. - * Caller should dev_put() the returned net_device. - */ -static struct net_device *rxe_netdev_from_av(struct rxe_dev *rxe, - int port_num, - struct rxe_av *av) -{ - union ib_gid gid; - struct ib_gid_attr attr; - struct net_device *ndev = rxe->ndev; - - if (ib_get_cached_gid(&rxe->ib_dev, port_num, av->grh.sgid_index, - &gid, &attr) == 0 && - attr.ndev && attr.ndev != ndev) - ndev = attr.ndev; - else - /* Only to ensure that caller may call dev_put() */ - dev_hold(ndev); - - return ndev; -} - static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_av *av) { + const struct ib_gid_attr *attr; struct dst_entry *dst = NULL; struct net_device *ndev; - ndev = rxe_netdev_from_av(rxe, qp->attr.port_num, av); + attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num, + av->grh.sgid_index); + if (IS_ERR(attr)) + return NULL; + ndev = attr->ndev; if (qp_type(qp) == IB_QPT_RC) dst = sk_dst_get(qp->sk->sk); @@ -244,8 +224,7 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, #endif } } - - dev_put(ndev); + rdma_put_gid_attr(attr); return dst; } @@ -536,9 +515,13 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, unsigned int hdr_len; struct sk_buff *skb; struct net_device *ndev; + const struct ib_gid_attr *attr; const int port_num = 1; - ndev = rxe_netdev_from_av(rxe, port_num, av); + attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index); + if (IS_ERR(attr)) + return NULL; + ndev = attr->ndev; if (av->network_type == RDMA_NETWORK_IPV4) hdr_len = ETH_HLEN + sizeof(struct udphdr) + @@ -550,10 +533,8 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev), GFP_ATOMIC); - if (unlikely(!skb)) { - dev_put(ndev); - return NULL; - } + if (unlikely(!skb)) + goto out; skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev)); @@ -568,7 +549,8 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, pkt->hdr = skb_put_zero(skb, paylen); pkt->mask |= RXE_GRH_MASK; - dev_put(ndev); +out: + rdma_put_gid_attr(attr); return skb; } diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index dfba44a40f0b..42797ac6f7b1 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -328,6 +328,7 @@ err1: static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) { + const struct ib_gid_attr *gid_attr; union ib_gid dgid; union ib_gid *pdgid; @@ -339,9 +340,14 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; } - return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid, - IB_GID_TYPE_ROCE_UDP_ENCAP, - 1, skb->dev, NULL); + gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid, + IB_GID_TYPE_ROCE_UDP_ENCAP, + 1, skb->dev); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); + + rdma_put_gid_attr(gid_attr); + return 0; } /* rxe_rcv is called from the interface driver */ -- cgit From dd8028f1e912134f35698392652bc561ef0d9cb4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 5 Jun 2018 07:26:51 +0300 Subject: RDMA/nldev: Return port capability flag for IB only Port capability flag represents IBTA PortInfo:CapabilityMask, but was mistakenly mixed with non-relevant fields. Return that information for IB only. Link: https://patchwork.kernel.org/patch/10386245/ Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 340c7bea45ab..0385ab438320 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -237,15 +237,15 @@ static int fill_port_info(struct sk_buff *msg, if (ret) return ret; - BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); - if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, - (u64)attr.port_cap_flags, RDMA_NLDEV_ATTR_PAD)) - return -EMSGSIZE; - if (rdma_protocol_ib(device, port) && - nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, - attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) - return -EMSGSIZE; if (rdma_protocol_ib(device, port)) { + BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, + (u64)attr.port_cap_flags, + RDMA_NLDEV_ATTR_PAD)) + return -EMSGSIZE; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, + attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) -- cgit From de7498147d0053273fd3dcf85ba20babb9ef9725 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 5 Jun 2018 07:55:02 +0300 Subject: RDMA/uverbs: Refactor flow_resources_alloc() function Simplify the flow_resources_alloc() function call by reducing number of goto statements. Reviewed-by: Michael J. Ruhl Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3e90b6a1d9d2..72803f836b2d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2761,29 +2761,24 @@ static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) resources = kzalloc(sizeof(*resources), GFP_KERNEL); if (!resources) - goto err_res; + return NULL; resources->counters = kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL); - - if (!resources->counters) - goto err_cnt; - resources->collection = kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL); - if (!resources->collection) - goto err_collection; + if (!resources->counters || !resources->collection) + goto err; resources->max = num_specs; return resources; -err_collection: +err: kfree(resources->counters); -err_cnt: kfree(resources); -err_res: + return NULL; } -- cgit From aaf5e003b1c454c5722bc4ea9dfd3506c57d36a9 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 1 Jun 2018 12:18:36 -0500 Subject: i40iw: Reorganize acquire/release of locks in i40iw_manage_apbvt Commit f43c00c04bbf ("i40iw: Extend port reuse support for listeners") introduces a sparse warning: include/linux/spinlock.h:365:9: sparse: context imbalance in 'i40iw_manage_apbvt' - unexpected unlock Fix this by reorganizing the acquire/release of locks in i40iw_manage_apbvt and add a new function i40iw_cqp_manage_abvpt_cmd to perform the CQP command. Also, use __clear_bit and __test_and_set_bit as we do not need atomic versions. Fixes: f43c00c04bbf ("i40iw: Extend port reuse support for listeners") Suggested-by: Jason Gunthorpe Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_hw.c | 83 +++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 2836c5420d60..55a1fbf0e670 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -435,45 +435,24 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) } /** - * i40iw_manage_apbvt - add or delete tcp port + * i40iw_cqp_manage_abvpt_cmd - send cqp command manage abpvt * @iwdev: iwarp device * @accel_local_port: port for apbvt * @add_port: add or delete port */ -int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port) +static enum i40iw_status_code +i40iw_cqp_manage_abvpt_cmd(struct i40iw_device *iwdev, + u16 accel_local_port, + bool add_port) { struct i40iw_apbvt_info *info; struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; - unsigned long flags; - struct i40iw_cm_core *cm_core = &iwdev->cm_core; - enum i40iw_status_code status = 0; - bool in_use; - - /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to - * protect against race where add APBVT CQP can race ahead of the delete - * APBVT for same port. - */ - spin_lock_irqsave(&cm_core->apbvt_lock, flags); - - if (!add_port) { - in_use = i40iw_port_in_use(cm_core, accel_local_port); - if (in_use) - goto exit; - clear_bit(accel_local_port, cm_core->ports_in_use); - } else { - in_use = test_and_set_bit(accel_local_port, - cm_core->ports_in_use); - spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); - if (in_use) - return 0; - } + enum i40iw_status_code status; cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port); - if (!cqp_request) { - status = -ENOMEM; - goto exit; - } + if (!cqp_request) + return I40IW_ERR_NO_MEMORY; cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_apbvt_entry.info; @@ -489,13 +468,53 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad status = i40iw_handle_cqp_op(iwdev, cqp_request); if (status) i40iw_pr_err("CQP-OP Manage APBVT entry fail"); -exit: - if (!add_port) - spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); return status; } +/** + * i40iw_manage_apbvt - add or delete tcp port + * @iwdev: iwarp device + * @accel_local_port: port for apbvt + * @add_port: add or delete port + */ +enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev, + u16 accel_local_port, + bool add_port) +{ + struct i40iw_cm_core *cm_core = &iwdev->cm_core; + enum i40iw_status_code status; + unsigned long flags; + bool in_use; + + /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to + * protect against race where add APBVT CQP can race ahead of the delete + * APBVT for same port. + */ + if (add_port) { + spin_lock_irqsave(&cm_core->apbvt_lock, flags); + in_use = __test_and_set_bit(accel_local_port, + cm_core->ports_in_use); + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + if (in_use) + return 0; + return i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port, + true); + } else { + spin_lock_irqsave(&cm_core->apbvt_lock, flags); + in_use = i40iw_port_in_use(cm_core, accel_local_port); + if (in_use) { + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + return 0; + } + __clear_bit(accel_local_port, cm_core->ports_in_use); + status = i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port, + false); + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + return status; + } +} + /** * i40iw_manage_arp_cache - manage hw arp cache * @iwdev: iwarp device -- cgit From 7f3ee8e030d851fdb0eace00cc5ceb294238c1b8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 11 Jun 2018 20:56:50 -0600 Subject: IB/rxe: Do not hide uABI stuff in memcpy struct rxe_global_route and struct ib_global_route are not the same thing and should not be memcpy'd over each other, do a member by member copy instead. This allows the layout of the in-kernel struct ib_global_route to be changed without breaking rxe. Reviewed-by: Zhu Yanjun Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_av.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 7f1ae364088a..c0f972c58d55 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -55,16 +55,29 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) void rxe_av_from_attr(u8 port_num, struct rxe_av *av, struct rdma_ah_attr *attr) { + const struct ib_global_route *grh = rdma_ah_read_grh(attr); + memset(av, 0, sizeof(*av)); - memcpy(&av->grh, rdma_ah_read_grh(attr), - sizeof(*rdma_ah_read_grh(attr))); + memcpy(av->grh.dgid.raw, grh->dgid.raw, sizeof(grh->dgid.raw)); + av->grh.flow_label = grh->flow_label; + av->grh.sgid_index = grh->sgid_index; + av->grh.hop_limit = grh->hop_limit; + av->grh.traffic_class = grh->traffic_class; av->port_num = port_num; } void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr) { + struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); + attr->type = RDMA_AH_ATTR_TYPE_ROCE; - memcpy(rdma_ah_retrieve_grh(attr), &av->grh, sizeof(av->grh)); + + memcpy(grh->dgid.raw, av->grh.dgid.raw, sizeof(av->grh.dgid.raw)); + grh->flow_label = av->grh.flow_label; + grh->sgid_index = av->grh.sgid_index; + grh->hop_limit = av->grh.hop_limit; + grh->traffic_class = av->grh.traffic_class; + rdma_ah_set_ah_flags(attr, IB_AH_GRH); rdma_ah_set_port_num(attr, av->port_num); } -- cgit From 0c271c433ca2f481ccf5e9a5645a2ec071188c19 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 8 Jun 2018 10:42:17 -0700 Subject: IB/mad: Agent registration is process context only Document this (it's implicitly true due to sleeping operations already in use in both registration and deregistration). Use this fact to use spin_lock_irq instead of spin_lock_irqsave. This improves performance slightly. Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index db0f93a9d507..1bb1733c7079 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -190,6 +190,8 @@ EXPORT_SYMBOL(ib_response_mad); /* * ib_register_mad_agent - Register to send/receive MADs + * + * Context: Process context. */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, u8 port_num, @@ -210,7 +212,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, struct ib_mad_mgmt_vendor_class *vendor_class; struct ib_mad_mgmt_method_table *method; int ret2, qpn; - unsigned long flags; u8 mgmt_class, vclass; /* Validate parameters */ @@ -376,7 +377,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error4; } - spin_lock_irqsave(&port_priv->reg_lock, flags); + spin_lock_irq(&port_priv->reg_lock); mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id); /* @@ -422,11 +423,11 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Add mad agent into port's agent list */ list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list); - spin_unlock_irqrestore(&port_priv->reg_lock, flags); + spin_unlock_irq(&port_priv->reg_lock); return &mad_agent_priv->agent; error5: - spin_unlock_irqrestore(&port_priv->reg_lock, flags); + spin_unlock_irq(&port_priv->reg_lock); ib_mad_agent_security_cleanup(&mad_agent_priv->agent); error4: kfree(reg_req); @@ -575,7 +576,6 @@ static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; - unsigned long flags; /* Note that we could still be handling received MADs */ @@ -587,10 +587,10 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) port_priv = mad_agent_priv->qp_info->port_priv; cancel_delayed_work(&mad_agent_priv->timed_work); - spin_lock_irqsave(&port_priv->reg_lock, flags); + spin_lock_irq(&port_priv->reg_lock); remove_mad_reg_req(mad_agent_priv); list_del(&mad_agent_priv->agent_list); - spin_unlock_irqrestore(&port_priv->reg_lock, flags); + spin_unlock_irq(&port_priv->reg_lock); flush_workqueue(port_priv->wq); ib_cancel_rmpp_recvs(mad_agent_priv); @@ -625,6 +625,8 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) /* * ib_unregister_mad_agent - Unregisters a client from using MAD services + * + * Context: Process context. */ void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) { -- cgit From fb51eecaa523c909cdc6240d3e9b8bfc74e7a888 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Jun 2018 10:22:02 +0300 Subject: IB: Ensure that all rdma_ah_attr's are zero initialized Since we are adding some new fields to this structure it is safest if all users reliably initialize the struct to zero. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 72803f836b2d..5733d0fb0673 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1968,7 +1968,7 @@ static int modify_qp(struct ib_uverbs_file *file, struct ib_qp *qp; int ret; - attr = kmalloc(sizeof *attr, GFP_KERNEL); + attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; @@ -2552,7 +2552,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_pd *pd; struct ib_ah *ah; - struct rdma_ah_attr attr; + struct rdma_ah_attr attr = {}; int ret; struct ib_udata udata; -- cgit From 8d9ec9addd6c492a99d3699212653cba92989767 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Jun 2018 10:22:03 +0300 Subject: IB/core: Add a sgid_attr pointer to struct rdma_ah_attr The sgid_attr will ultimately replace the sgid_index in the ah_attr. This will allow for all layers to have a consistent view of what gid table entry was selected as processing runs through all stages of the stack. This commit introduces the pointer and ensures it is set before calling any driver callback that includes a struct ah_attr callback, allowing future patches to adjust both the drivers and the callers to use sgid_attr instead of sgid_index. Signed-off-by: Jason Gunthorpe Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/verbs.c | 252 +++++++++++++++++++++++++++++++++++----- include/rdma/ib_verbs.h | 7 ++ 2 files changed, 232 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5e34e359f7b4..26ee1de1cc26 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -326,6 +326,85 @@ EXPORT_SYMBOL(ib_dealloc_pd); /* Address handles */ +/* + * Validate that the rdma_ah_attr is valid for the device before passing it + * off to the driver. + */ +static int rdma_check_ah_attr(struct ib_device *device, + struct rdma_ah_attr *ah_attr) +{ + if (!rdma_is_port_valid(device, ah_attr->port_num)) + return -EINVAL; + + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE && + !(ah_attr->ah_flags & IB_AH_GRH)) + return -EINVAL; + + if (ah_attr->grh.sgid_attr) { + /* + * Make sure the passed sgid_attr is consistent with the + * parameters + */ + if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index || + ah_attr->grh.sgid_attr->port_num != ah_attr->port_num) + return -EINVAL; + } + return 0; +} + +/* + * If the ah requires a GRH then ensure that sgid_attr pointer is filled in. + * On success the caller is responsible to call rdma_unfill_sgid_attr(). + */ +static int rdma_fill_sgid_attr(struct ib_device *device, + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr **old_sgid_attr) +{ + const struct ib_gid_attr *sgid_attr; + struct ib_global_route *grh; + int ret; + + *old_sgid_attr = ah_attr->grh.sgid_attr; + + ret = rdma_check_ah_attr(device, ah_attr); + if (ret) + return ret; + + if (!(ah_attr->ah_flags & IB_AH_GRH)) + return 0; + + grh = rdma_ah_retrieve_grh(ah_attr); + if (grh->sgid_attr) + return 0; + + sgid_attr = + rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index); + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); + + /* Move ownerhip of the kref into the ah_attr */ + grh->sgid_attr = sgid_attr; + return 0; +} + +static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *old_sgid_attr) +{ + /* + * Fill didn't change anything, the caller retains ownership of + * whatever it passed + */ + if (ah_attr->grh.sgid_attr == old_sgid_attr) + return; + + /* + * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller + * doesn't see any change in the rdma_ah_attr. If we get here + * old_sgid_attr is NULL. + */ + rdma_destroy_ah_attr(ah_attr); +} + static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata) @@ -345,9 +424,30 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, return ah; } +/** + * rdma_create_ah - Creates an address handle for the + * given address vector. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * + * It returns 0 on success and returns appropriate error code on error. + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) { - return _rdma_create_ah(pd, ah_attr, NULL); + const struct ib_gid_attr *old_sgid_attr; + struct ib_ah *ah; + int ret; + + ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); + if (ret) + return ERR_PTR(ret); + + ah = _rdma_create_ah(pd, ah_attr, NULL); + + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ah; } EXPORT_SYMBOL(rdma_create_ah); @@ -368,15 +468,27 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata) { + const struct ib_gid_attr *old_sgid_attr; + struct ib_ah *ah; int err; + err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); + if (err) + return ERR_PTR(err); + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { err = ib_resolve_eth_dmac(pd->device, ah_attr); - if (err) - return ERR_PTR(err); + if (err) { + ah = ERR_PTR(err); + goto out; + } } - return _rdma_create_ah(pd, ah_attr, udata); + ah = _rdma_create_ah(pd, ah_attr, udata); + +out: + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ah; } EXPORT_SYMBOL(rdma_create_user_ah); @@ -632,6 +744,49 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, } EXPORT_SYMBOL(ib_init_ah_attr_from_wc); +/** + * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership + * of the reference + * + * @attr: Pointer to AH attribute structure + * @dgid: Destination GID + * @flow_label: Flow label + * @hop_limit: Hop limit + * @traffic_class: traffic class + * @sgid_attr: Pointer to SGID attribute + * + * This takes ownership of the sgid_attr reference. The caller must ensure + * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after + * calling this function. + */ +void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid, + u32 flow_label, u8 hop_limit, u8 traffic_class, + const struct ib_gid_attr *sgid_attr) +{ + rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit, + traffic_class); + attr->grh.sgid_attr = sgid_attr; +} +EXPORT_SYMBOL(rdma_move_grh_sgid_attr); + +/** + * rdma_destroy_ah_attr - Release reference to SGID attribute of + * ah attribute. + * @ah_attr: Pointer to ah attribute + * + * Release reference to the SGID attribute of the ah attribute if it is + * non NULL. It is safe to call this multiple times, and safe to call it on + * a zero initialized ah_attr. + */ +void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr) +{ + if (ah_attr->grh.sgid_attr) { + rdma_put_gid_attr(ah_attr->grh.sgid_attr); + ah_attr->grh.sgid_attr = NULL; + } +} +EXPORT_SYMBOL(rdma_destroy_ah_attr); + struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, const struct ib_grh *grh, u8 port_num) { @@ -648,17 +803,29 @@ EXPORT_SYMBOL(ib_create_ah_from_wc); int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { + const struct ib_gid_attr *old_sgid_attr; + int ret; + if (ah->type != ah_attr->type) return -EINVAL; - return ah->device->modify_ah ? + ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr); + if (ret) + return ret; + + ret = ah->device->modify_ah ? ah->device->modify_ah(ah, ah_attr) : -EOPNOTSUPP; + + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ret; } EXPORT_SYMBOL(rdma_modify_ah); int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { + ah_attr->grh.sgid_attr = NULL; + return ah->device->query_ah ? ah->device->query_ah(ah, ah_attr) : -EOPNOTSUPP; @@ -1294,9 +1461,6 @@ static int ib_resolve_eth_dmac(struct ib_device *device, int ret = 0; struct ib_global_route *grh; - if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr))) - return -EINVAL; - grh = rdma_ah_retrieve_grh(ah_attr); if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { @@ -1315,6 +1479,14 @@ static int ib_resolve_eth_dmac(struct ib_device *device, return ret; } +static bool is_qp_type_connected(const struct ib_qp *qp) +{ + return (qp->qp_type == IB_QPT_UC || + qp->qp_type == IB_QPT_RC || + qp->qp_type == IB_QPT_XRC_INI || + qp->qp_type == IB_QPT_XRC_TGT); +} + /** * IB core internal function to perform QP attributes modification. */ @@ -1322,8 +1494,43 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + const struct ib_gid_attr *old_sgid_attr_av; + const struct ib_gid_attr *old_sgid_attr_alt_av; int ret; + /* + * Today the core code can only handle alternate paths and APM for IB + * ban them in roce mode. + */ + if (attr_mask & IB_QP_ALT_PATH && + !rdma_protocol_ib(qp->device, attr->alt_ah_attr.port_num)) + return -EINVAL; + + if (attr_mask & IB_QP_AV) { + ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr, + &old_sgid_attr_av); + if (ret) + return ret; + } + if (attr_mask & IB_QP_ALT_PATH) { + ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr, + &old_sgid_attr_alt_av); + if (ret) + goto out_av; + } + + /* + * If the user provided the qp_attr then we have to resolve it. Kernel + * users have to provide already resolved rdma_ah_attr's + */ + if (udata && (attr_mask & IB_QP_AV) && + attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && + is_qp_type_connected(qp)) { + ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); + if (ret) + goto out; + } + if (rdma_ib_or_roce(qp->device, port)) { if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n", @@ -1342,17 +1549,15 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, if (!ret && (attr_mask & IB_QP_PORT)) qp->port = attr->port_num; +out: + if (attr_mask & IB_QP_ALT_PATH) + rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av); +out_av: + if (attr_mask & IB_QP_AV) + rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av); return ret; } -static bool is_qp_type_connected(const struct ib_qp *qp) -{ - return (qp->qp_type == IB_QPT_UC || - qp->qp_type == IB_QPT_RC || - qp->qp_type == IB_QPT_XRC_INI || - qp->qp_type == IB_QPT_XRC_TGT); -} - /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. * @ib_qp: The QP to modify. @@ -1367,17 +1572,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp) int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { - struct ib_qp *qp = ib_qp->real_qp; - int ret; - - if (attr_mask & IB_QP_AV && - attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && - is_qp_type_connected(qp)) { - ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); - if (ret) - return ret; - } - return _ib_modify_qp(qp, attr, attr_mask, udata); + return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata); } EXPORT_SYMBOL(ib_modify_qp_with_udata); @@ -1449,6 +1644,9 @@ int ib_query_qp(struct ib_qp *qp, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { + qp_attr->ah_attr.grh.sgid_attr = NULL; + qp_attr->alt_ah_attr.grh.sgid_attr = NULL; + return qp->device->query_qp ? qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : -EOPNOTSUPP; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 67c458215512..a3a4b8335668 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -690,6 +690,7 @@ struct ib_event_handler { } while (0) struct ib_global_route { + const struct ib_gid_attr *sgid_attr; union ib_gid dgid; u32 flow_label; u8 sgid_index; @@ -4026,8 +4027,14 @@ static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr, grh->sgid_index = sgid_index; grh->hop_limit = hop_limit; grh->traffic_class = traffic_class; + grh->sgid_attr = NULL; } +void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr); +void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid, + u32 flow_label, u8 hop_limit, u8 traffic_class, + const struct ib_gid_attr *sgid_attr); + /** * rdma_ah_find_type - Return address handle type. * -- cgit From 947c99ecfcb49ddf7ade5e5fa4c33cf77d6d33f3 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Jun 2018 10:22:04 +0300 Subject: IB/core: Tidy ib_resolve_eth_dmac No reason to call rdma_ah_retrieve_grh, tidy whitespace, and add a function comment block. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/verbs.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 26ee1de1cc26..45c095e478ec 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1455,13 +1455,19 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); +/** + * ib_resolve_eth_dmac - Resolve destination mac address + * @device: Device to consider + * @ah_attr: address handle attribute which describes the + * source and destination parameters + * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It + * returns 0 on success or appropriate error code. It initializes the + * necessary ah_attr fields when call is successful. + */ static int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr) { - int ret = 0; - struct ib_global_route *grh; - - grh = rdma_ah_retrieve_grh(ah_attr); + int ret = 0; if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) { -- cgit From d97099fe53ed9ab8b17d084bed0099feb08a48c1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Jun 2018 10:22:05 +0300 Subject: IB{cm, core}: Introduce and use ah_attr copy, move, replace APIs Introduce AH attribute copy, move and replace APIs to be used by core and provider drivers. In CM code flow when ah attribute might be re-initialized twice while processing incoming request, or initialized once while from path record while sending out CM requests. Therefore use rdma_move_ah_attr API to handle such scenarios instead of memcpy(). Provider drivers keeps a copy ah_attr during the lifetime of the ah. Therefore, use rdma_replace_ah_attr() which conditionally release reference to old ah_attr and holds reference to new attribute whose referrence is released when the AH is freed. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 4 +-- drivers/infiniband/core/verbs.c | 54 ++++++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/qedr/verbs.c | 3 ++- drivers/infiniband/sw/rdmavt/ah.c | 4 ++- drivers/infiniband/sw/rdmavt/qp.c | 6 +++-- include/rdma/ib_verbs.h | 5 ++++ 6 files changed, 70 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 800ff69e09b3..0f39a879c91d 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -474,7 +474,7 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, if (ret) return ret; - memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr)); + rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } @@ -569,7 +569,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, ret = add_cm_id_to_port_list(cm_id_priv, av, port); if (ret) return ret; - memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr)); + rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 45c095e478ec..2c7129cccc6e 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -326,6 +326,60 @@ EXPORT_SYMBOL(ib_dealloc_pd); /* Address handles */ +/** + * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination. + * @dest: Pointer to destination ah_attr. Contents of the destination + * pointer is assumed to be invalid and attribute are overwritten. + * @src: Pointer to source ah_attr. + */ +void rdma_copy_ah_attr(struct rdma_ah_attr *dest, + const struct rdma_ah_attr *src) +{ + *dest = *src; + if (dest->grh.sgid_attr) + rdma_hold_gid_attr(dest->grh.sgid_attr); +} +EXPORT_SYMBOL(rdma_copy_ah_attr); + +/** + * rdma_replace_ah_attr - Replace valid ah_attr with new new one. + * @old: Pointer to existing ah_attr which needs to be replaced. + * old is assumed to be valid or zero'd + * @new: Pointer to the new ah_attr. + * + * rdma_replace_ah_attr() first releases any reference in the old ah_attr if + * old the ah_attr is valid; after that it copies the new attribute and holds + * the reference to the replaced ah_attr. + */ +void rdma_replace_ah_attr(struct rdma_ah_attr *old, + const struct rdma_ah_attr *new) +{ + rdma_destroy_ah_attr(old); + *old = *new; + if (old->grh.sgid_attr) + rdma_hold_gid_attr(old->grh.sgid_attr); +} +EXPORT_SYMBOL(rdma_replace_ah_attr); + +/** + * rdma_move_ah_attr - Move ah_attr pointed by source to destination. + * @dest: Pointer to destination ah_attr to copy to. + * dest is assumed to be valid or zero'd + * @src: Pointer to the new ah_attr. + * + * rdma_move_ah_attr() first releases any reference in the destination ah_attr + * if it is valid. This also transfers ownership of internal references from + * src to dest, making src invalid in the process. No new reference of the src + * ah_attr is taken. + */ +void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src) +{ + rdma_destroy_ah_attr(dest); + *dest = *src; + src->grh.sgid_attr = NULL; +} +EXPORT_SYMBOL(rdma_move_ah_attr); + /* * Validate that the rdma_ah_attr is valid for the device before passing it * off to the driver. diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index f7ac8fc9b531..5b2a79b27036 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2302,7 +2302,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, if (!ah) return ERR_PTR(-ENOMEM); - ah->attr = *attr; + rdma_copy_ah_attr(&ah->attr, attr); return &ah->ibah; } @@ -2311,6 +2311,7 @@ int qedr_destroy_ah(struct ib_ah *ibah) { struct qedr_ah *ah = get_qedr_ah(ibah); + rdma_destroy_ah_attr(&ah->attr); kfree(ah); return 0; } diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index ba3639a0d77c..89ec0f64abfc 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -120,7 +120,8 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, dev->n_ahs_allocated++; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - ah->attr = *ah_attr; + rdma_copy_ah_attr(&ah->attr, ah_attr); + atomic_set(&ah->refcount, 0); if (dev->driver_f.notify_new_ah) @@ -148,6 +149,7 @@ int rvt_destroy_ah(struct ib_ah *ibah) dev->n_ahs_allocated--; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + rdma_destroy_ah_attr(&ah->attr); kfree(ah); return 0; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 41183bd665ca..815f94c17c48 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1336,13 +1336,13 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->qp_access_flags = attr->qp_access_flags; if (attr_mask & IB_QP_AV) { - qp->remote_ah_attr = attr->ah_attr; + rdma_replace_ah_attr(&qp->remote_ah_attr, &attr->ah_attr); qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr); qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); } if (attr_mask & IB_QP_ALT_PATH) { - qp->alt_ah_attr = attr->alt_ah_attr; + rdma_replace_ah_attr(&qp->alt_ah_attr, &attr->alt_ah_attr); qp->s_alt_pkey_index = attr->alt_pkey_index; } @@ -1459,6 +1459,8 @@ int rvt_destroy_qp(struct ib_qp *ibqp) vfree(qp->s_wq); rdi->driver_f.qp_priv_free(rdi, qp); kfree(qp->s_ack_queue); + rdma_destroy_ah_attr(&qp->remote_ah_attr); + rdma_destroy_ah_attr(&qp->alt_ah_attr); kfree(qp); return 0; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a3a4b8335668..1e5c1e8ba282 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4034,6 +4034,11 @@ void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr); void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid, u32 flow_label, u8 hop_limit, u8 traffic_class, const struct ib_gid_attr *sgid_attr); +void rdma_copy_ah_attr(struct rdma_ah_attr *dest, + const struct rdma_ah_attr *src); +void rdma_replace_ah_attr(struct rdma_ah_attr *old, + const struct rdma_ah_attr *new); +void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src); /** * rdma_ah_find_type - Return address handle type. -- cgit From 47ec38666210485de860ab24675acb3d2e7d4954 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Jun 2018 10:22:06 +0300 Subject: RDMA: Convert drivers to use sgid_attr instead of sgid_index The core code now ensures that all driver callbacks that receive an rdma_ah_attrs will have a sgid_attr's pointer if there is a GRH present. Drivers can use this pointer instead of calling a query function with sgid_index. This simplifies the drivers and also avoids races where a gid_index lookup may return different data if it is changed. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 63 +++++++++++--------------------- drivers/infiniband/hw/hns/hns_roce_ah.c | 19 ++-------- drivers/infiniband/hw/mlx4/ah.c | 16 +++----- drivers/infiniband/hw/mlx4/qp.c | 31 ++++++---------- drivers/infiniband/hw/mlx5/ah.c | 11 +----- drivers/infiniband/hw/mlx5/main.c | 32 ++-------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +-- drivers/infiniband/hw/mlx5/qp.c | 10 ++--- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 24 +++++------- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 21 +++++------ drivers/infiniband/hw/qedr/verbs.c | 22 ++++------- drivers/infiniband/sw/rxe/rxe_av.c | 11 +++--- drivers/infiniband/sw/rxe/rxe_loc.h | 5 +-- drivers/infiniband/sw/rxe/rxe_qp.c | 23 +----------- drivers/infiniband/sw/rxe/rxe_verbs.c | 31 +++------------- include/rdma/ib_verbs.h | 8 ++-- 16 files changed, 96 insertions(+), 237 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 62eb9e3346d5..134360236c2c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -672,8 +672,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, int rc; u8 nw_type; - struct ib_gid_attr sgid_attr; - if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) { dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set"); return ERR_PTR(-EINVAL); @@ -704,20 +702,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, grh->dgid.raw) && !rdma_link_local_addr((struct in6_addr *) grh->dgid.raw)) { - union ib_gid sgid; + const struct ib_gid_attr *sgid_attr; - rc = ib_get_cached_gid(&rdev->ibdev, 1, - grh->sgid_index, &sgid, - &sgid_attr); - if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to query gid at index %d", - grh->sgid_index); - goto fail; - } - dev_put(sgid_attr.ndev); + sgid_attr = grh->sgid_attr; /* Get network header type for this GID */ - nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + nw_type = rdma_gid_attr_network_type(sgid_attr); switch (nw_type) { case RDMA_NETWORK_IPV4: ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4; @@ -1598,9 +1587,6 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; enum ib_qp_state curr_qp_state, new_qp_state; int rc, entries; - int status; - union ib_gid sgid; - struct ib_gid_attr sgid_attr; unsigned int flags; u8 nw_type; @@ -1667,6 +1653,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, if (qp_attr_mask & IB_QP_AV) { const struct ib_global_route *grh = rdma_ah_read_grh(&qp_attr->ah_attr); + const struct ib_gid_attr *sgid_attr; qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID | CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL | @@ -1690,29 +1677,23 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, ether_addr_copy(qp->qplib_qp.ah.dmac, qp_attr->ah_attr.roce.dmac); - status = ib_get_cached_gid(&rdev->ibdev, 1, - grh->sgid_index, - &sgid, &sgid_attr); - if (!status) { - memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr, - ETH_ALEN); - dev_put(sgid_attr.ndev); - nw_type = ib_gid_to_network_type(sgid_attr.gid_type, - &sgid); - switch (nw_type) { - case RDMA_NETWORK_IPV4: - qp->qplib_qp.nw_type = - CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4; - break; - case RDMA_NETWORK_IPV6: - qp->qplib_qp.nw_type = - CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6; - break; - default: - qp->qplib_qp.nw_type = - CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1; - break; - } + sgid_attr = qp_attr->ah_attr.grh.sgid_attr; + memcpy(qp->qplib_qp.smac, sgid_attr->ndev->dev_addr, + ETH_ALEN); + nw_type = rdma_gid_attr_network_type(sgid_attr); + switch (nw_type) { + case RDMA_NETWORK_IPV4: + qp->qplib_qp.nw_type = + CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4; + break; + case RDMA_NETWORK_IPV6: + qp->qplib_qp.nw_type = + CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6; + break; + default: + qp->qplib_qp.nw_type = + CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1; + break; } } @@ -1934,7 +1915,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, dev_put(sgid_attr.ndev); } /* Get network header type for this GID */ - nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + nw_type = rdma_gid_attr_network_type(&sgid_attr); switch (nw_type) { case RDMA_NETWORK_IPV4: nw_type = BNXT_RE_ROCEV2_IPV4_PACKET; diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index d74928621559..14efa3b9adb2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -44,13 +44,11 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); + const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; - struct ib_gid_attr gid_attr; struct hns_roce_ah *ah; u16 vlan_tag = 0xffff; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - union ib_gid sgid; - int ret; ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) @@ -59,18 +57,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - /* Get source gid */ - ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &gid_attr); - if (ret) { - dev_err(dev, "get sgid failed! ret = %d\n", ret); - kfree(ah); - return ERR_PTR(ret); - } - - if (is_vlan_dev(gid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); - dev_put(gid_attr.ndev); + gid_attr = ah_attr->grh.sgid_attr; + if (is_vlan_dev(gid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 9345d5b546d1..1ab3681acdcd 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -82,12 +82,11 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct mlx4_ib_ah *ah) { struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + const struct ib_gid_attr *gid_attr; struct mlx4_dev *dev = ibdev->dev; int is_mcast = 0; struct in6_addr in6; u16 vlan_tag = 0xffff; - union ib_gid sgid; - struct ib_gid_attr gid_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); int ret; @@ -96,15 +95,12 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, is_mcast = 1; memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN); - ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &gid_attr); - if (ret) - return ERR_PTR(ret); eth_zero_addr(ah->av.eth.s_mac); - if (is_vlan_dev(gid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); - memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(gid_attr.ndev); + gid_attr = ah_attr->grh.sgid_attr; + if (is_vlan_dev(gid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); + memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN); + if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 3b8045fd23ed..1538ce6e9dac 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2176,6 +2176,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, { struct ib_uobject *ibuobject; struct ib_srq *ibsrq; + const struct ib_gid_attr *gid_attr = NULL; struct ib_rwq_ind_table *rwq_ind_tbl; enum ib_qp_type qp_type; struct mlx4_ib_dev *dev; @@ -2356,29 +2357,17 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (attr_mask & IB_QP_AV) { u8 port_num = mlx4_is_bonded(dev->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - union ib_gid gid; - struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; u16 vlan = 0xffff; u8 smac[ETH_ALEN]; - int status = 0; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH; if (is_eth) { - int index = - rdma_ah_read_grh(&attr->ah_attr)->sgid_index; - - status = ib_get_cached_gid(&dev->ib_dev, port_num, - index, &gid, &gid_attr); - if (!status) { - vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev); - memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(gid_attr.ndev); - } + gid_attr = attr->ah_attr.grh.sgid_attr; + vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev); + memcpy(smac, gid_attr->ndev->dev_addr, ETH_ALEN); } - if (status) - goto out; if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, port_num, vlan, smac)) @@ -2389,7 +2378,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (is_eth && (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) { - u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type); + u8 qpc_roce_mode = gid_type_to_qpc(gid_attr->gid_type); if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) { err = -EINVAL; @@ -3181,10 +3170,12 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. guid_cache[ah->av.ib.gid_index]; } else { - ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid, NULL); + err = rdma_query_gid(ib_dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, + &sqp->ud_header.grh.source_gid); + if (err) + return err; } } memcpy(sqp->ud_header.grh.destination_gid.raw, diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index e6bde32a83f3..ffd03bf1a71e 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -37,7 +37,6 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, struct rdma_ah_attr *ah_attr) { enum ib_gid_type gid_type; - int err; if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); @@ -53,18 +52,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { - err = mlx5_get_roce_gid_type(dev, ah_attr->port_num, - ah_attr->grh.sgid_index, - &gid_type); - if (err) - return ERR_PTR(err); + gid_type = ah_attr->grh.sgid_attr->gid_type; memcpy(ah->av.rmac, ah_attr->roce.dmac, sizeof(ah_attr->roce.dmac)); ah->av.udp_sport = - mlx5_get_roce_udp_sport(dev, - rdma_ah_get_port_num(ah_attr), - rdma_ah_read_grh(ah_attr)->sgid_index); + mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr); ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) #define MLX5_ECN_ENABLED BIT(1) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 94669df81342..e6d88f32391b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -524,41 +524,15 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, attr->index, NULL, NULL); } -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, - int index) +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr) { - struct ib_gid_attr attr; - union ib_gid gid; - - if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) - return 0; - - dev_put(attr.ndev); - - if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) + if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) return 0; return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); } -int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, - int index, enum ib_gid_type *gid_type) -{ - struct ib_gid_attr attr; - union ib_gid gid; - int ret; - - ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr); - if (ret) - return ret; - - dev_put(attr.ndev); - - *gid_type = attr.gid_type; - - return 0; -} - static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d89c8fe626f6..615bd6e9db6c 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1183,10 +1183,8 @@ int mlx5_ib_get_vf_stats(struct ib_device *device, int vf, int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, - int index); -int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, - int index, enum ib_gid_type *gid_type); +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr); void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a4f1f638509f..e3c4ab9be41d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2555,18 +2555,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) { if (!(ah_flags & IB_AH_GRH)) return -EINVAL; - err = mlx5_get_roce_gid_type(dev, port, grh->sgid_index, - &gid_type); - if (err) - return err; + memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || qp->ibqp.qp_type == IB_QPT_XRC_INI || qp->ibqp.qp_type == IB_QPT_XRC_TGT) - path->udp_sport = mlx5_get_roce_udp_sport(dev, port, - grh->sgid_index); + path->udp_sport = + mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr); path->dci_cfi_prio_sl = (sl & 0x7) << 4; + gid_type = ah->grh.sgid_attr->gid_type; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f; } else { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 3897b64532e1..a51b80bfadb3 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -71,7 +71,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type) } static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, - struct rdma_ah_attr *attr, union ib_gid *sgid, + struct rdma_ah_attr *attr, const union ib_gid *sgid, int pdid, bool *isvlan, u16 vlan_tag) { int status; @@ -164,11 +164,10 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, struct ocrdma_ah *ah; bool isvlan = false; u16 vlan_tag = 0xffff; - struct ib_gid_attr sgid_attr; + const struct ib_gid_attr *sgid_attr; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); const struct ib_global_route *grh; - union ib_gid sgid; if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) || !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH)) @@ -186,20 +185,15 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, if (status) goto av_err; - status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, &sgid, - &sgid_attr); - if (status) { - pr_err("%s(): Failed to query sgid, status = %d\n", - __func__, status); - goto av_conf_err; - } - if (is_vlan_dev(sgid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); - dev_put(sgid_attr.ndev); + sgid_attr = attr->grh.sgid_attr; + if (is_vlan_dev(sgid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev); + /* Get network header type for this GID */ - ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + ah->hdr_type = rdma_gid_attr_network_type(sgid_attr); - status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag); + status = set_av_attr(dev, ah, attr, &sgid_attr->gid, pd->id, + &isvlan, vlan_tag); if (status) goto av_conf_err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 6c136e5017fe..c6c87cba943b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2494,8 +2494,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, { int status; struct rdma_ah_attr *ah_attr = &attrs->ah_attr; - union ib_gid sgid; - struct ib_gid_attr sgid_attr; + const struct ib_gid_attr *sgid_attr; u32 vlan_id = 0xFFFF; u8 mac_addr[6], hdr_type; union { @@ -2525,25 +2524,23 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, memcpy(&cmd->params.dgid[0], &grh->dgid.raw[0], sizeof(cmd->params.dgid)); - status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, - &sgid, &sgid_attr); - if (!status) { - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); - memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(sgid_attr.ndev); - } + sgid_attr = ah_attr->grh.sgid_attr; + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); + memcpy(mac_addr, sgid_attr->ndev->dev_addr, ETH_ALEN); qp->sgid_idx = grh->sgid_index; - memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid)); + memcpy(&cmd->params.sgid[0], &sgid_attr->gid.raw[0], + sizeof(cmd->params.sgid)); status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]); if (status) return status; + cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) | (mac_addr[2] << 16) | (mac_addr[3] << 24); - hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + hdr_type = rdma_gid_attr_network_type(sgid_attr); if (hdr_type == RDMA_NETWORK_IPV4) { - rdma_gid2ip(&sgid_addr._sockaddr, &sgid); + rdma_gid2ip(&sgid_addr._sockaddr, &sgid_attr->gid); rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid); memcpy(&cmd->params.dgid[0], &dgid_addr._sockaddr_in.sin_addr.s_addr, 4); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 5b2a79b27036..10d8f4134ec0 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1075,27 +1075,19 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, struct qed_rdma_modify_qp_in_params *qp_params) { + const struct ib_gid_attr *gid_attr; enum rdma_network_type nw_type; - struct ib_gid_attr gid_attr; const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); - union ib_gid gid; u32 ipv4_addr; - int rc = 0; int i; - rc = ib_get_cached_gid(ibqp->device, - rdma_ah_get_port_num(&attr->ah_attr), - grh->sgid_index, &gid, &gid_attr); - if (rc) - return rc; - - qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev); + gid_attr = grh->sgid_attr; + qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev); - dev_put(gid_attr.ndev); - nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid); + nw_type = rdma_gid_attr_network_type(gid_attr); switch (nw_type) { case RDMA_NETWORK_IPV6: - memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], + memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0], sizeof(qp_params->sgid)); memcpy(&qp_params->dgid.bytes[0], &grh->dgid, @@ -1105,7 +1097,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); break; case RDMA_NETWORK_IB: - memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], + memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0], sizeof(qp_params->sgid)); memcpy(&qp_params->dgid.bytes[0], &grh->dgid, @@ -1115,7 +1107,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, case RDMA_NETWORK_IPV4: memset(&qp_params->sgid, 0, sizeof(qp_params->sgid)); memset(&qp_params->dgid, 0, sizeof(qp_params->dgid)); - ipv4_addr = qedr_get_ipv4_from_gid(gid.raw); + ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw); qp_params->sgid.ipv4_addr = ipv4_addr; ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw); diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index c0f972c58d55..26fe8d7dbc55 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -82,15 +82,14 @@ void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr) rdma_ah_set_port_num(attr, av->port_num); } -void rxe_av_fill_ip_info(struct rxe_av *av, - struct rdma_ah_attr *attr, - struct ib_gid_attr *sgid_attr, - union ib_gid *sgid) +void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr) { - rdma_gid2ip((struct sockaddr *)&av->sgid_addr, sgid); + const struct ib_gid_attr *sgid_attr = attr->grh.sgid_attr; + + rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid); rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &rdma_ah_read_grh(attr)->dgid); - av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid); + av->network_type = rdma_gid_attr_network_type(sgid_attr); } struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index a51ece596c43..87d14f7ef21b 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -43,10 +43,7 @@ void rxe_av_from_attr(u8 port_num, struct rxe_av *av, void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); -void rxe_av_fill_ip_info(struct rxe_av *av, - struct rdma_ah_attr *attr, - struct ib_gid_attr *sgid_attr, - union ib_gid *sgid); +void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr); struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index b9f7aa1114b2..9f83fc982f31 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -580,9 +580,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, struct ib_udata *udata) { int err; - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - union ib_gid sgid; - struct ib_gid_attr sgid_attr; if (mask & IB_QP_MAX_QP_RD_ATOMIC) { int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); @@ -623,30 +620,14 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.qkey = attr->qkey; if (mask & IB_QP_AV) { - ib_get_cached_gid(&rxe->ib_dev, 1, - rdma_ah_read_grh(&attr->ah_attr)->sgid_index, - &sgid, &sgid_attr); rxe_av_from_attr(attr->port_num, &qp->pri_av, &attr->ah_attr); - rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr, - &sgid_attr, &sgid); - if (sgid_attr.ndev) - dev_put(sgid_attr.ndev); + rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr); } if (mask & IB_QP_ALT_PATH) { - u8 sgid_index = - rdma_ah_read_grh(&attr->alt_ah_attr)->sgid_index; - - ib_get_cached_gid(&rxe->ib_dev, 1, sgid_index, - &sgid, &sgid_attr); - rxe_av_from_attr(attr->alt_port_num, &qp->alt_av, &attr->alt_ah_attr); - rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr, - &sgid_attr, &sgid); - if (sgid_attr.ndev) - dev_put(sgid_attr.ndev); - + rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr); qp->attr.alt_port_num = attr->alt_port_num; qp->attr.alt_pkey_index = attr->alt_pkey_index; qp->attr.alt_timeout = attr->alt_timeout; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 9deafc3aa6af..9cfd440cebe1 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -222,25 +222,11 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd) return 0; } -static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, - struct rxe_av *av) +static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, + struct rxe_av *av) { - int err; - union ib_gid sgid; - struct ib_gid_attr sgid_attr; - - err = ib_get_cached_gid(&rxe->ib_dev, rdma_ah_get_port_num(attr), - rdma_ah_read_grh(attr)->sgid_index, &sgid, - &sgid_attr); - if (err) { - pr_err("Failed to query sgid. err = %d\n", err); - return err; - } - rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr); - rxe_av_fill_ip_info(av, attr, &sgid_attr, &sgid); - dev_put(sgid_attr.ndev); - return 0; + rxe_av_fill_ip_info(av, attr); } static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, @@ -266,13 +252,9 @@ static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, rxe_add_ref(pd); ah->pd = pd; - err = rxe_init_av(rxe, attr, &ah->av); - if (err) - goto err2; - + rxe_init_av(rxe, attr, &ah->av); return &ah->ibah; -err2: rxe_drop_ref(pd); rxe_drop_ref(ah); err1: @@ -289,10 +271,7 @@ static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) if (err) return err; - err = rxe_init_av(rxe, attr, &ah->av); - if (err) - return err; - + rxe_init_av(rxe, attr, &ah->av); return 0; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1e5c1e8ba282..65f467d65bff 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -149,13 +149,13 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net return IB_GID_TYPE_IB; } -static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type, - union ib_gid *gid) +static inline enum rdma_network_type +rdma_gid_attr_network_type(const struct ib_gid_attr *attr) { - if (gid_type == IB_GID_TYPE_IB) + if (attr->gid_type == IB_GID_TYPE_IB) return RDMA_NETWORK_IB; - if (ipv6_addr_v4mapped((struct in6_addr *)gid)) + if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid)) return RDMA_NETWORK_IPV4; else return RDMA_NETWORK_IPV6; -- cgit From 7492052a186b11be024800eabedef25dcb882613 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Jun 2018 10:22:07 +0300 Subject: IB/mlx4: Use GID attribute from ah attribute While converting GID index from attribute to that of the HCA, GID attribute is available from the ah_attr. Make use of GID attribute to simplify the code and also avoid avoid GID query. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx4/ah.c | 4 +--- drivers/infiniband/hw/mlx4/main.c | 20 ++++++-------------- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- drivers/infiniband/hw/mlx4/qp.c | 3 +-- 4 files changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 1ab3681acdcd..5e9b0837ef61 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -105,9 +105,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (rdma_ah_get_port_num(ah_attr) << 24)); - ret = mlx4_ib_gid_index_to_real_index(ibdev, - rdma_ah_get_port_num(ah_attr), - grh->sgid_index); + ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr); if (ret < 0) return ERR_PTR(ret); ah->av.eth.gid_index = ret; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 859089df9f17..908b8e5c5acb 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -380,17 +380,15 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) } int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, - u8 port_num, int index) + const struct ib_gid_attr *attr) { struct mlx4_ib_iboe *iboe = &ibdev->iboe; struct gid_cache_context *ctx = NULL; - union ib_gid gid; struct mlx4_port_gid_table *port_gid_table; int real_index = -EINVAL; int i; - int ret; unsigned long flags; - struct ib_gid_attr attr; + u8 port_num = attr->port_num; if (port_num > MLX4_MAX_PORTS) return -EINVAL; @@ -399,21 +397,15 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, port_num = 1; if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num)) - return index; - - ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr); - if (ret) - return ret; - - if (attr.ndev) - dev_put(attr.ndev); + return attr->index; spin_lock_irqsave(&iboe->lock, flags); port_gid_table = &iboe->gids[port_num - 1]; for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) - if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) && - attr.gid_type == port_gid_table->gids[i].gid_type) { + if (!memcmp(&port_gid_table->gids[i].gid, + &attr->gid, sizeof(attr->gid)) && + attr->gid_type == port_gid_table->gids[i].gid_type) { ctx = port_gid_table->gids[i].ctx; break; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 7b1429917aba..88c929e2a79e 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -900,7 +900,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, - u8 port_num, int index); + const struct ib_gid_attr *attr); void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, int port); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 1538ce6e9dac..f800e8024859 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1859,8 +1859,7 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah); int real_sgid_index = - mlx4_ib_gid_index_to_real_index(dev, port, - grh->sgid_index); + mlx4_ib_gid_index_to_real_index(dev, grh->sgid_attr); if (real_sgid_index < 0) return real_sgid_index; -- cgit From 1a1f460ff151710289c2f8d4badd8b603b87d610 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Jun 2018 10:22:08 +0300 Subject: RDMA: Hold the sgid_attr inside the struct ib_ah/qp If the AH has a GRH then hold a reference to the sgid_attr inside the common struct. If the QP is modified with an AV that includes a GRH then also hold a reference to the sgid_attr inside the common struct. This informs the cache that the sgid_index is in-use so long as the AH or QP using it exists. This also means that all drivers can access the sgid_attr directly from the ah_attr instead of querying the cache during their UD post-send paths. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/verbs.c | 46 +++++++++++++++++++++++++++++++++++++++-- include/rdma/ib_verbs.h | 4 ++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 2c7129cccc6e..b0ad739a7bd0 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -459,6 +459,19 @@ static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr, rdma_destroy_ah_attr(ah_attr); } +static const struct ib_gid_attr * +rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *old_attr) +{ + if (old_attr) + rdma_put_gid_attr(old_attr); + if (ah_attr->ah_flags & IB_AH_GRH) { + rdma_hold_gid_attr(ah_attr->grh.sgid_attr); + return ah_attr->grh.sgid_attr; + } + return NULL; +} + static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata) @@ -472,6 +485,8 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, ah->pd = pd; ah->uobject = NULL; ah->type = ah_attr->type; + ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); + atomic_inc(&pd->usecnt); } @@ -871,6 +886,7 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) ah->device->modify_ah(ah, ah_attr) : -EOPNOTSUPP; + ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr); rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); return ret; } @@ -888,13 +904,17 @@ EXPORT_SYMBOL(rdma_query_ah); int rdma_destroy_ah(struct ib_ah *ah) { + const struct ib_gid_attr *sgid_attr = ah->sgid_attr; struct ib_pd *pd; int ret; pd = ah->pd; ret = ah->device->destroy_ah(ah); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + if (sgid_attr) + rdma_put_gid_attr(sgid_attr); + } return ret; } @@ -1573,6 +1593,13 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, return ret; } if (attr_mask & IB_QP_ALT_PATH) { + /* + * FIXME: This does not track the migration state, so if the + * user loads a new alternate path after the HW has migrated + * from primary->alternate we will keep the wrong + * references. This is OK for IB because the reference + * counting does not serve any functional purpose. + */ ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr, &old_sgid_attr_alt_av); if (ret) @@ -1606,8 +1633,17 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, } ret = ib_security_modify_qp(qp, attr, attr_mask, udata); - if (!ret && (attr_mask & IB_QP_PORT)) + if (ret) + goto out; + + if (attr_mask & IB_QP_PORT) qp->port = attr->port_num; + if (attr_mask & IB_QP_AV) + qp->av_sgid_attr = + rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr); + if (attr_mask & IB_QP_ALT_PATH) + qp->alt_path_sgid_attr = rdma_update_sgid_attr( + &attr->alt_ah_attr, qp->alt_path_sgid_attr); out: if (attr_mask & IB_QP_ALT_PATH) @@ -1765,6 +1801,8 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp) int ib_destroy_qp(struct ib_qp *qp) { + const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr; + const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr; struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; @@ -1795,6 +1833,10 @@ int ib_destroy_qp(struct ib_qp *qp) rdma_restrack_del(&qp->res); ret = qp->device->destroy_qp(qp); if (!ret) { + if (alt_path_sgid_attr) + rdma_put_gid_attr(alt_path_sgid_attr); + if (av_sgid_attr) + rdma_put_gid_attr(av_sgid_attr); if (pd) atomic_dec(&pd->usecnt); if (scq) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 65f467d65bff..0232c0f9f717 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1580,6 +1580,7 @@ struct ib_ah { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; + const struct ib_gid_attr *sgid_attr; enum rdma_ah_attr_type type; }; @@ -1778,6 +1779,9 @@ struct ib_qp { struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; + /* sgid_attrs associated with the AV's */ + const struct ib_gid_attr *av_sgid_attr; + const struct ib_gid_attr *alt_path_sgid_attr; u32 qp_num; u32 max_write_sge; u32 max_read_sge; -- cgit From 89af969a665390dc6b156fef55755ca546cd8d92 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Jun 2018 10:22:09 +0300 Subject: RDMA: Convert drivers to use the AH's sgid_attr in post_wr paths For UD the drivers were doing a sgid_index lookup into the cache to get the attrs, however we can now directly access the same attrs stores in the ib_ah instead and remove the lookup. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 33 ++++++++++--------------------- drivers/infiniband/hw/mlx4/qp.c | 8 ++------ drivers/infiniband/hw/mthca/mthca_av.c | 5 +---- drivers/infiniband/hw/qedr/qedr_roce_cm.c | 25 ++++++----------------- 4 files changed, 19 insertions(+), 52 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 134360236c2c..136eaa78ad4a 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1879,15 +1879,13 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, struct bnxt_qplib_swqe *wqe, int payload_size) { - struct ib_device *ibdev = &qp->rdev->ibdev; struct bnxt_re_ah *ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah, ib_ah); struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah; + const struct ib_gid_attr *sgid_attr = ah->ib_ah.sgid_attr; struct bnxt_qplib_sge sge; - union ib_gid sgid; u8 nw_type; u16 ether_type; - struct ib_gid_attr sgid_attr; union ib_gid dgid; bool is_eth = false; bool is_vlan = false; @@ -1900,22 +1898,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr)); - rc = ib_get_cached_gid(ibdev, 1, - qplib_ah->host_sgid_index, &sgid, - &sgid_attr); - if (rc) { - dev_err(rdev_to_dev(qp->rdev), - "Failed to query gid at index %d", - qplib_ah->host_sgid_index); - return rc; - } - if (sgid_attr.ndev) { - if (is_vlan_dev(sgid_attr.ndev)) - vlan_id = vlan_dev_vlan_id(sgid_attr.ndev); - dev_put(sgid_attr.ndev); - } + if (is_vlan_dev(sgid_attr->ndev)) + vlan_id = vlan_dev_vlan_id(sgid_attr->ndev); /* Get network header type for this GID */ - nw_type = rdma_gid_attr_network_type(&sgid_attr); + nw_type = rdma_gid_attr_network_type(sgid_attr); switch (nw_type) { case RDMA_NETWORK_IPV4: nw_type = BNXT_RE_ROCEV2_IPV4_PACKET; @@ -1928,9 +1914,9 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, break; } memcpy(&dgid.raw, &qplib_ah->dgid, 16); - is_udp = sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; + is_udp = sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; if (is_udp) { - if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) { + if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) { ip_version = 4; ether_type = ETH_P_IP; } else { @@ -1963,9 +1949,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, } if (is_grh || (ip_version == 6)) { - memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid.raw, sizeof(sgid)); + memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid_attr->gid.raw, + sizeof(sgid_attr->gid)); memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data, - sizeof(sgid)); + sizeof(sgid_attr->gid)); qp->qp1_hdr.grh.hop_limit = qplib_ah->hop_limit; } @@ -1975,7 +1962,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, qp->qp1_hdr.ip4.frag_off = htons(IP_DF); qp->qp1_hdr.ip4.ttl = qplib_ah->hop_limit; - memcpy(&qp->qp1_hdr.ip4.saddr, sgid.raw + 12, 4); + memcpy(&qp->qp1_hdr.ip4.saddr, sgid_attr->gid.raw + 12, 4); memcpy(&qp->qp1_hdr.ip4.daddr, qplib_ah->dgid.data + 12, 4); qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr); } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index f800e8024859..e576ca385d8e 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -3169,12 +3169,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. guid_cache[ah->av.ib.gid_index]; } else { - err = rdma_query_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid); - if (err) - return err; + sqp->ud_header.grh.source_gid = + ah->ibah.sgid_attr->gid; } } memcpy(sqp->ud_header.grh.destination_gid.raw, diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index e7f6223e9c60..0823c0bc7e73 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -281,10 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, header->grh.flow_label = ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff); header->grh.hop_limit = ah->av->hop_limit; - ib_get_cached_gid(&dev->ib_dev, - be32_to_cpu(ah->av->port_pd) >> 24, - ah->av->gid_index % dev->limits.gid_table_len, - &header->grh.source_gid, NULL); + header->grh.source_gid = ah->ibah.sgid_attr->gid; memcpy(header->grh.destination_gid.raw, ah->av->dgid, 16); } diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index 0f14e687bb91..2e1f352c037d 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -387,11 +387,10 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, bool has_vlan = false, has_grh_ipv6 = true; struct rdma_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - union ib_gid sgid; + const struct ib_gid_attr *sgid_attr = grh->sgid_attr; int send_size = 0; u16 vlan_id = 0; u16 ether_type; - struct ib_gid_attr sgid_attr; int rc; int ip_ver = 0; @@ -402,28 +401,16 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, for (i = 0; i < swr->num_sge; ++i) send_size += swr->sg_list[i].length; - rc = ib_get_cached_gid(qp->ibqp.device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &sgid_attr); - if (rc) { - DP_ERR(dev, - "gsi post send: failed to get cached GID (port=%d, ix=%d)\n", - rdma_ah_get_port_num(ah_attr), - grh->sgid_index); - return rc; - } - - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); if (vlan_id < VLAN_CFI_MASK) has_vlan = true; - dev_put(sgid_attr.ndev); - - has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); + has_udp = (sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); if (!has_udp) { /* RoCE v1 */ ether_type = ETH_P_IBOE; *roce_mode = ROCE_V1; - } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) { + } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) { /* RoCE v2 IPv4 */ ip_ver = 4; ether_type = ETH_P_IP; @@ -471,7 +458,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, udh->grh.flow_label = grh->flow_label; udh->grh.hop_limit = grh->hop_limit; udh->grh.destination_gid = grh->dgid; - memcpy(&udh->grh.source_gid.raw, &sgid.raw, + memcpy(&udh->grh.source_gid.raw, sgid_attr->gid.raw, sizeof(udh->grh.source_gid.raw)); } else { /* IPv4 header */ @@ -482,7 +469,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, udh->ip4.frag_off = htons(IP_DF); udh->ip4.ttl = grh->hop_limit; - ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw); + ipv4_addr = qedr_get_ipv4_from_gid(sgid_attr->gid.raw); udh->ip4.saddr = ipv4_addr; ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw); udh->ip4.daddr = ipv4_addr; -- cgit From 3c60e868c31e4ff144776bf53ff0dfe9e9e4ec15 Mon Sep 17 00:00:00 2001 From: "willy@infradead.org" Date: Wed, 13 Jun 2018 11:45:55 -0700 Subject: IDR: Expose the XArray lock Allow users of the IDR to use the XArray lock for their own synchronisation purposes. The IDR continues to rely on the caller to handle locking, but this lets the caller use the lock embedded in the IDR data structure instead of allocating their own lock. Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- include/linux/idr.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/idr.h b/include/linux/idr.h index e856f4e0ab35..3e8215b2c371 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -98,6 +98,17 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val) * period). */ +#define idr_lock(idr) xa_lock(&(idr)->idr_rt) +#define idr_unlock(idr) xa_unlock(&(idr)->idr_rt) +#define idr_lock_bh(idr) xa_lock_bh(&(idr)->idr_rt) +#define idr_unlock_bh(idr) xa_unlock_bh(&(idr)->idr_rt) +#define idr_lock_irq(idr) xa_lock_irq(&(idr)->idr_rt) +#define idr_unlock_irq(idr) xa_unlock_irq(&(idr)->idr_rt) +#define idr_lock_irqsave(idr, flags) \ + xa_lock_irqsave(&(idr)->idr_rt, flags) +#define idr_unlock_irqrestore(idr, flags) \ + xa_unlock_irqrestore(&(idr)->idr_rt, flags) + void idr_preload(gfp_t gfp_mask); int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t); -- cgit From 9a41e38a467c06a0c48369970ce5a9f790edd64d Mon Sep 17 00:00:00 2001 From: "willy@infradead.org" Date: Wed, 13 Jun 2018 05:34:03 -0700 Subject: IB/mad: Use IDR for agent IDs Allocate agent IDs from a global IDR instead of an atomic variable. This eliminates the possibility of reusing an ID which is already in use after 4 billion registrations. We limit the assigned ID to be less than 2^24 as the mlx4 driver uses the most significant byte of the agent ID to store the slave number. Users unlucky enough to see a collision between agent numbers and slave numbers see messages like: mlx4_ib: egress mad has non-null tid msb:1 class:4 slave:0 and the MAD layer stops working. We look up the agent under protection of the RCU lock, which means we have to free the agent using kfree_rcu, and only increment the reference counter if it is not 0. Signed-off-by: Matthew Wilcox Reported-by: Hans Westgaard Ry Acked-by: Jack Morgenstein Tested-by: Jack Morgenstein Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 83 +++++++++++++++++++++++--------------- drivers/infiniband/core/mad_priv.h | 7 ++-- 2 files changed, 55 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 1bb1733c7079..34e9b2768324 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -38,6 +38,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -58,8 +59,13 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); +/* + * The mlx4 driver uses the top byte to distinguish which virtual function + * generated the MAD, so we must avoid using it. + */ +#define AGENT_ID_LIMIT (1 << 24) +static DEFINE_IDR(ib_mad_clients); static struct list_head ib_mad_port_list; -static atomic_t ib_mad_client_id = ATOMIC_INIT(0); /* Port list lock */ static DEFINE_SPINLOCK(ib_mad_port_list_lock); @@ -377,13 +383,24 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error4; } - spin_lock_irq(&port_priv->reg_lock); - mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id); + idr_preload(GFP_KERNEL); + idr_lock(&ib_mad_clients); + ret2 = idr_alloc_cyclic(&ib_mad_clients, mad_agent_priv, 0, + AGENT_ID_LIMIT, GFP_ATOMIC); + idr_unlock(&ib_mad_clients); + idr_preload_end(); + + if (ret2 < 0) { + ret = ERR_PTR(ret2); + goto error5; + } + mad_agent_priv->agent.hi_tid = ret2; /* * Make sure MAD registration (if supplied) * is non overlapping with any existing ones */ + spin_lock_irq(&port_priv->reg_lock); if (mad_reg_req) { mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); if (!is_vendor_class(mgmt_class)) { @@ -394,7 +411,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (method) { if (method_in_use(&method, mad_reg_req)) - goto error5; + goto error6; } } ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, @@ -410,24 +427,25 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (is_vendor_method_in_use( vendor_class, mad_reg_req)) - goto error5; + goto error6; } } ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); } if (ret2) { ret = ERR_PTR(ret2); - goto error5; + goto error6; } } - - /* Add mad agent into port's agent list */ - list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list); spin_unlock_irq(&port_priv->reg_lock); return &mad_agent_priv->agent; -error5: +error6: spin_unlock_irq(&port_priv->reg_lock); + idr_lock(&ib_mad_clients); + idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); + idr_unlock(&ib_mad_clients); +error5: ib_mad_agent_security_cleanup(&mad_agent_priv->agent); error4: kfree(reg_req); @@ -589,8 +607,10 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) spin_lock_irq(&port_priv->reg_lock); remove_mad_reg_req(mad_agent_priv); - list_del(&mad_agent_priv->agent_list); spin_unlock_irq(&port_priv->reg_lock); + idr_lock(&ib_mad_clients); + idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); + idr_unlock(&ib_mad_clients); flush_workqueue(port_priv->wq); ib_cancel_rmpp_recvs(mad_agent_priv); @@ -601,7 +621,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) ib_mad_agent_security_cleanup(&mad_agent_priv->agent); kfree(mad_agent_priv->reg_req); - kfree(mad_agent_priv); + kfree_rcu(mad_agent_priv, rcu); } static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) @@ -1722,22 +1742,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv, struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; - spin_lock_irqsave(&port_priv->reg_lock, flags); if (ib_response_mad(mad_hdr)) { u32 hi_tid; - struct ib_mad_agent_private *entry; /* * Routing is based on high 32 bits of transaction ID * of MAD. */ hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; - list_for_each_entry(entry, &port_priv->agent_list, agent_list) { - if (entry->agent.hi_tid == hi_tid) { - mad_agent = entry; - break; - } - } + rcu_read_lock(); + mad_agent = idr_find(&ib_mad_clients, hi_tid); + if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount)) + mad_agent = NULL; + rcu_read_unlock(); } else { struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_method_table *method; @@ -1746,6 +1763,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, const struct ib_vendor_mad *vendor_mad; int index; + spin_lock_irqsave(&port_priv->reg_lock, flags); /* * Routing is based on version, class, and method * For "newer" vendor MADs, also based on OUI @@ -1785,20 +1803,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv, ~IB_MGMT_METHOD_RESP]; } } + if (mad_agent) + atomic_inc(&mad_agent->refcount); +out: + spin_unlock_irqrestore(&port_priv->reg_lock, flags); } - if (mad_agent) { - if (mad_agent->agent.recv_handler) - atomic_inc(&mad_agent->refcount); - else { - dev_notice(&port_priv->device->dev, - "No receive handler for client %p on port %d\n", - &mad_agent->agent, port_priv->port_num); - mad_agent = NULL; - } + if (mad_agent && !mad_agent->agent.recv_handler) { + dev_notice(&port_priv->device->dev, + "No receive handler for client %p on port %d\n", + &mad_agent->agent, port_priv->port_num); + deref_mad_agent(mad_agent); + mad_agent = NULL; } -out: - spin_unlock_irqrestore(&port_priv->reg_lock, flags); return mad_agent; } @@ -3161,7 +3178,6 @@ static int ib_mad_port_open(struct ib_device *device, port_priv->device = device; port_priv->port_num = port_num; spin_lock_init(&port_priv->reg_lock); - INIT_LIST_HEAD(&port_priv->agent_list); init_mad_qp(port_priv, &port_priv->qp_info[0]); init_mad_qp(port_priv, &port_priv->qp_info[1]); @@ -3340,6 +3356,9 @@ int ib_mad_init(void) INIT_LIST_HEAD(&ib_mad_port_list); + /* Client ID 0 is used for snoop-only clients */ + idr_alloc(&ib_mad_clients, NULL, 0, 0, GFP_KERNEL); + if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); return -EINVAL; diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 28669f6419e1..d84ae1671898 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -89,7 +89,6 @@ struct ib_rmpp_segment { }; struct ib_mad_agent_private { - struct list_head agent_list; struct ib_mad_agent agent; struct ib_mad_reg_req *reg_req; struct ib_mad_qp_info *qp_info; @@ -105,7 +104,10 @@ struct ib_mad_agent_private { struct list_head rmpp_list; atomic_t refcount; - struct completion comp; + union { + struct completion comp; + struct rcu_head rcu; + }; }; struct ib_mad_snoop_private { @@ -203,7 +205,6 @@ struct ib_mad_port_private { spinlock_t reg_lock; struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION]; - struct list_head agent_list; struct workqueue_struct *wq; struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE]; }; -- cgit From 6a965ee57d0ccff9994cf068bfc4f8beb2a2aba0 Mon Sep 17 00:00:00 2001 From: Vijay Immanuel Date: Wed, 13 Jun 2018 18:48:37 -0700 Subject: IB/rxe: increase max MR limit Increase the max MR limit to support more I/O queues for NVMe over Fabrics hosts. Signed-off-by: Vijay Immanuel Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 1b596fbbe251..4555510d86c4 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -83,7 +83,7 @@ enum rxe_device_param { RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = 16384, RXE_MAX_LOG_CQE = 15, - RXE_MAX_MR = 2 * 1024, + RXE_MAX_MR = 256 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, RXE_MAX_EE_RD_ATOM = 0, -- cgit From 92cf36eec2a76d8fe61d439cd2b3ebbf33029477 Mon Sep 17 00:00:00 2001 From: Vijay Immanuel Date: Tue, 12 Jun 2018 18:12:05 -0700 Subject: IB/rxe: support for 802.1q VLAN on the listener Set the vlan flag and vlan_id field in the wc for rdma_listen() to work over VLAN. This is required by ib_init_ah_attr_from_wc() which is called by the CM REQ handler. Signed-off-by: Vijay Immanuel Reviewed-by: Yonatan Cohen Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 5b57de30dee4..aa5833318372 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -884,6 +884,11 @@ static enum resp_states do_complete(struct rxe_qp *qp, else wc->network_hdr_type = RDMA_NETWORK_IPV6; + if (is_vlan_dev(skb->dev)) { + wc->wc_flags |= IB_WC_WITH_VLAN; + wc->vlan_id = vlan_dev_vlan_id(skb->dev); + } + if (pkt->mask & RXE_IMMDT_MASK) { wc->wc_flags |= IB_WC_WITH_IMM; wc->ex.imm_data = immdt_imm(pkt); -- cgit From b90575ce7b84483d46ebedd5c164e5f274f7ce5a Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 14 Jun 2018 05:45:42 -0400 Subject: IB/rxe: avoid unnecessary NULL check Before goto err2, the variable qp is checked. So it is not necessary to check qp in label err2. Signed-off-by: Zhu Yanjun Reviewed-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_recv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 42797ac6f7b1..cc5cfd156758 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -256,8 +256,7 @@ static int hdr_check(struct rxe_pkt_info *pkt) return 0; err2: - if (qp) - rxe_drop_ref(qp); + rxe_drop_ref(qp); err1: return -EINVAL; } -- cgit From 33023fb85a42b53bf778bc025f9667b582282be4 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 18 Jun 2018 08:05:26 -0700 Subject: IB/core: add max_send_sge and max_recv_sge attributes This patch replaces the ib_device_attr.max_sge with max_send_sge and max_recv_sge. It allows ulps to take advantage of devices that have very different send and recv sge depths. For example cxgb4 has a max_recv_sge of 4, yet a max_send_sge of 16. Splitting out these attributes allows much more efficient use of the SQ for cxgb4 with ulps that use the RDMA_RW API. Consider a large RDMA WRITE that has 16 scattergather entries. With max_sge of 4, the ulp would send 4 WRITE WRs, but with max_sge of 16, it can be done with 1 WRITE WR. Acked-by: Sagi Grimberg Acked-by: Christoph Hellwig Acked-by: Selvin Xavier Acked-by: Shiraz Saleem Acked-by: Dennis Dalessandro Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 3 ++- drivers/infiniband/hw/cxgb3/iwch_provider.c | 3 ++- drivers/infiniband/hw/cxgb4/provider.c | 3 ++- drivers/infiniband/hw/hfi1/verbs.c | 3 ++- drivers/infiniband/hw/hns/hns_roce_main.c | 3 ++- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 3 ++- drivers/infiniband/hw/mlx4/main.c | 4 ++-- drivers/infiniband/hw/mlx5/main.c | 3 ++- drivers/infiniband/hw/mthca/mthca_provider.c | 5 +++-- drivers/infiniband/hw/nes/nes_verbs.c | 3 ++- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 3 ++- drivers/infiniband/hw/qedr/verbs.c | 3 ++- drivers/infiniband/hw/qib/qib_verbs.c | 3 ++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 3 ++- drivers/infiniband/sw/rdmavt/qp.c | 5 +++-- drivers/infiniband/sw/rxe/rxe.c | 3 ++- drivers/infiniband/sw/rxe/rxe_qp.c | 8 ++++---- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 2 +- drivers/infiniband/ulp/isert/ib_isert.c | 5 +++-- drivers/infiniband/ulp/srpt/ib_srpt.c | 6 ++++-- drivers/nvme/target/rdma.c | 2 +- fs/cifs/smbdirect.c | 13 ++++++++++--- include/rdma/ib_verbs.h | 3 ++- net/rds/ib.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 2 +- 28 files changed, 65 insertions(+), 39 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 5733d0fb0673..908ee8ab3297 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -189,7 +189,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file, resp->max_qp = attr->max_qp; resp->max_qp_wr = attr->max_qp_wr; resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); - resp->max_sge = attr->max_sge; + resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge); resp->max_sge_rd = attr->max_sge_rd; resp->max_cq = attr->max_cq; resp->max_cqe = attr->max_cqe; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 136eaa78ad4a..6c0c6d3426e0 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -166,7 +166,8 @@ int bnxt_re_query_device(struct ib_device *ibdev, | IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_WINDOW_TYPE_2B | IB_DEVICE_MEM_MGT_EXTENSIONS; - ib_attr->max_sge = dev_attr->max_qp_sges; + ib_attr->max_send_sge = dev_attr->max_qp_sges; + ib_attr->max_recv_sge = dev_attr->max_qp_sges; ib_attr->max_sge_rd = dev_attr->max_qp_sges; ib_attr->max_cq = dev_attr->max_cq; ib_attr->max_cqe = dev_attr->max_cq_wqes; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index be097c6723c0..68bc2f9a532f 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1103,7 +1103,8 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro props->max_mr_size = dev->attr.max_mr_size; props->max_qp = dev->attr.max_qps; props->max_qp_wr = dev->attr.max_wrs; - props->max_sge = dev->attr.max_sge_per_wr; + props->max_send_sge = dev->attr.max_sge_per_wr; + props->max_recv_sge = dev->attr.max_sge_per_wr; props->max_sge_rd = 1; props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 1feade8bb4b3..61b8bdb9423d 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -343,7 +343,8 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro props->max_mr_size = T4_MAX_MR_SIZE; props->max_qp = dev->rdev.lldi.vr->qp.size / 2; props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth; - props->max_sge = T4_MAX_RECV_SGE; + props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE); + props->max_recv_sge = T4_MAX_RECV_SGE; props->max_sge_rd = 1; props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter; props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 08991874c0e2..b7c75b63f887 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1410,7 +1410,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; rdi->dparms.props.max_qp = hfi1_max_qps; rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; - rdi->dparms.props.max_sge = hfi1_max_sges; + rdi->dparms.props.max_send_sge = hfi1_max_sges; + rdi->dparms.props.max_recv_sge = hfi1_max_sges; rdi->dparms.props.max_sge_rd = hfi1_max_sges; rdi->dparms.props.max_cq = hfi1_max_cqs; rdi->dparms.props.max_ah = hfi1_max_ahs; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 24a2ea0018d9..850032de8676 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -206,7 +206,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_qp_wr = hr_dev->caps.max_wqes; props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_RC_RNR_NAK_GEN; - props->max_sge = max(hr_dev->caps.max_sq_sg, hr_dev->caps.max_rq_sg); + props->max_send_sge = hr_dev->caps.max_sq_sg; + props->max_recv_sge = hr_dev->caps.max_rq_sg; props->max_sge_rd = 1; props->max_cq = hr_dev->caps.num_cqs; props->max_cqe = hr_dev->caps.max_cqes; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 68679ad4c6da..8884ff71a634 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -71,7 +71,8 @@ static int i40iw_query_device(struct ib_device *ibdev, props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; props->max_qp = iwdev->max_qp - iwdev->used_qps; props->max_qp_wr = I40IW_MAX_QP_WRS; - props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + props->max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + props->max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; props->max_cq = iwdev->max_cq - iwdev->used_cqs; props->max_cqe = iwdev->max_cqe; props->max_mr = iwdev->max_mr - iwdev->used_mrs; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 908b8e5c5acb..87de1a467d60 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -517,8 +517,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->page_size_cap = dev->dev->caps.page_size_cap; props->max_qp = dev->dev->quotas.qp; props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; - props->max_sge = min(dev->dev->caps.max_sq_sg, - dev->dev->caps.max_rq_sg); + props->max_send_sge = dev->dev->caps.max_sq_sg; + props->max_recv_sge = dev->dev->caps.max_rq_sg; props->max_sge_rd = MLX4_MAX_SGE_RD; props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e6d88f32391b..e46cda740479 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -888,7 +888,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg); - props->max_sge = min(max_rq_sg, max_sq_sg); + props->max_send_sge = max_sq_sg; + props->max_recv_sge = max_rq_sg; props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 541f237965c7..20febafc1fdd 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -96,8 +96,9 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr props->page_size_cap = mdev->limits.page_size_cap; props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; props->max_qp_wr = mdev->limits.max_wqes; - props->max_sge = mdev->limits.max_sg; - props->max_sge_rd = props->max_sge; + props->max_send_sge = mdev->limits.max_sg; + props->max_recv_sge = mdev->limits.max_sg; + props->max_sge_rd = mdev->limits.max_sg; props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; props->max_cqe = mdev->limits.max_cqes; props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 32f26556c808..82b8f9630ee8 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -436,7 +436,8 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop props->max_mr_size = 0x80000000; props->max_qp = nesibdev->max_qp; props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2; - props->max_sge = nesdev->nesadapter->max_sge; + props->max_send_sge = nesdev->nesadapter->max_sge; + props->max_recv_sge = nesdev->nesadapter->max_sge; props->max_cq = nesibdev->max_cq; props->max_cqe = nesdev->nesadapter->max_cqe; props->max_mr = nesibdev->max_mr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 82e20fc32890..1f057fdb3a8c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -89,7 +89,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge); + attr->max_send_sge = dev->attr.max_send_sge; + attr->max_recv_sge = dev->attr.max_recv_sge; attr->max_sge_rd = dev->attr.max_rdma_sge; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 10d8f4134ec0..0c41d54f586b 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -112,7 +112,8 @@ int qedr_query_device(struct ib_device *ibdev, IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = qattr->max_sge; + attr->max_send_sge = qattr->max_sge; + attr->max_recv_sge = qattr->max_sge; attr->max_sge_rd = qattr->max_sge; attr->max_cq = qattr->max_cq; attr->max_cqe = qattr->max_cqe; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 14b4057a2b8f..41babbc0db58 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1489,7 +1489,8 @@ static void qib_fill_device_attr(struct qib_devdata *dd) rdi->dparms.props.max_mr_size = ~0ULL; rdi->dparms.props.max_qp = ib_qib_max_qps; rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs; - rdi->dparms.props.max_sge = ib_qib_max_sges; + rdi->dparms.props.max_send_sge = ib_qib_max_sges; + rdi->dparms.props.max_recv_sge = ib_qib_max_sges; rdi->dparms.props.max_sge_rd = ib_qib_max_sges; rdi->dparms.props.max_cq = ib_qib_max_cqs; rdi->dparms.props.max_cqe = ib_qib_max_cqes; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index a51463cd2f37..816cc285daf6 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -82,7 +82,8 @@ int pvrdma_query_device(struct ib_device *ibdev, props->max_qp = dev->dsr->caps.max_qp; props->max_qp_wr = dev->dsr->caps.max_qp_wr; props->device_cap_flags = dev->dsr->caps.device_cap_flags; - props->max_sge = dev->dsr->caps.max_sge; + props->max_send_sge = dev->dsr->caps.max_sge; + props->max_recv_sge = dev->dsr->caps.max_sge; props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge, dev->dsr->caps.max_sge_rd); props->max_srq = dev->dsr->caps.max_srq; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 815f94c17c48..d29e3c943399 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -780,14 +780,15 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (!rdi) return ERR_PTR(-EINVAL); - if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || + if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || init_attr->create_flags) return ERR_PTR(-EINVAL); /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { - if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || + if (init_attr->cap.max_recv_sge > + rdi->dparms.props.max_recv_sge || init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr) return ERR_PTR(-EINVAL); diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 7121e1b1eb89..10999fa69281 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -91,7 +91,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_qp = RXE_MAX_QP; rxe->attr.max_qp_wr = RXE_MAX_QP_WR; rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; - rxe->attr.max_sge = RXE_MAX_SGE; + rxe->attr.max_send_sge = RXE_MAX_SGE; + rxe->attr.max_recv_sge = RXE_MAX_SGE; rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; rxe->attr.max_cq = RXE_MAX_CQ; rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1; diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 9f83fc982f31..c58452daffc7 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -49,9 +49,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, goto err1; } - if (cap->max_send_sge > rxe->attr.max_sge) { + if (cap->max_send_sge > rxe->attr.max_send_sge) { pr_warn("invalid send sge = %d > %d\n", - cap->max_send_sge, rxe->attr.max_sge); + cap->max_send_sge, rxe->attr.max_send_sge); goto err1; } @@ -62,9 +62,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, goto err1; } - if (cap->max_recv_sge > rxe->attr.max_sge) { + if (cap->max_recv_sge > rxe->attr.max_recv_sge) { pr_warn("invalid recv sge = %d > %d\n", - cap->max_recv_sge, rxe->attr.max_sge); + cap->max_recv_sge, rxe->attr.max_recv_sge); goto err1; } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 6535d9beb24d..23cb1adc636f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1068,8 +1068,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ struct ib_qp *tx_qp; if (dev->features & NETIF_F_SG) - attr.cap.max_send_sge = - min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); + attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge, + MAX_SKB_FRAGS + 1); tx_qp = ib_create_qp(priv->pd, &attr); tx->max_send_sge = attr.cap.max_send_sge; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 984a88096f39..ba4669f24014 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -147,7 +147,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .cap = { .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, - .max_send_sge = min_t(u32, priv->ca->attrs.max_sge, + .max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge, MAX_SKB_FRAGS + 1), .max_recv_sge = IPOIB_UD_RX_SG }, diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index cccbcf0eb035..7e056f3c82a0 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -136,7 +136,7 @@ isert_create_qp(struct isert_conn *isert_conn, attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; - attr.cap.max_send_sge = device->ib_device->attrs.max_sge; + attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; @@ -299,7 +299,8 @@ isert_create_device_ib_res(struct isert_device *device) struct ib_device *ib_dev = device->ib_device; int ret; - isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge); + isert_dbg("devattr->max_send_sge: %d devattr->max_recv_sge %d\n", + ib_dev->attrs.max_send_sge, ib_dev->attrs.max_recv_sge); isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd); ret = isert_alloc_comps(device); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 698f7779e231..1b0b285a0ae0 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1753,13 +1753,15 @@ retry: */ qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr); qp_init->cap.max_rdma_ctxs = sq_size / 2; - qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); + qp_init->cap.max_send_sge = min(attrs->max_send_sge, + SRPT_MAX_SG_PER_WQE); qp_init->port_num = ch->sport->port; if (sdev->use_srq) { qp_init->srq = sdev->srq; } else { qp_init->cap.max_recv_wr = ch->rq_size; - qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge; + qp_init->cap.max_recv_sge = min(attrs->max_recv_sge, + SRPT_MAX_SG_PER_WQE); } if (ch->using_rdma_cm) { diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 52e0c5d579a7..0d7f3d603f1d 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -874,7 +874,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) qp_attr.cap.max_send_wr = queue->send_queue_size + 1; qp_attr.cap.max_rdma_ctxs = queue->send_queue_size; qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, - ndev->device->attrs.max_sge); + ndev->device->attrs.max_send_sge); if (ndev->srq) { qp_attr.srq = ndev->srq; diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index e459c97151b3..c5a1cddd8856 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1661,9 +1661,16 @@ static struct smbd_connection *_smbd_get_connection( info->max_receive_size = smbd_max_receive_size; info->keep_alive_interval = smbd_keep_alive_interval; - if (info->id->device->attrs.max_sge < SMBDIRECT_MAX_SGE) { - log_rdma_event(ERR, "warning: device max_sge = %d too small\n", - info->id->device->attrs.max_sge); + if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) { + log_rdma_event(ERR, + "warning: device max_send_sge = %d too small\n", + info->id->device->attrs.max_send_sge); + log_rdma_event(ERR, "Queue Pair creation may fail\n"); + } + if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) { + log_rdma_event(ERR, + "warning: device max_recv_sge = %d too small\n", + info->id->device->attrs.max_recv_sge); log_rdma_event(ERR, "Queue Pair creation may fail\n"); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0232c0f9f717..dc5d262739e5 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -345,7 +345,8 @@ struct ib_device_attr { int max_qp; int max_qp_wr; u64 device_cap_flags; - int max_sge; + int max_send_sge; + int max_recv_sge; int max_sge_rd; int max_cq; int max_cqe; diff --git a/net/rds/ib.c b/net/rds/ib.c index b6ad38e48f62..683b55d4e2b0 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -143,7 +143,7 @@ static void rds_ib_add_one(struct ib_device *device) INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); rds_ibdev->max_wrs = device->attrs.max_qp_wr; - rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE); + rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE); has_fr = (device->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index e9535a66bab0..547b2cdf1427 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -476,7 +476,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Qualify the transport resource defaults with the * capabilities of this particular device */ - newxprt->sc_max_send_sges = dev->attrs.max_sge; + newxprt->sc_max_send_sges = dev->attrs.max_send_sge; /* transport hdr, head iovec, one page list entry, tail iovec */ if (newxprt->sc_max_send_sges < 4) { pr_err("svcrdma: too few Send SGEs available (%d)\n", diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 16161a36dc73..112a15abc4a4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -508,7 +508,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, unsigned int max_sge; int rc; - max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge, + max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge, RPCRDMA_MAX_SEND_SGES); if (max_sge < RPCRDMA_MIN_SEND_SGES) { pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); -- cgit From 3cba33d3118880706e2178ec8b4f3a7109f370ea Mon Sep 17 00:00:00 2001 From: Bharat Potnuri Date: Fri, 15 Jun 2018 20:58:23 +0530 Subject: iw_cxgb4: remove duplicate memcpy() in c4iw_create_listen() memcpy() of mapped addresses is done twice in c4iw_create_listen(), removing the duplicate memcpy(). Fixes: 170003c894d9 ("iw_cxgb4: remove port mapper related code") Reviewed-by: Steve Wise Signed-off-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 0912fa026327..77243f7e17d5 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3444,9 +3444,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) } insert_handle(dev, &dev->stid_idr, ep, ep->stid); - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - state_set(&ep->com, LISTEN); if (ep->com.local_addr.ss_family == AF_INET) err = create_server4(dev, ep); -- cgit From 1114b0a8a83dfc82464fd1d8a34313044381cf5e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 17 Jun 2018 12:59:50 +0300 Subject: IB/uverbs: Export uverbs idr and fd types As provider drivers could use UVERBS_ATTR_FD and UVERBS_ATTR_IDR macros need to export them. Signed-off-by: Matan Barak Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index a6e904973ba8..8035a0a7564c 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -611,6 +611,7 @@ const struct uverbs_obj_type_class uverbs_idr_class = { */ .needs_kfree_rcu = true, }; +EXPORT_SYMBOL(uverbs_idr_class); static void _uverbs_close_fd(struct ib_uobject_file *uobj_file) { @@ -719,6 +720,7 @@ const struct uverbs_obj_type_class uverbs_fd_class = { .remove_commit = remove_commit_fd_uobject, .needs_kfree_rcu = false, }; +EXPORT_SYMBOL(uverbs_fd_class); struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, struct ib_ucontext *ucontext, -- cgit From 9442d8bf1d63e09780dc3b60ac6cdfa0813a98c2 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 17 Jun 2018 12:59:51 +0300 Subject: IB/uverbs: Refactor uverbs_finalize_objects uverbs_finalize_objects is currently used only to commit or abort objects. Since we want to add automatic allocation/free of PTR_IN attributes, moving it to uverbs_ioctl.c and renamit it to uverbs_finalize_attrs. Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 40 --------------------- drivers/infiniband/core/rdma_core.h | 10 ++---- drivers/infiniband/core/uverbs_ioctl.c | 63 +++++++++++++++++++++++++++------- 3 files changed, 54 insertions(+), 59 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 8035a0a7564c..df3c40533252 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -779,43 +779,3 @@ int uverbs_finalize_object(struct ib_uobject *uobj, return ret; } - -int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle, - struct uverbs_attr_spec_hash * const *spec_hash, - size_t num, - bool commit) -{ - unsigned int i; - int ret = 0; - - for (i = 0; i < num; i++) { - struct uverbs_attr_bundle_hash *curr_bundle = - &attrs_bundle->hash[i]; - const struct uverbs_attr_spec_hash *curr_spec_bucket = - spec_hash[i]; - unsigned int j; - - for (j = 0; j < curr_bundle->num_attrs; j++) { - struct uverbs_attr *attr; - const struct uverbs_attr_spec *spec; - - if (!uverbs_attr_is_valid_in_hash(curr_bundle, j)) - continue; - - attr = &curr_bundle->attrs[j]; - spec = &curr_spec_bucket->attrs[j]; - - if (spec->type == UVERBS_ATTR_TYPE_IDR || - spec->type == UVERBS_ATTR_TYPE_FD) { - int current_ret; - - current_ret = uverbs_finalize_object(attr->obj_attr.uobject, - spec->obj.access, - commit); - if (!ret) - ret = current_ret; - } - } - } - return ret; -} diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 1efcf93238dd..a243cc2a59f7 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -94,9 +94,6 @@ struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type struct ib_ucontext *ucontext, enum uverbs_obj_access access, int id); -int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, - bool commit); /* * Note that certain finalize stages could return a status: * (a) alloc_commit could return a failure if the object is committed at the @@ -112,9 +109,8 @@ int uverbs_finalize_object(struct ib_uobject *uobj, * function. For example, this could happen when we couldn't destroy an * object. */ -int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle, - struct uverbs_attr_spec_hash * const *spec_hash, - size_t num, - bool commit); +int uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, + bool commit); #endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 8d32c4ae368c..6759d59a4421 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -167,6 +167,45 @@ static int uverbs_process_attr(struct ib_device *ibdev, return 0; } +static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle, + struct uverbs_attr_spec_hash *const *spec_hash, + size_t num, bool commit) +{ + unsigned int i; + int ret = 0; + + for (i = 0; i < num; i++) { + struct uverbs_attr_bundle_hash *curr_bundle = + &attrs_bundle->hash[i]; + const struct uverbs_attr_spec_hash *curr_spec_bucket = + spec_hash[i]; + unsigned int j; + + for (j = 0; j < curr_bundle->num_attrs; j++) { + struct uverbs_attr *attr; + const struct uverbs_attr_spec *spec; + + if (!uverbs_attr_is_valid_in_hash(curr_bundle, j)) + continue; + + attr = &curr_bundle->attrs[j]; + spec = &curr_spec_bucket->attrs[j]; + + if (spec->type == UVERBS_ATTR_TYPE_IDR || + spec->type == UVERBS_ATTR_TYPE_FD) { + int current_ret; + + current_ret = uverbs_finalize_object( + attr->obj_attr.uobject, + spec->obj.access, commit); + if (!ret) + ret = current_ret; + } + } + } + return ret; +} + static int uverbs_uattrs_process(struct ib_device *ibdev, struct ib_ucontext *ucontext, const struct ib_uverbs_attr *uattrs, @@ -187,10 +226,10 @@ static int uverbs_uattrs_process(struct ib_device *ibdev, ret = uverbs_ns_idx(&attr_id, method->num_buckets); if (ret < 0) { if (uattr->flags & UVERBS_ATTR_F_MANDATORY) { - uverbs_finalize_objects(attr_bundle, - method->attr_buckets, - num_given_buckets, - false); + uverbs_finalize_attrs(attr_bundle, + method->attr_buckets, + num_given_buckets, + false); return ret; } continue; @@ -208,10 +247,10 @@ static int uverbs_uattrs_process(struct ib_device *ibdev, attr_spec_bucket, &attr_bundle->hash[ret], uattr_ptr++); if (ret) { - uverbs_finalize_objects(attr_bundle, - method->attr_buckets, - num_given_buckets, - false); + uverbs_finalize_attrs(attr_bundle, + method->attr_buckets, + num_given_buckets, + false); return ret; } } @@ -271,10 +310,10 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, ret = method_spec->handler(ibdev, ufile, attr_bundle); cleanup: - finalize_ret = uverbs_finalize_objects(attr_bundle, - method_spec->attr_buckets, - attr_bundle->num_buckets, - !ret); + finalize_ret = uverbs_finalize_attrs(attr_bundle, + method_spec->attr_buckets, + attr_bundle->num_buckets, + !ret); return ret ? ret : finalize_ret; } -- cgit From 8762d149e88dea5bc09e0d7faa84b635807167ab Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 17 Jun 2018 12:59:52 +0300 Subject: IB/uverbs: Add PTR_IN attributes that are allocated/copied automatically Adding UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY flag to PTR_IN attributes. By using this flag, the parse automatically allocates and copies the user-space data. This data is accessible by using uverbs_attr_get_len and uverbs_attr_get_alloced_ptr inline accessor functions from the handler. Signed-off-by: Matan Barak Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 25 ++++++++++++++++++++++- include/rdma/uverbs_ioctl.h | 36 +++++++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 6759d59a4421..5ac2950978d2 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -114,9 +114,27 @@ static int uverbs_process_attr(struct ib_device *ibdev, uattr->attr_data.reserved) return -EINVAL; - e->ptr_attr.data = uattr->data; e->ptr_attr.len = uattr->len; e->ptr_attr.flags = uattr->flags; + + if (val_spec->flags & UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY && + !uverbs_attr_ptr_is_inline(e)) { + void *p; + + p = kvmalloc(uattr->len, GFP_KERNEL); + if (!p) + return -ENOMEM; + + e->ptr_attr.ptr = p; + + if (copy_from_user(p, u64_to_user_ptr(uattr->data), + uattr->len)) { + kvfree(p); + return -EFAULT; + } + } else { + e->ptr_attr.data = uattr->data; + } break; case UVERBS_ATTR_TYPE_IDR: @@ -200,6 +218,11 @@ static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle, spec->obj.access, commit); if (!ret) ret = current_ret; + } else if (spec->type == UVERBS_ATTR_TYPE_PTR_IN && + spec->flags & + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY && + !uverbs_attr_ptr_is_inline(attr)) { + kvfree(attr->ptr_attr.ptr); } } } diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index bd6bba3a6e04..11cc40ef1cb6 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -65,6 +65,10 @@ enum { UVERBS_ATTR_SPEC_F_MANDATORY = 1U << 0, /* Support extending attributes by length, validate all unknown size == zero */ UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO = 1U << 1, + /* + * Valid only for PTR_IN. Allocate and copy the data inside the parser + */ + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY = 1U << 2, }; /* Specification of a single attribute inside the ioctl message */ @@ -323,7 +327,14 @@ struct uverbs_object_tree_def { */ struct uverbs_ptr_attr { - u64 data; + /* + * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is + * used. + */ + union { + void *ptr; + u64 data; + }; u16 len; /* Combination of bits from enum UVERBS_ATTR_F_XXXX */ u16 flags; @@ -431,6 +442,17 @@ static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_att return attr->obj_attr.uobject; } +static inline int +uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + return attr->ptr_attr.len; +} + static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx, const void *from, size_t size) { @@ -457,6 +479,18 @@ static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr) return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data); } +static inline void *uverbs_attr_get_alloced_ptr( + const struct uverbs_attr_bundle *attrs_bundle, u16 idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + + if (IS_ERR(attr)) + return (void *)attr; + + return uverbs_attr_ptr_is_inline(attr) ? (void *)&attr->ptr_attr.data : + attr->ptr_attr.ptr; +} + static inline int _uverbs_copy_from(void *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, -- cgit From 2d9c1bd7e177bd8b460403db9513b0a223e46ab8 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 17 Jun 2018 12:59:53 +0300 Subject: IB/uverbs: Add a macro to define a type with no kernel known size Sometimes the uverbs uAPI doesn't really care about the structure it gets from user-space. All it wants to do is to allocate enough space and send it to the hardware/provider driver. Adding a UVERBS_ATTR_MIN_SIZE that could be used for this scenarios. We use USHRT_MAX as the kernel known size to bypass any zero validations. Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types.c | 4 ++-- include/rdma/uverbs_ioctl.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index b570acbd94af..0df0ac9c1de3 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -207,10 +207,10 @@ int uverbs_destroy_def_handler(struct ib_device *ib_dev, * spec. */ const struct uverbs_attr_def uverbs_uhw_compat_in = - UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX), + UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_MIN_SIZE(0), UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); const struct uverbs_attr_def uverbs_uhw_compat_out = - UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX), + UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_MIN_SIZE(0), UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata) diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 11cc40ef1cb6..970357d0ccc4 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -215,6 +215,8 @@ struct uverbs_object_tree_def { .min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .len = sizeof(_type) #define UVERBS_ATTR_SIZE(_min_len, _len) \ .min_len = _min_len, .len = _len +#define UVERBS_ATTR_MIN_SIZE(_min_len) \ + UVERBS_ATTR_SIZE(_min_len, USHRT_MAX) /* * In new compiler, UVERBS_ATTR could be simplified by declaring it as -- cgit From 19b9def25852caf710b978cd27955090650f115b Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 17 Jun 2018 12:59:54 +0300 Subject: IB/uverbs: Allow an empty namespace in ioctl() framework The ioctl parser framework wrongly assumed that each namespace is populated. This could lead to NULL dereferences. Fix the parser to always check that a given namespace indeed exists. Fixes: fac9658cabb9 ("IB/core: Add new ioctl interface") Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 5ac2950978d2..20be6835291e 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -199,6 +199,9 @@ static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle, spec_hash[i]; unsigned int j; + if (!curr_spec_bucket) + continue; + for (j = 0; j < curr_bundle->num_attrs; j++) { struct uverbs_attr *attr; const struct uverbs_attr_spec *spec; @@ -247,7 +250,7 @@ static int uverbs_uattrs_process(struct ib_device *ibdev, struct uverbs_attr_spec_hash *attr_spec_bucket; ret = uverbs_ns_idx(&attr_id, method->num_buckets); - if (ret < 0) { + if (ret < 0 || !method->attr_buckets[ret]) { if (uattr->flags & UVERBS_ATTR_F_MANDATORY) { uverbs_finalize_attrs(attr_bundle, method->attr_buckets, @@ -290,6 +293,9 @@ static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *met struct uverbs_attr_spec_hash *attr_spec_bucket = method_spec->attr_buckets[i]; + if (!attr_spec_bucket) + continue; + if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask, attr_bundle->hash[i].valid_bitmap, attr_spec_bucket->num_attrs)) @@ -403,7 +409,12 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, * filled at a later stage (uverbs_process_attr) */ for (i = 0; i < method_spec->num_buckets; i++) { - unsigned int curr_num_attrs = method_spec->attr_buckets[i]->num_attrs; + unsigned int curr_num_attrs; + + if (!method_spec->attr_buckets[i]) + continue; + + curr_num_attrs = method_spec->attr_buckets[i]->num_attrs; ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr; curr_attr += curr_num_attrs; -- cgit From e502a864c3526aa93b983d4b14e9615b3da430e6 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 17 Jun 2018 12:59:58 +0300 Subject: IB/core: Introduce DECLARE_UVERBS_GLOBAL_METHODS Introduce a new macro to be used for global methods on a singleton object. This macros sets internally the type_attrs to be NULL as such an object can't be created. Downstream patches from this series will use this macro. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/uverbs_named_ioctl.h | 4 ++++ include/rdma/uverbs_std_types.h | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h index c5bb4ebdb0b0..228421f2a427 100644 --- a/include/rdma/uverbs_named_ioctl.h +++ b/include/rdma/uverbs_named_ioctl.h @@ -43,6 +43,7 @@ #define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y) #define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id) #define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id) +#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id) #define DECLARE_UVERBS_NAMED_METHOD(id, ...) \ DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, UVERBS_HANDLER(id), ##__VA_ARGS__) @@ -56,6 +57,9 @@ #define DECLARE_UVERBS_NAMED_OBJECT(id, ...) \ DECLARE_UVERBS_OBJECT(UVERBS_OBJECT(id), id, ##__VA_ARGS__) +#define DECLARE_UVERBS_GLOBAL_METHODS(_name, ...) \ + DECLARE_UVERBS_NAMED_OBJECT(_name, NULL, ##__VA_ARGS__) + #define _UVERBS_COMP_NAME(x, y, z) _UVERBS_NAME(_UVERBS_NAME(x, y), z) #define UVERBS_NO_OVERRIDE NULL diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 9d56cdb84655..4c151b67fb6d 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -37,8 +37,6 @@ #include #include -#define UVERBS_OBJECT(id) uverbs_object_##id - #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) const struct uverbs_object_tree_def *uverbs_default_get_objects(void); #else -- cgit From 7dc08dcfc8c86cb4457e383734ff6844ddaff876 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 17 Jun 2018 12:59:59 +0300 Subject: IB/core: Expose ib_ucontext from a given ib_uverbs_file Drivers that use the IOCTL API may have the ib_uverbs_file and need a way to get the related ib_ucontext from it, this is enabled by this patch. Downstream patches from this series will use it. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 6 ++++++ include/rdma/ib_verbs.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 3ae2339dd27a..f5f4bfb59705 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -138,6 +138,12 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); +struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile) +{ + return ufile->ucontext; +} +EXPORT_SYMBOL(ib_uverbs_get_ucontext); + int uverbs_dealloc_mw(struct ib_mw *mw) { struct ib_pd *pd = mw->pd; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index dc5d262739e5..995d517c0a76 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4120,4 +4120,6 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) */ void rdma_roce_rescan_device(struct ib_device *ibdev); +struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile); + #endif /* IB_VERBS_H */ -- cgit From a8b92ca1b0e5ce620e425e9d2f89ce44f1a82a82 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 17 Jun 2018 12:59:57 +0300 Subject: IB/mlx5: Introduce DEVX Introduce DEVX to enable direct device commands in downstream patches from this series. In that mode of work the firmware manages the isolation between processes' resources and as such a DEVX user id is created and assigned to the given user context upon allocation request. A capability check is done to make sure that this feature is really supported by the firmware prior to creating the DEVX user id. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/Makefile | 1 + drivers/infiniband/hw/mlx5/devx.c | 58 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/main.c | 24 +++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 13 ++++++++ include/uapi/rdma/mlx5-abi.h | 3 ++ 5 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 drivers/infiniband/hw/mlx5/devx.c diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index d42b922bede8..577e4c418bae 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o +mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c new file mode 100644 index 000000000000..775448910ad1 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "mlx5_ib.h" + +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) +{ + u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u64 general_obj_types; + void *uctx; + void *hdr; + int err; + + uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx); + hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr); + + general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types); + if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) || + !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM)) + return -EINVAL; + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return 0; +} + +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid); + + mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e46cda740479..058a82a55ffe 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1650,8 +1650,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (err) return ERR_PTR(err); - if (req.flags) - return ERR_PTR(-EINVAL); + if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX) + return ERR_PTR(-EOPNOTSUPP); if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) return ERR_PTR(-EOPNOTSUPP); @@ -1735,6 +1735,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, goto out_uars; } + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { + /* Block DEVX on Infiniband as of SELinux */ + if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) { + err = -EPERM; + goto out_td; + } + + err = mlx5_ib_devx_create(dev, context); + if (err) + goto out_td; + } + INIT_LIST_HEAD(&context->vma_private_list); mutex_init(&context->vma_private_list_mutex); INIT_LIST_HEAD(&context->db_page_list); @@ -1795,7 +1807,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) - goto out_td; + goto out_mdev; bfregi->ver = ver; bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; @@ -1805,6 +1817,9 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, return &context->ibucontext; +out_mdev: + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) + mlx5_ib_devx_destroy(dev, context); out_td: if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) mlx5_ib_dealloc_transport_domain(dev, context->tdn); @@ -1830,6 +1845,9 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; + if (context->devx_uid) + mlx5_ib_devx_destroy(dev, context); + bfregi = &context->bfregi; if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) mlx5_ib_dealloc_transport_domain(dev, context->tdn); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 615bd6e9db6c..1c857dd3c77f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -143,6 +143,7 @@ struct mlx5_ib_ucontext { u64 lib_caps; DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES); + u16 devx_uid; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -1215,6 +1216,18 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context); +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context); +#else +static inline int +mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; }; +static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) {} +#endif static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 8daec1fa49cf..5d591ff28139 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -76,6 +76,9 @@ enum mlx5_lib_caps { MLX5_LIB_CAP_4K_UAR = (__u64)1 << 0, }; +enum mlx5_ib_alloc_uctx_v2_flags { + MLX5_IB_ALLOC_UCTX_DEVX = 1 << 0, +}; struct mlx5_ib_alloc_ucontext_req_v2 { __u32 total_num_bfregs; __u32 num_low_latency_bfregs; -- cgit From 8aa8c95ce4ccc10a72f6755ee889d9fb1ceb60a6 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 17 Jun 2018 13:00:00 +0300 Subject: IB/mlx5: Add support for DEVX general command Add support to run general firmware command via the DEVX interface. A command that works on some object (e.g. CQ, WQ, etc.) will be added in next patches while maintaining the required object lock. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 87 ++++++++++++++++++++++++++++++++ include/uapi/rdma/mlx5_user_ioctl_cmds.h | 13 +++++ 2 files changed, 100 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 775448910ad1..9fca6541a175 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -13,6 +13,14 @@ #include #include "mlx5_ib.h" +#define UVERBS_MODULE_NAME mlx5_ib +#include + +static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file) +{ + return to_mucontext(ib_uverbs_get_ucontext(file)); +} + int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) { u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; @@ -56,3 +64,82 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } + +static bool devx_is_general_cmd(void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_VPORT_STATE: + case MLX5_CMD_OP_QUERY_ADAPTER: + case MLX5_CMD_OP_QUERY_ISSI: + case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: + case MLX5_CMD_OP_QUERY_VNIC_ENV: + case MLX5_CMD_OP_QUERY_VPORT_COUNTER: + case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG: + case MLX5_CMD_OP_NOP: + case MLX5_CMD_OP_QUERY_CONG_STATUS: + case MLX5_CMD_OP_QUERY_CONG_PARAMS: + case MLX5_CMD_OP_QUERY_CONG_STATISTICS: + return true; + default: + return false; + } +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + struct mlx5_ib_dev *dev = to_mdev(ib_dev); + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT); + void *cmd_out; + int err; + + if (!c->devx_uid) + return -EPERM; + + /* Only white list of some general HCA commands are allowed for this method. */ + if (!devx_is_general_cmd(cmd_in)) + return -EINVAL; + + cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); + if (!cmd_out) + return -ENOMEM; + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), + cmd_out, cmd_out_len); + if (err) + goto other_cmd_free; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, cmd_out_len); + +other_cmd_free: + kvfree(cmd_out); + return err; +} + +static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER, + &UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + &UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)) +); + +static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER)); + +static DECLARE_UVERBS_OBJECT_TREE(devx_objects, + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX)); diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index f7d685ef2d1f..0b456fa91bb4 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -45,4 +45,17 @@ enum mlx5_ib_alloc_dm_attrs { MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, }; +enum mlx5_ib_devx_methods { + MLX5_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_devx_other_attrs { + MLX5_IB_ATTR_DEVX_OTHER_CMD_IN = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, +}; + +enum mlx5_ib_devx_objects { + MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT), +}; + #endif -- cgit From 7efce3691d33e1f4263a7c64e8ff39b12922509b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 17 Jun 2018 13:00:01 +0300 Subject: IB/mlx5: Add obj create and destroy functionality Add support to create and destroy firmware objects via the DEVX interface. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 337 ++++++++++++++++++++++++++++++- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 16 ++ 2 files changed, 350 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9fca6541a175..87116a3b7916 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -16,6 +16,14 @@ #define UVERBS_MODULE_NAME mlx5_ib #include +#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) +struct devx_obj { + struct mlx5_core_dev *mdev; + u32 obj_id; + u32 dinlen; /* destroy inbox length */ + u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; +}; + static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file) { return to_mucontext(ib_uverbs_get_ucontext(file)); @@ -65,7 +73,52 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } -static bool devx_is_general_cmd(void *in) +static bool devx_is_obj_create_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_CREATE_MKEY: + case MLX5_CMD_OP_CREATE_CQ: + case MLX5_CMD_OP_ALLOC_PD: + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_CREATE_RMP: + case MLX5_CMD_OP_CREATE_SQ: + case MLX5_CMD_OP_CREATE_RQ: + case MLX5_CMD_OP_CREATE_RQT: + case MLX5_CMD_OP_CREATE_TIR: + case MLX5_CMD_OP_CREATE_TIS: + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_CREATE_QP: + case MLX5_CMD_OP_CREATE_SRQ: + case MLX5_CMD_OP_CREATE_XRC_SRQ: + case MLX5_CMD_OP_CREATE_DCT: + case MLX5_CMD_OP_CREATE_XRQ: + case MLX5_CMD_OP_ATTACH_TO_MCG: + case MLX5_CMD_OP_ALLOC_XRCD: + return true; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + { + u16 op_mod = MLX5_GET(set_fte_in, in, op_mod); + if (op_mod == 0) + return true; + return false; + } + default: + return false; + } +} + +static bool devx_is_general_cmd(const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); @@ -95,7 +148,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev, { struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); struct mlx5_ib_dev *dev = to_mdev(ib_dev); - void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN); + void *cmd_in = uverbs_attr_get_alloced_ptr( + attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT); void *cmd_out; @@ -126,6 +180,256 @@ other_cmd_free: return err; } +static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, + u32 *dinlen, + u32 *obj_id) +{ + u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type); + u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid); + + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid); + + switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); + break; + + case MLX5_CMD_OP_CREATE_MKEY: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); + break; + case MLX5_CMD_OP_CREATE_CQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + break; + case MLX5_CMD_OP_ALLOC_PD: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + break; + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + break; + case MLX5_CMD_OP_CREATE_RMP: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + break; + case MLX5_CMD_OP_CREATE_SQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + break; + case MLX5_CMD_OP_CREATE_RQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + break; + case MLX5_CMD_OP_CREATE_RQT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + break; + case MLX5_CMD_OP_CREATE_TIR: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + break; + case MLX5_CMD_OP_CREATE_TIS: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + break; + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + break; + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in); + *obj_id = MLX5_GET(create_flow_table_out, out, table_id); + MLX5_SET(destroy_flow_table_in, din, other_vport, + MLX5_GET(create_flow_table_in, in, other_vport)); + MLX5_SET(destroy_flow_table_in, din, vport_number, + MLX5_GET(create_flow_table_in, in, vport_number)); + MLX5_SET(destroy_flow_table_in, din, table_type, + MLX5_GET(create_flow_table_in, in, table_type)); + MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + break; + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in); + *obj_id = MLX5_GET(create_flow_group_out, out, group_id); + MLX5_SET(destroy_flow_group_in, din, other_vport, + MLX5_GET(create_flow_group_in, in, other_vport)); + MLX5_SET(destroy_flow_group_in, din, vport_number, + MLX5_GET(create_flow_group_in, in, vport_number)); + MLX5_SET(destroy_flow_group_in, din, table_type, + MLX5_GET(create_flow_group_in, in, table_type)); + MLX5_SET(destroy_flow_group_in, din, table_id, + MLX5_GET(create_flow_group_in, in, table_id)); + MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + break; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in); + *obj_id = MLX5_GET(set_fte_in, in, flow_index); + MLX5_SET(delete_fte_in, din, other_vport, + MLX5_GET(set_fte_in, in, other_vport)); + MLX5_SET(delete_fte_in, din, vport_number, + MLX5_GET(set_fte_in, in, vport_number)); + MLX5_SET(delete_fte_in, din, table_type, + MLX5_GET(set_fte_in, in, table_type)); + MLX5_SET(delete_fte_in, din, table_id, + MLX5_GET(set_fte_in, in, table_id)); + MLX5_SET(delete_fte_in, din, flow_index, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + break; + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + break; + case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_ENCAP_HEADER); + break; + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + break; + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in); + *obj_id = MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + MLX5_SET(destroy_scheduling_element_in, din, + scheduling_hierarchy, + MLX5_GET(create_scheduling_element_in, in, + scheduling_hierarchy)); + MLX5_SET(destroy_scheduling_element_in, din, + scheduling_element_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); + break; + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in); + *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); + break; + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in); + *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); + MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + break; + case MLX5_CMD_OP_CREATE_QP: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP); + break; + case MLX5_CMD_OP_CREATE_SRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + break; + case MLX5_CMD_OP_CREATE_XRC_SRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + break; + case MLX5_CMD_OP_CREATE_DCT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + break; + case MLX5_CMD_OP_CREATE_XRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + break; + case MLX5_CMD_OP_ATTACH_TO_MCG: + *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in); + MLX5_SET(detach_from_mcg_in, din, qpn, + MLX5_GET(attach_to_mcg_in, in, qpn)); + memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid), + MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid), + MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid)); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + break; + case MLX5_CMD_OP_ALLOC_XRCD: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + break; + default: + /* The entry must match to one of the devx_is_obj_create_cmd */ + WARN_ON(true); + break; + } +} + +static int devx_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + struct devx_obj *obj = uobject->object; + int ret; + + ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (ret && why == RDMA_REMOVE_DESTROY) + return ret; + + kfree(obj); + return ret; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_DESTROY)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + struct mlx5_ib_dev *dev = to_mdev(ib_dev); + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT); + void *cmd_out; + struct ib_uobject *uobj; + struct devx_obj *obj; + int err; + + if (!c->devx_uid) + return -EPERM; + + if (!devx_is_obj_create_cmd(cmd_in)) + return -EINVAL; + + obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); + if (!cmd_out) { + err = -ENOMEM; + goto obj_free; + } + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN), + cmd_out, cmd_out_len); + if (err) + goto cmd_free; + + uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); + uobj->object = obj; + obj->mdev = dev->mdev; + devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id); + WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); + if (err) + goto cmd_free; + + kvfree(cmd_out); + return 0; + +cmd_free: + kvfree(cmd_out); +obj_free: + kfree(obj); + return err; +} + static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER, &UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), @@ -138,8 +442,35 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER, UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)) ); +static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE, + &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + &UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + +static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY, + &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER)); +static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, + &UVERBS_TYPE_ALLOC_IDR(0, devx_obj_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY)); + static DECLARE_UVERBS_OBJECT_TREE(devx_objects, - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX)); + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ)); diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 0b456fa91bb4..8d285f4555cd 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -54,8 +54,24 @@ enum mlx5_ib_devx_other_attrs { MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, }; +enum mlx5_ib_devx_obj_create_attrs { + MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, +}; + +enum mlx5_ib_devx_obj_destroy_attrs { + MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_devx_obj_methods { + MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_DEVX_OBJ_DESTROY, +}; + enum mlx5_ib_devx_objects { MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_OBJECT_DEVX_OBJ, }; #endif -- cgit From e662e14d801b01a976e58bc3f8d9fe49b9fcec3a Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 17 Jun 2018 13:00:02 +0300 Subject: IB/mlx5: Add DEVX support for modify and query commands Add support in DEVX for modify and query commands, the required lock is taken (i.e. READ/WRITE) by the KABI infrastructure accordingly. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 350 +++++++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 5 + include/linux/mlx5/mlx5_ifc.h | 3 + include/uapi/rdma/mlx5_user_ioctl_cmds.h | 14 ++ 4 files changed, 370 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 87116a3b7916..bb5e40a9edd8 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -73,6 +73,161 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } +static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + u32 obj_id; + + switch (opcode) { + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); + break; + case MLX5_CMD_OP_QUERY_MKEY: + obj_id = MLX5_GET(query_mkey_in, in, mkey_index); + break; + case MLX5_CMD_OP_QUERY_CQ: + obj_id = MLX5_GET(query_cq_in, in, cqn); + break; + case MLX5_CMD_OP_MODIFY_CQ: + obj_id = MLX5_GET(modify_cq_in, in, cqn); + break; + case MLX5_CMD_OP_QUERY_SQ: + obj_id = MLX5_GET(query_sq_in, in, sqn); + break; + case MLX5_CMD_OP_MODIFY_SQ: + obj_id = MLX5_GET(modify_sq_in, in, sqn); + break; + case MLX5_CMD_OP_QUERY_RQ: + obj_id = MLX5_GET(query_rq_in, in, rqn); + break; + case MLX5_CMD_OP_MODIFY_RQ: + obj_id = MLX5_GET(modify_rq_in, in, rqn); + break; + case MLX5_CMD_OP_QUERY_RMP: + obj_id = MLX5_GET(query_rmp_in, in, rmpn); + break; + case MLX5_CMD_OP_MODIFY_RMP: + obj_id = MLX5_GET(modify_rmp_in, in, rmpn); + break; + case MLX5_CMD_OP_QUERY_RQT: + obj_id = MLX5_GET(query_rqt_in, in, rqtn); + break; + case MLX5_CMD_OP_MODIFY_RQT: + obj_id = MLX5_GET(modify_rqt_in, in, rqtn); + break; + case MLX5_CMD_OP_QUERY_TIR: + obj_id = MLX5_GET(query_tir_in, in, tirn); + break; + case MLX5_CMD_OP_MODIFY_TIR: + obj_id = MLX5_GET(modify_tir_in, in, tirn); + break; + case MLX5_CMD_OP_QUERY_TIS: + obj_id = MLX5_GET(query_tis_in, in, tisn); + break; + case MLX5_CMD_OP_MODIFY_TIS: + obj_id = MLX5_GET(modify_tis_in, in, tisn); + break; + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + obj_id = MLX5_GET(query_flow_table_in, in, table_id); + break; + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + obj_id = MLX5_GET(modify_flow_table_in, in, table_id); + break; + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + obj_id = MLX5_GET(query_flow_group_in, in, group_id); + break; + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + obj_id = MLX5_GET(query_fte_in, in, flow_index); + break; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + obj_id = MLX5_GET(set_fte_in, in, flow_index); + break; + case MLX5_CMD_OP_QUERY_Q_COUNTER: + obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id); + break; + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id); + break; + case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: + obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); + break; + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + obj_id = MLX5_GET(query_scheduling_element_in, in, + scheduling_element_id); + break; + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + obj_id = MLX5_GET(modify_scheduling_element_in, in, + scheduling_element_id); + break; + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + break; + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index); + break; + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); + break; + case MLX5_CMD_OP_QUERY_QP: + obj_id = MLX5_GET(query_qp_in, in, qpn); + break; + case MLX5_CMD_OP_RST2INIT_QP: + obj_id = MLX5_GET(rst2init_qp_in, in, qpn); + break; + case MLX5_CMD_OP_INIT2RTR_QP: + obj_id = MLX5_GET(init2rtr_qp_in, in, qpn); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn); + break; + case MLX5_CMD_OP_RTS2RTS_QP: + obj_id = MLX5_GET(rts2rts_qp_in, in, qpn); + break; + case MLX5_CMD_OP_SQERR2RTS_QP: + obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn); + break; + case MLX5_CMD_OP_2ERR_QP: + obj_id = MLX5_GET(qp_2err_in, in, qpn); + break; + case MLX5_CMD_OP_2RST_QP: + obj_id = MLX5_GET(qp_2rst_in, in, qpn); + break; + case MLX5_CMD_OP_QUERY_DCT: + obj_id = MLX5_GET(query_dct_in, in, dctn); + break; + case MLX5_CMD_OP_QUERY_XRQ: + obj_id = MLX5_GET(query_xrq_in, in, xrqn); + break; + case MLX5_CMD_OP_QUERY_XRC_SRQ: + obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn); + break; + case MLX5_CMD_OP_ARM_XRC_SRQ: + obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn); + break; + case MLX5_CMD_OP_QUERY_SRQ: + obj_id = MLX5_GET(query_srq_in, in, srqn); + break; + case MLX5_CMD_OP_ARM_RQ: + obj_id = MLX5_GET(arm_rq_in, in, srq_number); + break; + case MLX5_CMD_OP_DRAIN_DCT: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + obj_id = MLX5_GET(drain_dct_in, in, dctn); + break; + case MLX5_CMD_OP_ARM_XRQ: + obj_id = MLX5_GET(arm_xrq_in, in, xrqn); + break; + default: + return false; + } + + if (obj_id == obj->obj_id) + return true; + + return false; +} + static bool devx_is_obj_create_cmd(const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); @@ -118,7 +273,83 @@ static bool devx_is_obj_create_cmd(const void *in) } } -static bool devx_is_general_cmd(const void *in) +static bool devx_is_obj_modify_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_CQ: + case MLX5_CMD_OP_MODIFY_RMP: + case MLX5_CMD_OP_MODIFY_SQ: + case MLX5_CMD_OP_MODIFY_RQ: + case MLX5_CMD_OP_MODIFY_RQT: + case MLX5_CMD_OP_MODIFY_TIR: + case MLX5_CMD_OP_MODIFY_TIS: + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_RST2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_RTR2RTS_QP: + case MLX5_CMD_OP_RTS2RTS_QP: + case MLX5_CMD_OP_SQERR2RTS_QP: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_ARM_XRC_SRQ: + case MLX5_CMD_OP_ARM_RQ: + case MLX5_CMD_OP_DRAIN_DCT: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + case MLX5_CMD_OP_ARM_XRQ: + return true; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + { + u16 op_mod = MLX5_GET(set_fte_in, in, op_mod); + + if (op_mod == 1) + return true; + return false; + } + default: + return false; + } +} + +static bool devx_is_obj_query_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_MKEY: + case MLX5_CMD_OP_QUERY_CQ: + case MLX5_CMD_OP_QUERY_RMP: + case MLX5_CMD_OP_QUERY_SQ: + case MLX5_CMD_OP_QUERY_RQ: + case MLX5_CMD_OP_QUERY_RQT: + case MLX5_CMD_OP_QUERY_TIR: + case MLX5_CMD_OP_QUERY_TIS: + case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_QP: + case MLX5_CMD_OP_QUERY_SRQ: + case MLX5_CMD_OP_QUERY_XRC_SRQ: + case MLX5_CMD_OP_QUERY_DCT: + case MLX5_CMD_OP_QUERY_XRQ: + return true; + default: + return false; + } +} + +static bool devx_is_general_cmd(void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); @@ -430,6 +661,89 @@ obj_free: return err; } +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + struct mlx5_ib_dev *dev = to_mdev(ib_dev); + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT); + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE); + void *cmd_out; + int err; + + if (!c->devx_uid) + return -EPERM; + + if (!devx_is_obj_modify_cmd(cmd_in)) + return -EINVAL; + + if (!devx_is_valid_obj_id(uobj->object, cmd_in)) + return -EINVAL; + + cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); + if (!cmd_out) + return -ENOMEM; + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), + cmd_out, cmd_out_len); + if (err) + goto other_cmd_free; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + cmd_out, cmd_out_len); + +other_cmd_free: + kvfree(cmd_out); + return err; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + struct mlx5_ib_dev *dev = to_mdev(ib_dev); + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT); + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE); + void *cmd_out; + int err; + + if (!c->devx_uid) + return -EPERM; + + if (!devx_is_obj_query_cmd(cmd_in)) + return -EINVAL; + + if (!devx_is_valid_obj_id(uobj->object, cmd_in)) + return -EINVAL; + + cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); + if (!cmd_out) + return -ENOMEM; + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), + cmd_out, cmd_out_len); + if (err) + goto other_cmd_free; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, cmd_out, cmd_out_len); + +other_cmd_free: + kvfree(cmd_out); + return err; +} + static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER, &UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), @@ -463,13 +777,45 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY, UVERBS_ACCESS_DESTROY, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY, + &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_WRITE, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + &UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + +static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY, + &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + &UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER)); static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, &UVERBS_TYPE_ALLOC_IDR(0, devx_obj_cleanup), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY)); + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY)); static DECLARE_UVERBS_OBJECT_TREE(devx_objects, &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index d07f24de8fa3..9d03a202abb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -429,6 +429,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_FPGA_QUERY_QP: case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS: case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -603,6 +605,9 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP); MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT); MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); default: return "unknown command opcode"; } } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f810772e80c0..ac24ed87c67e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -246,12 +246,15 @@ enum { MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, + MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942, MLX5_CMD_OP_FPGA_CREATE_QP = 0x960, MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961, MLX5_CMD_OP_FPGA_QUERY_QP = 0x962, MLX5_CMD_OP_FPGA_DESTROY_QP = 0x963, MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS = 0x964, MLX5_CMD_OP_CREATE_GENERAL_OBJECT = 0xa00, + MLX5_CMD_OP_MODIFY_GENERAL_OBJECT = 0xa01, + MLX5_CMD_OP_QUERY_GENERAL_OBJECT = 0xa02, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT = 0xa03, MLX5_CMD_OP_MAX }; diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 8d285f4555cd..97d216b8d053 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -64,9 +64,23 @@ enum mlx5_ib_devx_obj_destroy_attrs { MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), }; +enum mlx5_ib_devx_obj_modify_attrs { + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, +}; + +enum mlx5_ib_devx_obj_query_attrs { + MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, +}; + enum mlx5_ib_devx_obj_methods { MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_METHOD_DEVX_OBJ_DESTROY, + MLX5_IB_METHOD_DEVX_OBJ_MODIFY, + MLX5_IB_METHOD_DEVX_OBJ_QUERY, }; enum mlx5_ib_devx_objects { -- cgit From 7c043e908a74ae0a935037cdd984d0cb89b2b970 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 17 Jun 2018 13:00:03 +0300 Subject: IB/mlx5: Add support for DEVX query UAR Return a device UAR index for a given user index via the DEVX interface. Security note: The hardware protection mechanism works like this: Each device object that is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in the device specification manual) upon its creation. Then upon doorbell, hardware fetches the object context for which the doorbell was rang, and validates that the UAR through which the DB was rang matches the UAR ID of the object. If no match the doorbell is silently ignored by the hardware. Of course, the user cannot ring a doorbell on a UAR that was not mapped to it. Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command mailboxes (except tagging them with UID), we expose to the user its UAR ID, so it can embed it in these objects in the expected specification format. So the only thing the user can do is hurt itself by creating a QP/SQ/CQ with a UAR ID other than his, and then in this case other users may ring a doorbell on its objects. The consequence of that will be that another user can schedule a QP/SQ of the buggy user for execution (just insert it to the hardware schedule queue or arm its CQ for event generation), no further harm is expected. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 53 +++++++++++++++++++++++++++++++- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++ drivers/infiniband/hw/mlx5/qp.c | 9 ++++-- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 6 ++++ 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index bb5e40a9edd8..9b1804eb9924 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -373,6 +373,50 @@ static bool devx_is_general_cmd(void *in) } } +/* + *Security note: + * The hardware protection mechanism works like this: Each device object that + * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in + * the device specification manual) upon its creation. Then upon doorbell, + * hardware fetches the object context for which the doorbell was rang, and + * validates that the UAR through which the DB was rang matches the UAR ID + * of the object. + * If no match the doorbell is silently ignored by the hardware. Of course, + * the user cannot ring a doorbell on a UAR that was not mapped to it. + * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command + * mailboxes (except tagging them with UID), we expose to the user its UAR + * ID, so it can embed it in these objects in the expected specification + * format. So the only thing the user can do is hurt itself by creating a + * QP/SQ/CQ with a UAR ID other than his, and then in this case other users + * may ring a doorbell on its objects. + * The consequence of that will be that another user can schedule a QP/SQ + * of the buggy user for execution (just insert it to the hardware schedule + * queue or arm its CQ for event generation), no further harm is expected. + */ +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + u32 user_idx; + s32 dev_idx; + + if (uverbs_copy_from(&user_idx, attrs, + MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX)) + return -EFAULT; + + dev_idx = bfregn_to_uar_index(to_mdev(ib_dev), + &c->bfregi, user_idx, true); + if (dev_idx < 0) + return dev_idx; + + if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, + &dev_idx, sizeof(dev_idx))) + return -EFAULT; + + return 0; +} + static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev, struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) @@ -744,6 +788,12 @@ other_cmd_free: return err; } +static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR, + &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER, &UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), @@ -808,7 +858,8 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY, UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER)); + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR)); static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, &UVERBS_TYPE_ALLOC_IDR(0, devx_obj_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 1c857dd3c77f..e45f364622eb 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1329,4 +1329,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, unsigned long mlx5_ib_get_xlt_emergency_page(void); void mlx5_ib_put_xlt_emergency_page(void); +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, int bfregn, + bool dyn_bfreg); #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index e3c4ab9be41d..d5f072c50ee5 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -641,9 +641,9 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq); -static int bfregn_to_uar_index(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi, int bfregn, - bool dyn_bfreg) +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, int bfregn, + bool dyn_bfreg) { int bfregs_per_sys_page; int index_of_sys_page; @@ -653,6 +653,9 @@ static int bfregn_to_uar_index(struct mlx5_ib_dev *dev, MLX5_NON_FP_BFREGS_PER_UAR; index_of_sys_page = bfregn / bfregs_per_sys_page; + if (index_of_sys_page >= bfregi->num_sys_pages) + return -EINVAL; + if (dyn_bfreg) { index_of_sys_page += bfregi->num_static_sys_pages; if (bfregn > bfregi->num_dyn_bfregs || diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 97d216b8d053..1252695cd94b 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -47,6 +47,7 @@ enum mlx5_ib_alloc_dm_attrs { enum mlx5_ib_devx_methods { MLX5_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_DEVX_QUERY_UAR, }; enum mlx5_ib_devx_other_attrs { @@ -60,6 +61,11 @@ enum mlx5_ib_devx_obj_create_attrs { MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, }; +enum mlx5_ib_devx_query_uar_attrs { + MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, +}; + enum mlx5_ib_devx_obj_destroy_attrs { MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), }; -- cgit From aeae94579caf77406a8a235ea33fdb67abe9a57e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 17 Jun 2018 13:00:04 +0300 Subject: IB/mlx5: Add DEVX support for memory registration Add support to register a memory with the firmware via the DEVX interface. The driver translates a given user address to ib_umem then it will register the physical addresses with the firmware and get a unique id for this registration to be used for this virtual address. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 199 ++++++++++++++++++++++++++++++- include/linux/mlx5/mlx5_ifc.h | 1 + include/uapi/rdma/mlx5_user_ioctl_cmds.h | 18 +++ 3 files changed, 217 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9b1804eb9924..162321f486eb 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -24,6 +24,22 @@ struct devx_obj { u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; }; +struct devx_umem { + struct mlx5_core_dev *mdev; + struct ib_umem *umem; + u32 page_offset; + int page_shift; + int ncont; + u32 dinlen; + u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; +}; + +struct devx_umem_reg_cmd { + void *in; + u32 inlen; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; +}; + static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file) { return to_mucontext(ib_uverbs_get_ucontext(file)); @@ -788,6 +804,181 @@ other_cmd_free: return err; } +static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, + struct uverbs_attr_bundle *attrs, + struct devx_umem *obj) +{ + u64 addr; + size_t size; + int access; + int npages; + int err; + u32 page_mask; + + if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || + uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN) || + uverbs_copy_from(&access, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS)) + return -EFAULT; + + err = ib_check_mr_access(access); + if (err) + return err; + + obj->umem = ib_umem_get(ucontext, addr, size, access, 0); + if (IS_ERR(obj->umem)) + return PTR_ERR(obj->umem); + + mlx5_ib_cont_pages(obj->umem, obj->umem->address, + MLX5_MKEY_PAGE_SHIFT_MASK, &npages, + &obj->page_shift, &obj->ncont, NULL); + + if (!npages) { + ib_umem_release(obj->umem); + return -EINVAL; + } + + page_mask = (1 << obj->page_shift) - 1; + obj->page_offset = obj->umem->address & page_mask; + + return 0; +} + +static int devx_umem_reg_cmd_alloc(struct devx_umem *obj, + struct devx_umem_reg_cmd *cmd) +{ + cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + + (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); + cmd->in = kvzalloc(cmd->inlen, GFP_KERNEL); + return cmd->in ? 0 : -ENOMEM; +} + +static void devx_umem_reg_cmd_free(struct devx_umem_reg_cmd *cmd) +{ + kvfree(cmd->in); +} + +static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, + struct devx_umem *obj, + struct devx_umem_reg_cmd *cmd) +{ + void *umem; + __be64 *mtt; + + umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); + mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); + + MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM); + MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); + MLX5_SET(umem, umem, log_page_size, obj->page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(umem, umem, page_offset, obj->page_offset); + mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt, + (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | + MLX5_IB_MTT_READ); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + struct mlx5_ib_dev *dev = to_mdev(ib_dev); + struct devx_umem_reg_cmd cmd; + struct devx_umem *obj; + struct ib_uobject *uobj; + u32 obj_id; + int err; + + if (!c->devx_uid) + return -EPERM; + + uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE); + obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + err = devx_umem_get(dev, &c->ibucontext, attrs, obj); + if (err) + goto err_obj_free; + + err = devx_umem_reg_cmd_alloc(obj, &cmd); + if (err) + goto err_umem_release; + + devx_umem_reg_cmd_build(dev, obj, &cmd); + + MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, + sizeof(cmd.out)); + if (err) + goto err_umem_reg_cmd_free; + + obj->mdev = dev->mdev; + uobj->object = obj; + devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id); + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id)); + if (err) + goto err_umem_destroy; + + devx_umem_reg_cmd_free(&cmd); + + return 0; + +err_umem_destroy: + mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out)); +err_umem_reg_cmd_free: + devx_umem_reg_cmd_free(&cmd); +err_umem_release: + ib_umem_release(obj->umem); +err_obj_free: + kfree(obj); + return err; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_DEREG)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + return 0; +} + +static int devx_umem_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct devx_umem *obj = uobject->object; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + int err; + + err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (err && why == RDMA_REMOVE_DESTROY) + return err; + + ib_umem_release(obj->umem); + kfree(obj); + return 0; +} + +static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG, + &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, + MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG, + &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, + MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR, &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, UVERBS_ATTR_TYPE(u32), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), @@ -868,6 +1059,12 @@ static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY)); +static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, + &UVERBS_TYPE_ALLOC_IDR(0, devx_umem_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); + static DECLARE_UVERBS_OBJECT_TREE(devx_objects, &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ)); + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ), + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM)); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ac24ed87c67e..00b539303f5d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -82,6 +82,7 @@ enum { enum { MLX5_OBJ_TYPE_UCTX = 0x0004, + MLX5_OBJ_TYPE_UMEM = 0x0005, }; enum { diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 1252695cd94b..dbc549a7bf50 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -89,9 +89,27 @@ enum mlx5_ib_devx_obj_methods { MLX5_IB_METHOD_DEVX_OBJ_QUERY, }; +enum mlx5_ib_devx_umem_reg_attrs { + MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, + MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, + MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, +}; + +enum mlx5_ib_devx_umem_dereg_attrs { + MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_devx_umem_methods { + MLX5_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_DEVX_UMEM_DEREG, +}; + enum mlx5_ib_devx_objects { MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_OBJECT_DEVX_OBJ, + MLX5_IB_OBJECT_DEVX_UMEM, }; #endif -- cgit From f6fe01b7181be1751a5d8f19f230eb0d17b9a7b1 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 17 Jun 2018 13:00:05 +0300 Subject: IB/mlx5: Add DEVX query EQN support Return the matching device EQN for a given user vector number via the DEVX interface. Note: EQs are owned by the kernel and shared by all user processes. Basically, a user CQ can point to any EQ. The kernel doesn't enforce any such limitation today either. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 34 +++++++++++++++++++++++++++++++- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 6 ++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 162321f486eb..dbf2d61591d3 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -389,6 +389,31 @@ static bool devx_is_general_cmd(void *in) } } +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_dev *dev = to_mdev(ib_dev); + int user_vector; + int dev_eqn; + unsigned int irqn; + int err; + + if (uverbs_copy_from(&user_vector, attrs, + MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC)) + return -EFAULT; + + err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn); + if (err < 0) + return err; + + if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, + &dev_eqn, sizeof(dev_eqn))) + return -EFAULT; + + return 0; +} + /* *Security note: * The hardware protection mechanism works like this: Each device object that @@ -979,6 +1004,12 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG, UVERBS_ACCESS_DESTROY, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN, + &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR, &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, UVERBS_ATTR_TYPE(u32), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), @@ -1050,7 +1081,8 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY, static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR)); + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN)); static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, &UVERBS_TYPE_ALLOC_IDR(0, devx_obj_cleanup), diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index dbc549a7bf50..1a05bb4b0b34 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -48,6 +48,7 @@ enum mlx5_ib_alloc_dm_attrs { enum mlx5_ib_devx_methods { MLX5_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_METHOD_DEVX_QUERY_UAR, + MLX5_IB_METHOD_DEVX_QUERY_EQN, }; enum mlx5_ib_devx_other_attrs { @@ -82,6 +83,11 @@ enum mlx5_ib_devx_obj_query_attrs { MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, }; +enum mlx5_ib_devx_query_eqn_attrs { + MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, +}; + enum mlx5_ib_devx_obj_methods { MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_METHOD_DEVX_OBJ_DESTROY, -- cgit From c59450c463695a016e823175bac421cff219935d Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 17 Jun 2018 13:00:06 +0300 Subject: IB/mlx5: Expose DEVX tree Expose DEVX tree to be used by upper layers. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 5 +++++ drivers/infiniband/hw/mlx5/main.c | 7 ++++++- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index dbf2d61591d3..f535e7da2dc5 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1100,3 +1100,8 @@ static DECLARE_UVERBS_OBJECT_TREE(devx_objects, &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ), &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM)); + +const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void) +{ + return &devx_objects; +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 058a82a55ffe..c29c7c838980 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5264,7 +5264,7 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION, UVERBS_ATTR_TYPE(u64), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); -#define NUM_TREES 2 +#define NUM_TREES 3 static int populate_specs_root(struct mlx5_ib_dev *dev) { const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = { @@ -5279,6 +5279,11 @@ static int populate_specs_root(struct mlx5_ib_dev *dev) !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) default_root[num_trees++] = &mlx5_ib_dm; + if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_UCTX && + !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) + default_root[num_trees++] = mlx5_ib_get_devx_tree(); + dev->ib_dev.specs_root = uverbs_alloc_spec_tree(num_trees, default_root); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e45f364622eb..a72c73c3ed33 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1221,12 +1221,15 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context); void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context); +const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); #else static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; }; static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) {} +static inline const struct uverbs_object_tree_def * +mlx5_ib_get_devx_tree(void) { return NULL; } #endif static inline void init_query_mad(struct ib_smp *mad) { -- cgit From b0ba3c18d6bf7e454e83e5f49bbc1fd0eb42be71 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 4 Jun 2018 11:43:29 -0700 Subject: IB/hfi1: Move normal functions from hfi1_devdata to const array The current implementation precludes having receive context specific packet type receive handlers. Fix this by adding adding c99 const array for the existing handlers and remove the current 72 bytes of pointers from devdata. A new pointer in hfi1_ctxtdata will point to the const array. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/driver.c | 25 ++++++++++++++++++------- drivers/infiniband/hw/hfi1/hfi.h | 16 ++++------------ drivers/infiniband/hw/hfi1/init.c | 19 +------------------ 3 files changed, 23 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 94dca95db04f..a77aec4431cd 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -757,7 +757,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) * crashing down. There is no need to eat another * comparison in this performance critical code. */ - packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet); + packet->rcd->rhf_rcv_function_map[packet->etype](packet); packet->numpkt++; /* Set up for the next packet */ @@ -1575,7 +1575,7 @@ void handle_eflags(struct hfi1_packet *packet) * The following functions are called by the interrupt handler. They are type * specific handlers for each packet type. */ -int process_receive_ib(struct hfi1_packet *packet) +static int process_receive_ib(struct hfi1_packet *packet) { if (hfi1_setup_9B_packet(packet)) return RHF_RCV_CONTINUE; @@ -1607,7 +1607,7 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) return false; } -int process_receive_bypass(struct hfi1_packet *packet) +static int process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; @@ -1649,7 +1649,7 @@ int process_receive_bypass(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int process_receive_error(struct hfi1_packet *packet) +static int process_receive_error(struct hfi1_packet *packet) { /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ if (unlikely( @@ -1668,7 +1668,7 @@ int process_receive_error(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int kdeth_process_expected(struct hfi1_packet *packet) +static int kdeth_process_expected(struct hfi1_packet *packet) { hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) @@ -1682,7 +1682,7 @@ int kdeth_process_expected(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int kdeth_process_eager(struct hfi1_packet *packet) +static int kdeth_process_eager(struct hfi1_packet *packet) { hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) @@ -1695,7 +1695,7 @@ int kdeth_process_eager(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int process_receive_invalid(struct hfi1_packet *packet) +static int process_receive_invalid(struct hfi1_packet *packet) { dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n", rhf_rcv_type(packet->rhf)); @@ -1760,3 +1760,14 @@ next: update_ps_mdata(&mdata, rcd); } } + +const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = { + [RHF_RCV_TYPE_EXPECTED] = kdeth_process_expected, + [RHF_RCV_TYPE_EAGER] = kdeth_process_eager, + [RHF_RCV_TYPE_IB] = process_receive_ib, + [RHF_RCV_TYPE_ERROR] = process_receive_error, + [RHF_RCV_TYPE_BYPASS] = process_receive_bypass, + [RHF_RCV_TYPE_INVALID5] = process_receive_invalid, + [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, + [RHF_RCV_TYPE_INVALID7] = process_receive_invalid, +}; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 4ab8b5bfbed1..b0a44222863c 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -191,6 +191,7 @@ struct exp_tid_set { u32 count; }; +typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); struct hfi1_ctxtdata { /* shadow the ctxt's RcvCtrl register */ u64 rcvctrl; @@ -259,6 +260,8 @@ struct hfi1_ctxtdata { char comm[TASK_COMM_LEN]; /* so file ops can get at unit */ struct hfi1_devdata *dd; + /* per context recv functions */ + const rhf_rcv_function_ptr *rhf_rcv_function_map; /* so functions that need physical port can get it easily */ struct hfi1_pportdata *ppd; /* associated msix interrupt */ @@ -897,12 +900,11 @@ struct hfi1_pportdata { u64 vl_xmit_flit_cnt[C_VL_COUNT + 1]; }; -typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); - typedef void (*opcode_handler)(struct hfi1_packet *packet); typedef void (*hfi1_make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct rvt_swqe *wqe); +extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[]; /* return values for the RHF receive functions */ @@ -1289,8 +1291,6 @@ struct hfi1_devdata { u64 sw_cce_err_status_aggregate; /* Software counter that aggregates all bypass packet rcv errors */ u64 sw_rcv_bypass_packet_errors; - /* receive interrupt function */ - rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; /* Save the enabled LCB error bits */ u64 lcb_err_en; @@ -1329,8 +1329,6 @@ struct hfi1_devdata { /* seqlock for sc2vl */ seqlock_t sc2vl_lock ____cacheline_aligned_in_smp; u64 sc2vl[4]; - /* receive interrupt functions */ - rhf_rcv_function_ptr *rhf_rcv_function_map; u64 __percpu *rcv_limit; u16 rhf_offset; /* offset of RHF within receive header entry */ /* adding a new field here would make it part of this cacheline */ @@ -2021,12 +2019,6 @@ static inline void flush_wc(void) } void handle_eflags(struct hfi1_packet *packet); -int process_receive_ib(struct hfi1_packet *packet); -int process_receive_bypass(struct hfi1_packet *packet); -int process_receive_error(struct hfi1_packet *packet); -int kdeth_process_expected(struct hfi1_packet *packet); -int kdeth_process_eager(struct hfi1_packet *packet); -int process_receive_invalid(struct hfi1_packet *packet); void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd); /* global module parameter variables */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index f110842b91f5..0c45924a1e21 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -367,6 +367,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, __set_bit(0, rcd->in_use_ctxts); rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; + rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; mutex_init(&rcd->exp_mutex); @@ -853,24 +854,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) struct hfi1_ctxtdata *rcd; struct hfi1_pportdata *ppd; - /* Set up recv low level handlers */ - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EXPECTED] = - kdeth_process_expected; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EAGER] = - kdeth_process_eager; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_IB] = process_receive_ib; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_ERROR] = - process_receive_error; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_BYPASS] = - process_receive_bypass; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID5] = - process_receive_invalid; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID6] = - process_receive_invalid; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID7] = - process_receive_invalid; - dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions; - /* Set up send low level handlers */ dd->process_pio_send = hfi1_verbs_send_pio; dd->process_dma_send = hfi1_verbs_send_dma; -- cgit From 40442b30aad0b5a476aef8f9ea91c11d0f9eae2a Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 4 Jun 2018 11:43:37 -0700 Subject: IB/hfi1: Move rhf_offset from devdata to ctxtdata This field should be in ctxtdata to allow for better locality of access by eliminating a dd dereference. The new field is now side-by-side with rcvhdrqentsize since the rhf_offset is a function of the rcvhdrqentsize. Both fields are now correctly sized as u8. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 5 ----- drivers/infiniband/hw/hfi1/driver.c | 38 ++++++++++++++++++------------------- drivers/infiniband/hw/hfi1/hfi.h | 7 ++++--- drivers/infiniband/hw/hfi1/init.c | 2 ++ 4 files changed, 24 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 6deb101cdd43..b13636c95756 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -15217,11 +15217,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, aspm_init(dd); dd->rcvhdrsize = DEFAULT_RCVHDRSIZE; - /* - * rcd[0] is guaranteed to be valid by this point. Also, all - * context are using the same value, as per the module parameter. - */ - dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32); ret = init_pervl_scs(dd); if (ret) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a77aec4431cd..a41f85558312 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -208,25 +208,25 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf, (offset * RCV_BUF_BLOCK_SIZE)); } -static inline void *hfi1_get_header(struct hfi1_devdata *dd, +static inline void *hfi1_get_header(struct hfi1_ctxtdata *rcd, __le32 *rhf_addr) { u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); - return (void *)(rhf_addr - dd->rhf_offset + offset); + return (void *)(rhf_addr - rcd->rhf_offset + offset); } -static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd, +static inline struct ib_header *hfi1_get_msgheader(struct hfi1_ctxtdata *rcd, __le32 *rhf_addr) { - return (struct ib_header *)hfi1_get_header(dd, rhf_addr); + return (struct ib_header *)hfi1_get_header(rcd, rhf_addr); } static inline struct hfi1_16b_header - *hfi1_get_16B_header(struct hfi1_devdata *dd, + *hfi1_get_16B_header(struct hfi1_ctxtdata *rcd, __le32 *rhf_addr) { - return (struct hfi1_16b_header *)hfi1_get_header(dd, rhf_addr); + return (struct hfi1_16b_header *)hfi1_get_header(rcd, rhf_addr); } /* @@ -591,13 +591,12 @@ static void __prescan_rxq(struct hfi1_packet *packet) init_ps_mdata(&mdata, packet); while (1) { - struct hfi1_devdata *dd = rcd->dd; struct hfi1_ibport *ibp = rcd_to_iport(rcd); __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + - dd->rhf_offset; + packet->rcd->rhf_offset; struct rvt_qp *qp; struct ib_header *hdr; - struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; + struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; int is_ecn = 0; @@ -612,7 +611,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (etype != RHF_RCV_TYPE_IB) goto next; - packet->hdr = hfi1_get_msgheader(dd, rhf_addr); + packet->hdr = hfi1_get_msgheader(packet->rcd, rhf_addr); hdr = packet->hdr; lnh = ib_get_lnh(hdr); @@ -718,7 +717,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) ret = check_max_packet(packet, thread); packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + - packet->rcd->dd->rhf_offset; + packet->rcd->rhf_offset; packet->rhf = rhf_to_cpu(packet->rhf_addr); return ret; @@ -768,7 +767,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) ret = check_max_packet(packet, thread); packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + - packet->rcd->dd->rhf_offset; + packet->rcd->rhf_offset; packet->rhf = rhf_to_cpu(packet->rhf_addr); return ret; @@ -949,12 +948,12 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, u8 sc = SC15_PACKET; if (etype == RHF_RCV_TYPE_IB) { - struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + struct ib_header *hdr = hfi1_get_msgheader(packet->rcd, packet->rhf_addr); sc = hfi1_9B_get_sc5(hdr, packet->rhf); } else if (etype == RHF_RCV_TYPE_BYPASS) { struct hfi1_16b_header *hdr = hfi1_get_16B_header( - packet->rcd->dd, + packet->rcd, packet->rhf_addr); sc = hfi1_16B_get_sc(hdr); } @@ -1034,7 +1033,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) packet.rhqoff += packet.rsize; packet.rhf_addr = (__le32 *)rcd->rcvhdrq + packet.rhqoff + - dd->rhf_offset; + rcd->rhf_offset; packet.rhf = rhf_to_cpu(packet.rhf_addr); } else if (skip_pkt) { @@ -1384,7 +1383,7 @@ bail: static inline void hfi1_setup_ib_header(struct hfi1_packet *packet) { packet->hdr = (struct hfi1_ib_message_header *) - hfi1_get_msgheader(packet->rcd->dd, + hfi1_get_msgheader(packet->rcd, packet->rhf_addr); packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; } @@ -1485,7 +1484,7 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) u8 l4; packet->hdr = (struct hfi1_16b_header *) - hfi1_get_16B_header(packet->rcd->dd, + hfi1_get_16B_header(packet->rcd, packet->rhf_addr); l4 = hfi1_16B_get_l4(packet->hdr); if (l4 == OPA_16B_L4_IB_LOCAL) { @@ -1719,9 +1718,8 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) init_ps_mdata(&mdata, &packet); while (1) { - struct hfi1_devdata *dd = rcd->dd; __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + - dd->rhf_offset; + rcd->rhf_offset; struct ib_header *hdr; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn; @@ -1738,7 +1736,7 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) if (etype > RHF_RCV_TYPE_IB) goto next; - packet.hdr = hfi1_get_msgheader(dd, rhf_addr); + packet.hdr = hfi1_get_msgheader(rcd, rhf_addr); hdr = packet.hdr; lnh = be16_to_cpu(hdr->lrh[0]) & 3; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b0a44222863c..b30985a915e1 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -206,7 +206,9 @@ struct hfi1_ctxtdata { /* number of rcvhdrq entries */ u16 rcvhdrq_cnt; /* size of each of the rcvhdrq entries */ - u16 rcvhdrqentsize; + u8 rcvhdrqentsize; + /* offset of RHF within receive header entry */ + u8 rhf_offset; /* mmap of hdrq, must fit in 44 bits */ dma_addr_t rcvhdrq_dma; dma_addr_t rcvhdrqtailaddr_dma; @@ -1330,7 +1332,6 @@ struct hfi1_devdata { seqlock_t sc2vl_lock ____cacheline_aligned_in_smp; u64 sc2vl[4]; u64 __percpu *rcv_limit; - u16 rhf_offset; /* offset of RHF within receive header entry */ /* adding a new field here would make it part of this cacheline */ /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ @@ -1469,7 +1470,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, /* calculate the current RHF address */ static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) { - return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->dd->rhf_offset; + return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset; } int hfi1_reset_device(int); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 0c45924a1e21..f3e0b89660ae 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -405,6 +405,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, rcd->rcvhdrq_cnt = rcvhdrcnt; rcd->rcvhdrqentsize = hfi1_hdrq_entsize; + rcd->rhf_offset = + rcd->rcvhdrqentsize - sizeof(u64) / sizeof(u32); /* * Simple Eager buffer allocation: we have already pre-allocated * the number of RcvArray entry groups. Each ctxtdata structure -- cgit From 32e3d97079ad0d9adb36af2d2d09a806100690e9 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 4 Jun 2018 11:43:46 -0700 Subject: IB/hfi1: Remove rcvhdrsize The field is based on a constant that can never change. Use the define to assign the register instead. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 2 -- drivers/infiniband/hw/hfi1/hfi.h | 2 -- drivers/infiniband/hw/hfi1/init.c | 2 +- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b13636c95756..ccbdce2e0178 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -15216,8 +15216,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, */ aspm_init(dd); - dd->rcvhdrsize = DEFAULT_RCVHDRSIZE; - ret = init_pervl_scs(dd); if (ret) goto bail_cleanup; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b30985a915e1..ba6dd2c3aa6a 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1136,8 +1136,6 @@ struct hfi1_devdata { /* these are the "32 bit" regs */ - /* value we put in kr_rcvhdrsize */ - u32 rcvhdrsize; /* number of receive contexts the chip supports */ u32 chip_rcv_contexts; /* number of receive array entries */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index f3e0b89660ae..f2a0b031bef2 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1887,7 +1887,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT; write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg); - reg = (dd->rcvhdrsize & RCV_HDR_SIZE_HDR_SIZE_MASK) + reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) << RCV_HDR_SIZE_HDR_SIZE_SHIFT; write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg); -- cgit From 2e2ba09e48b72497a9b69fc49ab693f7f0c2e5cf Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 4 Jun 2018 11:44:02 -0700 Subject: IB/rdmavt, IB/hfi1: Create device dependent s_flags Move some s_flags defines out of rdmavt and into hfi1 because they are hfi1 specific and therefore should remain in the driver instead of bubbling up to rdmavt. Document device specific ranges in rdmavt and remap those in hfi1. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/pio.c | 6 +++--- drivers/infiniband/hw/hfi1/qp.c | 6 +++--- drivers/infiniband/hw/hfi1/qp.h | 24 ++++++++++++++++++++++-- drivers/infiniband/hw/hfi1/rc.c | 6 +++--- drivers/infiniband/hw/hfi1/ruc.c | 14 +++++++------- drivers/infiniband/hw/hfi1/verbs.c | 4 ++-- include/rdma/rdmavt_qp.h | 30 +++++++++++++++++------------- 7 files changed, 57 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 9cac15d10c4f..363ab0f35369 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1618,11 +1618,11 @@ static void sc_piobufavail(struct send_context *sc) /* Wake up the most starved one first */ if (n) hfi1_qp_wakeup(qps[max_idx], - RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); + RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); for (i = 0; i < n; i++) if (i != max_idx) hfi1_qp_wakeup(qps[i], - RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); + RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); } /* translate a send credit update to a bit code of reasons */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1697d96151bd..9b1e84a6b1cc 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -273,7 +273,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_PATH_MIG_STATE && attr->path_mig_state == IB_MIG_MIGRATED && qp->s_mig_state == IB_MIG_ARMED) { - qp->s_flags |= RVT_S_AHG_CLEAR; + qp->s_flags |= HFI1_S_AHG_CLEAR; priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); @@ -717,7 +717,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp) qp->remote_ah_attr = qp->alt_ah_attr; qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr); qp->s_pkey_index = qp->s_alt_pkey_index; - qp->s_flags |= RVT_S_AHG_CLEAR; + qp->s_flags |= HFI1_S_AHG_CLEAR; priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); qp_set_16b(qp); diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index b2d4cba8d15b..078cff7560b6 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -1,7 +1,7 @@ #ifndef _QP_H #define _QP_H /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -69,6 +69,26 @@ static inline int hfi1_send_ok(struct rvt_qp *qp) !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); } +/* + * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK + * + * HFI1_S_AHG_VALID - ahg header valid on chip + * HFI1_S_AHG_CLEAR - have send engine clear ahg state + * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain + * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1 + */ +#define HFI1_S_AHG_VALID 0x80000000 +#define HFI1_S_AHG_CLEAR 0x40000000 +#define HFI1_S_WAIT_PIO_DRAIN 0x20000000 +#define HFI1_S_MIN_BIT_MASK 0x01000000 + +/* + * overload wait defines + */ + +#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN) +#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) + /* * free_ahg - clear ahg from QP */ @@ -77,7 +97,7 @@ static inline void clear_ahg(struct rvt_qp *qp) struct hfi1_qp_priv *priv = qp->priv; priv->s_ahg->ahgcount = 0; - qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR); + qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR); if (priv->s_sde && qp->s_ahgidx >= 0) sdma_ahg_free(priv->s_sde, qp->s_ahgidx); qp->s_ahgidx = -1; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 1a1a47ac53c6..1d31bd2fa91f 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -241,7 +241,7 @@ bail: smp_wmb(); qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING - | RVT_S_AHG_VALID); + | HFI1_S_AHG_VALID); return 0; } @@ -1024,7 +1024,7 @@ done: if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) && (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) qp->s_flags |= RVT_S_WAIT_PSN; - qp->s_flags &= ~RVT_S_AHG_VALID; + qp->s_flags &= ~HFI1_S_AHG_VALID; } /* diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index ef4c566e206f..5f56f3c1b4c4 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -194,7 +194,7 @@ static void ruc_loopback(struct rvt_qp *sqp) spin_lock_irqsave(&sqp->s_lock, flags); /* Return if we are already busy processing a work request. */ - if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || + if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) || !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) goto unlock; @@ -533,9 +533,9 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) { struct hfi1_qp_priv *priv = qp->priv; - if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR)) + if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR)) clear_ahg(qp); - if (!(qp->s_flags & RVT_S_AHG_VALID)) { + if (!(qp->s_flags & HFI1_S_AHG_VALID)) { /* first middle that needs copy */ if (qp->s_ahgidx < 0) qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); @@ -544,7 +544,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY; /* save to protect a change in another thread */ priv->s_ahg->ahgidx = qp->s_ahgidx; - qp->s_flags |= RVT_S_AHG_VALID; + qp->s_flags |= HFI1_S_AHG_VALID; } } else { /* subsequent middle after valid */ @@ -650,7 +650,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, if (middle) build_ahg(qp, bth2); else - qp->s_flags &= ~RVT_S_AHG_VALID; + qp->s_flags &= ~HFI1_S_AHG_VALID; bth0 |= pkey; bth0 |= extra_bytes << 20; @@ -727,7 +727,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, if (middle) build_ahg(qp, bth2); else - qp->s_flags &= ~RVT_S_AHG_VALID; + qp->s_flags &= ~HFI1_S_AHG_VALID; bth0 |= pkey; bth0 |= extra_bytes << 20; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index b7c75b63f887..5cef1224fa9c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1007,7 +1007,7 @@ static int pio_wait(struct rvt_qp *qp, int was_empty; dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); - dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); + dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN); qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); iowait_queue(ps->pkts_sent, &priv->s_iowait, @@ -1376,7 +1376,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) return pio_wait(qp, ps->s_txreq->psc, ps, - RVT_S_WAIT_PIO_DRAIN); + HFI1_S_WAIT_PIO_DRAIN); return sr(qp, ps, 0); } diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 1145a4c154b2..927f6d5b6d0f 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCQP_H /* - * Copyright(c) 2016, 2017 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -91,6 +91,7 @@ * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK * RVT_S_ECN - a BECN was queued to the send engine + * RVT_S_MAX_BIT_MASK - The max bit that can be used by rdmavt */ #define RVT_S_SIGNAL_REQ_WR 0x0001 #define RVT_S_BUSY 0x0002 @@ -103,23 +104,26 @@ #define RVT_S_WAIT_SSN_CREDIT 0x0100 #define RVT_S_WAIT_DMA 0x0200 #define RVT_S_WAIT_PIO 0x0400 -#define RVT_S_WAIT_PIO_DRAIN 0x0800 -#define RVT_S_WAIT_TX 0x1000 -#define RVT_S_WAIT_DMA_DESC 0x2000 -#define RVT_S_WAIT_KMEM 0x4000 -#define RVT_S_WAIT_PSN 0x8000 -#define RVT_S_WAIT_ACK 0x10000 -#define RVT_S_SEND_ONE 0x20000 -#define RVT_S_UNLIMITED_CREDIT 0x40000 -#define RVT_S_AHG_VALID 0x80000 -#define RVT_S_AHG_CLEAR 0x100000 -#define RVT_S_ECN 0x200000 +#define RVT_S_WAIT_TX 0x0800 +#define RVT_S_WAIT_DMA_DESC 0x1000 +#define RVT_S_WAIT_KMEM 0x2000 +#define RVT_S_WAIT_PSN 0x4000 +#define RVT_S_WAIT_ACK 0x8000 +#define RVT_S_SEND_ONE 0x10000 +#define RVT_S_UNLIMITED_CREDIT 0x20000 +#define RVT_S_ECN 0x40000 +#define RVT_S_MAX_BIT_MASK 0x800000 + +/* + * Drivers should use s_flags starting with bit 31 down to the bit next to + * RVT_S_MAX_BIT_MASK + */ /* * Wait flags that would prevent any packet type from being sent. */ #define RVT_S_ANY_WAIT_IO \ - (RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN | RVT_S_WAIT_TX | \ + (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) /* -- cgit From cfdeb8934b6f85f235071e9d3700756a0a12758d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 19 Jun 2018 10:39:06 +0300 Subject: RDMA/mlx5: Refactor transport domain checks Put all relevant checks for transport domain in the mlx5_ib_alloc/dealloc_transport_domain functions. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c29c7c838980..dcb780b4f199 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1584,6 +1584,9 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) { int err; + if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + return 0; + err = mlx5_core_alloc_transport_domain(dev->mdev, tdn); if (err) return err; @@ -1605,6 +1608,9 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) { + if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + return; + mlx5_core_dealloc_transport_domain(dev->mdev, tdn); if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || @@ -1729,11 +1735,9 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; #endif - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { - err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); - if (err) - goto out_uars; - } + err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); + if (err) + goto out_uars; if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { /* Block DEVX on Infiniband as of SELinux */ @@ -1821,8 +1825,7 @@ out_mdev: if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) mlx5_ib_devx_destroy(dev, context); out_td: - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_ib_dealloc_transport_domain(dev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); out_uars: deallocate_uars(dev, context); @@ -1849,8 +1852,7 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) mlx5_ib_devx_destroy(dev, context); bfregi = &context->bfregi; - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_ib_dealloc_transport_domain(dev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); deallocate_uars(dev, context); kfree(bfregi->sys_pages); -- cgit From 9f876f3de6616f02960d7d88ad52c805946f4b63 Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Thu, 21 Jun 2018 15:37:56 +0300 Subject: IB/mlx5: Support RoCE ICRC encapsulated error counter This patch adds support to query the counter that counts the RoCE packets with corrupted ICRC (Invariant Cyclic Redundancy Code). This counter will be under /sys/class/infiniband//ports//hw_counters/ rx_icrc_encapsulated - The number of RoCE packets with ICRC error. Signed-off-by: Talat Batheesh Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cmd.c | 12 +++++++ drivers/infiniband/hw/mlx5/cmd.h | 1 + drivers/infiniband/hw/mlx5/main.c | 62 ++++++++++++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + 4 files changed, 73 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 188512bf46e6..026717eaa92d 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -170,3 +170,15 @@ int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) return err; } + +int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) +{ + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + MLX5_SET(ppcnt_reg, in, local_port, 1); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT, + 0, 0); +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index e7206c8a8011..b946c47c3069 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -40,6 +40,7 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size); +int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e52dd21519b4..ce37df26bf2a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4683,6 +4683,15 @@ static const struct mlx5_ib_counter extended_err_cnts[] = { INIT_Q_COUNTER(req_cqe_flush_error), }; +#define INIT_EXT_PPCNT_COUNTER(_name) \ + { .name = #_name, .offset = \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} + +static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { + INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), +}; + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { int i; @@ -4718,7 +4727,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); num_counters += ARRAY_SIZE(cong_cnts); } - + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); + num_counters += ARRAY_SIZE(ext_ppcnt_cnts); + } cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); if (!cnts->names) return -ENOMEM; @@ -4775,6 +4787,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, offsets[j] = cong_cnts[i].offset; } } + + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { + names[j] = ext_ppcnt_cnts[i].name; + offsets[j] = ext_ppcnt_cnts[i].offset; + } + } } static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) @@ -4820,7 +4839,8 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, return rdma_alloc_hw_stats_struct(port->cnts.names, port->cnts.num_q_counters + - port->cnts.num_cong_counters, + port->cnts.num_cong_counters + + port->cnts.num_ext_ppcnt_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -4853,6 +4873,34 @@ free: return ret; } +static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_port *port, + struct rdma_hw_stats *stats) +{ + int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int ret, i; + void *out; + + out = kvzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out); + if (ret) + goto free; + + for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) { + stats->value[i + offset] = + be64_to_cpup((__be64 *)(out + + port->cnts.offsets[i + offset])); + } + +free: + kvfree(out); + return ret; +} + static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u8 port_num, int index) @@ -4866,13 +4914,21 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, if (!stats) return -EINVAL; - num_counters = port->cnts.num_q_counters + port->cnts.num_cong_counters; + num_counters = port->cnts.num_q_counters + + port->cnts.num_cong_counters + + port->cnts.num_ext_ppcnt_counters; /* q_counters are per IB device, query the master mdev */ ret = mlx5_ib_query_q_counters(dev->mdev, port, stats); if (ret) return ret; + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats); + if (ret) + return ret; + } + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d89c8fe626f6..298d6a341bb2 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -665,6 +665,7 @@ struct mlx5_ib_counters { size_t *offsets; u32 num_q_counters; u32 num_cong_counters; + u32 num_ext_ppcnt_counters; u16 set_id; bool set_id_valid; }; -- cgit From 321d7863acf7b1cf921ac18cd5ad5483b3cbb7ec Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 20 Jun 2018 15:47:11 -0600 Subject: IB/uverbs: Delete type and id from uverbs_obj_attr In this context the uobject is not allowed to be NULL, so type is the same as uobject->type, and at least for IDR, id is the same as uobject->id. FD objects should never handle the FD number outside the uAPI boundary code. Suggested-by: Guy Levi Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 6 ++---- include/rdma/uverbs_ioctl.h | 4 ---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 20be6835291e..03065bad8dae 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -152,14 +152,12 @@ static int uverbs_process_attr(struct ib_device *ibdev, object = uverbs_get_object(ibdev, spec->obj.obj_type); if (!object) return -EINVAL; - o_attr->type = object->type_attrs; - o_attr->id = (int)uattr->data; o_attr->uobject = uverbs_get_uobject_from_context( - o_attr->type, + object->type_attrs, ucontext, spec->obj.access, - o_attr->id); + (int)uattr->data); if (IS_ERR(o_attr->uobject)) return PTR_ERR(o_attr->uobject); diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 970357d0ccc4..90a4947ff548 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -344,11 +344,7 @@ struct uverbs_ptr_attr { }; struct uverbs_obj_attr { - /* pointer to the kernel descriptor -> type, access, etc */ - const struct uverbs_obj_type *type; struct ib_uobject *uobject; - /* fd or id in idr of this object */ - int id; }; struct uverbs_attr { -- cgit From 88145678924891bdb959010de20b1bb4f27542d5 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 21 Jun 2018 15:31:24 +0300 Subject: RDMA/cma: Consider net namespace while leaving multicast group When sending multicast leave request, consider the net ns in which this cm_id is created. Code was duplicated in cma_leave_mc_groups() and rdma_leave_multicast(), which is now done using a helper function cma_leave_roce_mc_group(). Fixes: bee3c3c91865 ("IB/cma: Join and leave multicast groups with IGMP") Reviewed-by: Daniel Jurgens Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 57 ++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c2f478761ae9..fca2854749e5 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1629,6 +1629,28 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_unlock(&lock); } +static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv, + struct cma_multicast *mc) +{ + if (mc->igmp_joined) { + struct rdma_dev_addr *dev_addr = + &id_priv->id.route.addr.dev_addr; + struct net_device *ndev = NULL; + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(dev_addr->net, + dev_addr->bound_dev_if); + if (ndev) { + cma_igmp_send(ndev, + &mc->multicast.ib->rec.mgid, + false); + dev_put(ndev); + } + mc->igmp_joined = false; + } + kref_put(&mc->mcref, release_mc); +} + static void cma_leave_mc_groups(struct rdma_id_private *id_priv) { struct cma_multicast *mc; @@ -1642,22 +1664,7 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) ib_sa_free_multicast(mc->multicast.ib); kfree(mc); } else { - if (mc->igmp_joined) { - struct rdma_dev_addr *dev_addr = - &id_priv->id.route.addr.dev_addr; - struct net_device *ndev = NULL; - - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(&init_net, - dev_addr->bound_dev_if); - if (ndev) { - cma_igmp_send(ndev, - &mc->multicast.ib->rec.mgid, - false); - dev_put(ndev); - } - } - kref_put(&mc->mcref, release_mc); + cma_leave_roce_mc_group(id_priv, mc); } } } @@ -4268,23 +4275,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) ib_sa_free_multicast(mc->multicast.ib); kfree(mc); } else if (rdma_protocol_roce(id->device, id->port_num)) { - if (mc->igmp_joined) { - struct rdma_dev_addr *dev_addr = - &id->route.addr.dev_addr; - struct net_device *ndev = NULL; - - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(dev_addr->net, - dev_addr->bound_dev_if); - if (ndev) { - cma_igmp_send(ndev, - &mc->multicast.ib->rec.mgid, - false); - dev_put(ndev); - } - mc->igmp_joined = false; - } - kref_put(&mc->mcref, release_mc); + cma_leave_roce_mc_group(id_priv, mc); } return; } -- cgit From 59d40813328f405976774662ddb530c6e9e9df52 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 21 Jun 2018 15:31:25 +0300 Subject: IB/core: Free GID table entry during GID deletion If we already hold the table->lock when doing the kref_put it means we are in a context where it is safe to do the deletion synchronously, with no need for the work queue. This helps to eliminate issues when GID change is requested as part of MAC address change or bonding event change where expectation is to replace the GID almost immediately. Fixes: b150c3862d21 ("IB/core: Introduce GID entry reference counts") Reviewed-by: Daniel Jurgens Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 9846373c5cbc..dada33c53188 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -206,7 +206,7 @@ static void schedule_free_gid(struct kref *kref) queue_work(ib_wq, &entry->del_work); } -static void free_gid_entry(struct ib_gid_table_entry *entry) +static void free_gid_entry_locked(struct ib_gid_table_entry *entry) { struct ib_device *device = entry->attr.device; u8 port_num = entry->attr.port_num; @@ -216,10 +216,10 @@ static void free_gid_entry(struct ib_gid_table_entry *entry) device->name, port_num, entry->attr.index, entry->attr.gid.raw); - mutex_lock(&table->lock); if (rdma_cap_roce_gid_table(device, port_num) && entry->state != GID_TABLE_ENTRY_INVALID) device->del_gid(&entry->attr, &entry->context); + write_lock_irq(&table->rwlock); /* @@ -232,13 +232,20 @@ static void free_gid_entry(struct ib_gid_table_entry *entry) table->data_vec[entry->attr.index] = NULL; /* Now this index is ready to be allocated */ write_unlock_irq(&table->rwlock); - mutex_unlock(&table->lock); if (entry->attr.ndev) dev_put(entry->attr.ndev); kfree(entry); } +static void free_gid_entry(struct kref *kref) +{ + struct ib_gid_table_entry *entry = + container_of(kref, struct ib_gid_table_entry, kref); + + free_gid_entry_locked(entry); +} + /** * free_gid_work - Release reference to the GID entry * @work: Work structure to refer to GID entry which needs to be @@ -251,7 +258,13 @@ static void free_gid_work(struct work_struct *work) { struct ib_gid_table_entry *entry = container_of(work, struct ib_gid_table_entry, del_work); - free_gid_entry(entry); + struct ib_device *device = entry->attr.device; + u8 port_num = entry->attr.port_num; + struct ib_gid_table *table = rdma_gid_table(device, port_num); + + mutex_lock(&table->lock); + free_gid_entry_locked(entry); + mutex_unlock(&table->lock); } static struct ib_gid_table_entry * @@ -296,6 +309,11 @@ static void put_gid_entry(struct ib_gid_table_entry *entry) kref_put(&entry->kref, schedule_free_gid); } +static void put_gid_entry_locked(struct ib_gid_table_entry *entry) +{ + kref_put(&entry->kref, free_gid_entry); +} + static int add_roce_gid(struct ib_gid_table_entry *entry) { const struct ib_gid_attr *attr = &entry->attr; @@ -398,7 +416,7 @@ static void del_gid(struct ib_device *ib_dev, u8 port, table->data_vec[ix] = NULL; write_unlock_irq(&table->rwlock); - put_gid_entry(entry); + put_gid_entry_locked(entry); } /* rwlock should be read locked, or lock should be held */ -- cgit From b25784312840bcf33558c12e5b2563029ad9dec6 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 20 Jun 2018 09:42:31 -0700 Subject: IB/hfi1: Remove rcvhdrq_size The usage of this ctxt data field is not hot path and the value can be computed on demand to cut down the ctxtdata bloat. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 2 +- drivers/infiniband/hw/hfi1/file_ops.c | 6 +++--- drivers/infiniband/hw/hfi1/hfi.h | 15 +++++++++++++-- drivers/infiniband/hw/hfi1/init.c | 11 ++--------- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index ccbdce2e0178..c254f47a5922 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -11857,7 +11857,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, * sequence numbers could land exactly on the same spot. * E.g. a rcd restart before the receive header wrapped. */ - memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); + memset(rcd->rcvhdrq, 0, rcvhdrq_size(rcd)); /* starting timeout */ rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 0fc4aa9455c3..3b09eedd29bc 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -411,7 +411,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) mapio = 1; break; case RCV_HDRQ: - memlen = uctxt->rcvhdrq_size; + memlen = rcvhdrq_size(uctxt); memvirt = uctxt->rcvhdrq; break; case RCV_EGRBUF: { @@ -521,7 +521,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) break; case SUBCTXT_RCV_HDRQ: memaddr = (u64)uctxt->subctxt_rcvhdr_base; - memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt; + memlen = rcvhdrq_size(uctxt) * uctxt->subctxt_cnt; flags |= VM_IO | VM_DONTEXPAND; vmf = 1; break; @@ -1040,7 +1040,7 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt) return -ENOMEM; /* We can take the size of the RcvHdr Queue from the master */ - uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * + uctxt->subctxt_rcvhdr_base = vmalloc_user(rcvhdrq_size(uctxt) * num_subctxts); if (!uctxt->subctxt_rcvhdr_base) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index ba6dd2c3aa6a..4a8e5930d539 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -201,8 +201,6 @@ struct hfi1_ctxtdata { volatile __le64 *rcvhdrtail_kvaddr; /* when waiting for rcv or pioavail */ wait_queue_head_t wait; - /* rcvhdrq size (for freeing) */ - size_t rcvhdrq_size; /* number of rcvhdrq entries */ u16 rcvhdrq_cnt; /* size of each of the rcvhdrq entries */ @@ -324,6 +322,19 @@ struct hfi1_ctxtdata { u8 vnic_q_idx; }; +/** + * rcvhdrq_size - return total size in bytes for header queue + * @rcd: the receive context + * + * rcvhdrqentsize is in DWs, so we have to convert to bytes + * + */ +static inline u32 rcvhdrq_size(struct hfi1_ctxtdata *rcd) +{ + return PAGE_ALIGN(rcd->rcvhdrq_cnt * + rcd->rcvhdrqentsize * sizeof(u32)); +} + /* * Represents a single packet at a high level. Put commonly computed things in * here so we do not have to keep doing them over and over. The rule of thumb is diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index f2a0b031bef2..47a1f5c34dcb 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1129,7 +1129,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) return; if (rcd->rcvhdrq) { - dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, + dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd), rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; if (rcd->rcvhdrtail_kvaddr) { @@ -1840,12 +1840,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (!rcd->rcvhdrq) { gfp_t gfp_flags; - /* - * rcvhdrqentsize is in DWs, so we have to convert to bytes - * (* sizeof(u32)). - */ - amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize * - sizeof(u32)); + amt = rcvhdrq_size(rcd); if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic) gfp_flags = GFP_KERNEL; @@ -1870,8 +1865,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (!rcd->rcvhdrtail_kvaddr) goto bail_free; } - - rcd->rcvhdrq_size = amt; } /* * These values are per-context: -- cgit From b67bbc5923bf99b4898fd3d4ed2337e8b716b448 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 20 Jun 2018 09:42:40 -0700 Subject: IB/hfi1: Remove rcvctrl from ctxtdata It is only ever written. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 3 +-- drivers/infiniband/hw/hfi1/hfi.h | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index c254f47a5922..55675ab694a1 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -11952,9 +11952,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS) rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; - rcd->rcvctrl = rcvctrl; hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl); - write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl); + write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl); /* work around sticky RcvCtxtStatus.BlockedRHQFull */ if (did_enable && diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 4a8e5930d539..837f49c7cdb0 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -193,8 +193,6 @@ struct exp_tid_set { typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); struct hfi1_ctxtdata { - /* shadow the ctxt's RcvCtrl register */ - u64 rcvctrl; /* rcvhdrq base, needs mmap before useful */ void *rcvhdrq; /* kernel virtual address where hdrqtail is updated */ -- cgit From 4b0b76bd37fe3de94f368b9fa146792d74207266 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 20 Jun 2018 09:42:49 -0700 Subject: IB/hfi1: Rightsize ctxt_eager_bufs fields Fields in this structure are sized excessively based on hardware limitations and input values. Fix by reducing fields as appropriate and repositioning to close holes in the structure. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/hfi.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 837f49c7cdb0..552acd744ebe 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -169,12 +169,6 @@ extern const struct pci_error_handlers hfi1_pci_err_handler; struct hfi1_opcode_stats_perctx; struct ctxt_eager_bufs { - ssize_t size; /* total size of eager buffers */ - u32 count; /* size of buffers array */ - u32 numbufs; /* number of buffers allocated */ - u32 alloced; /* number of rcvarray entries used */ - u32 rcvtid_size; /* size of each eager rcv tid */ - u32 threshold; /* head update threshold */ struct eager_buffer { void *addr; dma_addr_t dma; @@ -184,6 +178,12 @@ struct ctxt_eager_bufs { void *addr; dma_addr_t dma; } *rcvtids; + u32 size; /* total size of eager buffers */ + u32 rcvtid_size; /* size of each eager rcv tid */ + u16 count; /* size of buffers array */ + u16 numbufs; /* number of buffers allocated */ + u16 alloced; /* number of rcvarray entries used */ + u16 threshold; /* head update threshold */ }; struct exp_tid_set { -- cgit From 15d063d5db188b3d6ef5c5d73d59b47895454861 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 20 Jun 2018 09:42:57 -0700 Subject: IB/hfi1: Remove unused/writeonly devdata fields Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/hfi.h | 6 ------ drivers/infiniband/hw/hfi1/init.c | 3 --- 2 files changed, 9 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 552acd744ebe..3b3d435a2f90 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1115,8 +1115,6 @@ struct hfi1_devdata { /* base receive interrupt timeout, in CSR units */ u32 rcv_intr_timeout_csr; - u32 freezelen; /* max length of freezemsg */ - u64 __iomem *egrtidbase; spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ spinlock_t uctxt_lock; /* protect rcd changes */ @@ -1156,10 +1154,6 @@ struct hfi1_devdata { /* number of bytes in the SDMA memory buffer */ u32 chip_sdma_mem_size; - /* size of each rcvegrbuffer */ - u32 rcvegrbufsize; - /* log2 of above */ - u16 rcvegrbufsize_shift; /* both sides of the PCIe link are gen3 capable */ u8 link_gen3_capable; u8 dc_shutdown; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 47a1f5c34dcb..26fca0c5199c 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -933,9 +933,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) dd->status = vmalloc_user(PAGE_SIZE); if (!dd->status) dd_dev_err(dd, "Failed to allocate dev status page\n"); - else - dd->freezelen = PAGE_SIZE - (sizeof(*dd->status) - - sizeof(dd->status->freezemsg)); for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; if (dd->status) -- cgit From 06e81e3e920806593fa7459ee52bf9b978a8d260 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 20 Jun 2018 09:43:06 -0700 Subject: IB/hfi1: Remove caches of chip CSRs Remove the sizeable cache of the chip sizing CSRs and replace with CSR reads as needed. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 74 +++++++++++++++++----------------- drivers/infiniband/hw/hfi1/chip.h | 30 ++++++++++++++ drivers/infiniband/hw/hfi1/hfi.h | 15 ------- drivers/infiniband/hw/hfi1/init.c | 2 +- drivers/infiniband/hw/hfi1/pcie.c | 11 ++--- drivers/infiniband/hw/hfi1/pio.c | 8 ++-- drivers/infiniband/hw/hfi1/sdma.c | 10 ++--- drivers/infiniband/hw/hfi1/vnic_main.c | 4 +- 8 files changed, 85 insertions(+), 69 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 55675ab694a1..708dc1572249 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10130,7 +10130,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) (((lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) << SEND_CTXT_CHECK_SLID_VALUE_SHIFT); - for (i = 0; i < dd->chip_send_contexts; i++) { + for (i = 0; i < chip_send_contexts(dd); i++) { hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x", i, (u32)sreg); write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg); @@ -12041,7 +12041,7 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp) } else if (entry->flags & CNTR_SDMA) { hfi1_cdbg(CNTR, "\t Per SDMA Engine\n"); - for (j = 0; j < dd->chip_sdma_engines; + for (j = 0; j < chip_sdma_engines(dd); j++) { val = entry->rw_cntr(entry, dd, j, @@ -12417,6 +12417,7 @@ static int init_cntrs(struct hfi1_devdata *dd) struct hfi1_pportdata *ppd; const char *bit_type_32 = ",32"; const int bit_type_32_sz = strlen(bit_type_32); + u32 sdma_engines = chip_sdma_engines(dd); /* set up the stats timer; the add_timer is done at the end */ timer_setup(&dd->synth_stats_timer, update_synth_timer, 0); @@ -12449,7 +12450,7 @@ static int init_cntrs(struct hfi1_devdata *dd) } } else if (dev_cntrs[i].flags & CNTR_SDMA) { dev_cntrs[i].offset = dd->ndevcntrs; - for (j = 0; j < dd->chip_sdma_engines; j++) { + for (j = 0; j < sdma_engines; j++) { snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, j); sz += strlen(name); @@ -12506,7 +12507,7 @@ static int init_cntrs(struct hfi1_devdata *dd) *p++ = '\n'; } } else if (dev_cntrs[i].flags & CNTR_SDMA) { - for (j = 0; j < dd->chip_sdma_engines; j++) { + for (j = 0; j < sdma_engines; j++) { snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, j); memcpy(p, name, strlen(name)); @@ -13019,9 +13020,9 @@ static void clear_all_interrupts(struct hfi1_devdata *dd) write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0); write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0); write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0); - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0); - for (i = 0; i < dd->chip_sdma_engines; i++) + for (i = 0; i < chip_sdma_engines(dd); i++) write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0); write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0); @@ -13428,6 +13429,8 @@ static int set_up_context_variables(struct hfi1_devdata *dd) int qos_rmt_count; int user_rmt_reduced; u32 n_usr_ctxts; + u32 send_contexts = chip_send_contexts(dd); + u32 rcv_contexts = chip_rcv_contexts(dd); /* * Kernel receive contexts: @@ -13449,16 +13452,16 @@ static int set_up_context_variables(struct hfi1_devdata *dd) * Every kernel receive context needs an ACK send context. * one send context is allocated for each VL{0-7} and VL15 */ - if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) { + if (num_kernel_contexts > (send_contexts - num_vls - 1)) { dd_dev_err(dd, "Reducing # kernel rcv contexts to: %d, from %lu\n", - (int)(dd->chip_send_contexts - num_vls - 1), + send_contexts - num_vls - 1, num_kernel_contexts); - num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; + num_kernel_contexts = send_contexts - num_vls - 1; } /* Accommodate VNIC contexts if possible */ - if ((num_kernel_contexts + num_vnic_contexts) > dd->chip_rcv_contexts) { + if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) { dd_dev_err(dd, "No receive contexts available for VNIC\n"); num_vnic_contexts = 0; } @@ -13476,13 +13479,13 @@ static int set_up_context_variables(struct hfi1_devdata *dd) /* * Adjust the counts given a global max. */ - if (total_contexts + n_usr_ctxts > dd->chip_rcv_contexts) { + if (total_contexts + n_usr_ctxts > rcv_contexts) { dd_dev_err(dd, "Reducing # user receive contexts to: %d, from %u\n", - (int)(dd->chip_rcv_contexts - total_contexts), + rcv_contexts - total_contexts, n_usr_ctxts); /* recalculate */ - n_usr_ctxts = dd->chip_rcv_contexts - total_contexts; + n_usr_ctxts = rcv_contexts - total_contexts; } /* each user context requires an entry in the RMT */ @@ -13508,7 +13511,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) dd->freectxts = n_usr_ctxts; dd_dev_info(dd, "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n", - (int)dd->chip_rcv_contexts, + rcv_contexts, (int)dd->num_rcv_contexts, (int)dd->n_krcv_queues, dd->num_vnic_contexts, @@ -13526,7 +13529,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) * contexts. */ dd->rcv_entries.group_size = RCV_INCREMENT; - ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size; + ngroups = chip_rcv_array_count(dd) / dd->rcv_entries.group_size; dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts; dd->rcv_entries.nctxt_extra = ngroups - (dd->num_rcv_contexts * dd->rcv_entries.ngroups); @@ -13551,7 +13554,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) dd_dev_info( dd, "send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n", - dd->chip_send_contexts, + send_contexts, dd->num_send_contexts, dd->sc_sizes[SC_KERNEL].count, dd->sc_sizes[SC_ACK].count, @@ -13609,7 +13612,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) write_csr(dd, CCE_INT_MAP + (8 * i), 0); /* SendCtxtCreditReturnAddr */ - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0); /* PIO Send buffers */ @@ -13622,7 +13625,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) /* RcvHdrAddr */ /* RcvHdrTailAddr */ /* RcvTidFlowTable */ - for (i = 0; i < dd->chip_rcv_contexts; i++) { + for (i = 0; i < chip_rcv_contexts(dd); i++) { write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0); write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0); for (j = 0; j < RXE_NUM_TID_FLOWS; j++) @@ -13630,7 +13633,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) } /* RcvArray */ - for (i = 0; i < dd->chip_rcv_array_count; i++) + for (i = 0; i < chip_rcv_array_count(dd); i++) hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0); /* RcvQPMapTable */ @@ -13788,7 +13791,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0); for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++) write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0); - for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++) + for (i = 0; i < chip_send_contexts(dd) / NUM_CONTEXTS_PER_SET; i++) write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0); for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++) write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0); @@ -13816,7 +13819,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) /* * TXE Per-Context CSRs */ - for (i = 0; i < dd->chip_send_contexts; i++) { + for (i = 0; i < chip_send_contexts(dd); i++) { write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0); write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0); write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0); @@ -13834,7 +13837,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) /* * TXE Per-SDMA CSRs */ - for (i = 0; i < dd->chip_sdma_engines; i++) { + for (i = 0; i < chip_sdma_engines(dd); i++) { write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0); /* SEND_DMA_STATUS read-only */ write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0); @@ -13967,7 +13970,7 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd) /* * RXE Kernel and User Per-Context CSRs */ - for (i = 0; i < dd->chip_rcv_contexts; i++) { + for (i = 0; i < chip_rcv_contexts(dd); i++) { /* kernel */ write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0); /* RCV_CTXT_STATUS read-only */ @@ -14083,13 +14086,13 @@ static int init_chip(struct hfi1_devdata *dd) /* disable send contexts and SDMA engines */ write_csr(dd, SEND_CTRL, 0); - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0); - for (i = 0; i < dd->chip_sdma_engines; i++) + for (i = 0; i < chip_sdma_engines(dd); i++) write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0); /* disable port (turn off RXE inbound traffic) and contexts */ write_csr(dd, RCV_CTRL, 0); - for (i = 0; i < dd->chip_rcv_contexts; i++) + for (i = 0; i < chip_rcv_contexts(dd); i++) write_csr(dd, RCV_CTXT_CTRL, 0); /* mask all interrupt sources */ for (i = 0; i < CCE_NUM_INT_CSRS; i++) @@ -14708,9 +14711,9 @@ static void init_txe(struct hfi1_devdata *dd) write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull); /* enable all per-context and per-SDMA engine errors */ - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull); - for (i = 0; i < dd->chip_sdma_engines; i++) + for (i = 0; i < chip_sdma_engines(dd); i++) write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull); /* set the local CU to AU mapping */ @@ -14978,11 +14981,13 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, "Functional simulator" }; struct pci_dev *parent = pdev->bus->self; + u32 sdma_engines; dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * sizeof(struct hfi1_pportdata)); if (IS_ERR(dd)) goto bail; + sdma_engines = chip_sdma_engines(dd); ppd = dd->pport; for (i = 0; i < dd->num_pports; i++, ppd++) { int vl; @@ -15080,11 +15085,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* give a reasonable active value, will be set on link up */ dd->pport->link_speed_active = OPA_LINK_SPEED_25G; - dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS); - dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS); - dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES); - dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE); - dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE); /* fix up link widths for emulation _p */ ppd = dd->pport; if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) { @@ -15095,11 +15095,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, OPA_LINK_WIDTH_1X; } /* insure num_vls isn't larger than number of sdma engines */ - if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) { + if (HFI1_CAP_IS_KSET(SDMA) && num_vls > sdma_engines) { dd_dev_err(dd, "num_vls %u too large, using %u VLs\n", - num_vls, dd->chip_sdma_engines); - num_vls = dd->chip_sdma_engines; - ppd->vls_supported = dd->chip_sdma_engines; + num_vls, sdma_engines); + num_vls = sdma_engines; + ppd->vls_supported = sdma_engines; ppd->vls_operational = ppd->vls_supported; } diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index fdf389e46e19..36b04d6300e5 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -656,6 +656,36 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt, write_csr(dd, offset0 + (0x1000 * ctxt), value); } +static inline u32 chip_rcv_contexts(struct hfi1_devdata *dd) +{ + return read_csr(dd, RCV_CONTEXTS); +} + +static inline u32 chip_send_contexts(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_CONTEXTS); +} + +static inline u32 chip_sdma_engines(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_DMA_ENGINES); +} + +static inline u32 chip_pio_mem_size(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_PIO_MEM_SIZE); +} + +static inline u32 chip_sdma_mem_size(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_DMA_MEM_SIZE); +} + +static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd) +{ + return read_csr(dd, RCV_ARRAY_CNT); +} + u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, u32 dw_len); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 3b3d435a2f90..148c12836074 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1059,8 +1059,6 @@ struct hfi1_devdata { dma_addr_t sdma_pad_phys; /* for deallocation */ size_t sdma_heads_size; - /* number from the chip */ - u32 chip_sdma_engines; /* num used */ u32 num_sdma; /* array of engines sized by num_sdma */ @@ -1141,19 +1139,6 @@ struct hfi1_devdata { /* Base GUID for device (network order) */ u64 base_guid; - /* these are the "32 bit" regs */ - - /* number of receive contexts the chip supports */ - u32 chip_rcv_contexts; - /* number of receive array entries */ - u32 chip_rcv_array_count; - /* number of PIO send contexts the chip supports */ - u32 chip_send_contexts; - /* number of bytes in the PIO memory buffer */ - u32 chip_pio_mem_size; - /* number of bytes in the SDMA memory buffer */ - u32 chip_sdma_mem_size; - /* both sides of the PCIe link are gen3 capable */ u8 link_gen3_capable; u8 dc_shutdown; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 26fca0c5199c..8c97de9c63f0 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -921,7 +921,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) } /* Allocate enough memory for user event notification. */ - len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS * + len = PAGE_ALIGN(chip_rcv_contexts(dd) * HFI1_MAX_SHARED_CTXTS * sizeof(*dd->events)); dd->events = vmalloc_user(len); if (!dd->events) diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 4d4371bf2c7c..144fc0a89da4 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -157,6 +157,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) unsigned long len; resource_size_t addr; int ret = 0; + u32 rcv_array_count; addr = pci_resource_start(pdev, 0); len = pci_resource_len(pdev, 0); @@ -186,9 +187,9 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) goto nomem; } - dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); - dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count); - dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8; + rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); + dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count); + dd->base2_start = RCV_ARRAY + rcv_array_count * 8; dd->kregbase2 = ioremap_nocache( addr + dd->base2_start, @@ -214,13 +215,13 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) * to write an entire cacheline worth of entries in one shot. */ dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY, - dd->chip_rcv_array_count * 8); + rcv_array_count * 8); if (!dd->rcvarray_wc) { dd_dev_err(dd, "WC mapping of receive array failed\n"); goto nomem; } dd_dev_info(dd, "WC RcvArray: %p for %x\n", - dd->rcvarray_wc, dd->chip_rcv_array_count * 8); + dd->rcvarray_wc, rcv_array_count * 8); dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */ return 0; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 363ab0f35369..c2c1cba5b23b 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -226,7 +226,7 @@ static const char *sc_type_name(int index) int init_sc_pools_and_sizes(struct hfi1_devdata *dd) { struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } }; - int total_blocks = (dd->chip_pio_mem_size / PIO_BLOCK_SIZE) - 1; + int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1; int total_contexts = 0; int fixed_blocks; int pool_blocks; @@ -343,8 +343,8 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd) sc_type_name(i), count); return -EINVAL; } - if (total_contexts + count > dd->chip_send_contexts) - count = dd->chip_send_contexts - total_contexts; + if (total_contexts + count > chip_send_contexts(dd)) + count = chip_send_contexts(dd) - total_contexts; total_contexts += count; @@ -507,7 +507,7 @@ static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index, if (sci->type == type && sci->allocated == 0) { sci->allocated = 1; /* use a 1:1 mapping, but make them non-equal */ - context = dd->chip_send_contexts - index - 1; + context = chip_send_contexts(dd) - index - 1; dd->hw_to_sw[context] = index; *sw_index = index; *hw_context = context; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 7fb350b87b49..88e326d6cc49 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1351,7 +1351,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) struct hfi1_pportdata *ppd = dd->pport + port; u32 per_sdma_credits; uint idle_cnt = sdma_idle_cnt; - size_t num_engines = dd->chip_sdma_engines; + size_t num_engines = chip_sdma_engines(dd); int ret = -ENOMEM; if (!HFI1_CAP_IS_KSET(SDMA)) { @@ -1360,18 +1360,18 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } if (mod_num_sdma && /* can't exceed chip support */ - mod_num_sdma <= dd->chip_sdma_engines && + mod_num_sdma <= chip_sdma_engines(dd) && /* count must be >= vls */ mod_num_sdma >= num_vls) num_engines = mod_num_sdma; dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma); - dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines); + dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", chip_sdma_engines(dd)); dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n", - dd->chip_sdma_mem_size); + chip_sdma_mem_size(dd)); per_sdma_credits = - dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE); + chip_sdma_mem_size(dd) / (num_engines * SDMA_BLOCK_SIZE); /* set up freeze waitqueue */ init_waitqueue_head(&dd->sdma_unfreeze_wq); diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 5d65582fe4d9..79874d8242cc 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -818,14 +818,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - dd->chip_sdma_engines, dd->num_vnic_contexts); + chip_sdma_engines(dd), dd->num_vnic_contexts); if (!netdev) return ERR_PTR(-ENOMEM); rn = netdev_priv(netdev); vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; - vinfo->num_tx_q = dd->chip_sdma_engines; + vinfo->num_tx_q = chip_sdma_engines(dd); vinfo->num_rx_q = dd->num_vnic_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; -- cgit From 071e4fec8e4dfaca4799f835d379f7368cbdf102 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 20 Jun 2018 09:43:14 -0700 Subject: IB/hfi1: Reorg ctxtdata and rightsize fields Many fields in ctxtdata are incorrectly sized and the organization of the fields within the structure is a jumble. Fix by: - Correcting oversize fields. - Putting fields common to all contexts at the top with hot fields at the top. - Moving PSM fields to the bottom of the structure. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/hfi.h | 169 +++++++++++++++++++------------------- drivers/infiniband/hw/hfi1/init.c | 4 +- 2 files changed, 86 insertions(+), 87 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 148c12836074..d7263b82eb03 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -197,36 +197,78 @@ struct hfi1_ctxtdata { void *rcvhdrq; /* kernel virtual address where hdrqtail is updated */ volatile __le64 *rcvhdrtail_kvaddr; - /* when waiting for rcv or pioavail */ - wait_queue_head_t wait; + /* so functions that need physical port can get it easily */ + struct hfi1_pportdata *ppd; + /* so file ops can get at unit */ + struct hfi1_devdata *dd; + /* this receive context's assigned PIO ACK send context */ + struct send_context *sc; + /* per context recv functions */ + const rhf_rcv_function_ptr *rhf_rcv_function_map; + /* + * The interrupt handler for a particular receive context can vary + * throughout it's lifetime. This is not a lock protected data member so + * it must be updated atomically and the prev and new value must always + * be valid. Worst case is we process an extra interrupt and up to 64 + * packets with the wrong interrupt handler. + */ + int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); + /* verbs rx_stats per rcd */ + struct hfi1_opcode_stats_perctx *opstats; + /* clear interrupt mask */ + u64 imask; + /* ctxt rcvhdrq head offset */ + u32 head; /* number of rcvhdrq entries */ u16 rcvhdrq_cnt; + u8 ireg; /* clear interrupt register */ + /* receive packet sequence counter */ + u8 seq_cnt; /* size of each of the rcvhdrq entries */ u8 rcvhdrqentsize; /* offset of RHF within receive header entry */ u8 rhf_offset; + /* dynamic receive available interrupt timeout */ + u8 rcvavail_timeout; + /* Indicates that this is vnic context */ + bool is_vnic; + /* vnic queue index this context is mapped to */ + u8 vnic_q_idx; + /* Is ASPM interrupt supported for this context */ + bool aspm_intr_supported; + /* ASPM state (enabled/disabled) for this context */ + bool aspm_enabled; + /* Is ASPM processing enabled for this context (in intr context) */ + bool aspm_intr_enable; + struct ctxt_eager_bufs egrbufs; + /* QPs waiting for context processing */ + struct list_head qp_wait_list; + /* tid allocation lists */ + struct exp_tid_set tid_group_list; + struct exp_tid_set tid_used_list; + struct exp_tid_set tid_full_list; + + /* Timer for re-enabling ASPM if interrupt activity quiets down */ + struct timer_list aspm_timer; + /* per-context configuration flags */ + unsigned long flags; + /* array of tid_groups */ + struct tid_group *groups; /* mmap of hdrq, must fit in 44 bits */ dma_addr_t rcvhdrq_dma; dma_addr_t rcvhdrqtailaddr_dma; - struct ctxt_eager_bufs egrbufs; - /* this receive context's assigned PIO ACK send context */ - struct send_context *sc; - - /* dynamic receive available interrupt timeout */ - u32 rcvavail_timeout; + /* Last interrupt timestamp */ + ktime_t aspm_ts_last_intr; + /* Last timestamp at which we scheduled a timer for this context */ + ktime_t aspm_ts_timer_sched; + /* Lock to serialize between intr, timer intr and user threads */ + spinlock_t aspm_lock; /* Reference count the base context usage */ struct kref kref; - - /* Device context index */ - u16 ctxt; - /* - * non-zero if ctxt can be shared, and defines the maximum number of - * sub-contexts for this device context. - */ - u16 subctxt_cnt; - /* non-zero if ctxt is being shared. */ - u16 subctxt_id; - u8 uuid[16]; + /* numa node of this context */ + int numa_id; + /* associated msix interrupt. */ + s16 msix_intr; /* job key */ u16 jkey; /* number of RcvArray groups for this context. */ @@ -237,87 +279,44 @@ struct hfi1_ctxtdata { u16 expected_count; /* index of first expected TID entry. */ u16 expected_base; - /* array of tid_groups */ - struct tid_group *groups; - - struct exp_tid_set tid_group_list; - struct exp_tid_set tid_used_list; - struct exp_tid_set tid_full_list; + /* Device context index */ + u8 ctxt; - /* lock protecting all Expected TID data of user contexts */ + /* PSM Specific fields */ + /* lock protecting all Expected TID data */ struct mutex exp_mutex; - /* per-context configuration flags */ - unsigned long flags; - /* per-context event flags for fileops/intr communication */ - unsigned long event_flags; - /* total number of polled urgent packets */ - u32 urgent; - /* saved total number of polled urgent packets for poll edge trigger */ - u32 urgent_poll; + /* when waiting for rcv or pioavail */ + wait_queue_head_t wait; + /* uuid from PSM */ + u8 uuid[16]; /* same size as task_struct .comm[], command that opened context */ char comm[TASK_COMM_LEN]; - /* so file ops can get at unit */ - struct hfi1_devdata *dd; - /* per context recv functions */ - const rhf_rcv_function_ptr *rhf_rcv_function_map; - /* so functions that need physical port can get it easily */ - struct hfi1_pportdata *ppd; - /* associated msix interrupt */ - u32 msix_intr; + /* Bitmask of in use context(s) */ + DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS); + /* per-context event flags for fileops/intr communication */ + unsigned long event_flags; /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ void *subctxt_uregbase; /* An array of pages for the eager receive buffers * N */ void *subctxt_rcvegrbuf; /* An array of pages for the eager header queue entries * N */ void *subctxt_rcvhdr_base; - /* Bitmask of in use context(s) */ - DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS); - /* The version of the library which opened this ctxt */ - u32 userversion; + /* total number of polled urgent packets */ + u32 urgent; + /* saved total number of polled urgent packets for poll edge trigger */ + u32 urgent_poll; /* Type of packets or conditions we want to poll for */ u16 poll_type; - /* receive packet sequence counter */ - u8 seq_cnt; - /* ctxt rcvhdrq head offset */ - u32 head; - /* QPs waiting for context processing */ - struct list_head qp_wait_list; - /* interrupt handling */ - u64 imask; /* clear interrupt mask */ - int ireg; /* clear interrupt register */ - int numa_id; /* numa node of this context */ - /* verbs rx_stats per rcd */ - struct hfi1_opcode_stats_perctx *opstats; - - /* Is ASPM interrupt supported for this context */ - bool aspm_intr_supported; - /* ASPM state (enabled/disabled) for this context */ - bool aspm_enabled; - /* Timer for re-enabling ASPM if interrupt activity quietens down */ - struct timer_list aspm_timer; - /* Lock to serialize between intr, timer intr and user threads */ - spinlock_t aspm_lock; - /* Is ASPM processing enabled for this context (in intr context) */ - bool aspm_intr_enable; - /* Last interrupt timestamp */ - ktime_t aspm_ts_last_intr; - /* Last timestamp at which we scheduled a timer for this context */ - ktime_t aspm_ts_timer_sched; - + /* non-zero if ctxt is being shared. */ + u16 subctxt_id; + /* The version of the library which opened this ctxt */ + u32 userversion; /* - * The interrupt handler for a particular receive context can vary - * throughout it's lifetime. This is not a lock protected data member so - * it must be updated atomically and the prev and new value must always - * be valid. Worst case is we process an extra interrupt and up to 64 - * packets with the wrong interrupt handler. + * non-zero if ctxt can be shared, and defines the maximum number of + * sub-contexts for this device context. */ - int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); - - /* Indicates that this is vnic context */ - bool is_vnic; + u8 subctxt_cnt; - /* vnic queue index this context is mapped to */ - u8 vnic_q_idx; }; /** diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 8c97de9c63f0..d51ad777d72d 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1913,9 +1913,9 @@ bail: int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) { struct hfi1_devdata *dd = rcd->dd; - u32 max_entries, egrtop, alloced_bytes = 0, idx = 0; + u32 max_entries, egrtop, alloced_bytes = 0; gfp_t gfp_flags; - u16 order; + u16 order, idx = 0; int ret = 0; u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu); -- cgit From 70324739ac5e0332dc053eaeaba773f5ab755879 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Wed, 20 Jun 2018 09:43:23 -0700 Subject: IB/hfi1: Remove INTx support and simplify MSIx usage The INTx IRQ support does not work for all HF1 IRQ handlers (specifically the receive data IRQs). Remove all supporting code for the INTx IRQ. If the requested MSIx vector request is unsuccessful, do not allow the driver to continue. Reviewed-by: Mike Marciniszyn Reviewed-by: Kamenee Arumugam Reviewed-by: Sadanand Warrier Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 89 ++++++++-------------------------- drivers/infiniband/hw/hfi1/hfi.h | 3 -- drivers/infiniband/hw/hfi1/pcie.c | 8 +-- drivers/infiniband/hw/hfi1/vnic_main.c | 8 ++- 4 files changed, 25 insertions(+), 83 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 708dc1572249..5d03c780c780 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8260,9 +8260,14 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source) dd_dev_err(dd, "invalid interrupt source %u\n", source); } -/* - * General interrupt handler. This is able to correctly handle - * all interrupts in case INTx is used. +/** + * gerneral_interrupt() - General interrupt handler + * @irq: MSIx IRQ vector + * @data: hfi1 devdata + * + * This is able to correctly handle all non-threaded interrupts. Receive + * context DATA IRQs are threaded and are not supported by this handler. + * */ static irqreturn_t general_interrupt(int irq, void *data) { @@ -13030,48 +13035,30 @@ static void clear_all_interrupts(struct hfi1_devdata *dd) write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0); } -/* Move to pcie.c? */ -static void disable_intx(struct pci_dev *pdev) -{ - pci_intx(pdev, 0); -} - /** * hfi1_clean_up_interrupts() - Free all IRQ resources * @dd: valid device data data structure * - * Free the MSI or INTx IRQs and assoicated PCI resources, - * if they have been allocated. + * Free the MSIx and assoicated PCI resources, if they have been allocated. */ void hfi1_clean_up_interrupts(struct hfi1_devdata *dd) { int i; + struct hfi1_msix_entry *me = dd->msix_entries; /* remove irqs - must happen before disabling/turning off */ - if (dd->num_msix_entries) { - /* MSI-X */ - struct hfi1_msix_entry *me = dd->msix_entries; - - for (i = 0; i < dd->num_msix_entries; i++, me++) { - if (!me->arg) /* => no irq, no affinity */ - continue; - hfi1_put_irq_affinity(dd, me); - pci_free_irq(dd->pcidev, i, me->arg); - } - - /* clean structures */ - kfree(dd->msix_entries); - dd->msix_entries = NULL; - dd->num_msix_entries = 0; - } else { - /* INTx */ - if (dd->requested_intx_irq) { - pci_free_irq(dd->pcidev, 0, dd); - dd->requested_intx_irq = 0; - } - disable_intx(dd->pcidev); + for (i = 0; i < dd->num_msix_entries; i++, me++) { + if (!me->arg) /* => no irq, no affinity */ + continue; + hfi1_put_irq_affinity(dd, me); + pci_free_irq(dd->pcidev, i, me->arg); } + /* clean structures */ + kfree(dd->msix_entries); + dd->msix_entries = NULL; + dd->num_msix_entries = 0; + pci_free_irq_vectors(dd->pcidev); } @@ -13121,20 +13108,6 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd, msix_intr); } -static int request_intx_irq(struct hfi1_devdata *dd) -{ - int ret; - - ret = pci_request_irq(dd->pcidev, 0, general_interrupt, NULL, dd, - DRIVER_NAME "_%d", dd->unit); - if (ret) - dd_dev_err(dd, "unable to request INTx interrupt, err %d\n", - ret); - else - dd->requested_intx_irq = 1; - return ret; -} - static int request_msix_irqs(struct hfi1_devdata *dd) { int first_general, last_general; @@ -13253,11 +13226,6 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) { int i; - if (!dd->num_msix_entries) { - synchronize_irq(pci_irq_vector(dd->pcidev, 0)); - return; - } - for (i = 0; i < dd->vnic.num_ctxt; i++) { struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; @@ -13346,7 +13314,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd) { u32 total; int ret, request; - int single_interrupt = 0; /* we expect to have all the interrupts */ /* * Interrupt count: @@ -13363,17 +13330,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd) if (request < 0) { ret = request; goto fail; - } else if (request == 0) { - /* using INTx */ - /* dd->num_msix_entries already zero */ - single_interrupt = 1; - dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n"); - } else if (request < total) { - /* using MSI-X, with reduced interrupts */ - dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n", - total, request); - ret = -EINVAL; - goto fail; } else { dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries), GFP_KERNEL); @@ -13394,10 +13350,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd) /* reset general handler mask, chip MSI-X mappings */ reset_interrupts(dd); - if (single_interrupt) - ret = request_intx_irq(dd); - else - ret = request_msix_irqs(dd); + ret = request_msix_irqs(dd); if (ret) goto fail; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index d7263b82eb03..d9470317983f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1210,9 +1210,6 @@ struct hfi1_devdata { u32 num_msix_entries; u32 first_dyn_msix_idx; - /* INTx information */ - u32 requested_intx_irq; /* did we request one? */ - /* general interrupt: mask of handled interrupts */ u64 gi_mask[CCE_NUM_INT_CSRS]; diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 144fc0a89da4..de3ee606034c 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -347,15 +347,13 @@ int pcie_speeds(struct hfi1_devdata *dd) /* * Returns: * - actual number of interrupts allocated or - * - 0 if fell back to INTx. * - error */ int request_msix(struct hfi1_devdata *dd, u32 msireq) { int nvec; - nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, - PCI_IRQ_MSIX | PCI_IRQ_LEGACY); + nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX); if (nvec < 0) { dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec); return nvec; @@ -363,10 +361,6 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq) tune_pcie_caps(dd); - /* check for legacy IRQ */ - if (nvec == 1 && !dd->pcidev->msix_enabled) - return 0; - return nvec; } diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 79874d8242cc..ba160f99cf8e 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2017 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -120,8 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, uctxt->seq_cnt = 1; uctxt->is_vnic = true; - if (dd->num_msix_entries) - hfi1_set_vnic_msix_info(uctxt); + hfi1_set_vnic_msix_info(uctxt); hfi1_stats.sps_ctxts++; dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); @@ -136,8 +135,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); flush_wc(); - if (dd->num_msix_entries) - hfi1_reset_vnic_msix_info(uctxt); + hfi1_reset_vnic_msix_info(uctxt); /* * Disable receive context and interrupt available, reset all -- cgit From b7403217656dcf6c51f09d0bca7a12db0de8934a Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 19 Jun 2018 10:59:14 +0300 Subject: IB: Make ib_init_ah_attr_from_wc set sgid_attr The work completion is inspected to determine what dgid table entry was used to receieve the packet, produces a sgid_attr that matches and sticks it in the ah_attr. All callers of this function are now required to release the ah_attr on success. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 3 + drivers/infiniband/core/user_mad.c | 1 + drivers/infiniband/core/verbs.c | 110 +++++++++++++++++++------------------ include/rdma/ib_verbs.h | 7 +++ 4 files changed, 67 insertions(+), 54 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0f39a879c91d..11b85933fb39 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1091,6 +1091,9 @@ retest: wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); + + rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr); + rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr); kfree(cm_id_priv->private_data); kfree(cm_id_priv); } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index bb98c9e4a7fd..c34a6852d691 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -268,6 +268,7 @@ static void recv_handler(struct ib_mad_agent *agent, packet->mad.hdr.traffic_class = grh->traffic_class; memcpy(packet->mad.hdr.gid, &grh->dgid, 16); packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label); + rdma_destroy_ah_attr(&ah_attr); } if (queue_packet(file, agent, packet)) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index b0ad739a7bd0..9a72b88fea80 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -636,16 +636,16 @@ static bool find_gid_index(const union ib_gid *gid, return true; } -static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, - u16 vlan_id, const union ib_gid *sgid, - enum ib_gid_type gid_type, - u16 *gid_index) +static const struct ib_gid_attr * +get_sgid_attr_from_eth(struct ib_device *device, u8 port_num, + u16 vlan_id, const union ib_gid *sgid, + enum ib_gid_type gid_type) { struct find_gid_index_context context = {.vlan_id = vlan_id, .gid_type = gid_type}; - return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, - &context, gid_index); + return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index, + &context); } int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, @@ -689,37 +689,24 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); static int ib_resolve_unicast_gid_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr) { - struct ib_gid_attr sgid_attr; - struct ib_global_route *grh; + struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr); + const struct ib_gid_attr *sgid_attr = grh->sgid_attr; int hop_limit = 0xff; - union ib_gid sgid; - int ret; - - grh = rdma_ah_retrieve_grh(ah_attr); - - ret = ib_get_cached_gid(device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &sgid_attr); - if (ret || !sgid_attr.ndev) { - if (!ret) - ret = -ENXIO; - return ret; - } + int ret = 0; /* If destination is link local and source GID is RoCEv1, * IP stack is not used. */ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) && - sgid_attr.gid_type == IB_GID_TYPE_ROCE) { + sgid_attr->gid_type == IB_GID_TYPE_ROCE) { rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw, ah_attr->roce.dmac); - goto done; + return ret; } - ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, + ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid, ah_attr->roce.dmac, - sgid_attr.ndev, &hop_limit); -done: - dev_put(sgid_attr.ndev); + sgid_attr->ndev, &hop_limit); grh->hop_limit = hop_limit; return ret; @@ -734,16 +721,18 @@ done: * as sgid and, sgid is used as dgid because sgid contains destinations * GID whom to respond to. * + * On success the caller is responsible to call rdma_destroy_ah_attr on the + * attr. */ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct rdma_ah_attr *ah_attr) { u32 flow_class; - u16 gid_index; int ret; enum rdma_network_type net_type = RDMA_NETWORK_IB; enum ib_gid_type gid_type = IB_GID_TYPE_IB; + const struct ib_gid_attr *sgid_attr; int hoplimit = 0xff; union ib_gid dgid; union ib_gid sgid; @@ -774,40 +763,49 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, if (!(wc->wc_flags & IB_WC_GRH)) return -EPROTOTYPE; - ret = get_sgid_index_from_eth(device, port_num, - vlan_id, &dgid, - gid_type, &gid_index); - if (ret) - return ret; + sgid_attr = get_sgid_attr_from_eth(device, port_num, + vlan_id, &dgid, + gid_type); + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); flow_class = be32_to_cpu(grh->version_tclass_flow); - rdma_ah_set_grh(ah_attr, &sgid, - flow_class & 0xFFFFF, - (u8)gid_index, hoplimit, - (flow_class >> 20) & 0xFF); - return ib_resolve_unicast_gid_dmac(device, ah_attr); + rdma_move_grh_sgid_attr(ah_attr, + &sgid, + flow_class & 0xFFFFF, + hoplimit, + (flow_class >> 20) & 0xFF, + sgid_attr); + + ret = ib_resolve_unicast_gid_dmac(device, ah_attr); + if (ret) + rdma_destroy_ah_attr(ah_attr); + + return ret; } else { rdma_ah_set_dlid(ah_attr, wc->slid); rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits); - if (wc->wc_flags & IB_WC_GRH) { - if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { - ret = ib_find_cached_gid_by_port(device, &dgid, - IB_GID_TYPE_IB, - port_num, NULL, - &gid_index); - if (ret) - return ret; - } else { - gid_index = 0; - } + if ((wc->wc_flags & IB_WC_GRH) == 0) + return 0; - flow_class = be32_to_cpu(grh->version_tclass_flow); - rdma_ah_set_grh(ah_attr, &sgid, + if (dgid.global.interface_id != + cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { + sgid_attr = rdma_find_gid_by_port( + device, &dgid, IB_GID_TYPE_IB, port_num, NULL); + } else + sgid_attr = rdma_get_gid_attr(device, port_num, 0); + + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); + flow_class = be32_to_cpu(grh->version_tclass_flow); + rdma_move_grh_sgid_attr(ah_attr, + &sgid, flow_class & 0xFFFFF, - (u8)gid_index, hoplimit, - (flow_class >> 20) & 0xFF); - } + hoplimit, + (flow_class >> 20) & 0xFF, + sgid_attr); + return 0; } } @@ -860,13 +858,17 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, const struct ib_grh *grh, u8 port_num) { struct rdma_ah_attr ah_attr; + struct ib_ah *ah; int ret; ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr); if (ret) return ERR_PTR(ret); - return rdma_create_ah(pd, &ah_attr); + ah = rdma_create_ah(pd, &ah_attr); + + rdma_destroy_ah_attr(&ah_attr); + return ah; } EXPORT_SYMBOL(ib_create_ah_from_wc); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 995d517c0a76..c01e9c6ed666 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3150,6 +3150,13 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); * ignored unless the work completion indicates that the GRH is valid. * @ah_attr: Returned attributes that can be used when creating an address * handle for replying to the message. + * When ib_init_ah_attr_from_wc() returns success, + * (a) for IB link layer it optionally contains a reference to SGID attribute + * when GRH is present for IB link layer. + * (b) for RoCE link layer it contains a reference to SGID attribute. + * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID + * attributes which are initialized using ib_init_ah_attr_from_wc(). + * */ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, -- cgit From f685c19529f0e4d9738f52cbf65d1189c6551fd5 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 19 Jun 2018 10:59:15 +0300 Subject: IB: Make ib_init_ah_from_mcmember set sgid_attr This is really just a CM support function, normally a multicast address does not have a specific SGID - but the RDMA CM usage model does restrict things to the netdevice the CM id is bound to, at least for roce case. Store the selected table entry in the sgid_attr for everything else to use. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 2 ++ drivers/infiniband/core/multicast.c | 40 +++++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fca2854749e5..40b2609e076b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3988,6 +3988,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) event.event = RDMA_CM_EVENT_MULTICAST_ERROR; ret = id_priv->id.event_handler(&id_priv->id, &event); + + rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 6c48f4193dda..d50ff70bb24b 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -716,14 +716,28 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, } EXPORT_SYMBOL(ib_sa_get_mcmember_rec); +/** + * ib_init_ah_from_mcmember - Initialize AH attribute from multicast + * member record and gid of the device. + * @device: RDMA device + * @port_num: Port of the rdma device to consider + * @ndev: Optional netdevice, applicable only for RoCE + * @gid_type: GID type to consider + * @ah_attr: AH attribute to fillup on successful completion + * + * ib_init_ah_from_mcmember() initializes AH attribute based on multicast + * member record and other device properties. On success the caller is + * responsible to call rdma_destroy_ah_attr on the ah_attr. Returns 0 on + * success or appropriate error code. + * + */ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, struct net_device *ndev, enum ib_gid_type gid_type, struct rdma_ah_attr *ah_attr) { - int ret; - u16 gid_index; + const struct ib_gid_attr *sgid_attr; /* GID table is not based on the netdevice for IB link layer, * so ignore ndev during search. @@ -733,26 +747,22 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, else if (!rdma_protocol_roce(device, port_num)) return -EINVAL; - ret = ib_find_cached_gid_by_port(device, &rec->port_gid, - gid_type, port_num, - ndev, - &gid_index); - if (ret) - return ret; + sgid_attr = rdma_find_gid_by_port(device, &rec->port_gid, + gid_type, port_num, ndev); + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); - memset(ah_attr, 0, sizeof *ah_attr); + memset(ah_attr, 0, sizeof(*ah_attr)); ah_attr->type = rdma_ah_find_type(device, port_num); rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid)); rdma_ah_set_sl(ah_attr, rec->sl); rdma_ah_set_port_num(ah_attr, port_num); rdma_ah_set_static_rate(ah_attr, rec->rate); - - rdma_ah_set_grh(ah_attr, &rec->mgid, - be32_to_cpu(rec->flow_label), - (u8)gid_index, - rec->hop_limit, - rec->traffic_class); + rdma_move_grh_sgid_attr(ah_attr, &rec->mgid, + be32_to_cpu(rec->flow_label), + rec->hop_limit, rec->traffic_class, + sgid_attr); return 0; } EXPORT_SYMBOL(ib_init_ah_from_mcmember); -- cgit From aa74f4878d61c83244ad8613082989b60a566ca4 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 19 Jun 2018 10:59:16 +0300 Subject: IB: Make init_ah_attr_grh_fields set sgid_attr Use the sgid and other information from the path record to figure out the sgid_attrs. Store the selected table entry in the sgid_attr for everything else to use. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 10 +++++++--- drivers/infiniband/core/cma.c | 2 ++ drivers/infiniband/core/sa_query.c | 19 +++++++++---------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 +++- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 11b85933fb39..fe1171226c13 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -554,10 +554,12 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, /* * av->ah_attr might be initialized based on wc or during - * request processing time. So initialize a new ah_attr on stack. + * request processing time which might have reference to sgid_attr. + * So initialize a new ah_attr on stack. * If initialization fails, old ah_attr is used for sending any * responses. If initialization is successful, than new ah_attr - * is used by overwriting the old one. + * is used by overwriting the old one. So that right ah_attr + * can be used to return an error response. */ ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path, &new_ah_attr); @@ -567,8 +569,10 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, av->timeout = path->packet_life_time + 1; ret = add_cm_id_to_port_list(cm_id_priv, av, port); - if (ret) + if (ret) { + rdma_destroy_ah_attr(&new_ah_attr); return ret; + } rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 40b2609e076b..367aa75ac338 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3508,6 +3508,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, } ret = id_priv->id.event_handler(&id_priv->id, &event); + + rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index a61ec7e33613..b6da4a6095f1 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1301,21 +1301,20 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, { enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec); struct net_device *ndev; - u16 gid_index; - int ret; + const struct ib_gid_attr *gid_attr; ndev = ib_get_ndev_from_path(rec); - ret = ib_find_cached_gid_by_port(device, &rec->sgid, type, - port_num, ndev, &gid_index); + gid_attr = + rdma_find_gid_by_port(device, &rec->sgid, type, port_num, ndev); if (ndev) dev_put(ndev); - if (ret) - return ret; + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); - rdma_ah_set_grh(ah_attr, &rec->dgid, - be32_to_cpu(rec->flow_label), - gid_index, rec->hop_limit, - rec->traffic_class); + rdma_move_grh_sgid_attr(ah_attr, &rec->dgid, + be32_to_cpu(rec->flow_label), + rec->hop_limit, rec->traffic_class, + gid_attr); return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f4fac48aeade..45663f3117e5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -770,8 +770,10 @@ static void path_rec_completion(int status, struct rdma_ah_attr av; if (!ib_init_ah_attr_from_path(priv->ca, priv->port, - pathrec, &av)) + pathrec, &av)) { ah = ipoib_create_ah(dev, priv->pd, &av); + rdma_destroy_ah_attr(&av); + } } spin_lock_irqsave(&priv->lock, flags); -- cgit From 4ed13a5f2d606d2e6bcc5b8adbf08ed52e76cbb5 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 19 Jun 2018 10:59:17 +0300 Subject: IB/cm: Keep track of the sgid_attr that created the cm id Hold reference to the the sgid_attr which is used in a cm_id until the cm_id is destroyed. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 67 ++++++++++++++++++++++++++----------------- include/rdma/ib_addr.h | 2 ++ 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 367aa75ac338..de7d2501a740 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -603,46 +603,54 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a return ret; } -static inline int cma_validate_port(struct ib_device *device, u8 port, - enum ib_gid_type gid_type, - union ib_gid *gid, - struct rdma_id_private *id_priv) +static const struct ib_gid_attr * +cma_validate_port(struct ib_device *device, u8 port, + enum ib_gid_type gid_type, + union ib_gid *gid, + struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int bound_if_index = dev_addr->bound_dev_if; + const struct ib_gid_attr *sgid_attr; int dev_type = dev_addr->dev_type; struct net_device *ndev = NULL; - int ret = -ENODEV; if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) - return ret; + return ERR_PTR(-ENODEV); if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) - return ret; + return ERR_PTR(-ENODEV); if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { ndev = dev_get_by_index(dev_addr->net, bound_if_index); if (!ndev) - return ret; + return ERR_PTR(-ENODEV); } else { gid_type = IB_GID_TYPE_IB; } - ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, - ndev, NULL); - + sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev); if (ndev) dev_put(ndev); + return sgid_attr; +} - return ret; +static void cma_bind_sgid_attr(struct rdma_id_private *id_priv, + const struct ib_gid_attr *sgid_attr) +{ + WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr); + id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr; } static int cma_acquire_dev(struct rdma_id_private *id_priv, struct rdma_id_private *listen_id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + const struct ib_gid_attr *sgid_attr; struct cma_device *cma_dev; union ib_gid gid, iboe_gid, *gidp; + enum ib_gid_type gid_type; + enum ib_gid_type default_type; int ret = -ENODEV; u8 port; @@ -662,14 +670,15 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, port = listen_id_priv->id.port_num; gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; - - ret = cma_validate_port(cma_dev->device, port, - rdma_protocol_ib(cma_dev->device, port) ? - IB_GID_TYPE_IB : - listen_id_priv->gid_type, gidp, - id_priv); - if (!ret) { + gid_type = rdma_protocol_ib(cma_dev->device, port) ? + IB_GID_TYPE_IB : + listen_id_priv->gid_type; + sgid_attr = cma_validate_port(cma_dev->device, port, + gid_type, gidp, id_priv); + if (!IS_ERR(sgid_attr)) { id_priv->id.port_num = port; + cma_bind_sgid_attr(id_priv, sgid_attr); + ret = 0; goto out; } } @@ -683,14 +692,16 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; - - ret = cma_validate_port(cma_dev->device, port, - rdma_protocol_ib(cma_dev->device, port) ? - IB_GID_TYPE_IB : - cma_dev->default_gid_type[port - 1], - gidp, id_priv); - if (!ret) { + default_type = cma_dev->default_gid_type[port - 1]; + gid_type = + rdma_protocol_ib(cma_dev->device, port) ? + IB_GID_TYPE_IB : default_type; + sgid_attr = cma_validate_port(cma_dev->device, port, + gid_type, gidp, id_priv); + if (!IS_ERR(sgid_attr)) { id_priv->id.port_num = port; + cma_bind_sgid_attr(id_priv, sgid_attr); + ret = 0; goto out; } } @@ -1706,6 +1717,10 @@ void rdma_destroy_id(struct rdma_cm_id *id) cma_deref_id(id_priv->id.context); kfree(id_priv->id.route.path_rec); + + if (id_priv->id.route.addr.dev_addr.sgid_attr) + rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); + put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); } diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index c2c8b1fdeead..715394f6d18a 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -58,6 +58,7 @@ * @bound_dev_if: An optional device interface index. * @transport: The transport type used. * @net: Network namespace containing the bound_dev_if net_dev. + * @sgid_attr: GID attribute to use for identified SGID */ struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; @@ -67,6 +68,7 @@ struct rdma_dev_addr { int bound_dev_if; enum rdma_transport_type transport; struct net *net; + const struct ib_gid_attr *sgid_attr; enum rdma_network_type network; int hoplimit; }; -- cgit From 815d456ef21a132b60ce67908d289235e9bb896c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 19 Jun 2018 10:59:18 +0300 Subject: IB/cm: Pass the sgid_attr through various events Make the sgid_attr available along with path information to the event consumer, this allows the consumer to keep using the same GID table entry as the event is related to. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 6 ++++-- drivers/infiniband/core/cma.c | 3 +++ include/rdma/ib_cm.h | 3 +++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index fe1171226c13..00c90d4f27bb 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3671,7 +3671,8 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_sidr_rep); -static void cm_format_sidr_rep_event(struct cm_work *work) +static void cm_format_sidr_rep_event(struct cm_work *work, + const struct cm_id_private *cm_id_priv) { struct cm_sidr_rep_msg *sidr_rep_msg; struct ib_cm_sidr_rep_event_param *param; @@ -3684,6 +3685,7 @@ static void cm_format_sidr_rep_event(struct cm_work *work) param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); param->info = &sidr_rep_msg->info; param->info_len = sidr_rep_msg->info_length; + param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; work->cm_event.private_data = &sidr_rep_msg->private_data; } @@ -3707,7 +3709,7 @@ static int cm_sidr_rep_handler(struct cm_work *work) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); - cm_format_sidr_rep_event(work); + cm_format_sidr_rep_event(work, cm_id_priv); cm_process_work(cm_id_priv, work); return 0; out: diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index de7d2501a740..f0eeb43b388f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3581,6 +3581,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, id_priv->cm_id.ib = id; req.path = id_priv->id.route.path_rec; + req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; @@ -3642,6 +3643,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, if (route->num_paths == 2) req.alternate_path = &route->path_rec[1]; + req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; + /* Alternate path SGID attribute currently unsupported */ req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.qp_num = id_priv->qp_num; req.qp_type = id_priv->id.qp_type; diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 7979cb04f529..c98d603c0b63 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -246,6 +246,7 @@ struct ib_cm_sidr_rep_event_param { u32 qkey; u32 qpn; void *info; + const struct ib_gid_attr *sgid_attr; u8 info_len; }; @@ -365,6 +366,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, struct ib_cm_req_param { struct sa_path_rec *primary_path; struct sa_path_rec *alternate_path; + const struct ib_gid_attr *ppath_sgid_attr; __be64 service_id; u32 qp_num; enum ib_qp_type qp_type; @@ -566,6 +568,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id, struct ib_cm_sidr_req_param { struct sa_path_rec *path; + const struct ib_gid_attr *sgid_attr; __be64 service_id; int timeout_ms; const void *private_data; -- cgit From 398391071f2576bbc6351bcb92c78fc432190ac3 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 19 Jun 2018 10:59:19 +0300 Subject: IB/cm: Replace members of sa_path_rec with 'struct sgid_attr *' While processing a path record entry in CM messages the associated GID attribute is now also supplied. Currently for RoCE a netdevice's net namespace pointer and ifindex are stored in path record entry. Both of these fields of the netdev can change anytime while processing CM messages. Additionally storing net namespace without holding reference will lead to use-after-free crash. Therefore it is removed. Netdevice information for RoCE is instead provided via referenced gid attribute in ib_cm requests. Such a design leads to a situation where the kernel can crash when the net pointer becomes invalid. However today it is always initialized to init_net, which cannot become invalid. In order to support processing packets in any arbitrary namespace of the received packet, it is necessary to avoid such conditions. This patch removes the dependency on the net pointer and ifindex; instead it will rely on SGID attribute which contains a pointer to netdev. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 78 +++++++++++++++++++------------ drivers/infiniband/core/cma.c | 5 +- drivers/infiniband/core/sa_query.c | 71 +++++++++++++++++----------- drivers/infiniband/core/uverbs_marshall.c | 2 - drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- include/rdma/ib_sa.h | 49 ++----------------- 6 files changed, 97 insertions(+), 110 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 00c90d4f27bb..c2b7edf5857f 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -508,31 +508,50 @@ static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv, return ret; } -static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path) +static struct cm_port * +get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr) { struct cm_device *cm_dev; struct cm_port *port = NULL; unsigned long flags; - u8 p; - struct net_device *ndev = ib_get_ndev_from_path(path); - - read_lock_irqsave(&cm.device_lock, flags); - list_for_each_entry(cm_dev, &cm.device_list, list) { - if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, - sa_conv_pathrec_to_gid_type(path), - ndev, &p, NULL)) { - port = cm_dev->port[p - 1]; - break; + + if (attr) { + read_lock_irqsave(&cm.device_lock, flags); + list_for_each_entry(cm_dev, &cm.device_list, list) { + if (cm_dev->ib_device == attr->device) { + port = cm_dev->port[attr->port_num - 1]; + break; + } + } + read_unlock_irqrestore(&cm.device_lock, flags); + } else { + /* SGID attribute can be NULL in following + * conditions. + * (a) Alternative path + * (b) IB link layer without GRH + * (c) LAP send messages + */ + read_lock_irqsave(&cm.device_lock, flags); + list_for_each_entry(cm_dev, &cm.device_list, list) { + attr = rdma_find_gid(cm_dev->ib_device, + &path->sgid, + sa_conv_pathrec_to_gid_type(path), + NULL); + if (!IS_ERR(attr)) { + port = cm_dev->port[attr->port_num - 1]; + break; + } } + read_unlock_irqrestore(&cm.device_lock, flags); + if (port) + rdma_put_gid_attr(attr); } - read_unlock_irqrestore(&cm.device_lock, flags); - - if (ndev) - dev_put(ndev); return port; } -static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, +static int cm_init_av_by_path(struct sa_path_rec *path, + const struct ib_gid_attr *sgid_attr, + struct cm_av *av, struct cm_id_private *cm_id_priv) { struct rdma_ah_attr new_ah_attr; @@ -540,7 +559,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, struct cm_port *port; int ret; - port = get_cm_port_from_path(path); + port = get_cm_port_from_path(path, sgid_attr); if (!port) return -EINVAL; cm_dev = port->cm_dev; @@ -562,7 +581,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, * can be used to return an error response. */ ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path, - &new_ah_attr); + &new_ah_attr, sgid_attr); if (ret) return ret; @@ -1420,12 +1439,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, + ret = cm_init_av_by_path(param->primary_path, + param->ppath_sgid_attr, &cm_id_priv->av, cm_id_priv); if (ret) goto error1; if (param->alternate_path) { - ret = cm_init_av_by_path(param->alternate_path, + ret = cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); if (ret) goto error1; @@ -1980,10 +2000,6 @@ static int cm_req_handler(struct cm_work *work) if (gid_attr.ndev) { work->path[0].rec_type = sa_conv_gid_to_pathrec_type(gid_attr.gid_type); - sa_path_set_ifindex(&work->path[0], - gid_attr.ndev->ifindex); - sa_path_set_ndev(&work->path[0], - dev_net(gid_attr.ndev)); dev_put(gid_attr.ndev); } else { cm_path_set_rec_type(work->port->cm_dev->ib_device, @@ -1999,7 +2015,7 @@ static int cm_req_handler(struct cm_work *work) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); work->path[0].hop_limit = grh->hop_limit; - ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, + ret = cm_init_av_by_path(&work->path[0], &gid_attr, &cm_id_priv->av, cm_id_priv); if (ret) { int err; @@ -2018,8 +2034,8 @@ static int cm_req_handler(struct cm_work *work) goto rejected; } if (cm_req_has_alt_path(req_msg)) { - ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, - cm_id_priv); + ret = cm_init_av_by_path(&work->path[1], NULL, + &cm_id_priv->alt_av, cm_id_priv); if (ret) { ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, &work->path[0].sgid, @@ -3142,7 +3158,7 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, + ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); if (ret) goto out; @@ -3285,7 +3301,7 @@ static int cm_lap_handler(struct cm_work *work) if (ret) goto unlock; - cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, + cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; @@ -3487,7 +3503,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); + ret = cm_init_av_by_path(param->path, param->sgid_attr, + &cm_id_priv->av, + cm_id_priv); if (ret) goto out; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f0eeb43b388f..a735ab4cddda 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2583,8 +2583,6 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv) route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type); route->path_rec->roce.route_resolved = true; - sa_path_set_ndev(route->path_rec, addr->dev_addr.net); - sa_path_set_ifindex(route->path_rec, ndev->ifindex); sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr); return ndev; } @@ -3510,7 +3508,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, ib_init_ah_attr_from_path(id_priv->id.device, id_priv->id.port_num, id_priv->id.route.path_rec, - &event.param.ud.ah_attr); + &event.param.ud.ah_attr, + rep->sgid_attr); event.param.ud.qp_num = rep->qpn; event.param.ud.qkey = rep->qkey; event.event = RDMA_CM_EVENT_ESTABLISHED; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index b6da4a6095f1..7005afb8a712 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1229,18 +1229,12 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num) static int roce_resolve_route_from_path(struct ib_device *device, u8 port_num, - struct sa_path_rec *rec) + struct sa_path_rec *rec, + const struct ib_gid_attr *attr) { struct net_device *resolved_dev; - struct net_device *ndev; struct net_device *idev; - struct rdma_dev_addr dev_addr = { - .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ? - sa_path_get_ifindex(rec) : 0), - .net = sa_path_get_ndev(rec) ? - sa_path_get_ndev(rec) : - &init_net - }; + struct rdma_dev_addr dev_addr = {}; union { struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; @@ -1250,6 +1244,14 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num, if (rec->roce.route_resolved) return 0; + if (!attr || !attr->ndev) + return -EINVAL; + + dev_addr.bound_dev_if = attr->ndev->ifindex; + /* TODO: Use net from the ib_gid_attr once it is added to it, + * until than, limit itself to init_net. + */ + dev_addr.net = &init_net; if (!device->get_netdev) return -EOPNOTSUPP; @@ -1278,16 +1280,13 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num, ret = -ENODEV; goto done; } - ndev = ib_get_ndev_from_path(rec); rcu_read_lock(); - if ((ndev && ndev != resolved_dev) || + if (attr->ndev != resolved_dev || (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev, resolved_dev))) ret = -EHOSTUNREACH; rcu_read_unlock(); dev_put(resolved_dev); - if (ndev) - dev_put(ndev); done: dev_put(idev); if (!ret) @@ -1297,19 +1296,18 @@ done: static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, struct sa_path_rec *rec, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *gid_attr) { enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec); - struct net_device *ndev; - const struct ib_gid_attr *gid_attr; - ndev = ib_get_ndev_from_path(rec); - gid_attr = - rdma_find_gid_by_port(device, &rec->sgid, type, port_num, ndev); - if (ndev) - dev_put(ndev); - if (IS_ERR(gid_attr)) - return PTR_ERR(gid_attr); + if (!gid_attr) { + gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type, + port_num, NULL); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); + } else + rdma_hold_gid_attr(gid_attr); rdma_move_grh_sgid_attr(ah_attr, &rec->dgid, be32_to_cpu(rec->flow_label), @@ -1318,9 +1316,26 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, return 0; } +/** + * ib_init_ah_attr_from_path - Initialize address handle attributes based on + * an SA path record. + * @device: Device associated ah attributes initialization. + * @port_num: Port on the specified device. + * @rec: path record entry to use for ah attributes initialization. + * @ah_attr: address handle attributes to initialization from path record. + * @sgid_attr: SGID attribute to consider during initialization. + * + * When ib_init_ah_attr_from_path() returns success, + * (a) for IB link layer it optionally contains a reference to SGID attribute + * when GRH is present for IB link layer. + * (b) for RoCE link layer it contains a reference to SGID attribute. + * User must invoke rdma_destroy_ah_attr() to release reference to SGID + * attributes which are initialized using ib_init_ah_attr_from_path(). + */ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, struct sa_path_rec *rec, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *gid_attr) { int ret = 0; @@ -1331,7 +1346,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, rdma_ah_set_static_rate(ah_attr, rec->rate); if (sa_path_is_roce(rec)) { - ret = roce_resolve_route_from_path(device, port_num, rec); + ret = roce_resolve_route_from_path(device, port_num, rec, + gid_attr); if (ret) return ret; @@ -1348,7 +1364,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, } if (rec->hop_limit > 0 || sa_path_is_roce(rec)) - ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr); + ret = init_ah_attr_grh_fields(device, port_num, + rec, ah_attr, gid_attr); return ret; } EXPORT_SYMBOL(ib_init_ah_attr_from_path); @@ -1556,8 +1573,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, ARRAY_SIZE(path_rec_table), mad->data, &rec); rec.rec_type = SA_PATH_REC_TYPE_IB; - sa_path_set_ndev(&rec, NULL); - sa_path_set_ifindex(&rec, 0); sa_path_set_dmac_zero(&rec); if (query->conv_pr) { diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index bb372b4713a4..b8d715c68ca4 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -211,7 +211,5 @@ void ib_copy_path_rec_from_user(struct sa_path_rec *dst, /* TODO: No need to set this */ sa_path_set_dmac_zero(dst); - sa_path_set_ndev(dst, NULL); - sa_path_set_ifindex(dst, 0); } EXPORT_SYMBOL(ib_copy_path_rec_from_user); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 45663f3117e5..983e52b871f3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -770,7 +770,7 @@ static void path_rec_completion(int status, struct rdma_ah_attr av; if (!ib_init_ah_attr_from_path(priv->ca, priv->port, - pathrec, &av)) { + pathrec, &av, NULL)) { ah = ipoib_create_ah(dev, priv->pd, &av); rdma_destroy_ah_attr(&av); } diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index bacb144f7780..b6ddf2a1b9d8 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -172,12 +172,7 @@ struct sa_path_rec_ib { */ struct sa_path_rec_roce { bool route_resolved; - u8 dmac[ETH_ALEN]; - /* ignored in IB */ - int ifindex; - /* ignored in IB */ - struct net *net; - + u8 dmac[ETH_ALEN]; }; struct sa_path_rec_opa { @@ -556,13 +551,10 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, enum ib_gid_type gid_type, struct rdma_ah_attr *ah_attr); -/** - * ib_init_ah_attr_from_path - Initialize address handle attributes based on - * an SA path record. - */ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, struct sa_path_rec *rec, - struct rdma_ah_attr *ah_attr); + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *sgid_attr); /** * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec @@ -667,45 +659,10 @@ static inline void sa_path_set_dmac_zero(struct sa_path_rec *rec) eth_zero_addr(rec->roce.dmac); } -static inline void sa_path_set_ifindex(struct sa_path_rec *rec, int ifindex) -{ - if (sa_path_is_roce(rec)) - rec->roce.ifindex = ifindex; -} - -static inline void sa_path_set_ndev(struct sa_path_rec *rec, struct net *net) -{ - if (sa_path_is_roce(rec)) - rec->roce.net = net; -} - static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec) { if (sa_path_is_roce(rec)) return rec->roce.dmac; return NULL; } - -static inline int sa_path_get_ifindex(struct sa_path_rec *rec) -{ - if (sa_path_is_roce(rec)) - return rec->roce.ifindex; - return 0; -} - -static inline struct net *sa_path_get_ndev(struct sa_path_rec *rec) -{ - if (sa_path_is_roce(rec)) - return rec->roce.net; - return NULL; -} - -static inline struct net_device *ib_get_ndev_from_path(struct sa_path_rec *rec) -{ - return sa_path_get_ndev(rec) ? - dev_get_by_index(sa_path_get_ndev(rec), - sa_path_get_ifindex(rec)) - : NULL; -} - #endif /* IB_SA_H */ -- cgit From a8872d53e9b7fcf650f0e4be40887b3ad5210e33 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 19 Jun 2018 10:59:20 +0300 Subject: IB/cm: Use sgid_attr from the AV Prior patches now ensure that the AV has a sgid_attr, if one would have been required. Instead of querying for one, take it directly from the AH. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c2b7edf5857f..de699f67a755 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1941,9 +1941,8 @@ static int cm_req_handler(struct cm_work *work) struct ib_cm_id *cm_id; struct cm_id_private *cm_id_priv, *listen_cm_id_priv; struct cm_req_msg *req_msg; - union ib_gid gid; - struct ib_gid_attr gid_attr; const struct ib_global_route *grh; + const struct ib_gid_attr *gid_attr; int ret; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1988,20 +1987,13 @@ static int cm_req_handler(struct cm_work *work) if (cm_req_has_alt_path(req_msg)) memset(&work->path[1], 0, sizeof(work->path[1])); grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr); - ret = ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, - grh->sgid_index, - &gid, &gid_attr); - if (ret) { - ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0); - goto rejected; - } + gid_attr = grh->sgid_attr; - if (gid_attr.ndev) { + if (gid_attr && gid_attr->ndev) { work->path[0].rec_type = - sa_conv_gid_to_pathrec_type(gid_attr.gid_type); - dev_put(gid_attr.ndev); + sa_conv_gid_to_pathrec_type(gid_attr->gid_type); } else { + /* If no GID attribute or ndev is null, it is not RoCE. */ cm_path_set_rec_type(work->port->cm_dev->ib_device, work->port->port_num, &work->path[0], @@ -2015,7 +2007,7 @@ static int cm_req_handler(struct cm_work *work) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); work->path[0].hop_limit = grh->hop_limit; - ret = cm_init_av_by_path(&work->path[0], &gid_attr, &cm_id_priv->av, + ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av, cm_id_priv); if (ret) { int err; -- cgit From ea8c2d8f6014b74921dd5a9654a623a725d79608 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 19 Jun 2018 10:59:21 +0300 Subject: RDMA/core: Remove unused ib cache functions Now that all users have been converted to use the version of these APIs that returns a gid_attr pointer we can delete the old entry points. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cache.c | 68 ----------------------------------------- include/rdma/ib_cache.h | 39 ----------------------- 2 files changed, 107 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index dada33c53188..357a5cb328c7 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -643,30 +643,6 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, return 0; } -static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, - union ib_gid *gid, struct ib_gid_attr *attr) -{ - struct ib_gid_table *table; - - table = rdma_gid_table(ib_dev, port); - - if (index < 0 || index >= table->sz) - return -EINVAL; - - if (!is_gid_entry_valid(table->data_vec[index])) - return -EINVAL; - - memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid)); - if (attr) { - memcpy(attr, &table->data_vec[index]->attr, - sizeof(*attr)); - if (attr->ndev) - dev_hold(attr->ndev); - } - - return 0; -} - /** * rdma_find_gid_by_port - Returns the GID entry attributes when it finds * a valid GID entry for given search parameters. It searches for the specified @@ -973,28 +949,6 @@ static int gid_table_setup_one(struct ib_device *ib_dev) return err; } -int ib_get_cached_gid(struct ib_device *device, - u8 port_num, - int index, - union ib_gid *gid, - struct ib_gid_attr *gid_attr) -{ - int res; - unsigned long flags; - struct ib_gid_table *table; - - if (!rdma_is_port_valid(device, port_num)) - return -EINVAL; - - table = rdma_gid_table(device, port_num); - read_lock_irqsave(&table->rwlock, flags); - res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); - read_unlock_irqrestore(&table->rwlock, flags); - - return res; -} -EXPORT_SYMBOL(ib_get_cached_gid); - /** * rdma_query_gid - Read the GID content from the GID software cache * @device: Device to query the GID @@ -1102,28 +1056,6 @@ int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, } EXPORT_SYMBOL(ib_find_cached_gid); -int ib_find_gid_by_filter(struct ib_device *device, - const union ib_gid *gid, - u8 port_num, - bool (*filter)(const union ib_gid *gid, - const struct ib_gid_attr *, - void *), - void *context, u16 *index) -{ - const struct ib_gid_attr *res; - - res = rdma_find_gid_by_filter(device, gid, port_num, filter, - context); - if (IS_ERR(res)) - return PTR_ERR(res); - - if (index) - *index = res->index; - - rdma_put_gid_attr(res); - return 0; -} - int ib_get_cached_pkey(struct ib_device *device, u8 port_num, int index, diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 059f7d894939..1108d4220276 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -37,56 +37,17 @@ #include -/** - * ib_get_cached_gid - Returns a cached GID table entry - * @device: The device to query. - * @port_num: The port number of the device to query. - * @index: The index into the cached GID table to query. - * @gid: The GID value found at the specified index. - * @attr: The GID attribute found at the specified index (only in RoCE). - * NULL means ignore (output parameter). - * - * ib_get_cached_gid() fetches the specified GID table entry stored in - * the local software cache. - */ -int ib_get_cached_gid(struct ib_device *device, - u8 port_num, - int index, - union ib_gid *gid, - struct ib_gid_attr *attr); int rdma_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); - -int ib_find_cached_gid(struct ib_device *device, - const union ib_gid *gid, - enum ib_gid_type gid_type, - struct net_device *ndev, - u8 *port_num, - u16 *index); const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, const union ib_gid *gid, enum ib_gid_type gid_type, struct net_device *ndev); - -int ib_find_cached_gid_by_port(struct ib_device *device, - const union ib_gid *gid, - enum ib_gid_type gid_type, - u8 port_num, - struct net_device *ndev, - u16 *index); const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev, const union ib_gid *gid, enum ib_gid_type gid_type, u8 port, struct net_device *ndev); - -int ib_find_gid_by_filter(struct ib_device *device, - const union ib_gid *gid, - u8 port_num, - bool (*filter)(const union ib_gid *gid, - const struct ib_gid_attr *, - void *), - void *context, u16 *index); const struct ib_gid_attr *rdma_find_gid_by_filter( struct ib_device *device, const union ib_gid *gid, u8 port_num, bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, -- cgit From d0e84c0ad39826c38a9d6881fd8f9af476a5d9a7 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 Jun 2018 10:43:55 +0300 Subject: IB/mlx5: Add support for drain SQ & RQ This patch follows the logic from ib_core but considers the internal device state upon executing the involved commands. Specifically, Upon internal error state modify QP to an error state can be assumed to be success as each in-progress WR going to be flushed in error in any case as expected by that modify command. In addition, As the drain should never fail the driver makes sure that post_send/recv will succeed even if the device is already in an internal error state. As such once the driver will supply the simulated/SW CQEs the CQE for the drain WR will be handled as well. In case of an internal error state the CQE for the drain WR may be completed as part of the main task that handled the error state or by the task that issued the drain WR. As the above depends on scheduling the code takes the relevant locks and actions to make sure that the completion handler for that WR will always be called after that the post_send/recv were issued but not in parallel to the other task that handles the error flow. Signed-off-by: Yishai Hadas Reviewed-by: Max Gurtovoy Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 + drivers/infiniband/hw/mlx5/qp.c | 153 +++++++++++++++++++++++++++++++++-- 3 files changed, 150 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b4e8173f3239..5d030bc3cdd8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5601,6 +5601,8 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->ib_dev.modify_qp = mlx5_ib_modify_qp; dev->ib_dev.query_qp = mlx5_ib_query_qp; dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; + dev->ib_dev.drain_sq = mlx5_ib_drain_sq; + dev->ib_dev.drain_rq = mlx5_ib_drain_rq; dev->ib_dev.post_send = mlx5_ib_post_send; dev->ib_dev.post_recv = mlx5_ib_post_recv; dev->ib_dev.create_cq = mlx5_ib_create_cq; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index df547f060b60..67e86c8304a2 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1016,6 +1016,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int mlx5_ib_destroy_qp(struct ib_qp *qp); +void mlx5_ib_drain_sq(struct ib_qp *qp); +void mlx5_ib_drain_rq(struct ib_qp *qp); int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d5f072c50ee5..6034a670859f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4361,9 +4361,8 @@ static void finish_wqe(struct mlx5_ib_qp *qp, qp->sq.w_list[idx].next = qp->sq.cur_post; } - -int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int _mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr, bool drain) { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -4394,7 +4393,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4691,13 +4690,19 @@ out: return err; } +int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + return _mlx5_ib_post_send(ibqp, wr, bad_wr, false); +} + static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) { sig->signature = calc_sig(sig, size); } -int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int _mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr, bool drain) { struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_data_seg *scat; @@ -4715,7 +4720,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qp->rq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4777,6 +4782,12 @@ out: return err; } +int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false); +} + static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) { switch (mlx5_state) { @@ -5698,3 +5709,131 @@ out: kvfree(in); return err; } + +struct mlx5_ib_drain_cqe { + struct ib_cqe cqe; + struct completion done; +}; + +static void mlx5_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx5_ib_drain_cqe *cqe = container_of(wc->wr_cqe, + struct mlx5_ib_drain_cqe, + cqe); + + complete(&cqe->done); +} + +/* This function returns only once the drained WR was completed */ +static void handle_drain_completion(struct ib_cq *cq, + struct mlx5_ib_drain_cqe *sdrain, + struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + + if (cq->poll_ctx == IB_POLL_DIRECT) { + while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0) + ib_process_cq_direct(cq, -1); + return; + } + + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + struct mlx5_ib_cq *mcq = to_mcq(cq); + bool triggered = false; + unsigned long flags; + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + /* Make sure that the CQ handler won't run if wasn't run yet */ + if (!mcq->mcq.reset_notify_added) + mcq->mcq.reset_notify_added = 1; + else + triggered = true; + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + + if (triggered) { + /* Wait for any scheduled/running task to be ended */ + switch (cq->poll_ctx) { + case IB_POLL_SOFTIRQ: + irq_poll_disable(&cq->iop); + irq_poll_enable(&cq->iop); + break; + case IB_POLL_WORKQUEUE: + cancel_work_sync(&cq->work); + break; + default: + WARN_ON_ONCE(1); + } + } + + /* Run the CQ handler - this makes sure that the drain WR will + * be processed if wasn't processed yet. + */ + mcq->mcq.comp(&mcq->mcq); + } + + wait_for_completion(&sdrain->done); +} + +void mlx5_ib_drain_sq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->send_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx5_ib_drain_cqe sdrain; + struct ib_send_wr *bad_swr; + struct ib_rdma_wr swr = { + .wr = { + .next = NULL, + { .wr_cqe = &sdrain.cqe, }, + .opcode = IB_WR_RDMA_WRITE, + }, + }; + int ret; + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_core_dev *mdev = dev->mdev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + sdrain.cqe.done = mlx5_ib_drain_qp_done; + init_completion(&sdrain.done); + + ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &sdrain, dev); +} + +void mlx5_ib_drain_rq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->recv_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx5_ib_drain_cqe rdrain; + struct ib_recv_wr rwr = {}, *bad_rwr; + int ret; + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_core_dev *mdev = dev->mdev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + rwr.wr_cqe = &rdrain.cqe; + rdrain.cqe.done = mlx5_ib_drain_qp_done; + init_completion(&rdrain.done); + + ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &rdrain, dev); +} -- cgit From 1975acd9f3fdc08d5e77fa95e740592e37c97fc0 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 Jun 2018 10:43:56 +0300 Subject: IB/mlx4: Add support for drain SQ & RQ This patch follows the logic from ib_core but considers the internal device state upon executing the involved commands. Specifically, Upon internal error state modify QP to an error state can be assumed to be success as each in-progress WR going to be flushed in error in any case as expected by that modify command. In addition, As the drain should never fail the driver makes sure that post_send/recv will succeed even if the device is already in an internal error state. As such once the driver will supply the simulated/SW CQEs the CQE for the drain WR will be handled as well. In case of an internal error state the CQE for the drain WR may be completed as part of the main task that handled the error state or by the task that issued the drain WR. As the above depends on scheduling the code takes the relevant locks and actions to make sure that the completion handler for that WR will always be called after that the post_send/recv were issued but not in parallel to the other task that handles the error flow. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/main.c | 2 + drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 + drivers/infiniband/hw/mlx4/qp.c | 154 +++++++++++++++++++++++++++++++++-- 3 files changed, 152 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 87de1a467d60..5bc522ca9431 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2701,6 +2701,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; ibdev->ib_dev.query_qp = mlx4_ib_query_qp; ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; + ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq; + ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq; ibdev->ib_dev.post_send = mlx4_ib_post_send; ibdev->ib_dev.post_recv = mlx4_ib_post_recv; ibdev->ib_dev.create_cq = mlx4_ib_create_cq; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 88c929e2a79e..e5a365bdbe45 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -778,6 +778,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_destroy_qp(struct ib_qp *qp); +void mlx4_ib_drain_sq(struct ib_qp *qp); +void mlx4_ib_drain_rq(struct ib_qp *qp); int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index e576ca385d8e..984e9f07339e 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -3568,8 +3568,8 @@ static void add_zero_len_inline(void *wqe) inl->byte_count = cpu_to_be32(1 << 31); } -int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int _mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr, bool drain) { struct mlx4_ib_qp *qp = to_mqp(ibqp); void *wqe; @@ -3609,7 +3609,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } spin_lock_irqsave(&qp->sq.lock, flags); - if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR && + !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -3899,8 +3900,14 @@ out: return err; } -int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + return _mlx4_ib_post_send(ibqp, wr, bad_wr, false); +} + +static int _mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr, bool drain) { struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_wqe_data_seg *scat; @@ -3915,7 +3922,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, max_gs = qp->rq.max_gs; spin_lock_irqsave(&qp->rq.lock, flags); - if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR && + !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -3986,6 +3994,12 @@ out: return err; } +int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + return _mlx4_ib_post_recv(ibqp, wr, bad_wr, false); +} + static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) { switch (mlx4_state) { @@ -4451,3 +4465,131 @@ int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) kfree(ib_rwq_ind_tbl); return 0; } + +struct mlx4_ib_drain_cqe { + struct ib_cqe cqe; + struct completion done; +}; + +static void mlx4_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx4_ib_drain_cqe *cqe = container_of(wc->wr_cqe, + struct mlx4_ib_drain_cqe, + cqe); + + complete(&cqe->done); +} + +/* This function returns only once the drained WR was completed */ +static void handle_drain_completion(struct ib_cq *cq, + struct mlx4_ib_drain_cqe *sdrain, + struct mlx4_ib_dev *dev) +{ + struct mlx4_dev *mdev = dev->dev; + + if (cq->poll_ctx == IB_POLL_DIRECT) { + while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0) + ib_process_cq_direct(cq, -1); + return; + } + + if (mdev->persist->state == MLX4_DEVICE_STATE_INTERNAL_ERROR) { + struct mlx4_ib_cq *mcq = to_mcq(cq); + bool triggered = false; + unsigned long flags; + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + /* Make sure that the CQ handler won't run if wasn't run yet */ + if (!mcq->mcq.reset_notify_added) + mcq->mcq.reset_notify_added = 1; + else + triggered = true; + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + + if (triggered) { + /* Wait for any scheduled/running task to be ended */ + switch (cq->poll_ctx) { + case IB_POLL_SOFTIRQ: + irq_poll_disable(&cq->iop); + irq_poll_enable(&cq->iop); + break; + case IB_POLL_WORKQUEUE: + cancel_work_sync(&cq->work); + break; + default: + WARN_ON_ONCE(1); + } + } + + /* Run the CQ handler - this makes sure that the drain WR will + * be processed if wasn't processed yet. + */ + mcq->mcq.comp(&mcq->mcq); + } + + wait_for_completion(&sdrain->done); +} + +void mlx4_ib_drain_sq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->send_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx4_ib_drain_cqe sdrain; + struct ib_send_wr *bad_swr; + struct ib_rdma_wr swr = { + .wr = { + .next = NULL, + { .wr_cqe = &sdrain.cqe, }, + .opcode = IB_WR_RDMA_WRITE, + }, + }; + int ret; + struct mlx4_ib_dev *dev = to_mdev(qp->device); + struct mlx4_dev *mdev = dev->dev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + sdrain.cqe.done = mlx4_ib_drain_qp_done; + init_completion(&sdrain.done); + + ret = _mlx4_ib_post_send(qp, &swr.wr, &bad_swr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &sdrain, dev); +} + +void mlx4_ib_drain_rq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->recv_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx4_ib_drain_cqe rdrain; + struct ib_recv_wr rwr = {}, *bad_rwr; + int ret; + struct mlx4_ib_dev *dev = to_mdev(qp->device); + struct mlx4_dev *mdev = dev->dev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + rwr.wr_cqe = &rdrain.cqe; + rdrain.cqe.done = mlx4_ib_drain_qp_done; + init_completion(&rdrain.done); + + ret = _mlx4_ib_post_recv(qp, &rwr, &bad_rwr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &rdrain, dev); +} -- cgit From 43cbd64b1fdc1da89abdad88a022d9e87a98e9c6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Jun 2018 11:19:42 -0600 Subject: IB/usnic: Update with bug fixes from core code usnic has a modified version of the core codes' ib_umem_get() and related, and the copy misses many of the bug fixes done over the years: Commit bc3e53f682d9 ("mm: distinguish between mlocked and pinned pages") Commit 87773dd56d54 ("IB: ib_umem_release() should decrement mm->pinned_vm from ib_umem_get") Commit 8494057ab5e4 ("IB/uverbs: Prevent integer overflow in ib_umem_get address arithmetic") Commit 8abaae62f3fd ("IB/core: disallow registering 0-sized memory region") Commit 66578b0b2f69 ("IB/core: don't disallow registering region starting at 0x0") Commit 53376fedb9da ("RDMA/core: not to set page dirty bit if it's already set.") Commit 8e907ed48827 ("IB/umem: Use the correct mm during ib_umem_release") Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 2 +- drivers/infiniband/hw/usnic/usnic_uiom.c | 40 +++++++++++++++++++--------- drivers/infiniband/hw/usnic/usnic_uiom.h | 5 +++- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index a688a5669168..9524524fade4 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -666,7 +666,7 @@ int usnic_ib_dereg_mr(struct ib_mr *ibmr) usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length); - usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing); + usnic_uiom_reg_release(mr->umem, ibmr->uobject->context); kfree(mr); return 0; } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 4381c0a9a873..9dd39daa602b 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "usnic_log.h" #include "usnic_uiom.h" @@ -88,7 +89,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) for_each_sg(chunk->page_list, sg, chunk->nents, i) { page = sg_page(sg); pa = sg_phys(sg); - if (dirty) + if (!PageDirty(page) && dirty) set_page_dirty_lock(page); put_page(page); usnic_dbg("pa: %pa\n", &pa); @@ -114,6 +115,16 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, dma_addr_t pa; unsigned int gup_flags; + /* + * If the combination of the addr and size requested for this memory + * region causes an integer overflow, return error. + */ + if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size)) + return -EINVAL; + + if (!size) + return -EINVAL; + if (!can_do_mlock()) return -EPERM; @@ -127,7 +138,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, down_write(¤t->mm->mmap_sem); - locked = npages + current->mm->locked_vm; + locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { @@ -143,7 +154,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = 0; while (npages) { - ret = get_user_pages(cur_base, + ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), gup_flags, page_list, NULL); @@ -186,7 +197,7 @@ out: if (ret < 0) usnic_uiom_put_pages(chunk_list, 0); else - current->mm->locked_vm = locked; + current->mm->pinned_vm = locked; up_write(¤t->mm->mmap_sem); free_page((unsigned long) page_list); @@ -420,18 +431,22 @@ out_free_uiomr: return ERR_PTR(err); } -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *ucontext) { + struct task_struct *task; struct mm_struct *mm; unsigned long diff; __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); - mm = get_task_mm(current); - if (!mm) { - kfree(uiomr); - return; - } + task = get_pid_task(ucontext->tgid, PIDTYPE_PID); + if (!task) + goto out; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + goto out; diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; @@ -443,7 +458,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) * up here and not be able to take the mmap_sem. In that case * we defer the vm_locked accounting to the system workqueue. */ - if (closing) { + if (ucontext->closing) { if (!down_write_trylock(&mm->mmap_sem)) { INIT_WORK(&uiomr->work, usnic_uiom_reg_account); uiomr->mm = mm; @@ -455,9 +470,10 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) } else down_write(&mm->mmap_sem); - current->mm->locked_vm -= diff; + mm->pinned_vm -= diff; up_write(&mm->mmap_sem); mmput(mm); +out: kfree(uiomr); } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index 431efe4143f4..8c096acff123 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -39,6 +39,8 @@ #include "usnic_uiom_interval_tree.h" +struct ib_ucontext; + #define USNIC_UIOM_READ (1) #define USNIC_UIOM_WRITE (2) @@ -89,7 +91,8 @@ void usnic_uiom_free_dev_list(struct device **devs); struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, unsigned long addr, size_t size, int access, int dmasync); -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing); +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *ucontext); int usnic_uiom_init(char *drv_name); void usnic_uiom_fini(void); #endif /* USNIC_UIOM_H_ */ -- cgit From a69d8b3a41165ba0a70f2e9612a08ea0af909233 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 22 Jun 2018 08:11:17 -0700 Subject: MAINTAINERS: Update SRP entries Reflect the acquisition of SanDisk by Western Digital in my e-mail address. Remove the reference to David Dillow's git tree since SRP patches are queued by Doug and Jason. Remove the reference to the OpenFabrics website since the srp_daemon source code has been moved from that website into the rdma-core project. Add an entry for the SRP target driver. Signed-off-by: Bart Van Assche Cc: Bart Van Assche Cc: David Dillow Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9d5eeff51b5f..37a1d71f417e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12621,15 +12621,21 @@ S: Maintained F: drivers/scsi/sr* SCSI RDMA PROTOCOL (SRP) INITIATOR -M: Bart Van Assche +M: Bart Van Assche L: linux-rdma@vger.kernel.org S: Supported -W: http://www.openfabrics.org Q: http://patchwork.kernel.org/project/linux-rdma/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/dad/srp-initiator.git F: drivers/infiniband/ulp/srp/ F: include/scsi/srp.h +SCSI RDMA PROTOCOL (SRP) TARGET +M: Bart Van Assche +L: linux-rdma@vger.kernel.org +L: target-devel@vger.kernel.org +S: Supported +Q: http://patchwork.kernel.org/project/linux-rdma/list/ +F: drivers/infiniband/ulp/srpt/ + SCSI SG DRIVER M: Doug Gilbert L: linux-scsi@vger.kernel.org -- cgit From e99028ad76e788574a7eafaee2e5916c5b9bbf87 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 25 Jun 2018 15:21:15 -0600 Subject: RDMA/uverbs: Check existence of create_flow callback In the accepted series "Refactor ib_uverbs_write path", we presented the roadmap to get rid of uverbs_cmd_mask and uverbs_ex_cmd_mask fields in favor of simple check of function pointer. So let's put NULL check of create_flow function callback despite the fact that uverbs_ex_cmd_mask still exists. Link: https://www.spinics.net/lists/linux-rdma/msg60753.html Suggested-by: Michael J Ruhl Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 908ee8ab3297..985dc86d5610 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3554,6 +3554,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_uobj; } + if (!qp->device->create_flow) { + err = -EOPNOTSUPP; + goto err_put; + } + flow_attr = kzalloc(struct_size(flow_attr, flows, cmd.flow_attr.num_of_specs), GFP_KERNEL); if (!flow_attr) { -- cgit From ca576fbbdc80d26ca46dd881944413e7dc05c21d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 24 Jun 2018 11:23:44 +0300 Subject: RDMA/verbs: Drop kernel variant of create_flow There are no kernel users of this interface so lets drop it. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 17 ----------------- include/rdma/ib_verbs.h | 2 -- 2 files changed, 19 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 9a72b88fea80..5ada09f708f5 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2275,23 +2275,6 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) } EXPORT_SYMBOL(ib_destroy_rwq_ind_table); -struct ib_flow *ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain) -{ - struct ib_flow *flow_id; - if (!qp->device->create_flow) - return ERR_PTR(-EOPNOTSUPP); - - flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL); - if (!IS_ERR(flow_id)) { - atomic_inc(&qp->usecnt); - flow_id->qp = qp; - } - return flow_id; -} -EXPORT_SYMBOL(ib_create_flow); - int ib_destroy_flow(struct ib_flow *flow_id) { int err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c01e9c6ed666..1c72ca81e5fa 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3807,8 +3807,6 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller); */ int ib_dealloc_xrcd(struct ib_xrcd *xrcd); -struct ib_flow *ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, int domain); int ib_destroy_flow(struct ib_flow *flow_id); static inline int ib_check_mr_access(int flags) -- cgit From 1ccddc42da03876f60fe2d0a1b124c27ed5ff201 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 24 Jun 2018 11:23:45 +0300 Subject: RDMA/verbs: Drop kernel variant of destroy_flow Following the removal of ib_create_flow(), adjust the code to get rid of ib_destroy_flow() too. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 3 ++- drivers/infiniband/core/uverbs_std_types.c | 9 ++++++--- drivers/infiniband/core/verbs.c | 12 ------------ include/rdma/ib_verbs.h | 2 -- 4 files changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 985dc86d5610..74c5bc934822 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3631,7 +3631,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kfree(kern_flow_attr); return 0; err_copy: - ib_destroy_flow(flow_id); + if (!qp->device->destroy_flow(flow_id)) + atomic_dec(&qp->usecnt); err_free: ib_uverbs_flow_resources_free(uflow_res); err_free_flow_attr: diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 0df0ac9c1de3..c50d73845a2a 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -48,14 +48,17 @@ static int uverbs_free_ah(struct ib_uobject *uobject, static int uverbs_free_flow(struct ib_uobject *uobject, enum rdma_remove_reason why) { - int ret; struct ib_flow *flow = (struct ib_flow *)uobject->object; struct ib_uflow_object *uflow = container_of(uobject, struct ib_uflow_object, uobject); + struct ib_qp *qp = flow->qp; + int ret; - ret = ib_destroy_flow(flow); - if (!ret) + ret = qp->device->destroy_flow(flow); + if (!ret) { + atomic_dec(&qp->usecnt); ib_uverbs_flow_resources_free(uflow->resources); + } return ret; } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5ada09f708f5..128d94988dd8 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2275,18 +2275,6 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) } EXPORT_SYMBOL(ib_destroy_rwq_ind_table); -int ib_destroy_flow(struct ib_flow *flow_id) -{ - int err; - struct ib_qp *qp = flow_id->qp; - - err = qp->device->destroy_flow(flow_id); - if (!err) - atomic_dec(&qp->usecnt); - return err; -} -EXPORT_SYMBOL(ib_destroy_flow); - int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1c72ca81e5fa..8e726fff30fe 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3807,8 +3807,6 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller); */ int ib_dealloc_xrcd(struct ib_xrcd *xrcd); -int ib_destroy_flow(struct ib_flow *flow_id); - static inline int ib_check_mr_access(int flags) { /* -- cgit From a5cc9831af05e658543593abaee45a29d061bac4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 24 Jun 2018 11:23:47 +0300 Subject: RDMA/uverbs: Don't overwrite NULL pointer with ZERO_SIZE_PTR Number of specs is provided by user and in valid case can be equal to zero. Such argument causes to call to kcalloc() with zero-length request and in return the ZERO_SIZE_PTR is assigned. This pointer is different from NULL and makes various if (..) checks to success. Fixes: b6ba4a9aa59f ("IB/uverbs: Add support for flow counters") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 74c5bc934822..7d70d17a853a 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2763,6 +2763,9 @@ static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) if (!resources) return NULL; + if (!num_specs) + goto out; + resources->counters = kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL); resources->collection = @@ -2771,8 +2774,8 @@ static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) if (!resources->counters || !resources->collection) goto err; +out: resources->max = num_specs; - return resources; err: -- cgit From 3a2e791c9456aab38a727b2b2558c08210f59f03 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 24 Jun 2018 11:23:48 +0300 Subject: RDMA/umem: Don't check for a negative return value of dma_map_sg_attrs() dma_map_sg_attrs() returns 0 on error and can't return a negative number (ensured by BUG_ON), so don't check. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 54ab6335c48d..498f59bb4989 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -206,7 +206,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, DMA_BIDIRECTIONAL, dma_attrs); - if (umem->nmap <= 0) { + if (!umem->nmap) { ret = -ENOMEM; goto out; } -- cgit From d9c44040ed05991bc43dddb7dbb043891b3bdb1f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 24 Jun 2018 11:23:52 +0300 Subject: RDMA/uverbs: Remove redundant check kern_spec->reserved is checked prior to calling kern_spec_to_ib_spec_filter() which makes this second check redundant. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 7d70d17a853a..5fc14fde274c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3040,9 +3040,6 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, void *kern_spec_mask; void *kern_spec_val; - if (kern_spec->reserved) - return -EINVAL; - kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); kern_spec_val = (void *)kern_spec + -- cgit From 5e62d5ff1b9a6573d0d6b411893b1c17ccc21ce7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 24 Jun 2018 16:57:50 +0300 Subject: IB/mlx4: Create slave AH's directly Since slave GID's do not exist in the core gid table we can no longer use the core code to help do this without creating inconsistencies. Directly create the AH using mlx4 internal APIs. Signed-off-by: Jason Gunthorpe Reviewed-by: Jack Morgenstein Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx4/ah.c | 58 +++++++++++++++++++++++++++++++----- drivers/infiniband/hw/mlx4/mad.c | 22 +++----------- drivers/infiniband/hw/mlx4/mlx4_ib.h | 4 +++ 3 files changed, 59 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 5e9b0837ef61..e9e3a6f390db 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -96,19 +96,29 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN); eth_zero_addr(ah->av.eth.s_mac); + + /* + * If sgid_attr is NULL we are being called by mlx4_ib_create_ah_slave + * and we are directly creating an AV for a slave's gid_index. + */ gid_attr = ah_attr->grh.sgid_attr; - if (is_vlan_dev(gid_attr->ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); - memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN); + if (gid_attr) { + if (is_vlan_dev(gid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); + memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN); + ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr); + if (ret < 0) + return ERR_PTR(ret); + ah->av.eth.gid_index = ret; + } else { + /* mlx4_ib_create_ah_slave fills in the s_mac and the vlan */ + ah->av.eth.gid_index = ah_attr->grh.sgid_index; + } if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (rdma_ah_get_port_num(ah_attr) << 24)); - ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr); - if (ret < 0) - return ERR_PTR(ret); - ah->av.eth.gid_index = ret; ah->av.eth.vlan = cpu_to_be16(vlan_tag); ah->av.eth.hop_limit = grh->hop_limit; if (rdma_ah_get_static_rate(ah_attr)) { @@ -167,6 +177,40 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, return create_ib_ah(pd, ah_attr, ah); /* never fails */ } +/* AH's created via this call must be free'd by mlx4_ib_destroy_ah. */ +struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, + u16 vlan_tag) +{ + struct rdma_ah_attr slave_attr = *ah_attr; + struct mlx4_ib_ah *mah; + struct ib_ah *ah; + + slave_attr.grh.sgid_attr = NULL; + slave_attr.grh.sgid_index = slave_sgid_index; + ah = mlx4_ib_create_ah(pd, &slave_attr, NULL); + if (IS_ERR(ah)) + return ah; + + ah->device = pd->device; + ah->pd = pd; + ah->type = ah_attr->type; + mah = to_mah(ah); + + /* get rid of force-loopback bit */ + mah->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); + + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) + memcpy(mah->av.eth.s_mac, s_mac, 6); + + if (vlan_tag < 0x1000) + vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; + mah->av.eth.vlan = cpu_to_be16(vlan_tag); + + return ah; +} + int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 90a3e2642c2e..8d730a69793d 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1367,13 +1367,10 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, struct mlx4_mad_snd_buf *sqp_mad; struct ib_ah *ah; struct ib_qp *send_qp = NULL; - struct ib_global_route *grh; unsigned wire_tx_ix = 0; int ret = 0; u16 wire_pkey_ix; int src_qpnum; - u8 sgid_index; - sqp_ctx = dev->sriov.sqps[port-1]; @@ -1394,16 +1391,11 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, send_qp = sqp->qp; /* create ah */ - grh = rdma_ah_retrieve_grh(attr); - sgid_index = grh->sgid_index; - grh->sgid_index = 0; - ah = rdma_create_ah(sqp_ctx->pd, attr); + ah = mlx4_ib_create_ah_slave(sqp_ctx->pd, attr, + rdma_ah_retrieve_grh(attr)->sgid_index, + s_mac, vlan_id); if (IS_ERR(ah)) return -ENOMEM; - grh->sgid_index = sgid_index; - to_mah(ah)->av.ib.gid_index = sgid_index; - /* get rid of force-loopback bit */ - to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); spin_lock(&sqp->tx_lock); if (sqp->tx_ix_head - sqp->tx_ix_tail >= (MLX4_NUM_TUNNEL_BUFS - 1)) @@ -1445,12 +1437,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, wr.wr.num_sge = 1; wr.wr.opcode = IB_WR_SEND; wr.wr.send_flags = IB_SEND_SIGNALED; - if (s_mac) - memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); - if (vlan_id < 0x1000) - vlan_id |= (rdma_ah_get_sl(attr) & 7) << 13; - to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id); - ret = ib_post_send(send_qp, &wr.wr, &bad_wr); if (!ret) @@ -1461,7 +1447,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&sqp->tx_lock); sqp->tx_ring[wire_tx_ix].ah = NULL; out: - rdma_destroy_ah(ah); + mlx4_ib_destroy_ah(ah); return ret; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e5a365bdbe45..1a0fad30633b 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -760,6 +760,10 @@ void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata); +struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, + u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int mlx4_ib_destroy_ah(struct ib_ah *ah); -- cgit From 7a5c938b9ed0985ea09b821b4b7f12b5e3d88d5d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 25 Jun 2018 16:03:41 -0600 Subject: IB/core: Check for rdma_protocol_ib only after validating port_num port_num is untrusted data from the user, so it should be checked after calling fill_sgid_attr, which validates it. Fixes: 8d9ec9addd6c ("IB/core: Add a sgid_attr pointer to struct rdma_ah_attr") Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 128d94988dd8..1bb6b6ff3341 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1580,14 +1580,6 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, const struct ib_gid_attr *old_sgid_attr_alt_av; int ret; - /* - * Today the core code can only handle alternate paths and APM for IB - * ban them in roce mode. - */ - if (attr_mask & IB_QP_ALT_PATH && - !rdma_protocol_ib(qp->device, attr->alt_ah_attr.port_num)) - return -EINVAL; - if (attr_mask & IB_QP_AV) { ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr, &old_sgid_attr_av); @@ -1606,6 +1598,17 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, &old_sgid_attr_alt_av); if (ret) goto out_av; + + /* + * Today the core code can only handle alternate paths and APM + * for IB. Ban them in roce mode. + */ + if (!(rdma_protocol_ib(qp->device, + attr->alt_ah_attr.port_num) && + rdma_protocol_ib(qp->device, port))) { + ret = EINVAL; + goto out; + } } /* -- cgit From 4e1077f720f5fb54a295c8c2b5f73c09aac75fd9 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 27 Jun 2018 19:26:11 +0300 Subject: RDMA/vmw_pvrdma: Delete unused function This function is not in use - delete it. Signed-off-by: Yuval Shaia Acked-by: Adit Ranadive Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 44cb1cfba417..42b8685c997e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -378,11 +378,6 @@ static inline enum ib_port_speed pvrdma_port_speed_to_ib( return (enum ib_port_speed)speed; } -static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask) -{ - return attr_mask; -} - static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask) { return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX); -- cgit From 151779996597af671e14a6838589d99fa09ffc6d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 27 Jun 2018 10:44:24 +0300 Subject: RDMA/mlx5: Don't leak UARs in case of free fails The failure in releasing one UAR doesn't mean that we can't continue to release rest of system pages, so don't return too early. As part of cleanup, there is no need to print warning if mlx5_cmd_free_uar() fails because such warning will be printed as part of mlx5_cmd_exec(). Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5d030bc3cdd8..322f036a1264 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1559,25 +1559,17 @@ error: return err; } -static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) +static void deallocate_uars(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) { struct mlx5_bfreg_info *bfregi; - int err; int i; bfregi = &context->bfregi; - for (i = 0; i < bfregi->num_sys_pages; i++) { + for (i = 0; i < bfregi->num_sys_pages; i++) if (i < bfregi->num_static_sys_pages || - bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) { - err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); - if (err) { - mlx5_ib_warn(dev, "failed to free uar %d, err=%d\n", i, err); - return err; - } - } - } - - return 0; + bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) + mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); } static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) -- cgit From 92ebb6a0a13a698f88fb3ba58c01844f190d2af5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 27 Jun 2018 10:44:26 +0300 Subject: IB/cm: Remove now useless rcu_lock in dst_fetch_ha This lock used to be protecting a call to dst_get_neighbour_noref, however the below commit changed it to dst_neigh_lookup which no longer requires rcu. Access to nud_state, neigh_event_send or rdma_copy_addr does not require RCU, so delete the lock. Fixes: 02b619555ad6 ("infiniband: Convert dst_fetch_ha() over to dst_neigh_lookup().") Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/addr.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 4f32c4062fb6..1b817fdb97a4 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -315,19 +315,17 @@ static int dst_fetch_ha(const struct dst_entry *dst, int ret = 0; n = dst_neigh_lookup(dst, daddr); + if (!n) + return -ENODATA; - rcu_read_lock(); - if (!n || !(n->nud_state & NUD_VALID)) { - if (n) - neigh_event_send(n, NULL); + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); ret = -ENODATA; } else { rdma_copy_addr(dev_addr, dst->dev, n->ha); } - rcu_read_unlock(); - if (n) - neigh_release(n); + neigh_release(n); return ret; } -- cgit From 24c937b39dfb102860e086f47ab9a58a0f6b0deb Mon Sep 17 00:00:00 2001 From: Vijay Immanuel Date: Mon, 18 Jun 2018 18:48:56 -0700 Subject: IB/rxe: don't clear the tx queue on every transfer Do not call sk_dst_set() on every packet transfer because that calls sk_tx_queue_clear(), which clears the tx queue. A QP must stay on the same tx queue to maintain packet order. Signed-off-by: Vijay Immanuel Acked-by: Moni Shoua Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 79b69943a8af..8094cbaa54a9 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -223,6 +223,11 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, rt6_get_cookie((struct rt6_info *)dst); #endif } + + if (dst && (qp_type(qp) == IB_QPT_RC)) { + dst_hold(dst); + sk_dst_set(qp->sk->sk, dst); + } } rdma_put_gid_attr(attr); return dst; @@ -397,11 +402,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, av->grh.traffic_class, av->grh.hop_limit, df, xnet); - if (qp_type(qp) == IB_QPT_RC) - sk_dst_set(qp->sk->sk, dst); - else - dst_release(dst); - + dst_release(dst); return 0; } @@ -429,11 +430,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, av->grh.traffic_class, av->grh.hop_limit); - if (qp_type(qp) == IB_QPT_RC) - sk_dst_set(qp->sk->sk, dst); - else - dst_release(dst); - + dst_release(dst); return 0; } -- cgit From 68348441ef7d15e6a902c956e139723c99e065ed Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 27 Jun 2018 11:03:45 +0300 Subject: IB/iser: set can_queue earlier to allow setting higher queue depth We need to set can_queue earlier than when enabling the scsi host. in a blk-mq enabled environment, the tagset allocation is taken from can_queue which cannot be modified later. Also, pass an updated .can_queue to iscsi_session_setup to have enough iscsi tasks allocated in the session kfifo. Reported-by: Karandeep Chahal Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9a6434c31db2..61cc47da2fec 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -633,8 +633,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, */ if (ep) { iser_conn = ep->dd_data; - max_cmds = iser_conn->max_cmds; shost->sg_tablesize = iser_conn->scsi_sg_tablesize; + shost->can_queue = min_t(u16, cmds_max, iser_conn->max_cmds); mutex_lock(&iser_conn->state_mutex); if (iser_conn->state != ISER_CONN_UP) { @@ -660,6 +660,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, } mutex_unlock(&iser_conn->state_mutex); } else { + shost->can_queue = min_t(u16, cmds_max, ISER_DEF_XMIT_CMDS_MAX); max_cmds = ISER_DEF_XMIT_CMDS_MAX; if (iscsi_host_add(shost, NULL)) goto free_host; @@ -676,21 +677,14 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, iser_warn("max_sectors was reduced from %u to %u\n", iser_max_sectors, shost->max_sectors); - if (cmds_max > max_cmds) { - iser_info("cmds_max changed from %u to %u\n", - cmds_max, max_cmds); - cmds_max = max_cmds; - } - cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, - cmds_max, 0, + shost->can_queue, 0, sizeof(struct iscsi_iser_task), initial_cmdsn, 0); if (!cls_session) goto remove_host; session = cls_session->dd_data; - shost->can_queue = session->scsi_cmds_max; return cls_session; remove_host: -- cgit From e620ebfc228dcbef7519e3d16f43c6c6f1a1d0cb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 26 Jun 2018 15:24:48 -0700 Subject: IB/srpt: Support HCAs with more than two ports Since there are adapters that have four ports, increase the size of the srpt_device.port[] array. This patch avoids that the following warning is hit with quad port Chelsio adapters: WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port)); Reported-by: Steve Wise Signed-off-by: Bart Van Assche Cc: Steve Wise Cc: Christoph Hellwig Cc: Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 5 ++--- drivers/infiniband/ulp/srpt/ib_srpt.h | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 1b0b285a0ae0..36d9fab7c998 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2970,7 +2970,8 @@ static void srpt_add_one(struct ib_device *device) pr_debug("device = %p\n", device); - sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); + sdev = kzalloc(struct_size(sdev, port, device->phys_port_cnt), + GFP_KERNEL); if (!sdev) goto err; @@ -3024,8 +3025,6 @@ static void srpt_add_one(struct ib_device *device) srpt_event_handler); ib_register_event_handler(&sdev->event_handler); - WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port)); - for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; INIT_LIST_HEAD(&sport->nexus_list); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 2361483476a0..444dfd7281b5 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -396,9 +396,9 @@ struct srpt_port { * @sdev_mutex: Serializes use_srq changes. * @use_srq: Whether or not to use SRQ. * @ioctx_ring: Per-HCA SRQ. - * @port: Information about the ports owned by this HCA. * @event_handler: Per-HCA asynchronous IB event handler. * @list: Node in srpt_dev_list. + * @port: Information about the ports owned by this HCA. */ struct srpt_device { struct ib_device *device; @@ -410,9 +410,9 @@ struct srpt_device { struct mutex sdev_mutex; bool use_srq; struct srpt_recv_ioctx **ioctx_ring; - struct srpt_port port[2]; struct ib_event_handler event_handler; struct list_head list; + struct srpt_port port[]; }; #endif /* IB_SRPT_H */ -- cgit From 1c77483e4c50339b0306572167ccbff6b55d051b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 20 Jun 2018 17:11:39 +0300 Subject: IB: Improve uverbs_cleanup_ucontext algorithm Improve uverbs_cleanup_ucontext algorithm to work properly when the topology graph of the objects cannot be determined at compile time. This is the case with objects created via the devx interface in mlx5. Typically uverbs objects must be created in a strict topologically sorted order, so that LIFO ordering will generally cause them to be freed properly. There are only a few cases (eg memory windows) where objects can point to things out of the strict LIFO order. Instead of using an explicit ordering scheme where the HW destroy is not allowed to fail, go over the list multiple times and allow the destroy function to fail. If progress halts then a final, desperate, cleanup is done before leaking the memory. This indicates a driver bug. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 113 ++++++++++++--------- drivers/infiniband/core/uverbs.h | 2 +- drivers/infiniband/core/uverbs_cmd.c | 11 +- drivers/infiniband/core/uverbs_std_types.c | 62 ++++++----- .../infiniband/core/uverbs_std_types_counters.c | 9 +- drivers/infiniband/core/uverbs_std_types_cq.c | 18 ++-- drivers/infiniband/core/uverbs_std_types_dm.c | 9 +- .../infiniband/core/uverbs_std_types_flow_action.c | 9 +- drivers/infiniband/core/uverbs_std_types_mr.c | 3 +- drivers/infiniband/hw/mlx5/devx.c | 8 +- include/rdma/ib_verbs.h | 46 ++++++++- include/rdma/uverbs_types.h | 11 +- 12 files changed, 190 insertions(+), 111 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index df3c40533252..2ddf1c716ba8 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -360,9 +360,10 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, /* * We can only fail gracefully if the user requested to destroy the - * object. In the rest of the cases, just remove whatever you can. + * object or when a retry may be called upon an error. + * In the rest of the cases, just remove whatever you can. */ - if (why == RDMA_REMOVE_DESTROY && ret) + if (ib_is_destroy_retryable(ret, why, uobj)) return ret; ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, @@ -393,7 +394,7 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, container_of(uobj, struct ib_uobject_file, uobj); int ret = fd_type->context_closed(uobj_file, why); - if (why == RDMA_REMOVE_DESTROY && ret) + if (ib_is_destroy_retryable(ret, why, uobj)) return ret; if (why == RDMA_REMOVE_DURING_CLEANUP) { @@ -422,7 +423,7 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, struct ib_ucontext *ucontext = uobj->context; ret = uobj->type->type_class->remove_commit(uobj, why); - if (ret && why == RDMA_REMOVE_DESTROY) { + if (ib_is_destroy_retryable(ret, why, uobj)) { /* We couldn't remove the object, so just unlock the uobject */ atomic_set(&uobj->usecnt, 0); uobj->type->type_class->lookup_put(uobj, true); @@ -645,61 +646,77 @@ void uverbs_close_fd(struct file *f) kref_put(uverbs_file_ref, ib_uverbs_release_file); } -void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) +static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, + enum rdma_remove_reason reason) { - enum rdma_remove_reason reason = device_removed ? - RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE; - unsigned int cur_order = 0; + struct ib_uobject *obj, *next_obj; + int ret = -EINVAL; + int err = 0; + /* + * This shouldn't run while executing other commands on this + * context. Thus, the only thing we should take care of is + * releasing a FD while traversing this list. The FD could be + * closed and released from the _release fop of this FD. + * In order to mitigate this, we add a lock. + * We take and release the lock per traversal in order to let + * other threads (which might still use the FDs) chance to run. + */ + mutex_lock(&ucontext->uobjects_lock); ucontext->cleanup_reason = reason; + list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, list) { + /* + * if we hit this WARN_ON, that means we are + * racing with a lookup_get. + */ + WARN_ON(uverbs_try_lock_object(obj, true)); + err = obj->type->type_class->remove_commit(obj, reason); + + if (ib_is_destroy_retryable(err, reason, obj)) { + pr_debug("ib_uverbs: failed to remove uobject id %d err %d\n", + obj->id, err); + atomic_set(&obj->usecnt, 0); + continue; + } + + if (err) + pr_err("ib_uverbs: unable to remove uobject id %d err %d\n", + obj->id, err); + + list_del(&obj->list); + /* put the ref we took when we created the object */ + uverbs_uobject_put(obj); + ret = 0; + } + mutex_unlock(&ucontext->uobjects_lock); + return ret; +} + +void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) +{ + enum rdma_remove_reason reason = device_removed ? + RDMA_REMOVE_DRIVER_REMOVE : + RDMA_REMOVE_CLOSE; /* * Waits for all remove_commit and alloc_commit to finish. Logically, We * want to hold this forever as the context is going to be destroyed, * but we'll release it since it causes a "held lock freed" BUG message. */ down_write(&ucontext->cleanup_rwsem); + ucontext->cleanup_retryable = true; + while (!list_empty(&ucontext->uobjects)) + if (__uverbs_cleanup_ucontext(ucontext, reason)) { + /* + * No entry was cleaned-up successfully during this + * iteration + */ + break; + } - while (!list_empty(&ucontext->uobjects)) { - struct ib_uobject *obj, *next_obj; - unsigned int next_order = UINT_MAX; + ucontext->cleanup_retryable = false; + if (!list_empty(&ucontext->uobjects)) + __uverbs_cleanup_ucontext(ucontext, reason); - /* - * This shouldn't run while executing other commands on this - * context. Thus, the only thing we should take care of is - * releasing a FD while traversing this list. The FD could be - * closed and released from the _release fop of this FD. - * In order to mitigate this, we add a lock. - * We take and release the lock per order traversal in order - * to let other threads (which might still use the FDs) chance - * to run. - */ - mutex_lock(&ucontext->uobjects_lock); - list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, - list) { - if (obj->type->destroy_order == cur_order) { - int ret; - - /* - * if we hit this WARN_ON, that means we are - * racing with a lookup_get. - */ - WARN_ON(uverbs_try_lock_object(obj, true)); - ret = obj->type->type_class->remove_commit(obj, - reason); - list_del(&obj->list); - if (ret) - pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n", - obj->id, cur_order); - /* put the ref we took when we created the object */ - uverbs_uobject_put(obj); - } else { - next_order = min(next_order, - obj->type->destroy_order); - } - } - mutex_unlock(&ucontext->uobjects_lock); - cur_order = next_order; - } up_write(&ucontext->cleanup_rwsem); } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index c0d40fc3a53a..f9f0bcf76812 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -230,7 +230,7 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); -int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd, +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, enum rdma_remove_reason why); int uverbs_dealloc_mw(struct ib_mw *mw); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 5fc14fde274c..5d0fd36b009d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -116,6 +116,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); rcu_read_unlock(); ucontext->closing = 0; + ucontext->cleanup_retryable = false; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING ucontext->umem_tree = RB_ROOT_CACHED; @@ -611,12 +612,13 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, return ret ?: in_len; } -int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, enum rdma_remove_reason why) { struct inode *inode; int ret; + struct ib_uverbs_device *dev = uobject->context->ufile->device; inode = xrcd->inode; if (inode && !atomic_dec_and_test(&xrcd->usecnt)) @@ -624,9 +626,12 @@ int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, ret = ib_dealloc_xrcd(xrcd); - if (why == RDMA_REMOVE_DESTROY && ret) + if (ib_is_destroy_retryable(ret, why, uobject)) { atomic_inc(&xrcd->usecnt); - else if (inode) + return ret; + } + + if (inode) xrcd_table_delete(dev, inode); return ret; diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index c50d73845a2a..c7f93b205c70 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -77,6 +77,13 @@ static int uverbs_free_qp(struct ib_uobject *uobject, container_of(uobject, struct ib_uqp_object, uevent.uobject); int ret; + /* + * If this is a user triggered destroy then do not allow destruction + * until the user cleans up all the mcast bindings. Unlike in other + * places we forcibly clean up the mcast attachments for !DESTROY + * because the mcast attaches are not ubojects and will not be + * destroyed by anything else during cleanup processing. + */ if (why == RDMA_REMOVE_DESTROY) { if (!list_empty(&uqp->mcast_list)) return -EBUSY; @@ -85,7 +92,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject, } ret = ib_destroy_qp(qp); - if (ret && why == RDMA_REMOVE_DESTROY) + if (ib_is_destroy_retryable(ret, why, uobject)) return ret; if (uqp->uxrcd) @@ -103,8 +110,10 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, int ret; ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); - if (!ret || why != RDMA_REMOVE_DESTROY) - kfree(ind_tbl); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + kfree(ind_tbl); return ret; } @@ -117,8 +126,10 @@ static int uverbs_free_wq(struct ib_uobject *uobject, int ret; ret = ib_destroy_wq(wq); - if (!ret || why != RDMA_REMOVE_DESTROY) - ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); return ret; } @@ -132,8 +143,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, int ret; ret = ib_destroy_srq(srq); - - if (ret && why == RDMA_REMOVE_DESTROY) + if (ib_is_destroy_retryable(ret, why, uobject)) return ret; if (srq_type == IB_SRQT_XRC) { @@ -155,12 +165,12 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, container_of(uobject, struct ib_uxrcd_object, uobject); int ret; + ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject); + if (ret) + return ret; + mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex); - if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt)) - ret = -EBUSY; - else - ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device, - xrcd, why); + ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why); mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex); return ret; @@ -170,9 +180,11 @@ static int uverbs_free_pd(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_pd *pd = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&pd->usecnt, why, uobject); + if (ret) + return ret; ib_dealloc_pd((struct ib_pd *)uobject->object); return 0; @@ -249,44 +261,42 @@ void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata) } DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL, - &UVERBS_TYPE_ALLOC_FD(0, - sizeof(struct ib_uverbs_completion_event_file), + &UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), uverbs_hot_unplug_completion_event_file, &uverbs_event_fops, "[infinibandevent]", O_RDONLY)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw)); + &UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), uverbs_free_srq)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah)); + &UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW, &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), - 0, uverbs_free_flow)); + uverbs_free_flow)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl)); + &UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), uverbs_free_xrcd)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD, - /* 2 is used in order to free the PD after MRs */ - &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd)); + &UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL); diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 03b182a684a6..6d0b1ce9fc1f 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -38,10 +38,11 @@ static int uverbs_free_counters(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_counters *counters = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && - atomic_read(&counters->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&counters->usecnt, why, uobject); + if (ret) + return ret; return counters->device->destroy_counters(counters); } @@ -150,7 +151,7 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters), + &UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ)); diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 3d293d01afea..f67b0895b48c 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -44,12 +44,16 @@ static int uverbs_free_cq(struct ib_uobject *uobject, int ret; ret = ib_destroy_cq(cq); - if (!ret || why != RDMA_REMOVE_DESTROY) - ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ? - container_of(ev_queue, - struct ib_uverbs_completion_event_file, - ev_queue) : NULL, - ucq); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + ib_uverbs_release_ucq( + uobject->context->ufile, + ev_queue ? container_of(ev_queue, + struct ib_uverbs_completion_event_file, + ev_queue) : + NULL, + ucq); return ret; } @@ -201,7 +205,7 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq), #if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI) &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index 8b681575b615..d294660a2e06 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -37,9 +37,11 @@ static int uverbs_free_dm(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_dm *dm = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&dm->usecnt, why, uobject); + if (ret) + return ret; return dm->device->dealloc_dm(dm); } @@ -102,7 +104,6 @@ static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM, - /* 1 is used in order to free the DM after MRs */ - &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm), + &UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm), &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC), &UVERBS_METHOD(UVERBS_METHOD_DM_FREE)); diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index a7be51cf2e42..c1875657bc99 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -37,10 +37,11 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_flow_action *action = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && - atomic_read(&action->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&action->usecnt, why, uobject); + if (ret) + return ret; return action->device->destroy_flow_action(action); } @@ -428,7 +429,7 @@ static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTRO UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action), + &UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action), &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 68f7cadf088f..d7f7ba3802af 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -142,6 +142,5 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR, - /* 1 is used in order to free the MR after all the MWs */ - &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr), + &UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG)); diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index f535e7da2dc5..30f6b612547f 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -675,7 +675,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, int ret; ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ret && why == RDMA_REMOVE_DESTROY) + if (ib_is_destroy_retryable(ret, why, uobject)) return ret; kfree(obj); @@ -976,7 +976,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, int err; err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (err && why == RDMA_REMOVE_DESTROY) + if (ib_is_destroy_retryable(err, why, uobject)) return err; ib_umem_release(obj->umem); @@ -1085,14 +1085,14 @@ static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN)); static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, - &UVERBS_TYPE_ALLOC_IDR(0, devx_obj_cleanup), + &UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY)); static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, - &UVERBS_TYPE_ALLOC_IDR(0, devx_umem_cleanup), + &UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8e726fff30fe..e1130c6c1377 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1476,7 +1476,10 @@ struct ib_fmr_attr { struct ib_umem; enum rdma_remove_reason { - /* Userspace requested uobject deletion. Call could fail */ + /* + * Userspace requested uobject deletion or initial try + * to remove uobject via cleanup. Call could fail + */ RDMA_REMOVE_DESTROY, /* Context deletion. This call should delete the actual object itself */ RDMA_REMOVE_CLOSE, @@ -1503,6 +1506,7 @@ struct ib_ucontext { /* protects cleanup process from other actions */ struct rw_semaphore cleanup_rwsem; enum rdma_remove_reason cleanup_reason; + bool cleanup_retryable; struct pid *tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING @@ -2684,6 +2688,46 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, return ib_is_buffer_cleared(udata->inbuf + offset, len); } +/** + * ib_is_destroy_retryable - Check whether the uobject destruction + * is retryable. + * @ret: The initial destruction return code + * @why: remove reason + * @uobj: The uobject that is destroyed + * + * This function is a helper function that IB layer and low-level drivers + * can use to consider whether the destruction of the given uobject is + * retry-able. + * It checks the original return code, if it wasn't success the destruction + * is retryable according to the ucontext state (i.e. cleanup_retryable) and + * the remove reason. (i.e. why). + * Must be called with the object locked for destroy. + */ +static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why, + struct ib_uobject *uobj) +{ + return ret && (why == RDMA_REMOVE_DESTROY || + uobj->context->cleanup_retryable); +} + +/** + * ib_destroy_usecnt - Called during destruction to check the usecnt + * @usecnt: The usecnt atomic + * @why: remove reason + * @uobj: The uobject that is destroyed + * + * Non-zero usecnts will block destruction unless destruction was triggered by + * a ucontext cleanup. + */ +static inline int ib_destroy_usecnt(atomic_t *usecnt, + enum rdma_remove_reason why, + struct ib_uobject *uobj) +{ + if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj)) + return -EBUSY; + return 0; +} + /** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index cc04ec65588d..175495d1b0b8 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -93,7 +93,6 @@ struct uverbs_obj_type_class { struct uverbs_obj_type { const struct uverbs_obj_type_class * const type_class; size_t obj_size; - unsigned int destroy_order; }; /* @@ -152,10 +151,9 @@ extern const struct uverbs_obj_type_class uverbs_fd_class; #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ sizeof(char)) -#define UVERBS_TYPE_ALLOC_FD(_order, _obj_size, _context_closed, _fops, _name, _flags)\ +#define UVERBS_TYPE_ALLOC_FD(_obj_size, _context_closed, _fops, _name, _flags)\ ((&((const struct uverbs_obj_fd_type) \ {.type = { \ - .destroy_order = _order, \ .type_class = &uverbs_fd_class, \ .obj_size = (_obj_size) + \ UVERBS_BUILD_BUG_ON((_obj_size) < sizeof(struct ib_uobject_file)), \ @@ -164,18 +162,17 @@ extern const struct uverbs_obj_type_class uverbs_fd_class; .fops = _fops, \ .name = _name, \ .flags = _flags}))->type) -#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order, _destroy_object) \ +#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _destroy_object) \ ((&((const struct uverbs_obj_idr_type) \ {.type = { \ - .destroy_order = _order, \ .type_class = &uverbs_idr_class, \ .obj_size = (_size) + \ UVERBS_BUILD_BUG_ON((_size) < \ sizeof(struct ib_uobject)) \ }, \ .destroy_object = _destroy_object,}))->type) -#define UVERBS_TYPE_ALLOC_IDR(_order, _destroy_object) \ - UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order, \ +#define UVERBS_TYPE_ALLOC_IDR(_destroy_object) \ + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), \ _destroy_object) #endif -- cgit From 4eefd62c17a9a5e7576207e84f3d2b4f73aba750 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 2 Jul 2018 10:06:51 -0700 Subject: include/rdma/opa_addr.h: Fix an endianness issue IB_MULTICAST_LID_BASE is defined as follows: #define IB_MULTICAST_LID_BASE cpu_to_be16(0xC000) Hence use be16_to_cpu() to convert it to CPU endianness. Compile-tested only. Fixes: af808ece5ce9 ("IB/SA: Check dlid before SA agent queries for ClassPortInfo") Signed-off-by: Bart Van Assche Cc: Venkata Sandeep Dhanalakota Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/rdma/opa_addr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 2bbb7a67e643..66d4393d339c 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -120,7 +120,7 @@ static inline bool rdma_is_valid_unicast_lid(struct rdma_ah_attr *attr) if (attr->type == RDMA_AH_ATTR_TYPE_IB) { if (!rdma_ah_get_dlid(attr) || rdma_ah_get_dlid(attr) >= - be32_to_cpu(IB_MULTICAST_LID_BASE)) + be16_to_cpu(IB_MULTICAST_LID_BASE)) return false; } else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) { if (!rdma_ah_get_dlid(attr) || -- cgit From af7b641ed4d06794550e9811259b5fa970857cd0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 2 Jul 2018 08:58:41 -0700 Subject: IB/srp: Remove driver version and release data information Remove the driver version and release date information because such information is not relevant for an upstream driver. See also commit e1267b01240a ("RDMA: Remove useless MODULE_VERSION"). Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srp/ib_srp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 34b1aaffa521..117dc1082e58 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -57,13 +57,10 @@ #define DRV_NAME "ib_srp" #define PFX DRV_NAME ": " -#define DRV_VERSION "2.0" -#define DRV_RELDATE "July 26, 2015" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_INFO(release_date, DRV_RELDATE); #if !defined(CONFIG_DYNAMIC_DEBUG) #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) -- cgit From 7496a511a0be976406d0e7ab8cdec7df6e5aac04 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 2 Jul 2018 08:59:28 -0700 Subject: IB/mlx5: Remove set-but-not-used variables Avoid that the compiler complains about set-but-not-used variables when building with W=1. This patch does not change any functionality. Signed-off-by: Bart Van Assche Cc: Leon Romanovsky Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 --- drivers/infiniband/hw/mlx5/main.c | 2 -- 2 files changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 454d391b4b40..d8c68db4497f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4052,15 +4052,12 @@ static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev, u32 bt_chk_sz; u32 mhop_num; int eqe_alloc; - int ba_num; int i = 0; int j = 0; mhop_num = hr_dev->caps.eqe_hop_num; buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); - ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1) / - buf_chk_sz; /* hop_num = 0 */ if (mhop_num == HNS_ROCE_HOP_NUM_0) { diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 322f036a1264..e4460b99581a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5784,9 +5784,7 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; - u8 port_num; - port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); -- cgit From 995250959d22fc341b5424e3343b0ce5df672461 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 2 Jul 2018 14:08:18 -0700 Subject: ib_srpt: Fix a use-after-free in srpt_close_ch() Avoid that KASAN reports the following: BUG: KASAN: use-after-free in srpt_close_ch+0x4f/0x1b0 [ib_srpt] Read of size 4 at addr ffff880151180cb8 by task check/4681 CPU: 15 PID: 4681 Comm: check Not tainted 4.18.0-rc2-dbg+ #4 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xa4/0xf5 print_address_description+0x6f/0x270 kasan_report+0x241/0x360 __asan_load4+0x78/0x80 srpt_close_ch+0x4f/0x1b0 [ib_srpt] srpt_set_enabled+0xf7/0x1e0 [ib_srpt] srpt_tpg_enable_store+0xb8/0x120 [ib_srpt] configfs_write_file+0x14e/0x1d0 [configfs] __vfs_write+0xd2/0x3b0 vfs_write+0x101/0x270 ksys_write+0xab/0x120 __x64_sys_write+0x43/0x50 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: aaf45bd83eba ("IB/srpt: Detect session shutdown reliably") Signed-off-by: Bart Van Assche Cc: Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 36d9fab7c998..754da8d30952 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1834,8 +1834,7 @@ static bool srpt_close_ch(struct srpt_rdma_ch *ch) int ret; if (!srpt_set_ch_state(ch, CH_DRAINING)) { - pr_debug("%s-%d: already closed\n", ch->sess_name, - ch->qp->qp_num); + pr_debug("%s: already closed\n", ch->sess_name); return false; } -- cgit From 14d15c2b278011056482eb015dff89f9cbf2b841 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 2 Jul 2018 14:08:45 -0700 Subject: ib_srpt: Fix a use-after-free in __srpt_close_all_ch() BUG: KASAN: use-after-free in srpt_set_enabled+0x1a9/0x1e0 [ib_srpt] Read of size 4 at addr ffff8801269d23f8 by task check/29726 CPU: 4 PID: 29726 Comm: check Not tainted 4.18.0-rc2-dbg+ #4 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xa4/0xf5 print_address_description+0x6f/0x270 kasan_report+0x241/0x360 __asan_load4+0x78/0x80 srpt_set_enabled+0x1a9/0x1e0 [ib_srpt] srpt_tpg_enable_store+0xb8/0x120 [ib_srpt] configfs_write_file+0x14e/0x1d0 [configfs] __vfs_write+0xd2/0x3b0 vfs_write+0x101/0x270 ksys_write+0xab/0x120 __x64_sys_write+0x43/0x50 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f235cfe6154 Fixes: aaf45bd83eba ("IB/srpt: Detect session shutdown reliably") Signed-off-by: Bart Van Assche Cc: Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 754da8d30952..e42eec20c631 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1940,8 +1940,8 @@ static void __srpt_close_all_ch(struct srpt_port *sport) list_for_each_entry(nexus, &sport->nexus_list, entry) { list_for_each_entry(ch, &nexus->ch_list, list) { if (srpt_disconnect_ch(ch) >= 0) - pr_info("Closing channel %s-%d because target %s_%d has been disabled\n", - ch->sess_name, ch->qp->qp_num, + pr_info("Closing channel %s because target %s_%d has been disabled\n", + ch->sess_name, sport->sdev->device->name, sport->port); srpt_close_ch(ch); } -- cgit From d108c60d3d55e117695dc7ea6e1ed3df31113ce3 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 2 Jul 2018 08:08:27 -0700 Subject: IB/hfi1: Set in_use_ctxts bits for user ctxts only The in_use_ctxts bitmask is for user receive contexts only. Setting it for any other type of receive context is incorrect. Move initial set of in_use_ctxts bits from the general context init to the user context specific init. Having this bit set can allow contexts to be incorrectly identified by some IRQ handlers. This will allow handle_user_interrupt() will now filter user contexts correctly. Clean up redundant is_rcv_urgent_int() user context check. A follow on patch will clean up an incorrect code path in the is_rcv_avail_int(). Fixes: 8737ce95c463 ("IB/hfi1: Fix an assign/ordering issue with shared context IDs") Reviewed-by: Mike Marciniszyn Reviewed-by: Kamenee Arumugam Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 14 ++++++++------ drivers/infiniband/hw/hfi1/file_ops.c | 4 ++++ drivers/infiniband/hw/hfi1/init.c | 1 - 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 5d03c780c780..66ea0b52d42e 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8173,8 +8173,14 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) err_detail, source); } -/* +/** + * is_rcv_urgent_int() - User receive context urgent IRQ handler + * @dd: valid dd + * @source: logical IRQ source (ofse from IS_RCVURGENT_START) + * * RX block receive urgent interrupt. Source is < 160. + * + * NOTE: kernel receive contexts specifically do NOT enable this IRQ. */ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) { @@ -8184,11 +8190,7 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) if (likely(source < dd->num_rcv_contexts)) { rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { - /* only pay attention to user urgent interrupts */ - if (source >= dd->first_dyn_alloc_ctxt && - !rcd->is_vnic) - handle_user_interrupt(rcd); - + handle_user_interrupt(rcd); hfi1_rcd_put(rcd); return; /* OK */ } diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 3b09eedd29bc..1fc75647e47b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -985,7 +985,11 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, * sub contexts. * This has to be done here so the rest of the sub-contexts find the * proper base context. + * NOTE: _set_bit() can be used here because the context creation is + * protected by the mutex (rather than the spin_lock), and will be the + * very first instance of this context. */ + __set_bit(0, uctxt->in_use_ctxts); if (uinfo->subctxt_cnt) init_subctxts(uctxt, uinfo); uctxt->userversion = uinfo->userversion; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index d51ad777d72d..758d273c32cf 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -364,7 +364,6 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, hfi1_exp_tid_group_init(rcd); rcd->ppd = ppd; rcd->dd = dd; - __set_bit(0, rcd->in_use_ctxts); rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; -- cgit From e3091644bf2e6d21841de62d8b3e6a526c5172d5 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 2 Jul 2018 08:08:37 -0700 Subject: IB/hfi1: Remove incorrect call to do_interrupt callback The general interrupt handler is_rcv_avail_int() has two paths, do_interrupt() (callback) and handle_user_interrupt(). The do_interrupt() callback is for the threaded receive handling. is_rcv_avail_int() cannot handle threaded IRQs. If the do_interrupt() path is taken, and the IRQ returns IRQ_WAKE_THREAD, the IRQ behavior will be indeterminate. Remove incorrect call to do_interrupt() from is_rcv_avail_int(), leaving the un-threaded (handle_user_interrupt()) path. Fixes: f4f30031c33c ("staging/rdma/hfi1: Thread the receive interrupt.") Reviewed-by: Mike Marciniszyn Reviewed-by: Kamenee Arumugam Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 66ea0b52d42e..2c19bf772451 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8143,8 +8143,15 @@ static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source) } } -/* +/** + * is_rcv_avail_int() - User receive context available IRQ handler + * @dd: valid dd + * @source: logical IRQ source (offset from IS_RCVAVAIL_START) + * * RX block receive available interrupt. Source is < 160. + * + * This is the general interrupt handler for user (PSM) receive contexts, + * and can only be used for non-threaded IRQs. */ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) { @@ -8154,12 +8161,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) if (likely(source < dd->num_rcv_contexts)) { rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { - /* Check for non-user contexts, including vnic */ - if (source < dd->first_dyn_alloc_ctxt || rcd->is_vnic) - rcd->do_interrupt(rcd, 0); - else - handle_user_interrupt(rcd); - + handle_user_interrupt(rcd); hfi1_rcd_put(rcd); return; /* OK */ } -- cgit From a93b632c4531ff50c43d658447a45cbc11f488fd Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 1 Jul 2018 15:50:17 +0300 Subject: IB/mlx5: Fix GRE flow specification Currently the driver sets the mask of the gre_protocol to 0xffff without consideration in the user request. Fix it by copy the mask from the verbs spec. Fixes: da2f22ae7707 ("IB/mlx5: Add support for GRE flow specification") Signed-off-by: Maor Gottlieb Reviewed-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e4460b99581a..0f3649ae8746 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2685,7 +2685,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, IPPROTO_GRE); MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, - 0xffff); + ntohs(ib_spec->gre.mask.protocol)); MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, ntohs(ib_spec->gre.val.protocol)); -- cgit From 11e40f5c577624d440154388d0a71bb2db0cd9f7 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 29 Jun 2018 07:52:06 -0400 Subject: vmw_pvrdma: Release netdev when vmxnet3 module is removed On repeated module load/unload cycles, its possible for the pvrmda driver to encounter this crash: ... [ 297.032448] RIP: 0010:[] [] netdev_walk_all_upper_dev_rcu+0x50/0xb0 [ 297.034078] RSP: 0018:ffff95087780bd08 EFLAGS: 00010286 [ 297.034986] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff95087a0c0000 [ 297.036196] RDX: ffff95087a0c0000 RSI: ffffffff839e44e0 RDI: ffff950835d0c000 [ 297.037421] RBP: ffff95087780bd40 R08: ffff95087a0e0ea0 R09: abddacd03f8e0ea0 [ 297.038636] R10: abddacd03f8e0ea0 R11: ffffef5901e9dbc0 R12: ffff95087a0c0000 [ 297.039854] R13: ffffffff839e44e0 R14: ffff95087a0c0000 R15: ffff950835d0c828 [ 297.041071] FS: 0000000000000000(0000) GS:ffff95087fc00000(0000) knlGS:0000000000000000 [ 297.042443] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 297.043429] CR2: ffffffffffffffe8 CR3: 000000007a652000 CR4: 00000000003607f0 [ 297.044674] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 297.045893] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 297.047109] Call Trace: [ 297.047545] [] netdev_has_upper_dev_all_rcu+0x18/0x20 [ 297.048691] [] is_eth_port_of_netdev+0x2f/0xa0 [ib_core] [ 297.049886] [] ? is_eth_active_slave_of_bonding_rcu+0x70/0x70 [ib_core] ... This occurs because vmw_pvrdma on probe stores a pointer to the netdev that exists on function 0 of the same bus/device/slot (which represents the vmxnet3 ethernet driver). However, it never removes this pointer if the vmxnet3 module is removed, leading to crashes resulting from use after free dereferencing incidents like the one above. The fix is pretty straightforward. vmw_pvrdma should listen for NETDEV_REGISTER and NETDEV_UNREGISTER events in its event listener code block, and update the stored netdev pointer accordingly. This solution has been tested by myself and the reporter with successful results. This fix also allows the pvrdma driver to find its underlying ethernet device in the event that vmxnet3 is loaded after pvrdma, which it was not able to do before. Signed-off-by: Neil Horman Reported-by: ruquin@redhat.com Tested-by: Adit Ranadive Acked-by: Adit Ranadive Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 39 ++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index faa1be2d7727..912933549dfb 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -695,8 +695,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context) } static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, + struct net_device *ndev, unsigned long event) { + struct pci_dev *pdev_net; + unsigned int slot; + switch (event) { case NETDEV_REBOOT: case NETDEV_DOWN: @@ -714,6 +718,24 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, else pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); break; + case NETDEV_UNREGISTER: + dev_put(dev->netdev); + dev->netdev = NULL; + break; + case NETDEV_REGISTER: + /* vmxnet3 will have same bus, slot. But func will be 0 */ + slot = PCI_SLOT(dev->pdev->devfn); + pdev_net = pci_get_slot(dev->pdev->bus, + PCI_DEVFN(slot, 0)); + if ((dev->netdev == NULL) && + (pci_get_drvdata(pdev_net) == ndev)) { + /* this is our netdev */ + dev->netdev = ndev; + dev_hold(ndev); + } + pci_dev_put(pdev_net); + break; + default: dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", event, dev->ib_dev.name); @@ -730,8 +752,11 @@ static void pvrdma_netdevice_event_work(struct work_struct *work) mutex_lock(&pvrdma_device_list_lock); list_for_each_entry(dev, &pvrdma_device_list, device_link) { - if (dev->netdev == netdev_work->event_netdev) { - pvrdma_netdevice_event_handle(dev, netdev_work->event); + if ((netdev_work->event == NETDEV_REGISTER) || + (dev->netdev == netdev_work->event_netdev)) { + pvrdma_netdevice_event_handle(dev, + netdev_work->event_netdev, + netdev_work->event); break; } } @@ -964,6 +989,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, ret = -ENODEV; goto err_free_cq_ring; } + dev_hold(dev->netdev); dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); @@ -1036,6 +1062,10 @@ err_free_intrs: pvrdma_free_irq(dev); pci_free_irq_vectors(pdev); err_free_cq_ring: + if (dev->netdev) { + dev_put(dev->netdev); + dev->netdev = NULL; + } pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); err_free_async_ring: pvrdma_page_dir_cleanup(dev, &dev->async_pdir); @@ -1075,6 +1105,11 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) flush_workqueue(event_wq); + if (dev->netdev) { + dev_put(dev->netdev); + dev->netdev = NULL; + } + /* Unregister ib device */ ib_unregister_device(&dev->ib_dev); -- cgit From f1228867adaf8890826f2b59e4caddb1c5cc2df7 Mon Sep 17 00:00:00 2001 From: Tarick Bedeir Date: Mon, 2 Jul 2018 14:02:34 -0700 Subject: IB/mlx4: Test port number before querying type. rdma_ah_find_type() can reach into ib_device->port_immutable with a potentially out-of-bounds port number, so check that the port number is valid first. Fixes: 44c58487d51a ("IB/core: Define 'ib' and 'roce' rdma_ah_attr types") Signed-off-by: Tarick Bedeir Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 984e9f07339e..408e720fd923 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -4047,9 +4047,9 @@ static void to_rdma_ah_attr(struct mlx4_ib_dev *ibdev, u8 port_num = path->sched_queue & 0x40 ? 2 : 1; memset(ah_attr, 0, sizeof(*ah_attr)); - ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num); if (port_num == 0 || port_num > dev->caps.num_ports) return; + ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) | -- cgit From 5d9a2b0e28759e319a623da33940dbb3ce952b7d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 1 Jul 2018 19:36:24 +0300 Subject: RDMA/i40w: Hold read semaphore while looking after VMA VMA lookup is supposed to be performed while mmap_sem is held. Fixes: f26c7c83395b ("i40iw: Add 2MB page support") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 8884ff71a634..7d85414742ff 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1410,6 +1410,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) struct vm_area_struct *vma; struct hstate *h; + down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); if (vma && is_vm_hugetlb_page(vma)) { h = hstate_vma(vma); @@ -1418,6 +1419,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) iwmr->page_msk = huge_page_mask(h); } } + up_read(¤t->mm->mmap_sem); } /** -- cgit From e543a245cbe08a47a742500ea72aadf85f537ed8 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 3 Jul 2018 18:02:37 +0300 Subject: MAINTAINERS: Moving out... The 2.6.18... was a hell of a ride, and by now both me and Roi are not dealing with iser any more. Max will replace us as maintainer, good luck to you dear! Signed-off-by: Or Gerlitz Acked-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 37a1d71f417e..c79f306a936a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7562,9 +7562,8 @@ S: Maintained F: drivers/firmware/iscsi_ibft* ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR -M: Or Gerlitz M: Sagi Grimberg -M: Roi Dayan +M: Max Gurtovoy L: linux-rdma@vger.kernel.org S: Supported W: http://www.openfabrics.org -- cgit From c2d7c8ff89b22ddefb1ac2986c0d48444a667689 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:32:12 +0300 Subject: IB/core: type promotion bug in rdma_rw_init_one_mr() "nents" is an unsigned int, so if ib_map_mr_sg() returns a negative error code then it's type promoted to a high unsigned int which is treated as success. Fixes: a060b5629ab0 ("IB/core: generic RDMA READ/WRITE API") Signed-off-by: Dan Carpenter Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index c8963e91f92a..3ee0adfb45e9 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -87,7 +87,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, } ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); - if (ret < nents) { + if (ret < 0 || ret < nents) { ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr); return -EINVAL; } -- cgit From 474e5a86067e5f12c97d1db8b170c7f45b53097a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:57:11 +0300 Subject: RDMA/bnxt_re: Fix a couple off by one bugs The sgid_tbl->tbl[] array is allocated in bnxt_qplib_alloc_sgid_tbl(). It has sgid_tbl->max elements. So the > should be >= to prevent accessing one element beyond the end of the array. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Dan Carpenter Acked-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 2f3f32eaa1d5..4097f3fa25c5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -197,7 +197,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, struct bnxt_qplib_sgid_tbl *sgid_tbl, int index, struct bnxt_qplib_gid *gid) { - if (index > sgid_tbl->max) { + if (index >= sgid_tbl->max) { dev_err(&res->pdev->dev, "QPLIB: Index %d exceeded SGID table max (%d)", index, sgid_tbl->max); @@ -402,7 +402,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, *pkey = 0xFFFF; return 0; } - if (index > pkey_tbl->max) { + if (index >= pkey_tbl->max) { dev_err(&res->pdev->dev, "QPLIB: Index %d exceeded PKEY table max (%d)", index, pkey_tbl->max); -- cgit From c1dfc0114c901b4f46c85ceff0491debf2b2a2ec Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:58:02 +0300 Subject: RDMA/bnxt_re: Fix a bunch of off by one bugs in qplib_fp.c The srq->swq[] is allocated in bnxt_qplib_create_srq(). It has srq->hwq.max_elements elements so these tests should be > instead of >= or we might go beyond the end of the array. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Dan Carpenter Acked-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 50d8f1fc98d5..e426b990c1dd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2354,7 +2354,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, srq = qp->srq; if (!srq) return -EINVAL; - if (wr_id_idx > srq->hwq.max_elements) { + if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC "); dev_err(&cq->hwq.pdev->dev, @@ -2369,7 +2369,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, *pcqe = cqe; } else { rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { + if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC "); dev_err(&cq->hwq.pdev->dev, @@ -2437,7 +2437,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, if (!srq) return -EINVAL; - if (wr_id_idx > srq->hwq.max_elements) { + if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD "); dev_err(&cq->hwq.pdev->dev, @@ -2452,7 +2452,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, *pcqe = cqe; } else { rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { + if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD "); dev_err(&cq->hwq.pdev->dev, @@ -2546,7 +2546,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, "QPLIB: FP: SRQ used but not defined??"); return -EINVAL; } - if (wr_id_idx > srq->hwq.max_elements) { + if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 "); dev_err(&cq->hwq.pdev->dev, @@ -2561,7 +2561,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, *pcqe = cqe; } else { rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { + if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id "); dev_err(&cq->hwq.pdev->dev, -- cgit From 25bb36e75d7d62dc14ae2306dca38d672e0c3fa0 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Tue, 19 Jun 2018 08:47:24 +0300 Subject: IB/mlx5: Expose dump and fill memory key MLX5 IB HCA offers the memory key, dump_fill_mkey to boost performance, when used in a send or receive operations. It is used to force local HCA operations to skip the PCI bus access, while keeping track of the processed length in the ibv_sge handling. Meaning, instead of a PCI write access the HCA leaves the target memory untouched, and skips filling that packet section. Similar behavior is done upon send, the HCA skips data in memory relevant to this key and saves PCI bus access. This functionality saves PCI read/write operations. Signed-off-by: Yonatan Cohen Reviewed-by: Yishai Hadas Reviewed-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 16 ++++++++++++++++ include/uapi/rdma/mlx5-abi.h | 3 ++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e52dd21519b4..bd402691c2c0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1660,6 +1660,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, int err; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); + u32 dump_fill_mkey; bool lib_uar_4k; if (!dev->ib_active) @@ -1761,6 +1762,12 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, goto out_uars; } + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); + if (err) + goto out_td; + } + INIT_LIST_HEAD(&context->vma_private_list); mutex_init(&context->vma_private_list_mutex); INIT_LIST_HEAD(&context->db_page_list); @@ -1819,6 +1826,15 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.response_length += sizeof(resp.num_dyn_bfregs); } + if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) { + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + resp.dump_fill_mkey = dump_fill_mkey; + resp.comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; + } + resp.response_length += sizeof(resp.dump_fill_mkey); + } + err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto out_td; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 8daec1fa49cf..1f7b7b6bddf0 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -90,6 +90,7 @@ struct mlx5_ib_alloc_ucontext_req_v2 { enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY = 1UL << 1, }; enum mlx5_user_cmds_supp_uhw { @@ -138,7 +139,7 @@ struct mlx5_ib_alloc_ucontext_resp { __u32 log_uar_size; __u32 num_uars_per_page; __u32 num_dyn_bfregs; - __u32 reserved3; + __u32 dump_fill_mkey; }; struct mlx5_ib_alloc_pd_resp { -- cgit From 87fc2a620a398e970872064841b0db7cc6d0149f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 08:50:23 +0300 Subject: RDMA/uverbs: Store the specs_root in the struct ib_uverbs_device The specs are required to operate the uverbs file, so they belong inside the ib_uverbs_device, not inside the ib_device. The spec passed in the ib_device is just a communication from the driver and should not be used during runtime. This also changes the lifetime of the spec memory to match the ib_uverbs_device, however at this time the spec_root can still contain driver pointers after disassociation, so it cannot be used if ib_dev is NULL. This is preparation for another series. Signed-off-by: Jason Gunthorpe Reviewed-by: Michael J. Ruhl Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/rdma_core.c | 4 ++-- drivers/infiniband/core/rdma_core.h | 2 +- drivers/infiniband/core/uverbs_ioctl.c | 26 ++++++++++++-------------- drivers/infiniband/core/uverbs_main.c | 16 +++++++++------- drivers/infiniband/hw/mlx5/main.c | 6 +++--- include/rdma/ib_verbs.h | 2 +- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 2ddf1c716ba8..c67bcdda5760 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -52,10 +52,10 @@ int uverbs_ns_idx(u16 *id, unsigned int ns_count) return ret; } -const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, +const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile, uint16_t object) { - const struct uverbs_root_spec *object_hash = ibdev->specs_root; + const struct uverbs_root_spec *object_hash = ufile->device->specs_root; const struct uverbs_object_spec_hash *objects; int ret = uverbs_ns_idx(&object, object_hash->num_buckets); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index a243cc2a59f7..8cede4546b25 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -44,7 +44,7 @@ #include int uverbs_ns_idx(u16 *id, unsigned int ns_count); -const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, +const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile, uint16_t object); const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, uint16_t method); diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 03065bad8dae..785975a4e3dd 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -46,8 +46,7 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, 0, uattr->len - len); } -static int uverbs_process_attr(struct ib_device *ibdev, - struct ib_ucontext *ucontext, +static int uverbs_process_attr(struct ib_uverbs_file *ufile, const struct ib_uverbs_attr *uattr, u16 attr_id, const struct uverbs_attr_spec_hash *attr_spec_bucket, @@ -145,17 +144,18 @@ static int uverbs_process_attr(struct ib_device *ibdev, if (uattr->attr_data.reserved) return -EINVAL; - if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX) + if (uattr->len != 0 || !ufile->ucontext || + uattr->data > INT_MAX) return -EINVAL; o_attr = &e->obj_attr; - object = uverbs_get_object(ibdev, spec->obj.obj_type); + object = uverbs_get_object(ufile, spec->obj.obj_type); if (!object) return -EINVAL; o_attr->uobject = uverbs_get_uobject_from_context( object->type_attrs, - ucontext, + ufile->ucontext, spec->obj.access, (int)uattr->data); @@ -230,8 +230,7 @@ static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle, return ret; } -static int uverbs_uattrs_process(struct ib_device *ibdev, - struct ib_ucontext *ucontext, +static int uverbs_uattrs_process(struct ib_uverbs_file *ufile, const struct ib_uverbs_attr *uattrs, size_t num_uattrs, const struct uverbs_method_spec *method, @@ -267,9 +266,9 @@ static int uverbs_uattrs_process(struct ib_device *ibdev, num_given_buckets = ret + 1; attr_spec_bucket = method->attr_buckets[ret]; - ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id, - attr_spec_bucket, &attr_bundle->hash[ret], - uattr_ptr++); + ret = uverbs_process_attr(ufile, uattr, attr_id, + attr_spec_bucket, + &attr_bundle->hash[ret], uattr_ptr++); if (ret) { uverbs_finalize_attrs(attr_bundle, method->attr_buckets, @@ -324,9 +323,8 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, int finalize_ret; int num_given_buckets; - num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs, - num_uattrs, method_spec, - attr_bundle, uattr_ptr); + num_given_buckets = uverbs_uattrs_process( + ufile, uattrs, num_uattrs, method_spec, attr_bundle, uattr_ptr); if (num_given_buckets <= 0) return -EINVAL; @@ -367,7 +365,7 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, if (hdr->driver_id != ib_dev->driver_id) return -EINVAL; - object_spec = uverbs_get_object(ib_dev, hdr->object_id); + object_spec = uverbs_get_object(file, hdr->object_id); if (!object_spec) return -EPROTONOSUPPORT; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index f5f4bfb59705..c05ce5ae5415 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -161,6 +161,7 @@ static void ib_uverbs_release_dev(struct kobject *kobj) container_of(kobj, struct ib_uverbs_device, kobj); cleanup_srcu_struct(&dev->disassociate_srcu); + uverbs_free_spec_tree(dev->specs_root); kfree(dev); } @@ -1067,7 +1068,7 @@ static void ib_uverbs_add_one(struct ib_device *device) if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; - if (!device->specs_root) { + if (!device->driver_specs_root) { const struct uverbs_object_tree_def *default_root[] = { uverbs_default_get_objects()}; @@ -1075,8 +1076,13 @@ static void ib_uverbs_add_one(struct ib_device *device) default_root); if (IS_ERR(uverbs_dev->specs_root)) goto err_class; - - device->specs_root = uverbs_dev->specs_root; + } else { + uverbs_dev->specs_root = device->driver_specs_root; + /* + * Take responsibility to free the specs allocated by the + * driver. + */ + device->driver_specs_root = NULL; } ib_set_client_data(device, &uverbs_client, uverbs_dev); @@ -1241,10 +1247,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) ib_uverbs_comp_dev(uverbs_dev); if (wait_clients) wait_for_completion(&uverbs_dev->comp); - if (uverbs_dev->specs_root) { - uverbs_free_spec_tree(uverbs_dev->specs_root); - device->specs_root = NULL; - } kobject_put(&uverbs_dev->kobj); } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d3867286606c..0fb80777aade 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5350,15 +5350,15 @@ static int populate_specs_root(struct mlx5_ib_dev *dev) !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) default_root[num_trees++] = mlx5_ib_get_devx_tree(); - dev->ib_dev.specs_root = + dev->ib_dev.driver_specs_root = uverbs_alloc_spec_tree(num_trees, default_root); - return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root); + return PTR_ERR_OR_ZERO(dev->ib_dev.driver_specs_root); } static void depopulate_specs_root(struct mlx5_ib_dev *dev) { - uverbs_free_spec_tree(dev->ib_dev.specs_root); + uverbs_free_spec_tree(dev->ib_dev.driver_specs_root); } static int mlx5_ib_read_counters(struct ib_counters *counters, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e1130c6c1377..8784d5bfc252 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2602,7 +2602,7 @@ struct ib_device { const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, int comp_vector); - struct uverbs_root_spec *specs_root; + struct uverbs_root_spec *driver_specs_root; enum rdma_driver_id driver_id; }; -- cgit From ad544cfe54cbd1b0b68c620a371ebcde6a3264eb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 08:50:24 +0300 Subject: RDMA/uverbs: Split UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE Two methods are sharing the same attribute constant, but the attribute definitions are not the same. This should not have been done, instead split them into two attributes with the same number. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_std_types_flow_action.c | 12 ++++++++---- include/uapi/rdma/ib_user_ioctl_cmds.h | 7 ++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index c1875657bc99..afbb19000503 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -321,7 +321,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device return ret; /* No need to check as this attribute is marked as MANDATORY */ - uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE); + uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs); if (IS_ERR(action)) return PTR_ERR(action); @@ -351,7 +352,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device if (ret) return ret; - uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE); + uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE); action = uobj->object; if (action->type != IB_FLOW_ACTION_ESP) @@ -390,7 +392,8 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { }; static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, + &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, UVERBS_ACCESS_NEW, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, @@ -407,7 +410,8 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, - &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, + &UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, UVERBS_ACCESS_WRITE, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 888ac5975a6c..2c881aaf05c2 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -79,7 +79,7 @@ enum uverbs_attrs_destroy_cq_cmd_attr_ids { }; enum uverbs_attrs_create_flow_action_esp { - UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, + UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, @@ -87,6 +87,11 @@ enum uverbs_attrs_create_flow_action_esp { UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, }; +enum uverbs_attrs_modify_flow_action_esp { + UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE = + UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, +}; + enum uverbs_attrs_destroy_flow_action_esp { UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, }; -- cgit From d108dac08085b6fe3947df9625c76fc9f66c1bbb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 08:50:25 +0300 Subject: RDMA/uverbs: Simplify UVERBS_ATTR family of macros Instead of using a complex cascade of macros, just directly provide the initializer list each of the declarations is trying to create. Now that the macros are simplified this also reworks the uverbs_attr_spec to be friendly to older compilers by eliminating any unnamed structures/unions inside, and removing the duplication of some fields. The structure size remains at 16 bytes which was the original motivation for some of this oddness. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_ioctl.c | 20 +-- drivers/infiniband/core/uverbs_ioctl_merge.c | 4 +- drivers/infiniband/core/uverbs_std_types.c | 8 +- .../infiniband/core/uverbs_std_types_flow_action.c | 24 ++- drivers/infiniband/hw/mlx5/devx.c | 72 ++++----- include/rdma/uverbs_ioctl.h | 172 +++++++++------------ 6 files changed, 135 insertions(+), 165 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 785975a4e3dd..62f7382e8513 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -77,13 +77,13 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, switch (spec->type) { case UVERBS_ATTR_TYPE_ENUM_IN: - if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems) + if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems) return -EOPNOTSUPP; if (uattr->attr_data.enum_data.reserved) return -EINVAL; - val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id]; + val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id]; /* Currently we only support PTR_IN based enums */ if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN) @@ -97,16 +97,16 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, * longer struct will fail here if used with an old kernel and * non-zero content, making ABI compat/discovery simpler. */ - if (uattr->len > val_spec->ptr.len && + if (uattr->len > val_spec->u.ptr.len && val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO && - !uverbs_is_attr_cleared(uattr, val_spec->ptr.len)) + !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len)) return -EOPNOTSUPP; /* fall through */ case UVERBS_ATTR_TYPE_PTR_OUT: - if (uattr->len < val_spec->ptr.min_len || + if (uattr->len < val_spec->u.ptr.min_len || (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) && - uattr->len > val_spec->ptr.len)) + uattr->len > val_spec->u.ptr.len)) return -EINVAL; if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN && @@ -149,20 +149,20 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, return -EINVAL; o_attr = &e->obj_attr; - object = uverbs_get_object(ufile, spec->obj.obj_type); + object = uverbs_get_object(ufile, spec->u.obj.obj_type); if (!object) return -EINVAL; o_attr->uobject = uverbs_get_uobject_from_context( object->type_attrs, ufile->ucontext, - spec->obj.access, + spec->u.obj.access, (int)uattr->data); if (IS_ERR(o_attr->uobject)) return PTR_ERR(o_attr->uobject); - if (spec->obj.access == UVERBS_ACCESS_NEW) { + if (spec->u.obj.access == UVERBS_ACCESS_NEW) { u64 id = o_attr->uobject->id; /* Copy the allocated id to the user-space */ @@ -216,7 +216,7 @@ static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle, current_ret = uverbs_finalize_object( attr->obj_attr.uobject, - spec->obj.access, commit); + spec->u.obj.access, commit); if (!ret) ret = current_ret; } else if (spec->type == UVERBS_ATTR_TYPE_PTR_IN && diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c index 6ceb672c4d46..cdada526623e 100644 --- a/drivers/infiniband/core/uverbs_ioctl_merge.c +++ b/drivers/infiniband/core/uverbs_ioctl_merge.c @@ -367,8 +367,8 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me memcpy(attr, &attr_defs[0]->attr, sizeof(*attr)); attr_obj_with_special_access = IS_ATTR_OBJECT(attr) && - (attr->obj.access == UVERBS_ACCESS_NEW || - attr->obj.access == UVERBS_ACCESS_DESTROY); + (attr->u.obj.access == UVERBS_ACCESS_NEW || + attr->u.obj.access == UVERBS_ACCESS_DESTROY); num_of_singularities += !!attr_obj_with_special_access; if (WARN(num_of_singularities > 1, "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n", diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index c7f93b205c70..ed63eed7250c 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -222,11 +222,11 @@ int uverbs_destroy_def_handler(struct ib_device *ib_dev, * spec. */ const struct uverbs_attr_def uverbs_uhw_compat_in = - UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_MIN_SIZE(0), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_MIN_SIZE(0), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); const struct uverbs_attr_def uverbs_uhw_compat_out = - UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_MIN_SIZE(0), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_MIN_SIZE(0), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata) { diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index afbb19000503..1ba55d4ef590 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -366,28 +366,22 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { - { .ptr = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm), - .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - } }, + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO), }, }; static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { - { .ptr = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - /* No need to specify any data */ - .len = 0, - } } + .type = UVERBS_ATTR_TYPE_PTR_IN, + /* No need to specify any data */ + UVERBS_ATTR_SIZE(0, 0), }, [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { - { .ptr = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size), - .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - } } + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO), }, }; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 30f6b612547f..2f75edc010ab 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1017,15 +1017,15 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER, - &UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), - &UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)) + &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)) ); static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE, @@ -1033,15 +1033,15 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_NEW, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), - &UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY, &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, @@ -1054,30 +1054,30 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_WRITE, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), - &UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY, &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_READ, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), - &UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 90a4947ff548..a7246e9cb148 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -73,46 +73,42 @@ enum { /* Specification of a single attribute inside the ioctl message */ struct uverbs_attr_spec { + u8 type; + u8 flags; + union { - /* Header shared by all following union members - to reduce space. */ - struct { - enum uverbs_attr_type type; - /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ - u8 flags; - }; struct { - enum uverbs_attr_type type; - /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ - u8 flags; /* Current known size to kernel */ - u16 len; + u16 len; /* User isn't allowed to provide something < min_len */ - u16 min_len; + u16 min_len; } ptr; + struct { - enum uverbs_attr_type type; - /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ - u8 flags; /* * higher bits mean the namespace and lower bits mean * the type id within the namespace. */ - u16 obj_type; - u8 access; + u16 obj_type; + u8 access; } obj; + + struct { + u8 num_elems; + } enum_def; + } u; + + /* This weird split of the enum lets us remove some padding */ + union { struct { - enum uverbs_attr_type type; - /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ - u8 flags; - u8 num_elems; /* * The enum attribute can select one of the attributes * contained in the ids array. Currently only PTR_IN * attributes are supported in the ids array. */ - const struct uverbs_attr_spec *ids; + const struct uverbs_attr_spec *ids; } enum_def; - }; + } u2; }; struct uverbs_attr_spec_hash { @@ -196,92 +192,72 @@ struct uverbs_object_tree_def { const struct uverbs_object_def * const (*objects)[]; }; -#define UA_FLAGS(_flags) .flags = _flags -#define __UVERBS_ATTR0(_id, _type, _fld, _attr, ...) \ - ((const struct uverbs_attr_def) \ - {.id = _id, .attr = {{._fld = {.type = _type, _attr, .flags = 0, } }, } }) -#define __UVERBS_ATTR1(_id, _type, _fld, _attr, _extra1, ...) \ - ((const struct uverbs_attr_def) \ - {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1 } },} }) -#define __UVERBS_ATTR2(_id, _type, _fld, _attr, _extra1, _extra2) \ - ((const struct uverbs_attr_def) \ - {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1, _extra2 } },} }) -#define __UVERBS_ATTR(_id, _type, _fld, _attr, _extra1, _extra2, _n, ...) \ - __UVERBS_ATTR##_n(_id, _type, _fld, _attr, _extra1, _extra2) +/* + * ======================================= + * Attribute Specifications + * ======================================= + */ +/* Use in the _type parameter for attribute specifications */ #define UVERBS_ATTR_TYPE(_type) \ - .min_len = sizeof(_type), .len = sizeof(_type) + .u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type) #define UVERBS_ATTR_STRUCT(_type, _last) \ - .min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .len = sizeof(_type) + .u.ptr.min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .u.ptr.len = sizeof(_type) #define UVERBS_ATTR_SIZE(_min_len, _len) \ - .min_len = _min_len, .len = _len + .u.ptr.min_len = _min_len, .u.ptr.len = _len #define UVERBS_ATTR_MIN_SIZE(_min_len) \ UVERBS_ATTR_SIZE(_min_len, USHRT_MAX) -/* - * In new compiler, UVERBS_ATTR could be simplified by declaring it as - * [_id] = {.type = _type, .len = _len, ##__VA_ARGS__} - * But since we support older compilers too, we need the more complex code. - */ -#define UVERBS_ATTR(_id, _type, _fld, _attr, ...) \ - __UVERBS_ATTR(_id, _type, _fld, _attr, ##__VA_ARGS__, 2, 1, 0) -#define UVERBS_ATTR_PTR_IN_SZ(_id, _len, ...) \ - UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_IN, ptr, _len, ##__VA_ARGS__) -/* If sizeof(_type) <= sizeof(u64), this will be inlined rather than a pointer */ -#define UVERBS_ATTR_PTR_IN(_id, _type, ...) \ - UVERBS_ATTR_PTR_IN_SZ(_id, _type, ##__VA_ARGS__) -#define UVERBS_ATTR_PTR_OUT_SZ(_id, _len, ...) \ - UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_OUT, ptr, _len, ##__VA_ARGS__) -#define UVERBS_ATTR_PTR_OUT(_id, _type, ...) \ - UVERBS_ATTR_PTR_OUT_SZ(_id, _type, ##__VA_ARGS__) -#define UVERBS_ATTR_ENUM_IN(_id, _enum_arr, ...) \ - UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_ENUM_IN, enum_def, \ - .ids = (_enum_arr), \ - .num_elems = ARRAY_SIZE(_enum_arr), ##__VA_ARGS__) +/* Must be used in the '...' of any UVERBS_ATTR */ +#define UA_FLAGS(_flags) .flags = _flags + +#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \ + ((const struct uverbs_attr_def){ \ + .id = _attr_id, \ + .attr = { .type = UVERBS_ATTR_TYPE_IDR, \ + .u.obj.obj_type = _idr_type, \ + .u.obj.access = _access, \ + __VA_ARGS__ } }) + +#define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...) \ + ((const struct uverbs_attr_def){ \ + .id = (_attr_id) + \ + BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW && \ + (_access) != UVERBS_ACCESS_READ), \ + .attr = { .type = UVERBS_ATTR_TYPE_FD, \ + .u.obj.obj_type = _fd_type, \ + .u.obj.access = _access, \ + __VA_ARGS__ } }) + +#define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \ + ((const struct uverbs_attr_def){ \ + .id = _attr_id, \ + .attr = { .type = UVERBS_ATTR_TYPE_PTR_IN, \ + _type, \ + __VA_ARGS__ } }) + +#define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...) \ + ((const struct uverbs_attr_def){ \ + .id = _attr_id, \ + .attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT, \ + _type, \ + __VA_ARGS__ } }) + +/* _enum_arry should be a 'static const union uverbs_attr_spec[]' */ +#define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...) \ + ((const struct uverbs_attr_def){ \ + .id = _attr_id, \ + .attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN, \ + .u2.enum_def.ids = _enum_arr, \ + .u.enum_def.num_elems = ARRAY_SIZE(_enum_arr), \ + __VA_ARGS__ }, \ + }) /* - * In new compiler, UVERBS_ATTR_IDR (and FD) could be simplified by declaring - * it as - * {.id = _id, \ - * .attr {.type = __obj_class, \ - * .obj = {.obj_type = _idr_type, \ - * .access = _access \ - * }, ##__VA_ARGS__ } } - * But since we support older compilers too, we need the more complex code. + * ======================================= + * Declaration helpers + * ======================================= */ -#define ___UVERBS_ATTR_OBJ0(_id, _obj_class, _obj_type, _access, ...)\ - ((const struct uverbs_attr_def) \ - {.id = _id, \ - .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type, \ - .access = _access, .flags = 0 } }, } }) -#define ___UVERBS_ATTR_OBJ1(_id, _obj_class, _obj_type, _access, _flags)\ - ((const struct uverbs_attr_def) \ - {.id = _id, \ - .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type, \ - .access = _access, _flags} }, } }) -#define ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, _flags, \ - _n, ...) \ - ___UVERBS_ATTR_OBJ##_n(_id, _obj_class, _obj_type, _access, _flags) -#define __UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, ...) \ - ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, \ - ##__VA_ARGS__, 1, 0) -#define UVERBS_ATTR_IDR(_id, _idr_type, _access, ...) \ - __UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_IDR, _idr_type, _access,\ - ##__VA_ARGS__) -#define UVERBS_ATTR_FD(_id, _fd_type, _access, ...) \ - __UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_FD, _fd_type, \ - (_access) + BUILD_BUG_ON_ZERO( \ - (_access) != UVERBS_ACCESS_NEW && \ - (_access) != UVERBS_ACCESS_READ), \ - ##__VA_ARGS__) -#define DECLARE_UVERBS_ATTR_SPEC(_name, ...) \ - const struct uverbs_attr_def _name = __VA_ARGS__ - -#define DECLARE_UVERBS_ENUM(_name, ...) \ - const struct uverbs_enum_spec _name = { \ - .len = ARRAY_SIZE(((struct uverbs_attr_spec[]){__VA_ARGS__})),\ - .ids = {__VA_ARGS__}, \ - } #define _UVERBS_METHOD_ATTRS_SZ(...) \ (sizeof((const struct uverbs_attr_def * const []){__VA_ARGS__}) /\ sizeof(const struct uverbs_attr_def *)) -- cgit From 595c7736d48037d67e7926f5d3ebf484b95b1d13 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 08:50:26 +0300 Subject: RDMA/uverbs: Simplify method definition macros Instead of the large set of indirecting macros, define the few needed macros to directly instantiate the struct uverbs_method_def and associated attributes list. This is small amount of code duplication but the readability is far better. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- .../infiniband/core/uverbs_std_types_counters.c | 7 +-- drivers/infiniband/core/uverbs_std_types_cq.c | 4 +- drivers/infiniband/core/uverbs_std_types_dm.c | 5 +- .../infiniband/core/uverbs_std_types_flow_action.c | 7 +-- drivers/infiniband/core/uverbs_std_types_mr.c | 2 +- drivers/infiniband/hw/mlx5/devx.c | 18 +++--- include/rdma/uverbs_ioctl.h | 18 ------ include/rdma/uverbs_named_ioctl.h | 73 ++++++++++++---------- 8 files changed, 60 insertions(+), 74 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 6d0b1ce9fc1f..61fd65bc9496 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -124,21 +124,20 @@ err_read: return ret; } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE, +DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE, &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, UVERBS_OBJECT_COUNTERS, UVERBS_ACCESS_NEW, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); -static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY, - uverbs_destroy_def_handler, +DECLARE_UVERBS_NAMED_METHOD_DESTROY(UVERBS_METHOD_COUNTERS_DESTROY, &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, UVERBS_OBJECT_COUNTERS, UVERBS_ACCESS_DESTROY, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); #define MAX_COUNTERS_BUFF_SIZE USHRT_MAX -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ, +DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ, &UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, UVERBS_OBJECT_COUNTERS, UVERBS_ACCESS_READ, diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index f67b0895b48c..ca0f6a7435d0 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -147,7 +147,7 @@ err_event_file: return ret; }; -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE, +DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE, &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, UVERBS_ACCESS_NEW, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), @@ -196,7 +196,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, sizeof(resp)); } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY, +DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY, &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ, UVERBS_ACCESS_DESTROY, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index d294660a2e06..75ac20bed5e4 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -85,7 +85,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, return 0; } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC, +DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC, &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM, UVERBS_ACCESS_NEW, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), @@ -96,8 +96,7 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC, UVERBS_ATTR_TYPE(u32), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); -static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE, - uverbs_destroy_def_handler, +DECLARE_UVERBS_NAMED_METHOD_DESTROY(UVERBS_METHOD_DM_FREE, &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, UVERBS_OBJECT_DM, UVERBS_ACCESS_DESTROY, diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index 1ba55d4ef590..1a572bdeff6a 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -385,7 +385,7 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { }, }; -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, +DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, UVERBS_ACCESS_NEW, @@ -403,7 +403,7 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, +DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, &UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, UVERBS_ACCESS_WRITE, @@ -419,8 +419,7 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); -static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY, - uverbs_destroy_def_handler, +DECLARE_UVERBS_NAMED_METHOD_DESTROY(UVERBS_METHOD_FLOW_ACTION_DESTROY, &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, UVERBS_OBJECT_FLOW_ACTION, UVERBS_ACCESS_DESTROY, diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index d7f7ba3802af..0366814b81c2 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -115,7 +115,7 @@ err_dereg: return ret; } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG, +DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG, &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR, UVERBS_ACCESS_NEW, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 2f75edc010ab..ecd0900681fa 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -984,7 +984,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, return 0; } -static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG, +DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG, &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, MLX5_IB_OBJECT_DEVX_UMEM, UVERBS_ACCESS_NEW, @@ -998,25 +998,25 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG, &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, UVERBS_ATTR_TYPE(u32), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); -static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG, +DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG, &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, MLX5_IB_OBJECT_DEVX_UMEM, UVERBS_ACCESS_DESTROY, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); -static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN, +DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN, &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, UVERBS_ATTR_TYPE(u32), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, UVERBS_ATTR_TYPE(u32), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); -static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR, +DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR, &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, UVERBS_ATTR_TYPE(u32), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, UVERBS_ATTR_TYPE(u32), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); -static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER, +DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER, &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | @@ -1028,7 +1028,7 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER, UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)) ); -static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE, +DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE, &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_NEW, @@ -1043,13 +1043,13 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); -static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY, +DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY, &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_DESTROY, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); -static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY, +DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY, &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_WRITE, @@ -1064,7 +1064,7 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); -static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY, +DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY, &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_READ, diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index a7246e9cb148..12fa0eef0ab0 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -258,24 +258,6 @@ struct uverbs_object_tree_def { * Declaration helpers * ======================================= */ -#define _UVERBS_METHOD_ATTRS_SZ(...) \ - (sizeof((const struct uverbs_attr_def * const []){__VA_ARGS__}) /\ - sizeof(const struct uverbs_attr_def *)) -#define _UVERBS_METHOD(_id, _handler, _flags, ...) \ - ((const struct uverbs_method_def) { \ - .id = _id, \ - .flags = _flags, \ - .handler = _handler, \ - .num_attrs = _UVERBS_METHOD_ATTRS_SZ(__VA_ARGS__), \ - .attrs = &(const struct uverbs_attr_def * const []){__VA_ARGS__} }) -#define DECLARE_UVERBS_METHOD(_name, _id, _handler, ...) \ - const struct uverbs_method_def _name = \ - _UVERBS_METHOD(_id, _handler, 0, ##__VA_ARGS__) -#define DECLARE_UVERBS_CTX_METHOD(_name, _id, _handler, _flags, ...) \ - const struct uverbs_method_def _name = \ - _UVERBS_METHOD(_id, _handler, \ - UVERBS_ACTION_FLAG_CREATE_ROOT, \ - ##__VA_ARGS__) #define _UVERBS_OBJECT_METHODS_SZ(...) \ (sizeof((const struct uverbs_method_def * const []){__VA_ARGS__}) / \ sizeof(const struct uverbs_method_def *)) diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h index 228421f2a427..06eac48ec4f2 100644 --- a/include/rdma/uverbs_named_ioctl.h +++ b/include/rdma/uverbs_named_ioctl.h @@ -45,14 +45,32 @@ #define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id) #define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id) -#define DECLARE_UVERBS_NAMED_METHOD(id, ...) \ - DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, UVERBS_HANDLER(id), ##__VA_ARGS__) +#define UVERBS_METHOD_ATTRS(id) \ + _UVERBS_NAME(UVERBS_MODULE_NAME, _method_attrs_##id) -#define DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(id, handler, ...) \ - DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, handler, ##__VA_ARGS__) +#define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...) \ + static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ + _method_id)[] = { __VA_ARGS__ }; \ + static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ + .id = _method_id, \ + .handler = UVERBS_HANDLER(_method_id), \ + .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ + .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ + } -#define DECLARE_UVERBS_NAMED_METHOD_NO_OVERRIDE(id, handler, ...) \ - DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, NULL, ##__VA_ARGS__) +/* Create a standard destroy method using the default handler. The handle_attr + * argument must be the attribute specifying the handle to destroy, the + * default handler does not support any other attributes. + */ +#define DECLARE_UVERBS_NAMED_METHOD_DESTROY(_method_id, _handle_attr) \ + static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ + _method_id)[] = { _handle_attr }; \ + static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ + .id = _method_id, \ + .handler = uverbs_destroy_def_handler, \ + .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ + .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ + } #define DECLARE_UVERBS_NAMED_OBJECT(id, ...) \ DECLARE_UVERBS_OBJECT(UVERBS_OBJECT(id), id, ##__VA_ARGS__) @@ -62,33 +80,22 @@ #define _UVERBS_COMP_NAME(x, y, z) _UVERBS_NAME(_UVERBS_NAME(x, y), z) -#define UVERBS_NO_OVERRIDE NULL - -/* This declares a parsing tree with one object and one method. This is usually - * used for merging driver attributes to the common attributes. The driver has - * a chance to override the handler and type attrs of the original object. - * The __VA_ARGS__ just contains a list of attributes. - */ -#define ADD_UVERBS_ATTRIBUTES(_name, _object, _method, _type_attrs, _handler, ...) \ -static DECLARE_UVERBS_METHOD(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ - _method_, _name), \ - _method, _handler, ##__VA_ARGS__); \ - \ -static DECLARE_UVERBS_OBJECT(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ - _object_, _name), \ - _object, _type_attrs, \ - &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ - _method_, _name)); \ - \ -static DECLARE_UVERBS_OBJECT_TREE(_name, \ - &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ - _object_, _name)) - -/* A very common use case is that the driver doesn't override the handler and - * type_attrs. Therefore, we provide a simplified macro for this common case. +/* Used by drivers to declare a complete parsing tree for a single method that + * differs only in having additional driver specific attributes. */ -#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object, _method, ...) \ - ADD_UVERBS_ATTRIBUTES(_name, _object, _method, UVERBS_NO_OVERRIDE, \ - UVERBS_NO_OVERRIDE, ##__VA_ARGS__) +#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...) \ + static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ + _method_id)[] = { __VA_ARGS__ }; \ + static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ + .id = _method_id, \ + .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ + .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ + }; \ + static DECLARE_UVERBS_OBJECT( \ + _UVERBS_COMP_NAME(UVERBS_MODULE_NAME, _object_id, _name), \ + _object_id, NULL, &UVERBS_METHOD(_method_id)); \ + static DECLARE_UVERBS_OBJECT_TREE( \ + _name, \ + &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, _object_id, _name)) #endif -- cgit From 6c61d2a55c4e5980e231fac9bb54e6ff1a5e811b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 08:50:27 +0300 Subject: RDMA/uverbs: Simplify UVERBS_OBJECT and _TREE family of macros Instead of the large set of indirecting macros, define the few needed macros to directly instantiate the struct uverbs_oject_tree_def and associated objects list. This is small amount of code duplication but the readability is far better. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs.h | 2 -- drivers/infiniband/core/uverbs_std_types.c | 46 ++++++++++----------------- drivers/infiniband/core/uverbs_std_types_cq.c | 2 +- drivers/infiniband/hw/mlx5/devx.c | 8 ++--- include/rdma/uverbs_ioctl.h | 44 ++++++++++++------------- include/rdma/uverbs_named_ioctl.h | 38 +++++++++++++++------- 6 files changed, 70 insertions(+), 70 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index f9f0bcf76812..a663e2cdc3d0 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -238,8 +238,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj); void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata); -extern const struct uverbs_attr_def uverbs_uhw_compat_in; -extern const struct uverbs_attr_def uverbs_uhw_compat_out; long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); int uverbs_destroy_def_handler(struct ib_device *ib_dev, struct ib_uverbs_file *file, diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index ed63eed7250c..8d037f722c4d 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -216,18 +216,6 @@ int uverbs_destroy_def_handler(struct ib_device *ib_dev, return 0; } -/* - * This spec is used in order to pass information to the hardware driver in a - * legacy way. Every verb that could get driver specific data should get this - * spec. - */ -const struct uverbs_attr_def uverbs_uhw_compat_in = - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_MIN_SIZE(0), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); -const struct uverbs_attr_def uverbs_uhw_compat_out = - UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_MIN_SIZE(0), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); - void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata) { /* @@ -300,23 +288,23 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD, DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL); -static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, - &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), - &UVERBS_OBJECT(UVERBS_OBJECT_PD), - &UVERBS_OBJECT(UVERBS_OBJECT_MR), - &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL), - &UVERBS_OBJECT(UVERBS_OBJECT_CQ), - &UVERBS_OBJECT(UVERBS_OBJECT_QP), - &UVERBS_OBJECT(UVERBS_OBJECT_AH), - &UVERBS_OBJECT(UVERBS_OBJECT_MW), - &UVERBS_OBJECT(UVERBS_OBJECT_SRQ), - &UVERBS_OBJECT(UVERBS_OBJECT_FLOW), - &UVERBS_OBJECT(UVERBS_OBJECT_WQ), - &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), - &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), - &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), - &UVERBS_OBJECT(UVERBS_OBJECT_DM), - &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS)); +DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, + &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), + &UVERBS_OBJECT(UVERBS_OBJECT_PD), + &UVERBS_OBJECT(UVERBS_OBJECT_MR), + &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL), + &UVERBS_OBJECT(UVERBS_OBJECT_CQ), + &UVERBS_OBJECT(UVERBS_OBJECT_QP), + &UVERBS_OBJECT(UVERBS_OBJECT_AH), + &UVERBS_OBJECT(UVERBS_OBJECT_MW), + &UVERBS_OBJECT(UVERBS_OBJECT_SRQ), + &UVERBS_OBJECT(UVERBS_OBJECT_FLOW), + &UVERBS_OBJECT(UVERBS_OBJECT_WQ), + &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), + &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), + &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), + &UVERBS_OBJECT(UVERBS_OBJECT_DM), + &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS)); const struct uverbs_object_tree_def *uverbs_default_get_objects(void) { diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index ca0f6a7435d0..f1f73ac0d2bd 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -165,7 +165,7 @@ DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE, &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)), &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &uverbs_uhw_compat_in, &uverbs_uhw_compat_out); + UVERBS_ATTR_UHW()); static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, struct ib_uverbs_file *file, diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index ecd0900681fa..4156e03b1bbc 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1079,24 +1079,24 @@ DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); -static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, +DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN)); -static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, &UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY)); -static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, &UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); -static DECLARE_UVERBS_OBJECT_TREE(devx_objects, +DECLARE_UVERBS_OBJECT_TREE(devx_objects, &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ), &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM)); diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 12fa0eef0ab0..392936ad25ba 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -253,33 +253,33 @@ struct uverbs_object_tree_def { __VA_ARGS__ }, \ }) +/* + * This spec is used in order to pass information to the hardware driver in a + * legacy way. Every verb that could get driver specific data should get this + * spec. + */ +#define UVERBS_ATTR_UHW() \ + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \ + UVERBS_ATTR_SIZE(0, USHRT_MAX), \ + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), \ + &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \ + UVERBS_ATTR_SIZE(0, USHRT_MAX), \ + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)) + /* * ======================================= * Declaration helpers * ======================================= */ -#define _UVERBS_OBJECT_METHODS_SZ(...) \ - (sizeof((const struct uverbs_method_def * const []){__VA_ARGS__}) / \ - sizeof(const struct uverbs_method_def *)) -#define _UVERBS_OBJECT(_id, _type_attrs, ...) \ - ((const struct uverbs_object_def) { \ - .id = _id, \ - .type_attrs = _type_attrs, \ - .num_methods = _UVERBS_OBJECT_METHODS_SZ(__VA_ARGS__), \ - .methods = &(const struct uverbs_method_def * const []){__VA_ARGS__} }) -#define DECLARE_UVERBS_OBJECT(_name, _id, _type_attrs, ...) \ - const struct uverbs_object_def _name = \ - _UVERBS_OBJECT(_id, _type_attrs, ##__VA_ARGS__) -#define _UVERBS_TREE_OBJECTS_SZ(...) \ - (sizeof((const struct uverbs_object_def * const []){__VA_ARGS__}) / \ - sizeof(const struct uverbs_object_def *)) -#define _UVERBS_OBJECT_TREE(...) \ - ((const struct uverbs_object_tree_def) { \ - .num_objects = _UVERBS_TREE_OBJECTS_SZ(__VA_ARGS__), \ - .objects = &(const struct uverbs_object_def * const []){__VA_ARGS__} }) -#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \ - const struct uverbs_object_tree_def _name = \ - _UVERBS_OBJECT_TREE(__VA_ARGS__) + +#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \ + static const struct uverbs_object_def *const _name##_ptr[] = { \ + __VA_ARGS__, \ + }; \ + static const struct uverbs_object_tree_def _name = { \ + .num_objects = ARRAY_SIZE(_name##_ptr), \ + .objects = &_name##_ptr, \ + } /* ================================================= * Parsing infrastructure diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h index 06eac48ec4f2..3ee045d7da4c 100644 --- a/include/rdma/uverbs_named_ioctl.h +++ b/include/rdma/uverbs_named_ioctl.h @@ -45,8 +45,9 @@ #define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id) #define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id) -#define UVERBS_METHOD_ATTRS(id) \ - _UVERBS_NAME(UVERBS_MODULE_NAME, _method_attrs_##id) +/* These are static so they do not need to be qualified */ +#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id +#define UVERBS_OBJECT_METHODS(object_id) _object_methods_##object_id #define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...) \ static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ @@ -72,14 +73,19 @@ .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ } -#define DECLARE_UVERBS_NAMED_OBJECT(id, ...) \ - DECLARE_UVERBS_OBJECT(UVERBS_OBJECT(id), id, ##__VA_ARGS__) +#define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...) \ + static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ + _object_id)[] = { __VA_ARGS__ }; \ + const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ + .id = _object_id, \ + .type_attrs = _type_attrs, \ + .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ + .methods = &UVERBS_OBJECT_METHODS(_object_id) \ + } #define DECLARE_UVERBS_GLOBAL_METHODS(_name, ...) \ DECLARE_UVERBS_NAMED_OBJECT(_name, NULL, ##__VA_ARGS__) -#define _UVERBS_COMP_NAME(x, y, z) _UVERBS_NAME(_UVERBS_NAME(x, y), z) - /* Used by drivers to declare a complete parsing tree for a single method that * differs only in having additional driver specific attributes. */ @@ -91,11 +97,19 @@ .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ }; \ - static DECLARE_UVERBS_OBJECT( \ - _UVERBS_COMP_NAME(UVERBS_MODULE_NAME, _object_id, _name), \ - _object_id, NULL, &UVERBS_METHOD(_method_id)); \ - static DECLARE_UVERBS_OBJECT_TREE( \ - _name, \ - &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, _object_id, _name)) + static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ + _object_id)[] = { &UVERBS_METHOD(_method_id) }; \ + static const struct uverbs_object_def _name##_struct = { \ + .id = _object_id, \ + .num_methods = 1, \ + .methods = &UVERBS_OBJECT_METHODS(_object_id) \ + }; \ + static const struct uverbs_object_def *const _name##_ptrs[] = { \ + &_name##_struct, \ + }; \ + static const struct uverbs_object_tree_def _name = { \ + .num_objects = 1, \ + .objects = &_name##_ptrs, \ + } #endif -- cgit From 9a119cd597769e0dd432110361ed1deec729ac06 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 08:50:28 +0300 Subject: RDMA/uverbs: Get rid of the & in method specifications Hide it inside the macros. The & is confusing and interferes with using this as a generic DSL in later patches. Since this also touches almost every line, also run the specs through clang-format (with 'BinPackParameters: false') to make the maintenance easier. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_std_types.c | 57 ++--- .../infiniband/core/uverbs_std_types_counters.c | 50 ++--- drivers/infiniband/core/uverbs_std_types_cq.c | 68 +++--- drivers/infiniband/core/uverbs_std_types_dm.c | 37 ++-- .../infiniband/core/uverbs_std_types_flow_action.c | 103 +++++---- drivers/infiniband/core/uverbs_std_types_mr.c | 59 ++--- drivers/infiniband/hw/mlx5/devx.c | 240 +++++++++++---------- drivers/infiniband/hw/mlx5/main.c | 32 +-- include/rdma/uverbs_ioctl.h | 20 +- include/rdma/uverbs_named_ioctl.h | 17 +- 10 files changed, 377 insertions(+), 306 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 8d037f722c4d..2f1a0b6598fe 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -248,45 +248,50 @@ void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata) } } -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL, - &UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), - uverbs_hot_unplug_completion_event_file, - &uverbs_event_fops, - "[infinibandevent]", O_RDONLY)); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), - uverbs_free_qp)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), + uverbs_hot_unplug_completion_event_file, + &uverbs_event_fops, + "[infinibandevent]", + O_RDONLY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_QP, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW, - &UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), - uverbs_free_srq)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_SRQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), + uverbs_free_srq)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH, - &UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), - uverbs_free_flow)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_FLOW, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), + uverbs_free_flow)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), - uverbs_free_wq)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_WQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL, - &UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), - uverbs_free_xrcd)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_XRCD, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), + uverbs_free_xrcd)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD, - &UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL); +DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE); DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 61fd65bc9496..21d61e384623 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -124,34 +124,36 @@ err_read: return ret; } -DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, - UVERBS_OBJECT_COUNTERS, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_METHOD_DESTROY(UVERBS_METHOD_COUNTERS_DESTROY, - &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, - UVERBS_OBJECT_COUNTERS, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_COUNTERS_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_COUNTERS_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); #define MAX_COUNTERS_BUFF_SIZE USHRT_MAX -DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ, - &UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, - UVERBS_OBJECT_COUNTERS, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, - UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, - UVERBS_ATTR_TYPE(__u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_COUNTERS_READ, + UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, + UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, + UVERBS_ATTR_TYPE(__u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, - &UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters), + UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ)); - diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index f1f73ac0d2bd..0aa16868149f 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -147,24 +147,28 @@ err_event_file: return ret; }; -DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE, +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_CQ_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_ACCESS_READ), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, - UVERBS_OBJECT_COMP_CHANNEL, - UVERBS_ACCESS_READ), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), UVERBS_ATTR_UHW()); static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, @@ -196,20 +200,22 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, sizeof(resp)); } -DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY, - &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP, - UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), - uverbs_free_cq), +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_CQ_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP, + UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_CQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq), + #if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI) - &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), - &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) + &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) #endif - ); - +); diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index 75ac20bed5e4..16e3e7c86a4b 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -85,24 +85,27 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, return 0; } -DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC, - &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT, - UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_METHOD_DESTROY(UVERBS_METHOD_DM_FREE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, - UVERBS_OBJECT_DM, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_DM_ALLOC, + UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_DM_FREE, + UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM, - &UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm), + UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm), &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC), &UVERBS_METHOD(UVERBS_METHOD_DM_FREE)); diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index 1a572bdeff6a..ec3e669071f7 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -385,49 +385,60 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { }, }; -DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); - -DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, - &UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_WRITE, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); - -DECLARE_UVERBS_NAMED_METHOD_DESTROY(UVERBS_METHOD_FLOW_ACTION_DESTROY, - &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION, - &UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); - +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, + hard_limit_pkts), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, + UVERBS_ATTR_TYPE(__u32)), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + uverbs_flow_action_esp_keymat, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + uverbs_flow_action_esp_replay), + UVERBS_ATTR_PTR_IN( + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, + type))); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, + UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_WRITE, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, + hard_limit_pkts), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, + UVERBS_ATTR_TYPE(__u32)), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + uverbs_flow_action_esp_keymat), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + uverbs_flow_action_esp_replay), + UVERBS_ATTR_PTR_IN( + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, + type))); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_FLOW_ACTION_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 0366814b81c2..779d6d4950eb 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -115,32 +115,37 @@ err_dereg: return ret; } -DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG, - &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_DM_MR_REG, + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY, UVERBS_ATTR_TYPE(u32), UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY, - UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY, - UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR, - &UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), - &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG)); + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_MR, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), + &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG)); diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 4156e03b1bbc..3ac3da4c3e23 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -984,122 +984,146 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, return 0; } -DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG, - &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, - MLX5_IB_OBJECT_DEVX_UMEM, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG, - &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, - MLX5_IB_OBJECT_DEVX_UMEM, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN, - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR, - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER, - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)) -); - -DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE, - &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, - MLX5_IB_OBJECT_DEVX_OBJ, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); - -DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY, - &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, - MLX5_IB_OBJECT_DEVX_OBJ, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY, - &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, - MLX5_IB_OBJECT_DEVX_OBJ, - UVERBS_ACCESS_WRITE, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); - -DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY, - &UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, - MLX5_IB_OBJECT_DEVX_OBJ, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, - UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_UMEM_REG, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, + MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_UMEM_DEREG, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, + MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_QUERY_EQN, + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_QUERY_UAR, + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OTHER, + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_CREATE, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_MODIFY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_WRITE, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_QUERY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | + UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN)); + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN)); DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, - &UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY)); + UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY)); DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, - &UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); + UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); DECLARE_UVERBS_OBJECT_TREE(devx_objects, - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ), - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM)); + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ), + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM)); const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void) { diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0fb80777aade..a0f4361981ab 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5315,20 +5315,24 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM, - UVERBS_METHOD_DM_ALLOC, - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, - UVERBS_ATTR_TYPE(u16), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION, - UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_dm, + UVERBS_OBJECT_DM, + UVERBS_METHOD_DM_ALLOC, + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + UVERBS_ATTR_TYPE(u16), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_flow_action, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); #define NUM_TREES 3 static int populate_specs_root(struct mlx5_ib_dev *dev) diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 392936ad25ba..d21c29a0f8ec 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -212,7 +212,7 @@ struct uverbs_object_tree_def { #define UA_FLAGS(_flags) .flags = _flags #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \ - ((const struct uverbs_attr_def){ \ + (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ .attr = { .type = UVERBS_ATTR_TYPE_IDR, \ .u.obj.obj_type = _idr_type, \ @@ -220,7 +220,7 @@ struct uverbs_object_tree_def { __VA_ARGS__ } }) #define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...) \ - ((const struct uverbs_attr_def){ \ + (&(const struct uverbs_attr_def){ \ .id = (_attr_id) + \ BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW && \ (_access) != UVERBS_ACCESS_READ), \ @@ -230,14 +230,14 @@ struct uverbs_object_tree_def { __VA_ARGS__ } }) #define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \ - ((const struct uverbs_attr_def){ \ + (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ .attr = { .type = UVERBS_ATTR_TYPE_PTR_IN, \ _type, \ __VA_ARGS__ } }) #define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...) \ - ((const struct uverbs_attr_def){ \ + (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ .attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT, \ _type, \ @@ -245,7 +245,7 @@ struct uverbs_object_tree_def { /* _enum_arry should be a 'static const union uverbs_attr_spec[]' */ #define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...) \ - ((const struct uverbs_attr_def){ \ + (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ .attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN, \ .u2.enum_def.ids = _enum_arr, \ @@ -259,12 +259,12 @@ struct uverbs_object_tree_def { * spec. */ #define UVERBS_ATTR_UHW() \ - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \ + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \ + UVERBS_ATTR_SIZE(0, USHRT_MAX), \ + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), \ + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \ UVERBS_ATTR_SIZE(0, USHRT_MAX), \ - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), \ - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \ - UVERBS_ATTR_SIZE(0, USHRT_MAX), \ - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)) + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)) /* * ======================================= diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h index 3ee045d7da4c..2eb1767042af 100644 --- a/include/rdma/uverbs_named_ioctl.h +++ b/include/rdma/uverbs_named_ioctl.h @@ -78,13 +78,24 @@ _object_id)[] = { __VA_ARGS__ }; \ const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ .id = _object_id, \ - .type_attrs = _type_attrs, \ + .type_attrs = &_type_attrs, \ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ .methods = &UVERBS_OBJECT_METHODS(_object_id) \ } -#define DECLARE_UVERBS_GLOBAL_METHODS(_name, ...) \ - DECLARE_UVERBS_NAMED_OBJECT(_name, NULL, ##__VA_ARGS__) +/* + * Declare global methods. These still have a unique object_id because we + * identify all uapi methods with a (object,method) tuple. However, they have + * no type pointer. + */ +#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...) \ + static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ + _object_id)[] = { __VA_ARGS__ }; \ + const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ + .id = _object_id, \ + .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ + .methods = &UVERBS_OBJECT_METHODS(_object_id) \ + } /* Used by drivers to declare a complete parsing tree for a single method that * differs only in having additional driver specific attributes. -- cgit From 83bb4442330f035bd68ec5d2f5b87bfef1c1a4ab Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 08:50:29 +0300 Subject: RDMA/uverbs: Remove UA_FLAGS This bit of boilerplate isn't really necessary, we can use bitfields instead of a flags enum and the macros can then individually initialize them through the __VA_ARGS__ like everything else. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_ioctl.c | 10 ++-- drivers/infiniband/core/uverbs_ioctl_merge.c | 6 +- .../infiniband/core/uverbs_std_types_counters.c | 10 ++-- drivers/infiniband/core/uverbs_std_types_cq.c | 17 +++--- drivers/infiniband/core/uverbs_std_types_dm.c | 8 +-- .../infiniband/core/uverbs_std_types_flow_action.c | 40 ++++++++----- drivers/infiniband/core/uverbs_std_types_mr.c | 16 ++--- drivers/infiniband/hw/mlx5/devx.c | 68 +++++++++++----------- drivers/infiniband/hw/mlx5/main.c | 6 +- include/rdma/uverbs_ioctl.h | 35 ++++++----- 10 files changed, 115 insertions(+), 101 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 62f7382e8513..cb6109036129 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -98,14 +98,14 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, * non-zero content, making ABI compat/discovery simpler. */ if (uattr->len > val_spec->u.ptr.len && - val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO && + val_spec->min_sz_or_zero && !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len)) return -EOPNOTSUPP; /* fall through */ case UVERBS_ATTR_TYPE_PTR_OUT: if (uattr->len < val_spec->u.ptr.min_len || - (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) && + (!val_spec->min_sz_or_zero && uattr->len > val_spec->u.ptr.len)) return -EINVAL; @@ -116,8 +116,7 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, e->ptr_attr.len = uattr->len; e->ptr_attr.flags = uattr->flags; - if (val_spec->flags & UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY && - !uverbs_attr_ptr_is_inline(e)) { + if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) { void *p; p = kvmalloc(uattr->len, GFP_KERNEL); @@ -220,8 +219,7 @@ static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle, if (!ret) ret = current_ret; } else if (spec->type == UVERBS_ATTR_TYPE_PTR_IN && - spec->flags & - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY && + spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(attr)) { kvfree(attr->ptr_attr.ptr); } diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c index cdada526623e..ece5c9463dbe 100644 --- a/drivers/infiniband/core/uverbs_ioctl_merge.c +++ b/drivers/infiniband/core/uverbs_ioctl_merge.c @@ -374,18 +374,18 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n", min_id) || WARN(attr_obj_with_special_access && - !(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY), + !attr->mandatory, "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n", min_id) || WARN(IS_ATTR_OBJECT(attr) && - attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, + attr->min_sz_or_zero, "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n", min_id)) { res = -EINVAL; goto free; } - if (attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY) + if (attr->mandatory) set_bit(min_id, hash->mandatory_attrs_bitmask); min_id++; diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 21d61e384623..202e3782e740 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -129,14 +129,14 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, UVERBS_OBJECT_COUNTERS, UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_METHOD_COUNTERS_DESTROY, UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, UVERBS_OBJECT_COUNTERS, UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); #define MAX_COUNTERS_BUFF_SIZE USHRT_MAX DECLARE_UVERBS_NAMED_METHOD( @@ -144,13 +144,13 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, UVERBS_OBJECT_COUNTERS, UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, UVERBS_ATTR_TYPE(__u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters), diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 0aa16868149f..1a14c245b511 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -152,23 +152,24 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE, UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, UVERBS_OBJECT_COMP_CHANNEL, - UVERBS_ACCESS_READ), + UVERBS_ACCESS_READ, + UA_OPTIONAL), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)), UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_UHW()); static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, @@ -205,10 +206,10 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ, UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP, UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_CQ, diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index 16e3e7c86a4b..8a2548173a90 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -90,20 +90,20 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM, UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH, UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_METHOD_DM_FREE, UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, UVERBS_OBJECT_DM, UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM, UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm), diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index ec3e669071f7..143dbfdfda6f 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -368,7 +368,7 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { .type = UVERBS_ATTR_TYPE_PTR_IN, UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO), + UA_MIN_SZ_OR_ZERO }, }; @@ -381,7 +381,7 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { .type = UVERBS_ATTR_TYPE_PTR_IN, UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO), + UA_MIN_SZ_OR_ZERO }, }; @@ -390,51 +390,59 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), + UA_MANDATORY, + UA_MIN_SZ_OR_ZERO), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, - UVERBS_ATTR_TYPE(__u32)), + UVERBS_ATTR_TYPE(__u32), + UA_OPTIONAL), UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, uverbs_flow_action_esp_keymat, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay), + uverbs_flow_action_esp_replay, + UA_OPTIONAL), UVERBS_ATTR_PTR_IN( UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, - type))); + type), + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, UVERBS_ACCESS_WRITE, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), + UA_OPTIONAL, + UA_MIN_SZ_OR_ZERO), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, - UVERBS_ATTR_TYPE(__u32)), + UVERBS_ATTR_TYPE(__u32), + UA_OPTIONAL), UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat), + uverbs_flow_action_esp_keymat, + UA_OPTIONAL), UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay), + uverbs_flow_action_esp_replay, + UA_OPTIONAL), UVERBS_ATTR_PTR_IN( UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, - type))); + type), + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_METHOD_FLOW_ACTION_DESTROY, UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, UVERBS_OBJECT_FLOW_ACTION, UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_FLOW_ACTION, diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 779d6d4950eb..c1b9124d611e 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -120,30 +120,30 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR, UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET, UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH, UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD, UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM, UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_MR, diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 3ac3da4c3e23..be44e7e837eb 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -989,119 +989,119 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, MLX5_IB_OBJECT_DEVX_UMEM, UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_UMEM_DEREG, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, MLX5_IB_OBJECT_DEVX_UMEM, UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_QUERY_EQN, UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_QUERY_UAR, UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OTHER, UVERBS_ATTR_PTR_IN( MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + UA_MANDATORY, + UA_MIN_SZ_OR_ZERO, + UA_ALLOC_AND_COPY), UVERBS_ATTR_PTR_OUT( MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + UA_MANDATORY, + UA_MIN_SZ_OR_ZERO)); DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_CREATE, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN( MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + UA_MANDATORY, + UA_MIN_SZ_OR_ZERO, + UA_ALLOC_AND_COPY), UVERBS_ATTR_PTR_OUT( MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + UA_MANDATORY, + UA_MIN_SZ_OR_ZERO)); DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_DESTROY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_MODIFY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_WRITE, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN( MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + UA_MANDATORY, + UA_MIN_SZ_OR_ZERO, + UA_ALLOC_AND_COPY), UVERBS_ATTR_PTR_OUT( MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + UA_MANDATORY, + UA_MIN_SZ_OR_ZERO)); DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_QUERY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_IN( MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO | - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)), + UA_MANDATORY, + UA_MIN_SZ_OR_ZERO, + UA_ALLOC_AND_COPY), UVERBS_ATTR_PTR_OUT( MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))); + UA_MANDATORY, + UA_MIN_SZ_OR_ZERO)); DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a0f4361981ab..54d45e13de34 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5321,10 +5321,10 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UVERBS_METHOD_DM_ALLOC, UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UA_MANDATORY), UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, UVERBS_ATTR_TYPE(u16), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); ADD_UVERBS_ATTRIBUTES_SIMPLE( mlx5_ib_flow_action, @@ -5332,7 +5332,7 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + UA_MANDATORY)); #define NUM_TREES 3 static int populate_specs_root(struct mlx5_ib_dev *dev) diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index d21c29a0f8ec..6073fd9d9c49 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -61,20 +61,22 @@ enum uverbs_obj_access { UVERBS_ACCESS_DESTROY }; -enum { - UVERBS_ATTR_SPEC_F_MANDATORY = 1U << 0, - /* Support extending attributes by length, validate all unknown size == zero */ - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO = 1U << 1, - /* - * Valid only for PTR_IN. Allocate and copy the data inside the parser - */ - UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY = 1U << 2, -}; - /* Specification of a single attribute inside the ioctl message */ +/* good size 16 */ struct uverbs_attr_spec { u8 type; - u8 flags; + + /* + * Support extending attributes by length, validate all + * unknown size == zero + */ + u8 min_sz_or_zero:1; + /* + * Valid only for PTR_IN. Allocate and copy the data inside + * the parser + */ + u8 alloc_and_copy:1; + u8 mandatory:1; union { struct { @@ -209,7 +211,10 @@ struct uverbs_object_tree_def { UVERBS_ATTR_SIZE(_min_len, USHRT_MAX) /* Must be used in the '...' of any UVERBS_ATTR */ -#define UA_FLAGS(_flags) .flags = _flags +#define UA_ALLOC_AND_COPY .alloc_and_copy = 1 +#define UA_MANDATORY .mandatory = 1 +#define UA_MIN_SZ_OR_ZERO .min_sz_or_zero = 1 +#define UA_OPTIONAL .mandatory = 0 #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \ (&(const struct uverbs_attr_def){ \ @@ -261,10 +266,12 @@ struct uverbs_object_tree_def { #define UVERBS_ATTR_UHW() \ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \ UVERBS_ATTR_SIZE(0, USHRT_MAX), \ - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), \ + UA_OPTIONAL, \ + UA_MIN_SZ_OR_ZERO), \ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \ UVERBS_ATTR_SIZE(0, USHRT_MAX), \ - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)) + UA_OPTIONAL, \ + UA_MIN_SZ_OR_ZERO), \ /* * ======================================= -- cgit From 540cd69209682a351ab76b83b85ea856b8192720 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 08:50:30 +0300 Subject: RDMA/uverbs: Use UVERBS_ATTR_MIN_SIZE correctly and uniformly This newer macro allows specifying a lower bound on the accepted size, and has an 'unlimited' upper bound. Due to this it never checks for trailing zeroing so it doesn't make any sense to combine it with MIN_SZ_OR_ZERO, so drop MIN_SZ_OR_ZERO when they are used together There were a couple of places that open coded this pattern, switch them to use the clearer UVERBS_ATTR_MIN_SIZE for clarity. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_std_types_counters.c | 3 +-- drivers/infiniband/hw/mlx5/devx.c | 16 ++++------------ include/rdma/uverbs_ioctl.h | 17 +++++++++-------- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 202e3782e740..dfe59ad721f6 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -138,7 +138,6 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_ACCESS_DESTROY, UA_MANDATORY)); -#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_COUNTERS_READ, UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, @@ -146,7 +145,7 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ACCESS_READ, UA_MANDATORY), UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, - UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE), + UVERBS_ATTR_MIN_SIZE(0), UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, UVERBS_ATTR_TYPE(__u32), diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index be44e7e837eb..192844bf6016 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1034,13 +1034,11 @@ DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), UA_MANDATORY, - UA_MIN_SZ_OR_ZERO, UA_ALLOC_AND_COPY), UVERBS_ATTR_PTR_OUT( MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_MANDATORY, - UA_MIN_SZ_OR_ZERO)); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_CREATE, @@ -1052,13 +1050,11 @@ DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), UA_MANDATORY, - UA_MIN_SZ_OR_ZERO, UA_ALLOC_AND_COPY), UVERBS_ATTR_PTR_OUT( MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_MANDATORY, - UA_MIN_SZ_OR_ZERO)); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_DESTROY, @@ -1077,13 +1073,11 @@ DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), UA_MANDATORY, - UA_MIN_SZ_OR_ZERO, UA_ALLOC_AND_COPY), UVERBS_ATTR_PTR_OUT( MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_MANDATORY, - UA_MIN_SZ_OR_ZERO)); + UA_MANDATORY)); DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_QUERY, @@ -1095,13 +1089,11 @@ DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), UA_MANDATORY, - UA_MIN_SZ_OR_ZERO, UA_ALLOC_AND_COPY), UVERBS_ATTR_PTR_OUT( MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), - UA_MANDATORY, - UA_MIN_SZ_OR_ZERO)); + UA_MANDATORY)); DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 6073fd9d9c49..0b46ef8f0b4c 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -207,8 +207,11 @@ struct uverbs_object_tree_def { .u.ptr.min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .u.ptr.len = sizeof(_type) #define UVERBS_ATTR_SIZE(_min_len, _len) \ .u.ptr.min_len = _min_len, .u.ptr.len = _len -#define UVERBS_ATTR_MIN_SIZE(_min_len) \ - UVERBS_ATTR_SIZE(_min_len, USHRT_MAX) +/* + * Specifies at least min_len bytes must be passed in, but the amount can be + * larger, up to the protocol maximum size. No check for zeroing is done. + */ +#define UVERBS_ATTR_MIN_SIZE(_min_len) UVERBS_ATTR_SIZE(_min_len, USHRT_MAX) /* Must be used in the '...' of any UVERBS_ATTR */ #define UA_ALLOC_AND_COPY .alloc_and_copy = 1 @@ -265,13 +268,11 @@ struct uverbs_object_tree_def { */ #define UVERBS_ATTR_UHW() \ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \ - UVERBS_ATTR_SIZE(0, USHRT_MAX), \ - UA_OPTIONAL, \ - UA_MIN_SZ_OR_ZERO), \ + UVERBS_ATTR_MIN_SIZE(0), \ + UA_OPTIONAL), \ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \ - UVERBS_ATTR_SIZE(0, USHRT_MAX), \ - UA_OPTIONAL, \ - UA_MIN_SZ_OR_ZERO), \ + UVERBS_ATTR_MIN_SIZE(0), \ + UA_OPTIONAL) /* * ======================================= -- cgit From 422e3d37ed7ea8b421208a44913c420055334976 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 08:50:31 +0300 Subject: RDMA/uverbs: Combine MIN_SZ_OR_ZERO with UVERBS_ATTR_STRUCT After all the rework is done it is now possible to include single flags in the type macros. Any user of UVERBS_ATTR_STRUCT needs to zero check data past the end of the known struct to be correct, so make this mandatory, and get rid of MIN_SZ_OR_ZERO as a user flag. This changes UVERBS_ATTR_TYPE to refer to a struct of exact size with not possibility of extension, convert the few users of UVERBS_ATTR_TYPE and MIN_SZ_OR_ZERO to use UVERBS_ATTR_STRUCT. The one user of UVERBS_ATTR_STRUCT without MIN_SZ_OR_ZERO is just confused. There is some padding at the end of that struct, but userspace always provides it with the padding. The construction doesn't test if the padding is zero, so it is pointless. Just use UVERBS_ATTR_TYPE. Finally, rename min_sz_or_zero to zero_trailing to better reflect what it does and hopefully avoid such mis-uses in the future. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_ioctl.c | 4 +-- drivers/infiniband/core/uverbs_ioctl_merge.c | 2 +- .../infiniband/core/uverbs_std_types_flow_action.c | 21 +++++++-------- include/rdma/uverbs_ioctl.h | 31 +++++++++++++++------- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index cb6109036129..5b59c6f0feed 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -98,14 +98,14 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, * non-zero content, making ABI compat/discovery simpler. */ if (uattr->len > val_spec->u.ptr.len && - val_spec->min_sz_or_zero && + val_spec->zero_trailing && !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len)) return -EOPNOTSUPP; /* fall through */ case UVERBS_ATTR_TYPE_PTR_OUT: if (uattr->len < val_spec->u.ptr.min_len || - (!val_spec->min_sz_or_zero && + (!val_spec->zero_trailing && uattr->len > val_spec->u.ptr.len)) return -EINVAL; diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c index ece5c9463dbe..f81aa888ce5c 100644 --- a/drivers/infiniband/core/uverbs_ioctl_merge.c +++ b/drivers/infiniband/core/uverbs_ioctl_merge.c @@ -378,7 +378,7 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n", min_id) || WARN(IS_ATTR_OBJECT(attr) && - attr->min_sz_or_zero, + attr->zero_trailing, "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n", min_id)) { res = -EINVAL; diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index 143dbfdfda6f..c753a34cd984 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -367,8 +367,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm), - UA_MIN_SZ_OR_ZERO + UVERBS_ATTR_STRUCT( + struct ib_uverbs_flow_action_esp_keymat_aes_gcm, + aes_key), }, }; @@ -380,8 +381,8 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { }, [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size), - UA_MIN_SZ_OR_ZERO + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, + size), }, }; @@ -394,8 +395,7 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), - UA_MANDATORY, - UA_MIN_SZ_OR_ZERO), + UA_MANDATORY), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32), UA_OPTIONAL), @@ -407,8 +407,7 @@ DECLARE_UVERBS_NAMED_METHOD( UA_OPTIONAL), UVERBS_ATTR_PTR_IN( UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, - type), + UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD( @@ -420,8 +419,7 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), - UA_OPTIONAL, - UA_MIN_SZ_OR_ZERO), + UA_OPTIONAL), UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32), UA_OPTIONAL), @@ -433,8 +431,7 @@ DECLARE_UVERBS_NAMED_METHOD( UA_OPTIONAL), UVERBS_ATTR_PTR_IN( UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, - type), + UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 0b46ef8f0b4c..017ccf75890c 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -67,10 +67,11 @@ struct uverbs_attr_spec { u8 type; /* - * Support extending attributes by length, validate all - * unknown size == zero + * Support extending attributes by length. Allow the user to provide + * more bytes than ptr.len, but check that everything after is zero'd + * by the user. */ - u8 min_sz_or_zero:1; + u8 zero_trailing:1; /* * Valid only for PTR_IN. Allocate and copy the data inside * the parser @@ -200,13 +201,26 @@ struct uverbs_object_tree_def { * ======================================= */ -/* Use in the _type parameter for attribute specifications */ -#define UVERBS_ATTR_TYPE(_type) \ - .u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type) -#define UVERBS_ATTR_STRUCT(_type, _last) \ - .u.ptr.min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .u.ptr.len = sizeof(_type) #define UVERBS_ATTR_SIZE(_min_len, _len) \ .u.ptr.min_len = _min_len, .u.ptr.len = _len + +/* + * Specifies a uapi structure that cannot be extended. The user must always + * supply the whole structure and nothing more. The structure must be declared + * in a header under include/uapi/rdma. + */ +#define UVERBS_ATTR_TYPE(_type) \ + .u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type) +/* + * Specifies a uapi structure where the user must provide at least up to + * member 'last'. Anything after last and up until the end of the structure + * can be non-zero, anything longer than the end of the structure must be + * zero. The structure must be declared in a header under include/uapi/rdma. + */ +#define UVERBS_ATTR_STRUCT(_type, _last) \ + .zero_trailing = 1, \ + UVERBS_ATTR_SIZE(((uintptr_t)(&((_type *)0)->_last + 1)), \ + sizeof(_type)) /* * Specifies at least min_len bytes must be passed in, but the amount can be * larger, up to the protocol maximum size. No check for zeroing is done. @@ -216,7 +230,6 @@ struct uverbs_object_tree_def { /* Must be used in the '...' of any UVERBS_ATTR */ #define UA_ALLOC_AND_COPY .alloc_and_copy = 1 #define UA_MANDATORY .mandatory = 1 -#define UA_MIN_SZ_OR_ZERO .min_sz_or_zero = 1 #define UA_OPTIONAL .mandatory = 0 #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \ -- cgit From c33e73af2183fb9fcd993e37abcdecc058b22d91 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 11:32:06 +0300 Subject: IB/uverbs: Add a uobj_perform_destroy helper This consolidates a bunch of repeated code patterns into a helper. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/rdma_core.c | 22 +++++++++++ drivers/infiniband/core/uverbs_cmd.c | 76 +++++++----------------------------- include/rdma/uverbs_std_types.h | 5 +++ 3 files changed, 41 insertions(+), 62 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index c67bcdda5760..38d3929f6e65 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -128,6 +128,28 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; } +/* + * Does both rdma_lookup_get_uobject() and rdma_remove_commit_uobject(), then + * returns success_res on success (negative errno on failure). For use by + * callers that do not need the uobj. + */ +int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id, + struct ib_uverbs_file *ufile, int success_res) +{ + struct ib_uobject *uobj; + int ret; + + uobj = rdma_lookup_get_uobject(type, ufile->ucontext, id, true); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + + ret = rdma_remove_commit_uobject(uobj); + if (ret) + return ret; + + return success_res; +} + static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, const struct uverbs_obj_type *type) { diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 5d0fd36b009d..b751c196e2c6 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -367,20 +367,12 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, int in_len, int out_len) { struct ib_uverbs_dealloc_pd cmd; - struct ib_uobject *uobj; - int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file, + in_len); } struct xrcd_table_entry { @@ -597,19 +589,12 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_close_xrcd cmd; - struct ib_uobject *uobj; - int ret = 0; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file, + in_len); } int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, @@ -829,20 +814,12 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_dereg_mr cmd; - struct ib_uobject *uobj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file, + in_len); } ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, @@ -921,19 +898,12 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_dealloc_mw cmd; - struct ib_uobject *uobj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file, + in_len); } ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, @@ -2641,19 +2611,12 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_ah cmd; - struct ib_uobject *uobj; - int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file, + in_len); } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, @@ -3445,7 +3408,6 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, struct ib_udata *uhw) { struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; - struct ib_uobject *uobj; int ret; size_t required_cmd_sz; @@ -3466,12 +3428,8 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - uobj = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - return uobj_remove_commit(uobj); + return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL, + cmd.ind_tbl_handle, file, 0); } int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, @@ -3658,7 +3616,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, struct ib_udata *uhw) { struct ib_uverbs_destroy_flow cmd; - struct ib_uobject *uobj; int ret; if (ucore->inlen < sizeof(cmd)) @@ -3671,13 +3628,8 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EINVAL; - uobj = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret; + return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file, + 0); } static int __uverbs_create_xsrq(struct ib_uverbs_file *file, diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 4c151b67fb6d..27c24453fc12 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -71,6 +71,11 @@ static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, #define uobj_get_write(_type, _id, _ucontext) \ __uobj_get(uobj_get_type(_type), true, _ucontext, _id) +int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id, + struct ib_uverbs_file *ufile, int success_res); +#define uobj_perform_destroy(_type, _id, _ufile, _success_res) \ + __uobj_perform_destroy(uobj_get_type(_type), _id, _ufile, _success_res) + static inline void uobj_put_read(struct ib_uobject *uobj) { rdma_lookup_put_uobject(uobj, false); -- cgit From 6a5e9c88419828a487204e35291ae4459697a9bd Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 11:32:07 +0300 Subject: IB/uverbs: Move non driver related elements from ib_ucontext to ib_ufile The IDR is part of the ib_ufile so all the machinery to lock it, handle closing and disassociation rightly belongs to the ufile not the ucontext. This changes the lifetime of that data to match the lifetime of the file descriptor which is always strictly longer than the lifetime of the ucontext. We need the entire locking machinery to continue to exist after ucontext destruction to allow us to return the destroy data after a device has been disassociated. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/rdma_core.c | 72 +++++++++++++++++------------------ drivers/infiniband/core/rdma_core.h | 1 - drivers/infiniband/core/uverbs.h | 8 ++++ drivers/infiniband/core/uverbs_cmd.c | 1 - drivers/infiniband/core/uverbs_main.c | 4 ++ include/rdma/ib_verbs.h | 9 ++--- 6 files changed, 49 insertions(+), 46 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 38d3929f6e65..11c6f271be00 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -161,6 +161,7 @@ static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, * user_handle should be filled by the handler, * The object is added to the list in the commit stage. */ + uobj->ufile = context->ufile; uobj->context = context; uobj->type = type; /* @@ -286,7 +287,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, ret = uverbs_try_lock_object(uobj, exclusive); if (ret) { - WARN(ucontext->cleanup_reason, + WARN(uobj->ufile->cleanup_reason, "ib_uverbs: Trying to lookup_get while cleanup context\n"); goto free; } @@ -441,8 +442,8 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive) static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, enum rdma_remove_reason why) { + struct ib_uverbs_file *ufile = uobj->ufile; int ret; - struct ib_ucontext *ucontext = uobj->context; ret = uobj->type->type_class->remove_commit(uobj, why); if (ib_is_destroy_retryable(ret, why, uobj)) { @@ -450,9 +451,9 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, atomic_set(&uobj->usecnt, 0); uobj->type->type_class->lookup_put(uobj, true); } else { - mutex_lock(&ucontext->uobjects_lock); + mutex_lock(&ufile->uobjects_lock); list_del(&uobj->list); - mutex_unlock(&ucontext->uobjects_lock); + mutex_unlock(&ufile->uobjects_lock); /* put the ref we took when we created the object */ uverbs_uobject_put(uobj); } @@ -464,19 +465,19 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) { int ret; - struct ib_ucontext *ucontext = uobj->context; + struct ib_uverbs_file *ufile = uobj->ufile; /* put the ref count we took at lookup_get */ uverbs_uobject_put(uobj); /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ucontext->cleanup_rwsem)) { + if (!down_read_trylock(&ufile->cleanup_rwsem)) { WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); return 0; } assert_uverbs_usecnt(uobj, true); ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY); - up_read(&ucontext->cleanup_rwsem); + up_read(&ufile->cleanup_rwsem); return ret; } @@ -496,10 +497,10 @@ static const struct uverbs_obj_type null_obj_type = { int rdma_explicit_destroy(struct ib_uobject *uobject) { int ret; - struct ib_ucontext *ucontext = uobject->context; + struct ib_uverbs_file *ufile = uobject->ufile; /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ucontext->cleanup_rwsem)) { + if (!down_read_trylock(&ufile->cleanup_rwsem)) { WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); return 0; } @@ -512,7 +513,7 @@ int rdma_explicit_destroy(struct ib_uobject *uobject) uobject->type = &null_obj_type; out: - up_read(&ucontext->cleanup_rwsem); + up_read(&ufile->cleanup_rwsem); return ret; } @@ -542,8 +543,10 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj) int rdma_alloc_commit_uobject(struct ib_uobject *uobj) { + struct ib_uverbs_file *ufile = uobj->ufile; + /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&uobj->context->cleanup_rwsem)) { + if (!down_read_trylock(&ufile->cleanup_rwsem)) { int ret; WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n"); @@ -559,12 +562,12 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) assert_uverbs_usecnt(uobj, true); atomic_set(&uobj->usecnt, 0); - mutex_lock(&uobj->context->uobjects_lock); - list_add(&uobj->list, &uobj->context->uobjects); - mutex_unlock(&uobj->context->uobjects_lock); + mutex_lock(&ufile->uobjects_lock); + list_add(&uobj->list, &ufile->uobjects); + mutex_unlock(&ufile->uobjects_lock); uobj->type->type_class->alloc_commit(uobj); - up_read(&uobj->context->cleanup_rwsem); + up_read(&ufile->cleanup_rwsem); return 0; } @@ -638,20 +641,18 @@ EXPORT_SYMBOL(uverbs_idr_class); static void _uverbs_close_fd(struct ib_uobject_file *uobj_file) { - struct ib_ucontext *ucontext; struct ib_uverbs_file *ufile = uobj_file->ufile; int ret; - mutex_lock(&uobj_file->ufile->cleanup_mutex); + mutex_lock(&ufile->cleanup_mutex); /* uobject was either already cleaned up or is cleaned up right now anyway */ if (!uobj_file->uobj.context || - !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem)) + !down_read_trylock(&ufile->cleanup_rwsem)) goto unlock; - ucontext = uobj_file->uobj.context; ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE); - up_read(&ucontext->cleanup_rwsem); + up_read(&ufile->cleanup_rwsem); if (ret) pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n"); unlock: @@ -671,6 +672,7 @@ void uverbs_close_fd(struct file *f) static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, enum rdma_remove_reason reason) { + struct ib_uverbs_file *ufile = ucontext->ufile; struct ib_uobject *obj, *next_obj; int ret = -EINVAL; int err = 0; @@ -684,9 +686,9 @@ static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, * We take and release the lock per traversal in order to let * other threads (which might still use the FDs) chance to run. */ - mutex_lock(&ucontext->uobjects_lock); - ucontext->cleanup_reason = reason; - list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, list) { + mutex_lock(&ufile->uobjects_lock); + ufile->cleanup_reason = reason; + list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) { /* * if we hit this WARN_ON, that means we are * racing with a lookup_get. @@ -710,7 +712,7 @@ static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, uverbs_uobject_put(obj); ret = 0; } - mutex_unlock(&ucontext->uobjects_lock); + mutex_unlock(&ufile->uobjects_lock); return ret; } @@ -719,14 +721,16 @@ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) enum rdma_remove_reason reason = device_removed ? RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE; + struct ib_uverbs_file *ufile = ucontext->ufile; + /* * Waits for all remove_commit and alloc_commit to finish. Logically, We * want to hold this forever as the context is going to be destroyed, * but we'll release it since it causes a "held lock freed" BUG message. */ - down_write(&ucontext->cleanup_rwsem); - ucontext->cleanup_retryable = true; - while (!list_empty(&ucontext->uobjects)) + down_write(&ufile->cleanup_rwsem); + ufile->ucontext->cleanup_retryable = true; + while (!list_empty(&ufile->uobjects)) if (__uverbs_cleanup_ucontext(ucontext, reason)) { /* * No entry was cleaned-up successfully during this @@ -735,19 +739,11 @@ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) break; } - ucontext->cleanup_retryable = false; - if (!list_empty(&ucontext->uobjects)) + ufile->ucontext->cleanup_retryable = false; + if (!list_empty(&ufile->uobjects)) __uverbs_cleanup_ucontext(ucontext, reason); - up_write(&ucontext->cleanup_rwsem); -} - -void uverbs_initialize_ucontext(struct ib_ucontext *ucontext) -{ - ucontext->cleanup_reason = 0; - mutex_init(&ucontext->uobjects_lock); - INIT_LIST_HEAD(&ucontext->uobjects); - init_rwsem(&ucontext->cleanup_rwsem); + up_write(&ufile->cleanup_rwsem); } const struct uverbs_obj_type_class uverbs_fd_class = { diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 8cede4546b25..f7f157e78f8c 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -56,7 +56,6 @@ const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_sp * cleanup_ucontext removes all uobjects from the context and puts them. */ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed); -void uverbs_initialize_ucontext(struct ib_ucontext *ucontext); /* * uverbs_uobject_get is called in order to increase the reference count on diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index a663e2cdc3d0..8b0a8ec98ac8 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -145,6 +145,14 @@ struct ib_uverbs_file { struct list_head list; int is_closed; + /* locking the uobjects_list */ + struct mutex uobjects_lock; + struct list_head uobjects; + + /* protects cleanup process from other actions */ + struct rw_semaphore cleanup_rwsem; + enum rdma_remove_reason cleanup_reason; + struct idr idr; /* spinlock protects write access to idr */ spinlock_t idr_lock; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b751c196e2c6..aa84246c0bfe 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -110,7 +110,6 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext->cg_obj = cg_obj; /* ufile is required when some objects are released */ ucontext->ufile = file; - uverbs_initialize_ucontext(ucontext); rcu_read_lock(); ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index c05ce5ae5415..82168b53e2ae 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -888,6 +888,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_init(&file->mutex); mutex_init(&file->cleanup_mutex); + mutex_init(&file->uobjects_lock); + INIT_LIST_HEAD(&file->uobjects); + init_rwsem(&file->cleanup_rwsem); + filp->private_data = file; kobject_get(&dev->kobj); list_add_tail(&file->list, &dev->uverbs_file_list); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8784d5bfc252..9c04cb5e4041 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1500,12 +1500,6 @@ struct ib_ucontext { struct ib_uverbs_file *ufile; int closing; - /* locking the uobjects_list */ - struct mutex uobjects_lock; - struct list_head uobjects; - /* protects cleanup process from other actions */ - struct rw_semaphore cleanup_rwsem; - enum rdma_remove_reason cleanup_reason; bool cleanup_retryable; struct pid *tgid; @@ -1531,6 +1525,9 @@ struct ib_ucontext { struct ib_uobject { u64 user_handle; /* handle given to us by userspace */ + /* ufile & ucontext owning this object */ + struct ib_uverbs_file *ufile; + /* FIXME, save memory: ufile->context == context */ struct ib_ucontext *context; /* associated user context */ void *object; /* containing object */ struct list_head list; /* link to context's list */ -- cgit From 6ef1c82821b2ae9bfa26fe65e6f0a66dfd79b7d7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 11:32:08 +0300 Subject: IB/uverbs: Replace ib_ucontext with ib_uverbs_file in core function calls The correct handle to refer to the idr/etc is ib_uverbs_file, revise all the core APIs to use this instead. The user API are left as wrappers that automatically convert a ucontext to a ufile for now. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/rdma_core.c | 66 +++++++++++++++++----------------- drivers/infiniband/core/rdma_core.h | 21 +++++------ drivers/infiniband/core/uverbs_ioctl.c | 4 +-- drivers/infiniband/core/uverbs_main.c | 15 ++++---- include/rdma/uverbs_std_types.h | 16 ++++----- include/rdma/uverbs_types.h | 8 ++--- 6 files changed, 62 insertions(+), 68 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 11c6f271be00..dcaf3813ee78 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -139,7 +139,7 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id, struct ib_uobject *uobj; int ret; - uobj = rdma_lookup_get_uobject(type, ufile->ucontext, id, true); + uobj = rdma_lookup_get_uobject(type, ufile, id, true); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -150,7 +150,7 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id, return success_res; } -static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, +static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, const struct uverbs_obj_type *type) { struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL); @@ -161,8 +161,8 @@ static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, * user_handle should be filled by the handler, * The object is added to the list in the commit stage. */ - uobj->ufile = context->ufile; - uobj->context = context; + uobj->ufile = ufile; + uobj->context = ufile->ucontext; uobj->type = type; /* * Allocated objects start out as write locked to deny any other @@ -210,15 +210,15 @@ static void uverbs_idr_remove_uobj(struct ib_uobject *uobj) } /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ -static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, - int id, bool exclusive) +static struct ib_uobject * +lookup_get_idr_uobject(const struct uverbs_obj_type *type, + struct ib_uverbs_file *ufile, int id, bool exclusive) { struct ib_uobject *uobj; rcu_read_lock(); /* object won't be released as we're protected in rcu */ - uobj = idr_find(&ucontext->ufile->idr, id); + uobj = idr_find(&ufile->idr, id); if (!uobj) { uobj = ERR_PTR(-ENOENT); goto free; @@ -239,7 +239,7 @@ free: } static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, + struct ib_uverbs_file *ufile, int id, bool exclusive) { struct file *f; @@ -270,13 +270,13 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty } struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, - int id, bool exclusive) + struct ib_uverbs_file *ufile, int id, + bool exclusive) { struct ib_uobject *uobj; int ret; - uobj = type->type_class->lookup_get(type, ucontext, id, exclusive); + uobj = type->type_class->lookup_get(type, ufile, id, exclusive); if (IS_ERR(uobj)) return uobj; @@ -300,12 +300,12 @@ free: } static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) + struct ib_uverbs_file *ufile) { int ret; struct ib_uobject *uobj; - uobj = alloc_uobj(ucontext, type); + uobj = alloc_uobj(ufile, type); if (IS_ERR(uobj)) return uobj; @@ -313,7 +313,7 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type * if (ret) goto uobj_put; - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device, + ret = ib_rdmacg_try_charge(&uobj->cg_obj, ufile->ucontext->device, RDMACG_RESOURCE_HCA_OBJECT); if (ret) goto idr_remove; @@ -328,7 +328,7 @@ uobj_put: } static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) + struct ib_uverbs_file *ufile) { const struct uverbs_obj_fd_type *fd_type = container_of(type, struct uverbs_obj_fd_type, type); @@ -341,7 +341,7 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t if (new_fd < 0) return ERR_PTR(new_fd); - uobj = alloc_uobj(ucontext, type); + uobj = alloc_uobj(ufile, type); if (IS_ERR(uobj)) { put_unused_fd(new_fd); return uobj; @@ -360,7 +360,7 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t uobj_file->uobj.id = new_fd; uobj_file->uobj.object = filp; - uobj_file->ufile = ucontext->ufile; + uobj_file->ufile = ufile; INIT_LIST_HEAD(&uobj->list); kref_get(&uobj_file->ufile->ref); @@ -368,9 +368,9 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t } struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) + struct ib_uverbs_file *ufile) { - return type->type_class->alloc_begin(type, ucontext); + return type->type_class->alloc_begin(type, ufile); } static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, @@ -669,10 +669,9 @@ void uverbs_close_fd(struct file *f) kref_put(uverbs_file_ref, ib_uverbs_release_file); } -static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, - enum rdma_remove_reason reason) +static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) { - struct ib_uverbs_file *ufile = ucontext->ufile; struct ib_uobject *obj, *next_obj; int ret = -EINVAL; int err = 0; @@ -716,12 +715,11 @@ static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, return ret; } -void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) +void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed) { enum rdma_remove_reason reason = device_removed ? RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE; - struct ib_uverbs_file *ufile = ucontext->ufile; /* * Waits for all remove_commit and alloc_commit to finish. Logically, We @@ -731,7 +729,7 @@ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) down_write(&ufile->cleanup_rwsem); ufile->ucontext->cleanup_retryable = true; while (!list_empty(&ufile->uobjects)) - if (__uverbs_cleanup_ucontext(ucontext, reason)) { + if (__uverbs_cleanup_ufile(ufile, reason)) { /* * No entry was cleaned-up successfully during this * iteration @@ -741,7 +739,7 @@ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) ufile->ucontext->cleanup_retryable = false; if (!list_empty(&ufile->uobjects)) - __uverbs_cleanup_ucontext(ucontext, reason); + __uverbs_cleanup_ufile(ufile, reason); up_write(&ufile->cleanup_rwsem); } @@ -757,19 +755,19 @@ const struct uverbs_obj_type_class uverbs_fd_class = { }; EXPORT_SYMBOL(uverbs_fd_class); -struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, - struct ib_ucontext *ucontext, - enum uverbs_obj_access access, - int id) +struct ib_uobject * +uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs, + struct ib_uverbs_file *ufile, + enum uverbs_obj_access access, int id) { switch (access) { case UVERBS_ACCESS_READ: - return rdma_lookup_get_uobject(type_attrs, ucontext, id, false); + return rdma_lookup_get_uobject(type_attrs, ufile, id, false); case UVERBS_ACCESS_DESTROY: case UVERBS_ACCESS_WRITE: - return rdma_lookup_get_uobject(type_attrs, ucontext, id, true); + return rdma_lookup_get_uobject(type_attrs, ufile, id, true); case UVERBS_ACCESS_NEW: - return rdma_alloc_begin_uobject(type_attrs, ucontext); + return rdma_alloc_begin_uobject(type_attrs, ufile); default: WARN_ON(true); return ERR_PTR(-EOPNOTSUPP); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index f7f157e78f8c..1bba60e960c1 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -48,14 +48,8 @@ const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile, uint16_t object); const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, uint16_t method); -/* - * These functions initialize the context and cleanups its uobjects. - * The context has a list of objects which is protected by a mutex - * on the context. initialize_ucontext should be called when we create - * a context. - * cleanup_ucontext removes all uobjects from the context and puts them. - */ -void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed); + +void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed); /* * uverbs_uobject_get is called in order to increase the reference count on @@ -81,7 +75,7 @@ void uverbs_uobject_put(struct ib_uobject *uobject); void uverbs_close_fd(struct file *f); /* - * Get an ib_uobject that corresponds to the given id from ucontext, assuming + * Get an ib_uobject that corresponds to the given id from ufile, assuming * the object is from the given type. Lock it to the required access when * applicable. * This function could create (access == NEW), destroy (access == DESTROY) @@ -89,10 +83,11 @@ void uverbs_close_fd(struct file *f); * The action will be finalized only when uverbs_finalize_object or * uverbs_finalize_objects are called. */ -struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, - struct ib_ucontext *ucontext, - enum uverbs_obj_access access, - int id); +struct ib_uobject * +uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs, + struct ib_uverbs_file *ufile, + enum uverbs_obj_access access, int id); + /* * Note that certain finalize stages could return a status: * (a) alloc_commit could return a failure if the object is committed at the diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 5b59c6f0feed..d3bf82cfaa2b 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -152,9 +152,9 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, if (!object) return -EINVAL; - o_attr->uobject = uverbs_get_uobject_from_context( + o_attr->uobject = uverbs_get_uobject_from_file( object->type_attrs, - ufile->ucontext, + ufile, spec->u.obj.access, (int)uattr->data); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 82168b53e2ae..037c8975d9f0 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -227,12 +227,13 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, } } -static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, - struct ib_ucontext *context, - bool device_removed) +static int ib_uverbs_cleanup_ufile(struct ib_uverbs_file *file, + bool device_removed) { + struct ib_ucontext *context = file->ucontext; + context->closing = 1; - uverbs_cleanup_ucontext(context, device_removed); + uverbs_cleanup_ufile(file, device_removed); put_pid(context->tgid); ib_rdmacg_uncharge(&context->cg_obj, context->device, @@ -918,7 +919,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) mutex_lock(&file->cleanup_mutex); if (file->ucontext) { - ib_uverbs_cleanup_ucontext(file, file->ucontext, false); + ib_uverbs_cleanup_ufile(file, false); file->ucontext = NULL; } mutex_unlock(&file->cleanup_mutex); @@ -1176,7 +1177,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, mutex_unlock(&file->cleanup_mutex); /* At this point ib_uverbs_close cannot be running - * ib_uverbs_cleanup_ucontext + * ib_uverbs_cleanup_ufile */ if (ucontext) { /* We must release the mutex before going ahead and @@ -1188,7 +1189,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, ib_uverbs_event_handler(&file->event_handler, &event); ib_uverbs_disassociate_ucontext(ucontext); mutex_lock(&file->cleanup_mutex); - ib_uverbs_cleanup_ucontext(file, ucontext, true); + ib_uverbs_cleanup_ufile(file, true); mutex_unlock(&file->cleanup_mutex); } diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 27c24453fc12..13b92020edd0 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -48,28 +48,28 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, bool write, - struct ib_ucontext *ucontext, + struct ib_uverbs_file *ufile, int id) { - return rdma_lookup_get_uobject(type, ucontext, id, write); + return rdma_lookup_get_uobject(type, ufile, id, write); } #define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs -#define uobj_get_read(_type, _id, _ucontext) \ - __uobj_get(uobj_get_type(_type), false, _ucontext, _id) +#define uobj_get_read(_type, _id, _ucontext) \ + __uobj_get(uobj_get_type(_type), false, (_ucontext)->ufile, _id) #define uobj_get_obj_read(_object, _type, _id, _ucontext) \ ({ \ struct ib_uobject *__uobj = \ __uobj_get(uobj_get_type(_type), \ - false, _ucontext, _id); \ + false, (_ucontext)->ufile, _id); \ \ (struct ib_##_object *)(IS_ERR(__uobj) ? NULL : __uobj->object);\ }) -#define uobj_get_write(_type, _id, _ucontext) \ - __uobj_get(uobj_get_type(_type), true, _ucontext, _id) +#define uobj_get_write(_type, _id, _ucontext) \ + __uobj_get(uobj_get_type(_type), true, (_ucontext)->ufile, _id) int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id, struct ib_uverbs_file *ufile, int success_res); @@ -107,7 +107,7 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj) static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type, struct ib_ucontext *ucontext) { - return rdma_alloc_begin_uobject(type, ucontext); + return rdma_alloc_begin_uobject(type, ucontext->ufile); } #define uobj_alloc(_type, ucontext) \ diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 175495d1b0b8..5290d8d34e9a 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -72,12 +72,12 @@ struct uverbs_obj_type_class { * reset flow). */ struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext); + struct ib_uverbs_file *ufile); void (*alloc_commit)(struct ib_uobject *uobj); void (*alloc_abort)(struct ib_uobject *uobj); struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, int id, + struct ib_uverbs_file *ufile, int id, bool exclusive); void (*lookup_put)(struct ib_uobject *uobj, bool exclusive); /* @@ -120,11 +120,11 @@ struct uverbs_obj_idr_type { }; struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, + struct ib_uverbs_file *ufile, int id, bool exclusive); void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive); struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext); + struct ib_uverbs_file *ufile); void rdma_alloc_abort_uobject(struct ib_uobject *uobj); int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj); int rdma_alloc_commit_uobject(struct ib_uobject *uobj); -- cgit From 2cc1e3b80942a7de7dce81f8a86c27a4ba3a695e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 11:32:09 +0300 Subject: IB/uverbs: Replace file->ucontext with file in uverbs_cmd.c The ucontext isn't needed any more, just pass the uverbs_file directly. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_cmd.c | 153 ++++++++++++++++------------------- include/rdma/uverbs_std_types.h | 35 ++++---- 2 files changed, 90 insertions(+), 98 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index aa84246c0bfe..ed61bd5b9c2b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -48,10 +48,10 @@ #include "core_priv.h" static struct ib_uverbs_completion_event_file * -ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context) +ib_uverbs_lookup_comp_file(int fd, struct ib_uverbs_file *ufile) { struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL, - fd, context); + fd, ufile); struct ib_uobject_file *uobj_file; if (IS_ERR(uobj)) @@ -322,7 +322,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - uobj = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_PD, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -509,8 +509,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } } - obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, - file->ucontext); + obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_tree_mutex_unlock; @@ -652,11 +651,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (ret) return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_MR, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; goto err_free; @@ -748,8 +747,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; - uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, - file->ucontext); + uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -767,7 +765,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, } if (cmd.flags & IB_MR_REREG_PD) { - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, + file); if (!pd) { ret = -EINVAL; goto put_uobjs; @@ -840,11 +839,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_MW, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; goto err_free; @@ -921,7 +920,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -966,14 +965,12 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd->comp_vector >= file->device->num_comp_vectors) return ERR_PTR(-EINVAL); - obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, - file->ucontext); + obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file); if (IS_ERR(obj)) return obj; if (cmd->comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, - file->ucontext); + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file); if (IS_ERR(ev_file)) { ret = PTR_ERR(ev_file); goto err; @@ -1155,7 +1152,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; @@ -1220,7 +1217,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; @@ -1267,7 +1264,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; @@ -1294,8 +1291,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle, - file->ucontext); + uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -1353,8 +1349,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, - file->ucontext); + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file); if (IS_ERR(obj)) return PTR_ERR(obj); obj->uxrcd = NULL; @@ -1364,9 +1359,9 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + sizeof(cmd->rwq_ind_tbl_handle) && (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { - ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL, - cmd->rwq_ind_tbl_handle, - file->ucontext); + ind_tbl = uobj_get_obj_read(rwq_ind_table, + UVERBS_OBJECT_RWQ_IND_TBL, + cmd->rwq_ind_tbl_handle, file); if (!ind_tbl) { ret = -EINVAL; goto err_put; @@ -1392,7 +1387,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_XRC_TGT) { xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle, - file->ucontext); + file); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; @@ -1411,8 +1406,8 @@ static int create_qp(struct ib_uverbs_file *file, cmd->max_recv_sge = 0; } else { if (cmd->is_srq) { - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle, - file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, + cmd->srq_handle, file); if (!srq || srq->srq_type == IB_SRQT_XRC) { ret = -EINVAL; goto err_put; @@ -1421,8 +1416,9 @@ static int create_qp(struct ib_uverbs_file *file, if (!ind_tbl) { if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle, - file->ucontext); + rcq = uobj_get_obj_read( + cq, UVERBS_OBJECT_CQ, + cmd->recv_cq_handle, file); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1432,11 +1428,12 @@ static int create_qp(struct ib_uverbs_file *file, } if (has_sq) - scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle, - file->ucontext); + scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, + cmd->send_cq_handle, file); if (!ind_tbl) rcq = rcq ?: scq; - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, + file); if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; @@ -1733,13 +1730,11 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, - file->ucontext); + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file); if (IS_ERR(obj)) return PTR_ERR(obj); - xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, - file->ucontext); + xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err_put; @@ -1841,7 +1836,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, goto out; } - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) { ret = -EINVAL; goto out; @@ -1946,7 +1941,7 @@ static int modify_qp(struct ib_uverbs_file *file, if (!attr) return -ENOMEM; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file); if (!qp) { ret = -EINVAL; goto out; @@ -2101,8 +2096,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); - uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle, - file->ucontext); + uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -2167,7 +2161,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, if (!user_wr) return -ENOMEM; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) goto out; @@ -2203,8 +2197,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, goto out_put; } - ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah, - file->ucontext); + ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, + user_wr->wr.ud.ah, file); if (!ud->ah) { kfree(ud); ret = -EINVAL; @@ -2438,7 +2432,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) goto out; @@ -2487,7 +2481,7 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (!srq) goto out; @@ -2544,11 +2538,11 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - uobj = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_AH, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; goto err; @@ -2632,7 +2626,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) return -EINVAL; @@ -2683,7 +2677,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) return -EINVAL; @@ -2791,7 +2785,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res, uflow_res->num++; } -static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, +static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec, struct ib_uflow_resources *uflow_res) @@ -2820,7 +2814,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, ib_spec->action.act = uobj_get_obj_read(flow_action, UVERBS_OBJECT_FLOW_ACTION, kern_spec->action.handle, - ucontext); + ufile); if (!ib_spec->action.act) return -EINVAL; ib_spec->action.size = @@ -2838,7 +2832,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, uobj_get_obj_read(counters, UVERBS_OBJECT_COUNTERS, kern_spec->flow_count.handle, - ucontext); + ufile); if (!ib_spec->flow_count.counters) return -EINVAL; ib_spec->flow_count.size = @@ -3019,7 +3013,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, kern_filter_sz, ib_spec); } -static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext, +static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile, struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec, struct ib_uflow_resources *uflow_res) @@ -3028,7 +3022,7 @@ static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext, return -EINVAL; if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG) - return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec, + return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec, uflow_res); else return kern_spec_to_ib_spec_filter(kern_spec, ib_spec); @@ -3071,18 +3065,17 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, - file->ucontext); + obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file); if (IS_ERR(obj)) return PTR_ERR(obj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { err = -EINVAL; goto err_uobj; } - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) { err = -EINVAL; goto err_put_pd; @@ -3186,8 +3179,7 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, return -EOPNOTSUPP; resp.response_length = required_resp_len; - uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle, - file->ucontext); + uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -3237,7 +3229,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) return -EINVAL; - wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file); if (!wq) return -EINVAL; @@ -3331,8 +3323,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (num_read_wqs = 0; num_read_wqs < num_wq_handles; num_read_wqs++) { - wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs], - file->ucontext); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, + wqs_handles[num_read_wqs], file); if (!wq) { err = -EINVAL; goto put_wqs; @@ -3341,7 +3333,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, wqs[num_read_wqs] = wq; } - uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto put_wqs; @@ -3504,13 +3496,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kern_flow_attr = &cmd.flow_attr; } - uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto err_free_attr; } - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) { err = -EINVAL; goto err_uobj; @@ -3546,8 +3538,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) && cmd.flow_attr.size >= ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) { - err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec, - uflow_res); + err = kern_spec_to_ib_spec(file, kern_spec, ib_spec, uflow_res); if (err) goto err_free; @@ -3644,8 +3635,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_srq_init_attr attr; int ret; - obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, - file->ucontext); + obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -3654,7 +3644,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (cmd->srq_type == IB_SRQT_XRC) { xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle, - file->ucontext); + file); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err; @@ -3671,15 +3661,15 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, } if (ib_srq_has_cq(cmd->srq_type)) { - attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle, - file->ucontext); + attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, + cmd->cq_handle, file); if (!attr.ext.cq) { ret = -EINVAL; goto err_put_xrcd; } } - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file); if (!pd) { ret = -EINVAL; goto err_put_cq; @@ -3851,7 +3841,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (!srq) return -EINVAL; @@ -3882,7 +3872,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (!srq) return -EINVAL; @@ -3919,8 +3909,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle, - file->ucontext); + uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -4098,7 +4087,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, if (cmd.attr_mask > IB_CQ_MODERATE) return -EOPNOTSUPP; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 13b92020edd0..3e3f108f0912 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -56,20 +56,24 @@ static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, #define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs -#define uobj_get_read(_type, _id, _ucontext) \ - __uobj_get(uobj_get_type(_type), false, (_ucontext)->ufile, _id) +#define uobj_get_read(_type, _id, _ufile) \ + __uobj_get(uobj_get_type(_type), false, _ufile, _id) -#define uobj_get_obj_read(_object, _type, _id, _ucontext) \ -({ \ - struct ib_uobject *__uobj = \ - __uobj_get(uobj_get_type(_type), \ - false, (_ucontext)->ufile, _id); \ - \ - (struct ib_##_object *)(IS_ERR(__uobj) ? NULL : __uobj->object);\ -}) +static inline void *_uobj_get_obj_read(const struct uverbs_obj_type *type, + int id, struct ib_uverbs_file *ufile) +{ + struct ib_uobject *uobj = __uobj_get(type, false, ufile, id); + + if (IS_ERR(uobj)) + return NULL; + return uobj->object; +} +#define uobj_get_obj_read(_object, _type, _id, _ufile) \ + ((struct ib_##_object *)_uobj_get_obj_read(uobj_get_type(_type), _id, \ + _ufile)) -#define uobj_get_write(_type, _id, _ucontext) \ - __uobj_get(uobj_get_type(_type), true, (_ucontext)->ufile, _id) +#define uobj_get_write(_type, _id, _ufile) \ + __uobj_get(uobj_get_type(_type), true, _ufile, _id) int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id, struct ib_uverbs_file *ufile, int success_res); @@ -105,13 +109,12 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj) } static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) + struct ib_uverbs_file *ufile) { - return rdma_alloc_begin_uobject(type, ucontext->ufile); + return rdma_alloc_begin_uobject(type, ufile); } -#define uobj_alloc(_type, ucontext) \ - __uobj_alloc(uobj_get_type(_type), ucontext) +#define uobj_alloc(_type, _ufile) __uobj_alloc(uobj_get_type(_type), _ufile) #endif -- cgit From 6f258884ddac5195e76dc916ff5a3965db7836aa Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 11:32:10 +0300 Subject: IB/uverbs: Tidy up remaining references to ucontext Unnecessary clutter, to indirect through ucontext when the ufile would do. Generally most of the code code should only be working with ufile, except for a few places that touch the driver interface. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/rdma_core.c | 19 +++++++++---------- drivers/infiniband/core/uverbs_main.c | 2 -- drivers/infiniband/core/uverbs_std_types_cq.c | 7 +++---- drivers/infiniband/core/uverbs_std_types_dm.c | 3 +-- 4 files changed, 13 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index dcaf3813ee78..98e55cb118ab 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -180,19 +180,19 @@ static int idr_add_uobj(struct ib_uobject *uobj) int ret; idr_preload(GFP_KERNEL); - spin_lock(&uobj->context->ufile->idr_lock); + spin_lock(&uobj->ufile->idr_lock); /* * We start with allocating an idr pointing to NULL. This represents an * object which isn't initialized yet. We'll replace it later on with * the real object once we commit. */ - ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0, + ret = idr_alloc(&uobj->ufile->idr, NULL, 0, min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT); if (ret >= 0) uobj->id = ret; - spin_unlock(&uobj->context->ufile->idr_lock); + spin_unlock(&uobj->ufile->idr_lock); idr_preload_end(); return ret < 0 ? ret : 0; @@ -204,9 +204,9 @@ static int idr_add_uobj(struct ib_uobject *uobj) */ static void uverbs_idr_remove_uobj(struct ib_uobject *uobj) { - spin_lock(&uobj->context->ufile->idr_lock); - idr_remove(&uobj->context->ufile->idr, uobj->id); - spin_unlock(&uobj->context->ufile->idr_lock); + spin_lock(&uobj->ufile->idr_lock); + idr_remove(&uobj->ufile->idr, uobj->id); + spin_unlock(&uobj->ufile->idr_lock); } /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ @@ -519,14 +519,13 @@ out: static void alloc_commit_idr_uobject(struct ib_uobject *uobj) { - spin_lock(&uobj->context->ufile->idr_lock); + spin_lock(&uobj->ufile->idr_lock); /* * We already allocated this IDR with a NULL object, so * this shouldn't fail. */ - WARN_ON(idr_replace(&uobj->context->ufile->idr, - uobj, uobj->id)); - spin_unlock(&uobj->context->ufile->idr_lock); + WARN_ON(idr_replace(&uobj->ufile->idr, uobj, uobj->id)); + spin_unlock(&uobj->ufile->idr_lock); } static void alloc_commit_fd_uobject(struct ib_uobject *uobj) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 037c8975d9f0..5d64b9c481b9 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -883,8 +883,6 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) file->device = dev; spin_lock_init(&file->idr_lock); idr_init(&file->idr); - file->ucontext = NULL; - file->async_file = NULL; kref_init(&file->ref); mutex_init(&file->mutex); mutex_init(&file->cleanup_mutex); diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 1a14c245b511..32c75cdd162f 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -61,7 +61,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct ib_ucontext *ucontext = file->ucontext; struct ib_ucq_object *obj; struct ib_udata uhw; int ret; @@ -98,7 +97,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, uverbs_uobject_get(ev_file_uobj); } - if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) { + if (attr.comp_vector >= file->device->num_comp_vectors) { ret = -EINVAL; goto err_event_file; } @@ -106,7 +105,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, obj = container_of(uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), typeof(*obj), uobject); - obj->uverbs_file = ucontext->ufile; + obj->uverbs_file = file; obj->comp_events_reported = 0; obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); @@ -115,7 +114,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, /* Temporary, only until drivers get the new uverbs_attr_bundle */ create_udata(attrs, &uhw); - cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw); + cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, &uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_event_file; diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index 8a2548173a90..9e148e322523 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -50,7 +50,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct ib_ucontext *ucontext = file->ucontext; struct ib_dm_alloc_attr attr = {}; struct ib_uobject *uobj; struct ib_dm *dm; @@ -71,7 +70,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject; - dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs); + dm = ib_dev->alloc_dm(ib_dev, file->ucontext, &attr, attrs); if (IS_ERR(dm)) return PTR_ERR(dm); -- cgit From d0259e82e7d214340aed33732e9a5ce448564921 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 11:32:11 +0300 Subject: IB/uverbs: Remove ib_uobject_file The only purpose for this structure was to hold the ib_uobject_file pointer, but now that is part of the standard ib_uobject the structure no longer makes any sense, so get rid of it. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/rdma_core.c | 47 +++++++++++---------------- drivers/infiniband/core/uverbs.h | 2 +- drivers/infiniband/core/uverbs_cmd.c | 8 ++--- drivers/infiniband/core/uverbs_main.c | 8 +++-- drivers/infiniband/core/uverbs_std_types.c | 6 ++-- drivers/infiniband/core/uverbs_std_types_cq.c | 2 +- include/rdma/ib_verbs.h | 6 ---- include/rdma/uverbs_types.h | 5 +-- 8 files changed, 35 insertions(+), 49 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 98e55cb118ab..847c6a2f1346 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -334,7 +334,6 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t container_of(type, struct uverbs_obj_fd_type, type); int new_fd; struct ib_uobject *uobj; - struct ib_uobject_file *uobj_file; struct file *filp; new_fd = get_unused_fd_flags(O_CLOEXEC); @@ -347,10 +346,9 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t return uobj; } - uobj_file = container_of(uobj, struct ib_uobject_file, uobj); filp = anon_inode_getfile(fd_type->name, fd_type->fops, - uobj_file, + uobj, fd_type->flags); if (IS_ERR(filp)) { put_unused_fd(new_fd); @@ -358,11 +356,11 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t return (void *)filp; } - uobj_file->uobj.id = new_fd; - uobj_file->uobj.object = filp; - uobj_file->ufile = ufile; + uobj->id = new_fd; + uobj->object = filp; + uobj->ufile = ufile; INIT_LIST_HEAD(&uobj->list); - kref_get(&uobj_file->ufile->ref); + kref_get(&ufile->ref); return uobj; } @@ -398,10 +396,8 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, static void alloc_abort_fd_uobject(struct ib_uobject *uobj) { - struct ib_uobject_file *uobj_file = - container_of(uobj, struct ib_uobject_file, uobj); struct file *filp = uobj->object; - int id = uobj_file->uobj.id; + int id = uobj->id; /* Unsuccessful NEW */ fput(filp); @@ -413,9 +409,7 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, { const struct uverbs_obj_fd_type *fd_type = container_of(uobj->type, struct uverbs_obj_fd_type, type); - struct ib_uobject_file *uobj_file = - container_of(uobj, struct ib_uobject_file, uobj); - int ret = fd_type->context_closed(uobj_file, why); + int ret = fd_type->context_closed(uobj, why); if (ib_is_destroy_retryable(ret, why, uobj)) return ret; @@ -425,7 +419,7 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, return ret; } - uobj_file->uobj.context = NULL; + uobj->context = NULL; return ret; } @@ -530,14 +524,11 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj) static void alloc_commit_fd_uobject(struct ib_uobject *uobj) { - struct ib_uobject_file *uobj_file = - container_of(uobj, struct ib_uobject_file, uobj); - - fd_install(uobj_file->uobj.id, uobj->object); + fd_install(uobj->id, uobj->object); /* This shouldn't be used anymore. Use the file object instead */ - uobj_file->uobj.id = 0; + uobj->id = 0; /* Get another reference as we export this to the fops */ - uverbs_uobject_get(&uobj_file->uobj); + uverbs_uobject_get(uobj); } int rdma_alloc_commit_uobject(struct ib_uobject *uobj) @@ -638,19 +629,19 @@ const struct uverbs_obj_type_class uverbs_idr_class = { }; EXPORT_SYMBOL(uverbs_idr_class); -static void _uverbs_close_fd(struct ib_uobject_file *uobj_file) +static void _uverbs_close_fd(struct ib_uobject *uobj) { - struct ib_uverbs_file *ufile = uobj_file->ufile; + struct ib_uverbs_file *ufile = uobj->ufile; int ret; mutex_lock(&ufile->cleanup_mutex); /* uobject was either already cleaned up or is cleaned up right now anyway */ - if (!uobj_file->uobj.context || + if (!uobj->context || !down_read_trylock(&ufile->cleanup_rwsem)) goto unlock; - ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE); + ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_CLOSE); up_read(&ufile->cleanup_rwsem); if (ret) pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n"); @@ -660,11 +651,11 @@ unlock: void uverbs_close_fd(struct file *f) { - struct ib_uobject_file *uobj_file = f->private_data; - struct kref *uverbs_file_ref = &uobj_file->ufile->ref; + struct ib_uobject *uobj = f->private_data; + struct kref *uverbs_file_ref = &uobj->ufile->ref; - _uverbs_close_fd(uobj_file); - uverbs_uobject_put(&uobj_file->uobj); + _uverbs_close_fd(uobj); + uverbs_uobject_put(uobj); kref_put(uverbs_file_ref, ib_uverbs_release_file); } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 8b0a8ec98ac8..cbb727f0959f 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -130,7 +130,7 @@ struct ib_uverbs_async_event_file { }; struct ib_uverbs_completion_event_file { - struct ib_uobject_file uobj_file; + struct ib_uobject uobj; struct ib_uverbs_event_queue ev_queue; }; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index ed61bd5b9c2b..bb2df271d3ff 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -52,7 +52,6 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_uverbs_file *ufile) { struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL, fd, ufile); - struct ib_uobject_file *uobj_file; if (IS_ERR(uobj)) return (void *)uobj; @@ -60,9 +59,8 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_uverbs_file *ufile) uverbs_uobject_get(uobj); uobj_put_read(uobj); - uobj_file = container_of(uobj, struct ib_uobject_file, uobj); - return container_of(uobj_file, struct ib_uverbs_completion_event_file, - uobj_file); + return container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); } ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, @@ -927,7 +925,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, resp.fd = uobj->id; ev_file = container_of(uobj, struct ib_uverbs_completion_event_file, - uobj_file.uobj); + uobj); ib_uverbs_init_event_queue(&ev_file->ev_queue); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 5d64b9c481b9..e9a50ee0ba76 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -191,7 +191,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file, } spin_unlock_irq(&ev_file->ev_queue.lock); - uverbs_uobject_put(&ev_file->uobj_file.uobj); + uverbs_uobject_put(&ev_file->uobj); } spin_lock_irq(&file->async_file->ev_queue.lock); @@ -346,7 +346,7 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, filp->private_data; return ib_uverbs_event_read(&comp_ev_file->ev_queue, - comp_ev_file->uobj_file.ufile, filp, + comp_ev_file->uobj.ufile, filp, buf, count, pos, sizeof(struct ib_uverbs_comp_event_desc)); } @@ -428,7 +428,9 @@ static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) { - struct ib_uverbs_completion_event_file *file = filp->private_data; + struct ib_uobject *uobj = filp->private_data; + struct ib_uverbs_completion_event_file *file = container_of( + uobj, struct ib_uverbs_completion_event_file, uobj); struct ib_uverbs_event *entry, *tmp; spin_lock_irq(&file->ev_queue.lock); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 2f1a0b6598fe..912519fda3ba 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -190,12 +190,12 @@ static int uverbs_free_pd(struct ib_uobject *uobject, return 0; } -static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file, +static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj, enum rdma_remove_reason why) { struct ib_uverbs_completion_event_file *comp_event_file = - container_of(uobj_file, struct ib_uverbs_completion_event_file, - uobj_file); + container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue; spin_lock_irq(&event_queue->lock); diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 32c75cdd162f..9d39a9e1e411 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -93,7 +93,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, if (!IS_ERR(ev_file_uobj)) { ev_file = container_of(ev_file_uobj, struct ib_uverbs_completion_event_file, - uobj_file.uobj); + uobj); uverbs_uobject_get(ev_file_uobj); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9c04cb5e4041..031d121190fd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1540,12 +1540,6 @@ struct ib_uobject { const struct uverbs_obj_type *type; }; -struct ib_uobject_file { - struct ib_uobject uobj; - /* ufile contains the lock between context release and file close */ - struct ib_uverbs_file *ufile; -}; - struct ib_udata { const void __user *inbuf; void __user *outbuf; diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 5290d8d34e9a..e2fc9db466d3 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -139,7 +139,7 @@ struct uverbs_obj_fd_type { * the driver is removed or the process terminated. */ struct uverbs_obj_type type; - int (*context_closed)(struct ib_uobject_file *uobj_file, + int (*context_closed)(struct ib_uobject *uobj, enum rdma_remove_reason why); const struct file_operations *fops; const char *name; @@ -156,7 +156,8 @@ extern const struct uverbs_obj_type_class uverbs_fd_class; {.type = { \ .type_class = &uverbs_fd_class, \ .obj_size = (_obj_size) + \ - UVERBS_BUILD_BUG_ON((_obj_size) < sizeof(struct ib_uobject_file)), \ + UVERBS_BUILD_BUG_ON((_obj_size) < \ + sizeof(struct ib_uobject)), \ }, \ .context_closed = _context_closed, \ .fops = _fops, \ -- cgit From 76bc79ccceea09a19f681dff7fd633fb8fb95c79 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 6 Jul 2018 11:40:39 -0600 Subject: IB/uverbs: Replace ib_ucq_object uverbs_file with the one in ib_uobject Now that ib_uobject has a ib_uverbs_file we don't need this extra one in ib_ucq_object. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs.h | 1 - drivers/infiniband/core/uverbs_cmd.c | 1 - drivers/infiniband/core/uverbs_main.c | 2 +- drivers/infiniband/core/uverbs_std_types_cq.c | 1 - 4 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index cbb727f0959f..3ddd39e435e1 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -204,7 +204,6 @@ struct ib_uwq_object { struct ib_ucq_object { struct ib_uobject uobject; - struct ib_uverbs_file *uverbs_file; struct list_head comp_list; struct list_head async_list; u32 comp_events_reported; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index bb2df271d3ff..d048cabc4246 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -976,7 +976,6 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, } obj->uobject.user_handle = cmd->user_handle; - obj->uverbs_file = file; obj->comp_events_reported = 0; obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index e9a50ee0ba76..8425718bebbd 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -538,7 +538,7 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); - ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, + ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); } diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 9d39a9e1e411..ce9c9c66bae4 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -105,7 +105,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, obj = container_of(uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), typeof(*obj), uobject); - obj->uverbs_file = file; obj->comp_events_reported = 0; obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); -- cgit From 87a37ce9e400e40daee537ff95343e3c94743c6d Mon Sep 17 00:00:00 2001 From: HÃ¥kon Bugge Date: Wed, 4 Jul 2018 12:48:01 +0200 Subject: IB/cm: Remove unused and erroneous msg sequence encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In cm_form_tid(), a two bit message sequence number is OR'ed into bit 31-30 of the lower TID value. After commit f06d26537559 ("IB/cm: Randomize starting comm ID"), the local_id is XOR'ed with a 32-bit random value. Hence, bit 31-30 in the lower TID now has an arbitrarily value and it makes no sense to OR in the message sequence number. Adding to that, the evolution in use of IDR routines in cm_alloc_id() has always had the possibility of returning a value with bit 30 set. In addition, said bits are never checked. Hence, remove the encoding and the corresponding enum. Signed-off-by: HÃ¥kon Bugge Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 14 ++++++-------- drivers/infiniband/core/cm_msgs.h | 7 ------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index de699f67a755..4724cb09b69d 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1256,14 +1256,12 @@ new_id: } EXPORT_SYMBOL(ib_cm_insert_listen); -static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, - enum cm_msg_sequence msg_seq) +static __be64 cm_form_tid(struct cm_id_private *cm_id_priv) { u64 hi_tid, low_tid; hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; - low_tid = (u64) ((__force u32)cm_id_priv->id.local_id | - (msg_seq << 30)); + low_tid = (u64)cm_id_priv->id.local_id; return cpu_to_be64(hi_tid | low_tid); } @@ -1291,7 +1289,7 @@ static void cm_format_req(struct cm_req_msg *req_msg, pri_path->opa.slid); cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); + cm_form_tid(cm_id_priv)); req_msg->local_comm_id = cm_id_priv->id.local_id; req_msg->service_id = param->service_id; @@ -2465,7 +2463,7 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, u8 private_data_len) { cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ)); + cm_form_tid(cm_id_priv)); dreq_msg->local_comm_id = cm_id_priv->id.local_id; dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); @@ -3096,7 +3094,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg, alt_ext = opa_is_extended_lid(alternate_path->opa.dlid, alternate_path->opa.slid); cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); + cm_form_tid(cm_id_priv)); lap_msg->local_comm_id = cm_id_priv->id.local_id; lap_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); @@ -3472,7 +3470,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, struct ib_cm_sidr_req_param *param) { cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); + cm_form_tid(cm_id_priv)); sidr_req_msg->request_id = cm_id_priv->id.local_id; sidr_req_msg->pkey = param->path->pkey; sidr_req_msg->service_id = param->service_id; diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 8b76f0ef965e..476d4309576d 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -44,13 +44,6 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -enum cm_msg_sequence { - CM_MSG_SEQUENCE_REQ, - CM_MSG_SEQUENCE_LAP, - CM_MSG_SEQUENCE_DREQ, - CM_MSG_SEQUENCE_SIDR -}; - struct cm_req_msg { struct ib_mad_hdr hdr; -- cgit From 299c36b1efe7a34ff255788592d835e913163a4d Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 5 Jul 2018 00:52:47 +0300 Subject: RDMA/ipoib: Use min_t() macro instead of min() Use min_t() macro to avoid the casting when using min() macro, also fix the type of "length" and "wc->byte_len" to be "unsigned int" and "u32" which is the right type for each one of them. Signed-off-by: Kamal Heib Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 23cb1adc636f..65b305d91ce3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -547,7 +547,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, 0, PAGE_SIZE); --skb_shinfo(skb)->nr_frags; } else { - size = min(length, (unsigned) PAGE_SIZE); + size = min_t(unsigned int, length, PAGE_SIZE); skb_frag_size_set(frag, size); skb->data_len += size; @@ -641,8 +641,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } } - frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, - (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; + frags = PAGE_ALIGN(wc->byte_len - + min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) / + PAGE_SIZE; newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping, GFP_ATOMIC); -- cgit From 0578cdad190b19b23965cb05aebaf2aeb4b6b7e5 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 5 Jul 2018 00:52:48 +0300 Subject: RDMA/ipoib: Prefer unsigned int to bare use of unsigned This commit replaces all the unsigned definitions in favour of 'unsigned int' which is preferred. Signed-off-by: Kamal Heib Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib.h | 14 +++++++------- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 10 +++++----- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 +++- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index a50b062ed13e..3dd130afb571 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -252,11 +252,11 @@ struct ipoib_cm_tx { struct ipoib_neigh *neigh; struct ipoib_path *path; struct ipoib_tx_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; + unsigned int tx_head; + unsigned int tx_tail; unsigned long flags; u32 mtu; - unsigned max_send_sge; + unsigned int max_send_sge; }; struct ipoib_cm_rx_buf { @@ -373,8 +373,8 @@ struct ipoib_dev_priv { struct ipoib_rx_buf *rx_ring; struct ipoib_tx_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; + unsigned int tx_head; + unsigned int tx_tail; struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; struct ib_ud_wr tx_wr; struct ib_wc send_wc[MAX_SEND_CQE]; @@ -404,7 +404,7 @@ struct ipoib_dev_priv { #endif u64 hca_caps; struct ipoib_ethtool_st ethtool; - unsigned max_send_sge; + unsigned int max_send_sge; bool sm_fullmember_sendonly_support; const struct net_device_ops *rn_ops; }; @@ -414,7 +414,7 @@ struct ipoib_ah { struct ib_ah *ah; struct list_head list; struct kref ref; - unsigned last_send; + unsigned int last_send; int valid; }; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 65b305d91ce3..9b374ce354b4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -275,7 +275,7 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, static int ipoib_cm_modify_rx_qp(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp, - unsigned psn) + unsigned int psn) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_attr qp_attr; @@ -422,7 +422,7 @@ err_free_1: static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp, struct ib_cm_req_event_param *req, - unsigned psn) + unsigned int psn) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_data data = {}; @@ -446,7 +446,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even struct net_device *dev = cm_id->context; struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_rx *p; - unsigned psn; + unsigned int psn; int ret; ipoib_dbg(priv, "REQ arrived\n"); @@ -713,7 +713,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_tx_buf *tx_req; int rc; - unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb); + unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb); if (unlikely(skb->len > tx->mtu)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", @@ -1429,7 +1429,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work) struct net_device *dev = priv->dev; struct sk_buff *skb; unsigned long flags; - unsigned mtu = priv->mcast_mtu; + unsigned int mtu = priv->mcast_mtu; netif_tx_lock_bh(dev); spin_lock_irqsave(&priv->lock, flags); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index fb3728bf7e40..f782f4b02569 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -569,7 +569,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_tx_buf *tx_req; int hlen, rc; void *phead; - unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb); + unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb); if (skb_is_gso(skb)) { hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 983e52b871f3..15344f91d96b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1201,7 +1201,9 @@ static void ipoib_timeout(struct net_device *dev) static int ipoib_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - const void *daddr, const void *saddr, unsigned len) + const void *daddr, + const void *saddr, + unsigned int len) { struct ipoib_header *header; -- cgit From b1b639708f7431c85df4f70ae0d82c336705d7d4 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 5 Jul 2018 00:52:51 +0300 Subject: RDMA/ipoib: Fix use of sizeof() Make sure to use sizeof(...) instead of sizeof ... which is more preferred. Signed-off-by: Kamal Heib Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 16 ++++++++-------- drivers/infiniband/ulp/ipoib/ipoib_fs.c | 6 +++--- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_main.c | 10 +++++----- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 6 +++--- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 2 +- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 9b374ce354b4..582f199887b0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -363,7 +363,7 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i if (!rx->rx_ring) return -ENOMEM; - t = kmalloc(sizeof *t, GFP_KERNEL); + t = kmalloc(sizeof(*t), GFP_KERNEL); if (!t) { ret = -ENOMEM; goto err_free_1; @@ -432,7 +432,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); rep.private_data = &data; - rep.private_data_len = sizeof data; + rep.private_data_len = sizeof(data); rep.flow_control = 0; rep.rnr_retry_count = req->rnr_retry_count; rep.srq = ipoib_cm_has_srq(dev); @@ -450,7 +450,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even int ret; ipoib_dbg(priv, "REQ arrived\n"); - p = kzalloc(sizeof *p, GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; p->dev = dev; @@ -658,7 +658,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping); - memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping); + memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping)); ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); @@ -1095,7 +1095,7 @@ static int ipoib_cm_send_req(struct net_device *dev, req.qp_num = qp->qp_num; req.qp_type = qp->qp_type; req.private_data = &data; - req.private_data_len = sizeof data; + req.private_data_len = sizeof(data); req.flow_control = 0; req.starting_psn = 0; /* FIXME */ @@ -1153,7 +1153,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, ret = -ENOMEM; goto err_tx; } - memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); + memset(p->tx_ring, 0, ipoib_sendq_size * sizeof(*p->tx_ring)); p->qp = ipoib_cm_create_tx_qp(p->dev, p); memalloc_noio_restore(noio_flag); @@ -1306,7 +1306,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_tx *tx; - tx = kzalloc(sizeof *tx, GFP_ATOMIC); + tx = kzalloc(sizeof(*tx), GFP_ATOMIC); if (!tx) return NULL; @@ -1371,7 +1371,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) neigh->daddr + QPN_AND_OPTIONS_OFFSET); goto free_neigh; } - memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); + memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec)); spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index ea302b054601..178488028734 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -262,15 +262,15 @@ static const struct file_operations ipoib_path_fops = { void ipoib_create_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - char name[IFNAMSIZ + sizeof "_path"]; + char name[IFNAMSIZ + sizeof("_path")]; - snprintf(name, sizeof name, "%s_mcg", dev->name); + snprintf(name, sizeof(name), "%s_mcg", dev->name); priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, ipoib_root, dev, &ipoib_mcg_fops); if (!priv->mcg_dentry) ipoib_warn(priv, "failed to create mcg debug file\n"); - snprintf(name, sizeof name, "%s_path", dev->name); + snprintf(name, sizeof(name), "%s_path", dev->name); priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, ipoib_root, dev, &ipoib_path_fops); if (!priv->path_dentry) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f782f4b02569..5f5d42bad2ea 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -58,7 +58,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ipoib_ah *ah; struct ib_ah *vah; - ah = kmalloc(sizeof *ah, GFP_KERNEL); + ah = kmalloc(sizeof(*ah), GFP_KERNEL); if (!ah) return ERR_PTR(-ENOMEM); @@ -203,7 +203,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } memcpy(mapping, priv->rx_ring[wr_id].mapping, - IPOIB_UD_RX_SG * sizeof *mapping); + IPOIB_UD_RX_SG * sizeof(*mapping)); /* * If we can't allocate a new RX buffer, dump diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 15344f91d96b..9bcd487e51c2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -634,7 +634,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev) { struct ipoib_path_iter *iter; - iter = kmalloc(sizeof *iter, GFP_KERNEL); + iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return NULL; @@ -885,7 +885,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) if (!priv->broadcast) return NULL; - path = kzalloc(sizeof *path, GFP_ATOMIC); + path = kzalloc(sizeof(*path), GFP_ATOMIC); if (!path) return NULL; @@ -1207,7 +1207,7 @@ static int ipoib_hard_header(struct sk_buff *skb, { struct ipoib_header *header; - header = skb_push(skb, sizeof *header); + header = skb_push(skb, sizeof(*header)); header->proto = htons(type); header->reserved = 0; @@ -1375,7 +1375,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr, { struct ipoib_neigh *neigh; - neigh = kzalloc(sizeof *neigh, GFP_ATOMIC); + neigh = kzalloc(sizeof(*neigh), GFP_ATOMIC); if (!neigh) return NULL; @@ -2366,7 +2366,7 @@ static void ipoib_add_one(struct ib_device *device) int p; int count = 0; - dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); + dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL); if (!dev_list) return; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 6709328d90f8..f696ea49c97a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -140,7 +140,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, { struct ipoib_mcast *mcast; - mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); + mcast = kzalloc(sizeof(*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC); if (!mcast) return NULL; @@ -917,7 +917,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast)) continue; - memcpy(mgid.raw, ha->addr + 4, sizeof mgid); + memcpy(mgid.raw, ha->addr + 4, sizeof(mgid)); mcast = __ipoib_mcast_find(dev, &mgid); if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { @@ -997,7 +997,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) { struct ipoib_mcast_iter *iter; - iter = kmalloc(sizeof *iter, GFP_KERNEL); + iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return NULL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index ba4669f24014..8dbf305508a0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -52,7 +52,7 @@ int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca, if (set_qkey) { ret = -ENOMEM; - qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL); if (!qp_attr) goto out; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 55a9b71ed05a..b067ad5e4c7e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -130,7 +130,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) return -EPERM; - snprintf(intf_name, sizeof intf_name, "%s.%04x", + snprintf(intf_name, sizeof(intf_name), "%s.%04x", ppriv->dev->name, pkey); if (!mutex_trylock(&ppriv->sysfs_mutex)) -- cgit From 28e39894ed4fecd19956ae672a312f65419bd703 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 5 Jul 2018 09:38:38 -0700 Subject: RDMA/core: Remove ib_find_cached_gid() and ib_find_cached_gid_by_port() Remove these two functions since all their callers have been removed. See also commit ea8c2d8f6014 ("RDMA/core: Remove unused ib cache functions"). Signed-off-by: Bart Van Assche Reviewed-by: Parav Pandit Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 357a5cb328c7..c5fa67c72f23 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -747,26 +747,6 @@ const struct ib_gid_attr *rdma_find_gid_by_filter( return res; } -int ib_find_cached_gid_by_port(struct ib_device *ib_dev, - const union ib_gid *gid, - enum ib_gid_type gid_type, - u8 port, struct net_device *ndev, - u16 *index) -{ - const struct ib_gid_attr *res; - - res = rdma_find_gid_by_port(ib_dev, gid, gid_type, port, ndev); - if (IS_ERR(res)) - return PTR_ERR(res); - - if (index) - *index = res->index; - rdma_put_gid_attr(res); - return 0; - -} -EXPORT_SYMBOL(ib_find_cached_gid_by_port); - static struct ib_gid_table *alloc_gid_table(int sz) { struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL); @@ -1038,24 +1018,6 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, } EXPORT_SYMBOL(rdma_find_gid); -int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, - enum ib_gid_type gid_type, struct net_device *ndev, - u8 *port_num, u16 *index) -{ - const struct ib_gid_attr *res; - - res = rdma_find_gid(device, gid, gid_type, ndev); - if (IS_ERR(res)) - return PTR_ERR(res); - if (port_num) - *port_num = res->port_num; - if (index) - *index = res->index; - rdma_put_gid_attr(res); - return 0; -} -EXPORT_SYMBOL(ib_find_cached_gid); - int ib_get_cached_pkey(struct ib_device *device, u8 port_num, int index, -- cgit From f8c2d2280cf67cd85cff6b42c989bbf1c7d0af61 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 5 Jul 2018 10:51:03 -0700 Subject: RDMA/core: Remove set-but-not-used variables Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index c5fa67c72f23..0bee1f4b914e 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -824,12 +824,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, { union ib_gid gid = { }; struct ib_gid_attr gid_attr; - struct ib_gid_table *table; unsigned int gid_type; unsigned long mask; - table = rdma_gid_table(ib_dev, port); - mask = GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_DEFAULT | GID_ATTR_FIND_MASK_NETDEV; @@ -1293,13 +1290,10 @@ static void ib_cache_update(struct ib_device *device, struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; int i; int ret; - struct ib_gid_table *table; if (!rdma_is_port_valid(device, port)) return; - table = rdma_gid_table(device, port); - tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) return; -- cgit From 4c5743bc4fe3233cecc1c184a773c79c8ee45bbe Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 5 Jul 2018 10:51:35 -0700 Subject: IB/nes: Fix a compiler warning Avoid that the following compiler warning is reported when building with W=1: drivers/infiniband/hw/nes/nes_hw.c:646:51: warning: suggest braces around empty body in an 'if' statement [-Wempty-body] Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/nes/nes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 00c27291dc26..18340942d75f 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -159,7 +159,7 @@ do { \ #define NES_EVENT_TIMEOUT 1200000 #else -#define nes_debug(level, fmt, args...) +#define nes_debug(level, fmt, args...) do {} while (0) #define assert(expr) do {} while (0) #define NES_EVENT_TIMEOUT 100000 -- cgit From aa9d5ffbb72746dd1de9d92be7e8b437c41cf0a3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 5 Jul 2018 10:52:25 -0700 Subject: RDMA/ocrdma: Remove a set-but-not-used variable Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index a51b80bfadb3..8cc9459a9f9b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -167,13 +167,11 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, const struct ib_gid_attr *sgid_attr; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); - const struct ib_global_route *grh; if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) || !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH)) return ERR_PTR(-EINVAL); - grh = rdma_ah_read_grh(attr); if (atomic_cmpxchg(&dev->update_sl, 1, 0)) ocrdma_init_service_level(dev); -- cgit From efdbda81d910d8fafcd25e556b8a5722b836e162 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 5 Jul 2018 16:02:00 -0700 Subject: IB/iser: Remove set-but-not-used variables This patch does not change any functionality. Signed-off-by: Bart Van Assche Cc: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 61cc47da2fec..3fecd87c9f2b 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -610,12 +610,10 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, uint32_t initial_cmdsn) { struct iscsi_cls_session *cls_session; - struct iscsi_session *session; struct Scsi_Host *shost; struct iser_conn *iser_conn = NULL; struct ib_conn *ib_conn; u32 max_fr_sectors; - u16 max_cmds; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); if (!shost) @@ -661,7 +659,6 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, mutex_unlock(&iser_conn->state_mutex); } else { shost->can_queue = min_t(u16, cmds_max, ISER_DEF_XMIT_CMDS_MAX); - max_cmds = ISER_DEF_XMIT_CMDS_MAX; if (iscsi_host_add(shost, NULL)) goto free_host; } @@ -683,7 +680,6 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, initial_cmdsn, 0); if (!cls_session) goto remove_host; - session = cls_session->dd_data; return cls_session; -- cgit From 2f229bcf25ca076e8d8f46b9b3dccad5751040d2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 6 Jul 2018 09:45:32 -0700 Subject: RDMA/rxe: Simplify the error handling code in rxe_create_ah() This patch not only simplifies the error handling code in rxe_create_ah() but also removes the dead code that was left behind by commit 47ec38666210 ("RDMA: Convert drivers to use sgid_attr instead of sgid_index"). Signed-off-by: Bart Van Assche Reviewed-by: Parav Pandit Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 9cfd440cebe1..1188e163204d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -241,24 +241,17 @@ static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, err = rxe_av_chk_attr(rxe, attr); if (err) - goto err1; + return ERR_PTR(err); ah = rxe_alloc(&rxe->ah_pool); - if (!ah) { - err = -ENOMEM; - goto err1; - } + if (!ah) + return ERR_PTR(-ENOMEM); rxe_add_ref(pd); ah->pd = pd; rxe_init_av(rxe, attr, &ah->av); return &ah->ibah; - - rxe_drop_ref(pd); - rxe_drop_ref(ah); -err1: - return ERR_PTR(err); } static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) -- cgit From cbd8e988eb94ee54d890c103b4947bf4a8839fae Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 6 Jul 2018 10:27:06 -0700 Subject: RDMA/cxgb3: Make iwch_poll_cq_one() easier to analyze Introduce the function __iwch_poll_cq_one() to make iwch_poll_cq_one() easier to analyze for static source code analyzers. This patch avoids that sparse reports the following: drivers/infiniband/hw/cxgb3/iwch_cq.c:187:9: warning: context imbalance in 'iwch_poll_cq_one' - different lock contexts for basic block Compile-tested only. Signed-off-by: Bart Van Assche Cc: Steve Wise Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb3/iwch_cq.c | 64 ++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c index 0a8542c20804..a098c0140580 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -32,38 +32,16 @@ #include "iwch_provider.h" #include "iwch.h" -/* - * Get one cq entry from cxio and map it to openib. - * - * Returns: - * 0 EMPTY; - * 1 cqe returned - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, - struct ib_wc *wc) +static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, + struct iwch_qp *qhp, struct ib_wc *wc) { - struct iwch_qp *qhp = NULL; - struct t3_cqe cqe, *rd_cqe; - struct t3_wq *wq; + struct t3_wq *wq = qhp ? &qhp->wq : NULL; + struct t3_cqe cqe; u32 credit = 0; u8 cqe_flushed; u64 cookie; int ret = 1; - rd_cqe = cxio_next_cqe(&chp->cq); - - if (!rd_cqe) - return 0; - - qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); - if (!qhp) - wq = NULL; - else { - spin_lock(&qhp->lock); - wq = &(qhp->wq); - } ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); if (t3a_device(chp->rhp) && credit) { @@ -79,7 +57,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, ret = 1; wc->wr_id = cookie; - wc->qp = &qhp->ibqp; + wc->qp = qhp ? &qhp->ibqp : NULL; wc->vendor_err = CQE_STATUS(cqe); wc->wc_flags = 0; @@ -182,8 +160,38 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, } } out: - if (wq) + return ret; +} + +/* + * Get one cq entry from cxio and map it to openib. + * + * Returns: + * 0 EMPTY; + * 1 cqe returned + * -EAGAIN caller must try again + * any other -errno fatal error + */ +static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, + struct ib_wc *wc) +{ + struct iwch_qp *qhp; + struct t3_cqe *rd_cqe; + int ret; + + rd_cqe = cxio_next_cqe(&chp->cq); + + if (!rd_cqe) + return 0; + + qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); + if (qhp) { + spin_lock(&qhp->lock); + ret = __iwch_poll_cq_one(rhp, chp, qhp, wc); spin_unlock(&qhp->lock); + } else { + ret = __iwch_poll_cq_one(rhp, chp, NULL, wc); + } return ret; } -- cgit From 4ab39e2f98f2f49e97c8dd8e239697bd0bab8103 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 6 Jul 2018 10:28:27 -0700 Subject: RDMA/cxgb4: Make c4iw_poll_cq_one() easier to analyze Introduce the function __c4iw_poll_cq_one() such that c4iw_poll_cq_one() becomes easier to analyze for static source code analyzers. This patch avoids that sparse reports the following: drivers/infiniband/hw/cxgb4/cq.c:401:36: warning: context imbalance in 'c4iw_flush_hw_cq' - unexpected unlock drivers/infiniband/hw/cxgb4/cq.c:824:9: warning: context imbalance in 'c4iw_poll_cq_one' - different lock contexts for basic block Compile-tested only. Signed-off-by: Bart Van Assche Cc: Steve Wise Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 62 +++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 2be2e1ac1b5f..a5280d8d002f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -668,43 +668,22 @@ skip_cqe: return ret; } -/* - * Get one cq entry from c4iw and map it to openib. - * - * Returns: - * 0 cqe returned - * -ENODATA EMPTY; - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp, + struct ib_wc *wc) { - struct c4iw_qp *qhp = NULL; - struct t4_cqe uninitialized_var(cqe), *rd_cqe; - struct t4_wq *wq; + struct t4_cqe cqe; + struct t4_wq *wq = qhp ? &qhp->wq : NULL; u32 credit = 0; u8 cqe_flushed; u64 cookie = 0; int ret; - ret = t4_next_cqe(&chp->cq, &rd_cqe); - - if (ret) - return ret; - - qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); - if (!qhp) - wq = NULL; - else { - spin_lock(&qhp->lock); - wq = &(qhp->wq); - } ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); if (ret) goto out; wc->wr_id = cookie; - wc->qp = &qhp->ibqp; + wc->qp = qhp ? &qhp->ibqp : NULL; wc->vendor_err = CQE_STATUS(&cqe); wc->wc_flags = 0; @@ -819,8 +798,37 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) } } out: - if (wq) + return ret; +} + +/* + * Get one cq entry from c4iw and map it to openib. + * + * Returns: + * 0 cqe returned + * -ENODATA EMPTY; + * -EAGAIN caller must try again + * any other -errno fatal error + */ +static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +{ + struct c4iw_qp *qhp = NULL; + struct t4_cqe *rd_cqe; + int ret; + + ret = t4_next_cqe(&chp->cq, &rd_cqe); + + if (ret) + return ret; + + qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); + if (qhp) { + spin_lock(&qhp->lock); + ret = __c4iw_poll_cq_one(chp, qhp, wc); spin_unlock(&qhp->lock); + } else { + ret = __c4iw_poll_cq_one(chp, NULL, wc); + } return ret; } -- cgit From 222c7b1fd4dcc596e4a21dc93f49d9c976f7e314 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 6 Jul 2018 14:27:42 -0700 Subject: RDMA/rw: Fix rdma_rw_ctx_signature_init() kernel-doc header Fixes: 0e353e34e1e7 ("IB/core: add RW API support for signature MRs") Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 3ee0adfb45e9..474d65297afc 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -325,7 +325,7 @@ out_unmap_sg: EXPORT_SYMBOL(rdma_rw_ctx_init); /** - * rdma_rw_ctx_signature init - initialize a RW context with signature offload + * rdma_rw_ctx_signature_init - initialize a RW context with signature offload * @ctx: context to initialize * @qp: queue pair to operate on * @port_num: port num to which the connection is bound -- cgit From 60e6627f12a78203a093ca05b7bca15627747d81 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 6 Jul 2018 22:48:03 +0200 Subject: IB/mlx5: fix uaccess beyond "count" in debugfs read/write handlers In general, accessing userspace memory beyond the length of the supplied buffer in VFS read/write handlers can lead to both kernel memory corruption (via kernel_read()/kernel_write(), which can e.g. be triggered via sys_splice()) and privilege escalation inside userspace. In this case, the affected files are in debugfs (and should therefore only be accessible to root), and the read handlers check that *pos is zero (meaning that at least sys_splice() can't trigger kernel memory corruption). Because of the root requirement, this is not a security fix, but rather a cleanup. For the read handlers, fix it by using simple_read_from_buffer() instead of custom logic. Add min() calls to the write handlers. Fixes: 4a2da0b8c078 ("IB/mlx5: Add debug control parameters for congestion control") Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Jann Horn Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cong.c | 9 +-------- drivers/infiniband/hw/mlx5/mr.c | 32 ++++++++------------------------ 2 files changed, 9 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index 985fa2637390..7e4e358a4fd8 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -359,9 +359,6 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count, int ret; char lbuf[11]; - if (*pos) - return 0; - ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var); if (ret) return ret; @@ -370,11 +367,7 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count, if (ret < 0) return ret; - if (copy_to_user(buf, lbuf, ret)) - return -EFAULT; - - *pos += ret; - return ret; + return simple_read_from_buffer(buf, count, pos, lbuf, ret); } static const struct file_operations dbg_cc_fops = { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 90a9c461cedc..308456d28afb 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -271,16 +271,16 @@ static ssize_t size_write(struct file *filp, const char __user *buf, { struct mlx5_cache_ent *ent = filp->private_data; struct mlx5_ib_dev *dev = ent->dev; - char lbuf[20]; + char lbuf[20] = {0}; u32 var; int err; int c; - if (copy_from_user(lbuf, buf, sizeof(lbuf))) + count = min(count, sizeof(lbuf) - 1); + if (copy_from_user(lbuf, buf, count)) return -EFAULT; c = order2idx(dev, ent->order); - lbuf[sizeof(lbuf) - 1] = 0; if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; @@ -310,19 +310,11 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - if (*pos) - return 0; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); if (err < 0) return err; - if (copy_to_user(buf, lbuf, err)) - return -EFAULT; - - *pos += err; - - return err; + return simple_read_from_buffer(buf, count, pos, lbuf, err); } static const struct file_operations size_fops = { @@ -337,16 +329,16 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, { struct mlx5_cache_ent *ent = filp->private_data; struct mlx5_ib_dev *dev = ent->dev; - char lbuf[20]; + char lbuf[20] = {0}; u32 var; int err; int c; - if (copy_from_user(lbuf, buf, sizeof(lbuf))) + count = min(count, sizeof(lbuf) - 1); + if (copy_from_user(lbuf, buf, count)) return -EFAULT; c = order2idx(dev, ent->order); - lbuf[sizeof(lbuf) - 1] = 0; if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; @@ -372,19 +364,11 @@ static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - if (*pos) - return 0; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); if (err < 0) return err; - if (copy_to_user(buf, lbuf, err)) - return -EFAULT; - - *pos += err; - - return err; + return simple_read_from_buffer(buf, count, pos, lbuf, err); } static const struct file_operations limit_fops = { -- cgit From 97202bbe22f8f0c225ba63a50acaf56d6796c990 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 6 Jul 2018 11:42:03 -0600 Subject: IB/uverbs: Do not use uverbs_cmd_mask in the ioctl path Instead we are now checking the function pointers directly. Get rid of both cases in ioctl and drop the nonsense idea that destroy can fail. Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_std_types_cq.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index ce9c9c66bae4..5a6154345fa0 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -70,7 +70,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, struct ib_uverbs_completion_event_file *ev_file = NULL; struct ib_uobject *ev_file_uobj; - if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ)) + if (!ib_dev->create_cq || !ib_dev->destroy_cq) return -EOPNOTSUPP; ret = uverbs_copy_from(&attr.comp_vector, attrs, @@ -185,9 +185,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, obj = container_of(uobj, struct ib_ucq_object, uobject); - if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ)) - return -EOPNOTSUPP; - ret = rdma_explicit_destroy(uobj); if (ret) return ret; -- cgit From e3f1ed1f5ade66620ea727bdbd5a5e0091108a92 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 8 Jul 2018 12:55:43 +0300 Subject: RDMA/mlx5: Remove unused port number parameter Clean up a little bit code to drop unused port_num parameter. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 54d45e13de34..f86b5ad2dd43 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4606,7 +4606,7 @@ static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) } } -static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num) +static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; @@ -5712,9 +5712,9 @@ int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) return 0; } -static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev, - u8 port_num) +static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) { + u8 port_num; int i; for (i = 0; i < dev->num_ports; i++) { @@ -5737,6 +5737,8 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev, (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + return mlx5_add_netdev_notifier(dev, port_num); } @@ -5753,14 +5755,12 @@ int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) enum rdma_link_layer ll; int port_type_cap; int err = 0; - u8 port_num; - port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) - err = mlx5_ib_stage_common_roce_init(dev, port_num); + err = mlx5_ib_stage_common_roce_init(dev); return err; } @@ -5775,19 +5775,17 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; - u8 port_num; int err; - port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - err = mlx5_ib_stage_common_roce_init(dev, port_num); + err = mlx5_ib_stage_common_roce_init(dev); if (err) return err; - err = mlx5_enable_eth(dev, port_num); + err = mlx5_enable_eth(dev); if (err) goto cleanup; } @@ -5976,8 +5974,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, ib_dealloc_device((struct ib_device *)dev); } -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num); - void *__mlx5_ib_add(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile) { @@ -6107,7 +6103,7 @@ static const struct mlx5_ib_profile nic_rep_profile = { mlx5_ib_stage_rep_reg_cleanup), }; -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) +static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) { struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; @@ -6141,8 +6137,6 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) if (!bound) { list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n"); - } else { - mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1); } mutex_unlock(&mlx5_ib_multiport_mutex); @@ -6160,11 +6154,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) { - u8 port_num = mlx5_core_native_port_num(mdev) - 1; - - return mlx5_ib_add_slave_port(mdev, port_num); - } + if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) + return mlx5_ib_add_slave_port(mdev); dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); if (!dev) -- cgit From 921c0f5ba58e4064deb18b4985a202508fc5527f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 8 Jul 2018 13:40:30 +0300 Subject: IB/mlx5: Honor cnt_set_id_valid flag instead of set_id It is incorrect to depend on set_id value to know if counters were allocated or not. set_id_valid field is set to true when counters were allocated. Therefore, use set_id_valid while deciding to free counters. Cc: # 4.15 Fixes: aac4492ef23a ("IB/mlx5: Update counter implementation for dual port RoCE") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f86b5ad2dd43..0f482d2a760f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4699,7 +4699,7 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) int i; for (i = 0; i < dev->num_ports; i++) { - if (dev->port[i].cnts.set_id) + if (dev->port[i].cnts.set_id_valid) mlx5_core_dealloc_q_counter(dev->mdev, dev->port[i].cnts.set_id); kfree(dev->port[i].cnts.names); -- cgit From 07e7056aff6c4377667fe1606ae7707b375b1caa Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 8 Jul 2018 13:41:17 +0300 Subject: IB/core: Simplify check for RoCE route resolve roce_resolve_route_from_path() resolves the route based on the netdevice of the GID attribute, therefore there is no point in checking again if the route is resolved matches the same interface it arrived. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sa_query.c | 38 +++++--------------------------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 7005afb8a712..f269e74a4480 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1227,13 +1227,9 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num) return src_path_mask; } -static int -roce_resolve_route_from_path(struct ib_device *device, u8 port_num, - struct sa_path_rec *rec, - const struct ib_gid_attr *attr) +static int roce_resolve_route_from_path(struct sa_path_rec *rec, + const struct ib_gid_attr *attr) { - struct net_device *resolved_dev; - struct net_device *idev; struct rdma_dev_addr dev_addr = {}; union { struct sockaddr _sockaddr; @@ -1253,9 +1249,6 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num, */ dev_addr.net = &init_net; - if (!device->get_netdev) - return -EOPNOTSUPP; - rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid); rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid); @@ -1270,28 +1263,8 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num, rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) return -EINVAL; - idev = device->get_netdev(device, port_num); - if (!idev) - return -ENODEV; - - resolved_dev = dev_get_by_index(dev_addr.net, - dev_addr.bound_dev_if); - if (!resolved_dev) { - ret = -ENODEV; - goto done; - } - rcu_read_lock(); - if (attr->ndev != resolved_dev || - (resolved_dev != idev && - !rdma_is_upper_dev_rcu(idev, resolved_dev))) - ret = -EHOSTUNREACH; - rcu_read_unlock(); - dev_put(resolved_dev); -done: - dev_put(idev); - if (!ret) - rec->roce.route_resolved = true; - return ret; + rec->roce.route_resolved = true; + return 0; } static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, @@ -1346,8 +1319,7 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, rdma_ah_set_static_rate(ah_attr, rec->rate); if (sa_path_is_roce(rec)) { - ret = roce_resolve_route_from_path(device, port_num, rec, - gid_attr); + ret = roce_resolve_route_from_path(rec, gid_attr); if (ret) return ret; -- cgit From 3fda24324582ae790961137ecb88b3f0f2feb1f0 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 9 Jul 2018 22:21:03 +0300 Subject: RDMA/ipoib: Fix return code from ipoib_cm_dev_init The proper return code is -EOPNOTSUPP and not -ENOSYS when the function isn't supported, also make sure to return the right error code from ipoib_transport_dev_init() when ipoib_cm_dev_init() is supported. Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 3dd130afb571..e255a7e5a4c3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -729,7 +729,7 @@ void ipoib_cm_dev_stop(struct net_device *dev) static inline int ipoib_cm_dev_init(struct net_device *dev) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 8dbf305508a0..9f36ca786df8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -168,8 +168,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) else size += ipoib_recvq_size * ipoib_max_conn_qp; } else - if (ret != -ENOSYS) - return -ENODEV; + if (ret != -EOPNOTSUPP) + return ret; req_vec = (priv->port - 1) * 2; -- cgit From 2f944c0fbf58b1f390e5e61affd98ba0061214c6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 15:57:48 +0300 Subject: RDMA: Fix storage of PortInfo CapabilityMask in the kernel The internal flag IP_BASED_GIDS was added to a field that was being used to hold the port Info CapabilityMask without considering the effects this will have. Since most drivers just use the value from the HW MAD it means IP_BASED_GIDS will also become set on any HW that sets the IBA flag IsOtherLocalChangesNoticeSupported - which is not intended. Fix this by keeping port_cap_flags only for the IBA CapabilityMask value and store unrelated flags externally. Move the bit definitions for this to ib_mad.h to make it clear what is happening. To keep the uAPI unchanged define a new set of flags in the uapi header that are only used by ib_uverbs_query_port_resp.port_cap_flags which match the current flags supported in rdma-core, and the values exposed by the current kernel. Fixes: b4a26a27287a ("IB: Report using RoCE IP based gids in port caps") Signed-off-by: Jason Gunthorpe Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_cmd.c | 23 ++++++++++++++- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 +-- drivers/infiniband/hw/mlx4/main.c | 3 +- drivers/infiniband/hw/mlx5/main.c | 4 +-- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 9 +++--- drivers/infiniband/hw/qedr/verbs.c | 2 +- drivers/infiniband/hw/qib/qib_verbs.h | 3 -- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 3 +- include/rdma/ib_mad.h | 33 +++++++++++++++++++++ include/rdma/ib_verbs.h | 31 ++------------------ include/uapi/rdma/ib_user_ioctl_verbs.h | 38 +++++++++++++++++++++++++ include/uapi/rdma/ib_user_verbs.h | 2 +- 12 files changed, 110 insertions(+), 45 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d048cabc4246..1bc9ceb16b70 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -241,6 +241,27 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, return in_len; } +/* + * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the + * PortInfo CapabilityMask, but was extended with unique bits. + */ +static u32 make_port_cap_flags(const struct ib_port_attr *attr) +{ + u32 res; + + /* All IBA CapabilityMask bits are passed through here, except bit 26, + * which is overridden with IP_BASED_GIDS. This is due to a historical + * mistake in the implementation of IP_BASED_GIDS. Otherwise all other + * bits match the IBA definition across all kernel versions. + */ + res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS; + + if (attr->ip_gids) + res |= IB_UVERBS_PCF_IP_BASED_GIDS; + + return res; +} + ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, @@ -267,7 +288,7 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.max_mtu = attr.max_mtu; resp.active_mtu = attr.active_mtu; resp.gid_tbl_len = attr.gid_tbl_len; - resp.port_cap_flags = attr.port_cap_flags; + resp.port_cap_flags = make_port_cap_flags(&attr); resp.max_msg_sz = attr.max_msg_sz; resp.bad_pkey_cntr = attr.bad_pkey_cntr; resp.qkey_viol_cntr = attr.qkey_viol_cntr; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 6c0c6d3426e0..492c750f7ed6 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -244,8 +244,8 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, port_attr->gid_tbl_len = dev_attr->max_sgid; port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP | - IB_PORT_IP_BASED_GIDS; + IB_PORT_VENDOR_CLASS_SUP; + port_attr->ip_gids = true; port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW; port_attr->bad_pkey_cntr = 0; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 5bc522ca9431..ca0f1ee26091 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -762,7 +762,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, IB_WIDTH_4X : IB_WIDTH_1X; props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? IB_SPEED_FDR : IB_SPEED_QDR; - props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; + props->port_cap_flags = IB_PORT_CM_SUP; + props->ip_gids = true; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; props->pkey_tbl_len = 1; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0f482d2a760f..d1f1beefe599 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -419,8 +419,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width); - props->port_cap_flags |= IB_PORT_CM_SUP; - props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, roce_address_table_size); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 1f057fdb3a8c..9d0431e01dce 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -197,11 +197,10 @@ int ocrdma_query_port(struct ib_device *ibdev, props->sm_lid = 0; props->sm_sl = 0; props->state = port_state; - props->port_cap_flags = - IB_PORT_CM_SUP | - IB_PORT_REINIT_SUP | - IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | - IB_PORT_IP_BASED_GIDS; + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | + IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP; + props->ip_gids = true; props->gid_tbl_len = OCRDMA_MAX_SGID; props->pkey_tbl_len = 1; props->bad_pkey_cntr = 0; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 0c41d54f586b..b82c5d5fb0e3 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -225,7 +225,7 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) attr->lmc = 0; attr->sm_lid = 0; attr->sm_sl = 0; - attr->port_cap_flags = IB_PORT_IP_BASED_GIDS; + attr->ip_gids = true; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { attr->gid_tbl_len = 1; attr->pkey_tbl_len = 1; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index f9a46768a19a..e72562a8959a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -78,9 +78,6 @@ struct qib_verbs_txreq; #define QIB_VENDOR_IPG cpu_to_be16(0xFFA0) -/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */ -#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26) - #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) /* Values for set/get portinfo VLCap OperationalVLs */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 816cc285daf6..b65d10b0a875 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -155,7 +155,8 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port, props->gid_tbl_len = resp->attrs.gid_tbl_len; props->port_cap_flags = pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags); - props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; props->max_msg_sz = resp->attrs.max_msg_sz; props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr; props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr; diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 2f4f1768ded4..f6ba366051c7 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -262,6 +262,39 @@ struct ib_class_port_info { __be32 trap_qkey; }; +/* PortInfo CapabilityMask */ +enum ib_port_capability_mask_bits { + IB_PORT_SM = 1 << 1, + IB_PORT_NOTICE_SUP = 1 << 2, + IB_PORT_TRAP_SUP = 1 << 3, + IB_PORT_OPT_IPD_SUP = 1 << 4, + IB_PORT_AUTO_MIGR_SUP = 1 << 5, + IB_PORT_SL_MAP_SUP = 1 << 6, + IB_PORT_MKEY_NVRAM = 1 << 7, + IB_PORT_PKEY_NVRAM = 1 << 8, + IB_PORT_LED_INFO_SUP = 1 << 9, + IB_PORT_SM_DISABLED = 1 << 10, + IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, + IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, + IB_PORT_CM_SUP = 1 << 16, + IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, + IB_PORT_REINIT_SUP = 1 << 18, + IB_PORT_DEVICE_MGMT_SUP = 1 << 19, + IB_PORT_VENDOR_CLASS_SUP = 1 << 20, + IB_PORT_DR_NOTICE_SUP = 1 << 21, + IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, + IB_PORT_BOOT_MGMT_SUP = 1 << 23, + IB_PORT_LINK_LATENCY_SUP = 1 << 24, + IB_PORT_CLIENT_REG_SUP = 1 << 25, + IB_PORT_OTHER_LOCAL_CHANGES_SUP = 1 << 26, + IB_PORT_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27, + IB_PORT_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28, + IB_PORT_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29, + IB_PORT_MCAST_FDB_TOP_SUP = 1 << 30, + IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31, +}; + #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26) struct opa_class_port_info { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 031d121190fd..98e025759791 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -432,33 +432,6 @@ enum ib_port_state { IB_PORT_ACTIVE_DEFER = 5 }; -enum ib_port_cap_flags { - IB_PORT_SM = 1 << 1, - IB_PORT_NOTICE_SUP = 1 << 2, - IB_PORT_TRAP_SUP = 1 << 3, - IB_PORT_OPT_IPD_SUP = 1 << 4, - IB_PORT_AUTO_MIGR_SUP = 1 << 5, - IB_PORT_SL_MAP_SUP = 1 << 6, - IB_PORT_MKEY_NVRAM = 1 << 7, - IB_PORT_PKEY_NVRAM = 1 << 8, - IB_PORT_LED_INFO_SUP = 1 << 9, - IB_PORT_SM_DISABLED = 1 << 10, - IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, - IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, - IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, - IB_PORT_CM_SUP = 1 << 16, - IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, - IB_PORT_REINIT_SUP = 1 << 18, - IB_PORT_DEVICE_MGMT_SUP = 1 << 19, - IB_PORT_VENDOR_CLASS_SUP = 1 << 20, - IB_PORT_DR_NOTICE_SUP = 1 << 21, - IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, - IB_PORT_BOOT_MGMT_SUP = 1 << 23, - IB_PORT_LINK_LATENCY_SUP = 1 << 24, - IB_PORT_CLIENT_REG_SUP = 1 << 25, - IB_PORT_IP_BASED_GIDS = 1 << 26, -}; - enum ib_port_width { IB_WIDTH_1X = 1, IB_WIDTH_4X = 2, @@ -597,6 +570,9 @@ struct ib_port_attr { enum ib_mtu max_mtu; enum ib_mtu active_mtu; int gid_tbl_len; + unsigned int grh_required:1; + unsigned int ip_gids:1; + /* This is the value from PortInfo CapabilityMask, defined by IBA */ u32 port_cap_flags; u32 max_msg_sz; u32 bad_pkey_cntr; @@ -612,7 +588,6 @@ struct ib_port_attr { u8 active_width; u8 active_speed; u8 phys_state; - bool grh_required; }; enum ib_device_modify_flags { diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 625545d862d7..1220f1df3ded 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -40,6 +40,44 @@ #define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name #endif +enum ib_uverbs_query_port_cap_flags { + IB_UVERBS_PCF_SM = 1 << 1, + IB_UVERBS_PCF_NOTICE_SUP = 1 << 2, + IB_UVERBS_PCF_TRAP_SUP = 1 << 3, + IB_UVERBS_PCF_OPT_IPD_SUP = 1 << 4, + IB_UVERBS_PCF_AUTO_MIGR_SUP = 1 << 5, + IB_UVERBS_PCF_SL_MAP_SUP = 1 << 6, + IB_UVERBS_PCF_MKEY_NVRAM = 1 << 7, + IB_UVERBS_PCF_PKEY_NVRAM = 1 << 8, + IB_UVERBS_PCF_LED_INFO_SUP = 1 << 9, + IB_UVERBS_PCF_SM_DISABLED = 1 << 10, + IB_UVERBS_PCF_SYS_IMAGE_GUID_SUP = 1 << 11, + IB_UVERBS_PCF_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + IB_UVERBS_PCF_EXTENDED_SPEEDS_SUP = 1 << 14, + IB_UVERBS_PCF_CM_SUP = 1 << 16, + IB_UVERBS_PCF_SNMP_TUNNEL_SUP = 1 << 17, + IB_UVERBS_PCF_REINIT_SUP = 1 << 18, + IB_UVERBS_PCF_DEVICE_MGMT_SUP = 1 << 19, + IB_UVERBS_PCF_VENDOR_CLASS_SUP = 1 << 20, + IB_UVERBS_PCF_DR_NOTICE_SUP = 1 << 21, + IB_UVERBS_PCF_CAP_MASK_NOTICE_SUP = 1 << 22, + IB_UVERBS_PCF_BOOT_MGMT_SUP = 1 << 23, + IB_UVERBS_PCF_LINK_LATENCY_SUP = 1 << 24, + IB_UVERBS_PCF_CLIENT_REG_SUP = 1 << 25, + /* + * IsOtherLocalChangesNoticeSupported is aliased by IP_BASED_GIDS and + * is inaccessible + */ + IB_UVERBS_PCF_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27, + IB_UVERBS_PCF_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28, + IB_UVERBS_PCF_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29, + IB_UVERBS_PCF_MCAST_FDB_TOP_SUP = 1 << 30, + IB_UVERBS_PCF_HIERARCHY_INFO_SUP = 1ULL << 31, + + /* NOTE this is an internal flag, not an IBA flag */ + IB_UVERBS_PCF_IP_BASED_GIDS = 1 << 26, +}; + enum ib_uverbs_flow_action_esp_keymat { IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM, }; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 4f9991de8e3a..0a9070abb3f8 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -279,7 +279,7 @@ struct ib_uverbs_query_port { }; struct ib_uverbs_query_port_resp { - __u32 port_cap_flags; + __u32 port_cap_flags; /* see ib_uverbs_query_port_cap_flags */ __u32 max_msg_sz; __u32 bad_pkey_cntr; __u32 qkey_viol_cntr; -- cgit From 958200ad8e838bef3b36f9b0674923172b8d9da5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Jul 2018 15:57:49 +0300 Subject: RDMA/hfi1: Move grh_required into update_sm_ah grh_required is intended to be a global setting where all AV's will require a GRH, not just the sm_lid. Move the special logic to the creation of the SM AH. Signed-off-by: Jason Gunthorpe Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/sa_query.c | 28 ++++++++++++++++++---------- drivers/infiniband/hw/hfi1/verbs.c | 9 --------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index f269e74a4480..fdfdbb2bea7a 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -2300,16 +2300,24 @@ static void update_sm_ah(struct work_struct *work) rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid); rdma_ah_set_sl(&ah_attr, port_attr.sm_sl); rdma_ah_set_port_num(&ah_attr, port->port_num); - if (port_attr.grh_required) { - if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA) { - rdma_ah_set_make_grd(&ah_attr, true); - } else { - rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); - rdma_ah_set_subnet_prefix(&ah_attr, - cpu_to_be64(port_attr.subnet_prefix)); - rdma_ah_set_interface_id(&ah_attr, - cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); - } + + /* + * The OPA sm_lid of 0xFFFF needs special handling so that it can be + * differentiated from a permissive LID of 0xFFFF. We set the + * grh_required flag here so the SA can program the DGID in the + * address handle appropriately + */ + if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA && + (port_attr.grh_required || + port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE))) + rdma_ah_set_make_grd(&ah_attr, true); + + if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && port_attr.grh_required) { + rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); + rdma_ah_set_subnet_prefix(&ah_attr, + cpu_to_be64(port_attr.subnet_prefix)); + rdma_ah_set_interface_id(&ah_attr, + cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); } new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 5cef1224fa9c..2b07a5667ec8 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1498,15 +1498,6 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); - /* - * sm_lid of 0xFFFF needs special handling so that it can - * be differentiated from a permissve LID of 0xFFFF. - * We set the grh_required flag here so the SA can program - * the DGID in the address handle appropriately - */ - if (props->sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)) - props->grh_required = true; - return 0; } -- cgit From b02289b3d60f79ba0831051a7743d8fdb4110355 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 4 Jul 2018 15:57:50 +0300 Subject: RDMA: Validate grh_required when handling AVs Extend the existing grh_required flag to check when AV's are handled that a GRH is present. Since we don't want to do query_port during the AV checks for performance reasons move the flag into the immutable_data. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sa_query.c | 8 ++++++-- drivers/infiniband/core/verbs.c | 3 ++- drivers/infiniband/hw/mlx5/main.c | 23 ++++++++++++++++------- include/rdma/ib_verbs.h | 13 ++++++++++++- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index fdfdbb2bea7a..7b794a14d6e8 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -2276,6 +2276,7 @@ static void update_sm_ah(struct work_struct *work) struct ib_sa_sm_ah *new_ah; struct ib_port_attr port_attr; struct rdma_ah_attr ah_attr; + bool grh_required; if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { pr_warn("Couldn't query port\n"); @@ -2301,6 +2302,9 @@ static void update_sm_ah(struct work_struct *work) rdma_ah_set_sl(&ah_attr, port_attr.sm_sl); rdma_ah_set_port_num(&ah_attr, port->port_num); + grh_required = rdma_is_grh_required(port->agent->device, + port->port_num); + /* * The OPA sm_lid of 0xFFFF needs special handling so that it can be * differentiated from a permissive LID of 0xFFFF. We set the @@ -2308,11 +2312,11 @@ static void update_sm_ah(struct work_struct *work) * address handle appropriately */ if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA && - (port_attr.grh_required || + (grh_required || port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE))) rdma_ah_set_make_grd(&ah_attr, true); - if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && port_attr.grh_required) { + if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) { rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); rdma_ah_set_subnet_prefix(&ah_attr, cpu_to_be64(port_attr.subnet_prefix)); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 1bb6b6ff3341..b6ceb6fd6a67 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -390,7 +390,8 @@ static int rdma_check_ah_attr(struct ib_device *device, if (!rdma_is_port_valid(device, ah_attr->port_num)) return -EINVAL; - if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE && + if ((rdma_is_grh_required(device, ah_attr->port_num) || + ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) && !(ah_attr->ah_flags & IB_AH_GRH)) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d1f1beefe599..b7f94bc3811a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1220,7 +1220,6 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, props->qkey_viol_cntr = rep->qkey_violation_counter; props->subnet_timeout = rep->subnet_timeout; props->init_type_reply = rep->init_type_reply; - props->grh_required = rep->grh_required; err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); if (err) @@ -4462,7 +4461,8 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) cancel_work_sync(&devr->ports[port].pkey_change_work); } -static u32 get_core_cap_flags(struct ib_device *ibdev) +static u32 get_core_cap_flags(struct ib_device *ibdev, + struct mlx5_hca_vport_context *rep) { struct mlx5_ib_dev *dev = to_mdev(ibdev); enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); @@ -4471,11 +4471,14 @@ static u32 get_core_cap_flags(struct ib_device *ibdev) bool raw_support = !mlx5_core_mp_enabled(dev->mdev); u32 ret = 0; + if (rep->grh_required) + ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED; + if (ll == IB_LINK_LAYER_INFINIBAND) - return RDMA_CORE_PORT_IBA_IB; + return ret | RDMA_CORE_PORT_IBA_IB; if (raw_support) - ret = RDMA_CORE_PORT_RAW_PACKET; + ret |= RDMA_CORE_PORT_RAW_PACKET; if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) return ret; @@ -4498,17 +4501,23 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_attr attr; struct mlx5_ib_dev *dev = to_mdev(ibdev); enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); + struct mlx5_hca_vport_context rep = {0}; int err; - immutable->core_cap_flags = get_core_cap_flags(ibdev); - err = ib_query_port(ibdev, port_num, &attr); if (err) return err; + if (ll == IB_LINK_LAYER_INFINIBAND) { + err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0, + &rep); + if (err) + return err; + } + immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = get_core_cap_flags(ibdev); + immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep); if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) immutable->max_mad_size = IB_MGMT_MAD_SIZE; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 98e025759791..b523298d486b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -529,6 +529,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( #define RDMA_CORE_CAP_AF_IB 0x00001000 #define RDMA_CORE_CAP_ETH_AH 0x00002000 #define RDMA_CORE_CAP_OPA_AH 0x00004000 +#define RDMA_CORE_CAP_IB_GRH_REQUIRED 0x00008000 /* Protocol 0xFFF00000 */ #define RDMA_CORE_CAP_PROT_IB 0x00100000 @@ -538,6 +539,10 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( #define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000 #define RDMA_CORE_CAP_PROT_USNIC 0x02000000 +#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \ + | RDMA_CORE_CAP_PROT_ROCE \ + | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP) + #define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \ | RDMA_CORE_CAP_IB_MAD \ | RDMA_CORE_CAP_IB_SMI \ @@ -570,7 +575,6 @@ struct ib_port_attr { enum ib_mtu max_mtu; enum ib_mtu active_mtu; int gid_tbl_len; - unsigned int grh_required:1; unsigned int ip_gids:1; /* This is the value from PortInfo CapabilityMask, defined by IBA */ u32 port_cap_flags; @@ -2771,6 +2775,13 @@ static inline int rdma_is_port_valid(const struct ib_device *device, port <= rdma_end_port(device)); } +static inline bool rdma_is_grh_required(const struct ib_device *device, + u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & + RDMA_CORE_PORT_IB_GRH_REQUIRED; +} + static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB; -- cgit From 8942acea3723ff9424dc89350d2ab6e969fdd093 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 4 Jul 2018 15:57:51 +0300 Subject: IB/uverbs: Pass IB_UVERBS_QPF_GRH_REQUIRED to user space Userspace also needs to know if the port requires GRHs to properly form the AVs it creates. Signed-off-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_cmd.c | 3 +++ include/uapi/rdma/ib_user_ioctl_verbs.h | 4 ++++ include/uapi/rdma/ib_user_verbs.h | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 1bc9ceb16b70..bd6eefaecbd6 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -294,6 +294,9 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.qkey_viol_cntr = attr.qkey_viol_cntr; resp.pkey_tbl_len = attr.pkey_tbl_len; + if (rdma_is_grh_required(ib_dev, cmd.port_num)) + resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED; + if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) { resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid); diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 1220f1df3ded..a81d853bf25d 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -78,6 +78,10 @@ enum ib_uverbs_query_port_cap_flags { IB_UVERBS_PCF_IP_BASED_GIDS = 1 << 26, }; +enum ib_uverbs_query_port_flags { + IB_UVERBS_QPF_GRH_REQUIRED = 1 << 0, +}; + enum ib_uverbs_flow_action_esp_keymat { IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM, }; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 0a9070abb3f8..25a16760de2a 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -299,7 +299,8 @@ struct ib_uverbs_query_port_resp { __u8 active_speed; __u8 phys_state; __u8 link_layer; - __u8 reserved[2]; + __u8 flags; /* see ib_uverbs_query_port_flags */ + __u8 reserved; }; struct ib_uverbs_alloc_pd { -- cgit From 781a4016be54ac36b22eac2e84c7fe4cafd3492a Mon Sep 17 00:00:00 2001 From: Jan Dakinevich Date: Mon, 9 Jul 2018 16:51:08 +0300 Subject: ib_srpt: use kvmalloc to allocate ring pointers An array of pointers to SRPT contexts in ib_device is over 30KiB even in default case, in which an amount of contexts is 4095. The patch is intended to weed out large contigous allocation for non-DMA memory. Signed-off-by: Jan Dakinevich Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index e42eec20c631..3cb99ca841bb 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -719,7 +719,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) && ioctx_size != sizeof(struct srpt_send_ioctx)); - ring = kmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); + ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); if (!ring) goto out; for (i = 0; i < ring_size; ++i) { @@ -733,7 +733,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, err: while (--i >= 0) srpt_free_ioctx(sdev, ring[i], dma_size, dir); - kfree(ring); + kvfree(ring); ring = NULL; out: return ring; @@ -758,7 +758,7 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, for (i = 0; i < ring_size; ++i) srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); - kfree(ioctx_ring); + kvfree(ioctx_ring); } /** -- cgit From 528922afd41cdd1da6a4b33e2c82e38c1746561c Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 8 Jul 2018 13:24:39 +0300 Subject: IB: Enable uverbs_destroy_def_handler to be used by drivers Enable uverbs_destroy_def_handler to be used by drivers and replace current code to use it. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs.h | 3 --- drivers/infiniband/core/uverbs_std_types.c | 1 + drivers/infiniband/hw/mlx5/devx.c | 18 ++---------------- include/rdma/ib_verbs.h | 3 +++ 4 files changed, 6 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 3ddd39e435e1..d0a1a54275e5 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -246,9 +246,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata); long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -int uverbs_destroy_def_handler(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs); struct ib_uverbs_flow_spec { union { diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 912519fda3ba..718c8430d364 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -215,6 +215,7 @@ int uverbs_destroy_def_handler(struct ib_device *ib_dev, { return 0; } +EXPORT_SYMBOL(uverbs_destroy_def_handler); void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata) { diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 192844bf6016..60ac1fbe940e 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -682,13 +682,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, return ret; } -static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_DESTROY)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) -{ - return 0; -} - static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(struct ib_device *ib_dev, struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) @@ -961,13 +954,6 @@ err_obj_free: return err; } -static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_DEREG)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) -{ - return 0; -} - static int devx_umem_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why) { @@ -1003,7 +989,7 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); -DECLARE_UVERBS_NAMED_METHOD( +DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DEVX_UMEM_DEREG, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, MLX5_IB_OBJECT_DEVX_UMEM, @@ -1056,7 +1042,7 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), UA_MANDATORY)); -DECLARE_UVERBS_NAMED_METHOD( +DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DEVX_OBJ_DESTROY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, MLX5_IB_OBJECT_DEVX_OBJ, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b523298d486b..2696f1d730a1 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4146,4 +4146,7 @@ void rdma_roce_rescan_device(struct ib_device *ibdev); struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile); +int uverbs_destroy_def_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs); #endif /* IB_VERBS_H */ -- cgit From 23ff6ba8feec5c4bdf993af3fba3937d57883dc8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 13:03:16 -0600 Subject: RDMA/cxgb4: Restore the dropped uninitialized_var In some configurations even gcc 7 cannot unravel this complexity and still throws a warning. Fixes: 4ab39e2f98f2 ("RDMA/cxgb4: Make c4iw_poll_cq_one() easier to analyze") Reported-by: Stephen Rothwell Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index a5280d8d002f..a3a829951ac4 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -671,7 +671,7 @@ skip_cqe: static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp, struct ib_wc *wc) { - struct t4_cqe cqe; + struct t4_cqe uninitialized_var(cqe); struct t4_wq *wq = qhp ? &qhp->wq : NULL; u32 credit = 0; u8 cqe_flushed; -- cgit From 59b851dbf7dc94214e4fab5dd29ea28b4075a04f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 10 Jul 2018 13:32:24 -0700 Subject: RDMA/nes: Avoid complaints about unused variables Avoid that the compiler reports the following when building with W=1: drivers/infiniband/hw/nes/nes_utils.c: In function 'nes_arp_table': drivers/infiniband/hw/nes/nes_utils.c:689:9: warning: variable 'tmp_addr' set but not used [-Wunused-but-set-variable] __be32 tmp_addr; ^~~~~~~~ drivers/infiniband/hw/nes/nes_hw.c: In function 'flush_wqes': drivers/infiniband/hw/nes/nes_hw.c:3840:6: warning: variable 'ret' set but not used [-Wunused-but-set-variable] int ret; ^~~ drivers/infiniband/hw/nes/nes_verbs.c: In function 'nes_setup_virt_qp': drivers/infiniband/hw/nes/nes_verbs.c:811:6: warning: variable 'pbl_entries' set but not used [-Wunused-but-set-variable] u32 pbl_entries; ^~~~~~~~~~~ drivers/infiniband/hw/nes/nes_verbs.c: In function 'nes_dereg_mr': drivers/infiniband/hw/nes/nes_verbs.c:2487:6: warning: variable 'minor_code' set but not used [-Wunused-but-set-variable] u16 minor_code; ^~~~~~~~~~ drivers/infiniband/hw/nes/nes_cm.c: In function 'mini_cm_recv_pkt': drivers/infiniband/hw/nes/nes_cm.c:2570:20: warning: variable 'tmp_saddr' set but not used [-Wunused-but-set-variable] __be32 tmp_daddr, tmp_saddr; ^~~~~~~~~ drivers/infiniband/hw/nes/nes_cm.c:2570:9: warning: variable 'tmp_daddr' set but not used [-Wunused-but-set-variable] __be32 tmp_daddr, tmp_saddr; ^~~~~~~~~ drivers/infiniband/hw/nes/nes_cm.c: In function 'cm_event_connected': drivers/infiniband/hw/nes/nes_cm.c:3578:22: warning: variable 'raddr' set but not used [-Wunused-but-set-variable] struct sockaddr_in *raddr; ^~~~~ drivers/infiniband/hw/nes/nes_cm.c: In function 'cm_event_reset': drivers/infiniband/hw/nes/nes_cm.c:3753:6: warning: variable 'ret' set but not used [-Wunused-but-set-variable] int ret; ^~~ Signed-off-by: Bart Van Assche Cc: Faisal Latif Cc: Tatyana Nikolova Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/nes/nes.h | 2 +- drivers/infiniband/hw/nes/nes_hw.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 18340942d75f..bedaa02749fb 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -159,7 +159,7 @@ do { \ #define NES_EVENT_TIMEOUT 1200000 #else -#define nes_debug(level, fmt, args...) do {} while (0) +#define nes_debug(level, fmt, args...) no_printk(fmt, ##args) #define assert(expr) do {} while (0) #define NES_EVENT_TIMEOUT 100000 diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 18a7de1c3923..bd0675d8f298 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -70,8 +70,7 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); static void nes_terminate_start_timer(struct nes_qp *nesqp); -#ifdef CONFIG_INFINIBAND_NES_DEBUG -static unsigned char *nes_iwarp_state_str[] = { +static const char *const nes_iwarp_state_str[] = { "Non-Existent", "Idle", "RTS", @@ -82,7 +81,7 @@ static unsigned char *nes_iwarp_state_str[] = { "RSVD2", }; -static unsigned char *nes_tcp_state_str[] = { +static const char *const nes_tcp_state_str[] = { "Non-Existent", "Closed", "Listen", @@ -100,7 +99,6 @@ static unsigned char *nes_tcp_state_str[] = { "RSVD3", "RSVD4", }; -#endif static inline void print_ip(struct nes_cm_node *cm_node) { -- cgit From 07f3355df7e6d043d36d4c172a18e74510fe7e7b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Jul 2018 10:34:43 +0200 Subject: infiniband: i40iw, nes: don't use wall time for TCP sequence numbers The nes infiniband driver uses current_kernel_time() to get a nanosecond granunarity timestamp to initialize its tcp sequence counters. This is one of only a few remaining users of that deprecated function, so we should try to get rid of it. Aside from using a deprecated API, there are several problems I see here: - Using a CLOCK_REALTIME based time source makes it predictable in case the time base is synchronized. - Using a coarse timestamp means it only gets updated once per jiffie, making it even more predictable in order to avoid having to access the hardware clock source - The upper 2 bits are always zero because the nanoseconds are at most 999999999. For the Linux TCP implementation, we use secure_tcp_seq(), which appears to be appropriate here as well, and solves all the above problems. i40iw uses a variant of the same code, so I do that same thing there for ipv4. Unlike nes, i40e also supports ipv6, which needs to call secure_tcpv6_seq instead. Acked-by: Shiraz Saleem Signed-off-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/Kconfig | 1 + drivers/infiniband/hw/i40iw/i40iw_cm.c | 26 +++++++++++++++++++++----- drivers/infiniband/hw/nes/nes_cm.c | 8 +++++--- net/core/secure_seq.c | 1 + 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig index 2962979c06e9..d867ef1ac72a 100644 --- a/drivers/infiniband/hw/i40iw/Kconfig +++ b/drivers/infiniband/hw/i40iw/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_I40IW tristate "Intel(R) Ethernet X722 iWARP Driver" depends on INET && I40E + depends on IPV6 || !IPV6 depends on PCI select GENERIC_ALLOCATOR ---help--- diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 7b2655128b9f..423818a7d333 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -2164,7 +2165,6 @@ static struct i40iw_cm_node *i40iw_make_cm_node( struct i40iw_cm_listener *listener) { struct i40iw_cm_node *cm_node; - struct timespec ts; int oldarpindex; int arpindex; struct net_device *netdev = iwdev->netdev; @@ -2214,10 +2214,26 @@ static struct i40iw_cm_node *i40iw_make_cm_node( cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE; cm_node->tcp_cntxt.rcv_wnd = I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE; - ts = current_kernel_time(); - cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec; - cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4) : - (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6); + if (cm_node->ipv4) { + cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr[0]), + htonl(cm_node->rem_addr[0]), + htons(cm_node->loc_port), + htons(cm_node->rem_port)); + cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4; + } else if (IS_ENABLED(CONFIG_IPV6)) { + __be32 loc[4] = { + htonl(cm_node->loc_addr[0]), htonl(cm_node->loc_addr[1]), + htonl(cm_node->loc_addr[2]), htonl(cm_node->loc_addr[3]) + }; + __be32 rem[4] = { + htonl(cm_node->rem_addr[0]), htonl(cm_node->rem_addr[1]), + htonl(cm_node->rem_addr[2]), htonl(cm_node->rem_addr[3]) + }; + cm_node->tcp_cntxt.loc_seq_num = secure_tcpv6_seq(loc, rem, + htons(cm_node->loc_port), + htons(cm_node->rem_port)); + cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6; + } cm_node->iwdev = iwdev; cm_node->dev = &iwdev->sc_dev; diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6cdfbf8c5674..2b67ace5b614 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -1445,7 +1446,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, struct nes_cm_listener *listener) { struct nes_cm_node *cm_node; - struct timespec ts; int oldarpindex = 0; int arpindex = 0; struct nes_device *nesdev; @@ -1496,8 +1496,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >> NES_CM_DEFAULT_RCV_WND_SCALE; - ts = current_kernel_time(); - cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); + cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr), + htonl(cm_node->rem_addr), + htons(cm_node->loc_port), + htons(cm_node->rem_port)); cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) - sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN; cm_node->tcp_cntxt.rcv_nxt = 0; diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 7232274de334..af6ad467ed61 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -140,6 +140,7 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr, &net_secret); return seq_scale(hash); } +EXPORT_SYMBOL_GPL(secure_tcp_seq); u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { -- cgit From beae9eb555b918ecaf2214f2fd0a1af2dcec3ad7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 6 Jul 2018 13:04:30 -0700 Subject: RDMA/ocrdma: Make ocrdma_destroy_qp() easier to analyze This patch does not change any functionality but avoids that sparse reports the following: drivers/infiniband/hw/ocrdma/ocrdma_verbs.c:1818:31: warning: context imbalance in 'ocrdma_destroy_qp' - different lock contexts for basic block Compile-tested only. Signed-off-by: Bart Van Assche Cc: Selvin Xavier Cc: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 9d0431e01dce..86b22f6b7271 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1774,13 +1774,13 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp) * protect against proessing in-flight CQEs for this QP. */ spin_lock_irqsave(&qp->sq_cq->cq_lock, flags); - if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) + if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) { spin_lock(&qp->rq_cq->cq_lock); - - ocrdma_del_qpn_map(dev, qp); - - if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) + ocrdma_del_qpn_map(dev, qp); spin_unlock(&qp->rq_cq->cq_lock); + } else { + ocrdma_del_qpn_map(dev, qp); + } spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags); if (!pd->uctx) { -- cgit From 0576cbde14482931b29c50e10f274805e6721bbe Mon Sep 17 00:00:00 2001 From: oulijun Date: Mon, 9 Jul 2018 17:48:06 +0800 Subject: RDMA/hns: Fix endian conversions and annotations This patch removes the warnings reported by sparse. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 10 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 404 ++++++++++++++++------------ drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 2 +- 3 files changed, 245 insertions(+), 171 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 65f7b68d1777..df5e3c12254e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -579,22 +579,22 @@ struct hns_roce_ceqe { }; struct hns_roce_aeqe { - u32 asyn; + __le32 asyn; union { struct { - u32 qp; + __le32 qp; u32 rsv0; u32 rsv1; } qp_event; struct { - u32 cq; + __le32 cq; u32 rsv0; u32 rsv1; } cq_event; struct { - u32 ceqe; + __le32 ceqe; u32 rsv0; u32 rsv1; } ce_event; @@ -864,7 +864,7 @@ static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp) return container_of(hr_qp, struct hns_roce_sqp, hr_qp); } -static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest) +static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { __raw_writeq(*(u64 *) val, dest); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 235c67dfc6cb..783d28dd3ca4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -175,10 +175,10 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, UD_SEND_WQE_U32_36_FLOW_LABEL_M, UD_SEND_WQE_U32_36_FLOW_LABEL_S, 0); roce_set_field(ud_sq_wqe->u32_36, - UD_SEND_WQE_U32_36_PRIORITY_M, - UD_SEND_WQE_U32_36_PRIORITY_S, - ah->av.sl_tclass_flowlabel >> - HNS_ROCE_SL_SHIFT); + UD_SEND_WQE_U32_36_PRIORITY_M, + UD_SEND_WQE_U32_36_PRIORITY_S, + le32_to_cpu(ah->av.sl_tclass_flowlabel) >> + HNS_ROCE_SL_SHIFT); roce_set_field(ud_sq_wqe->u32_36, UD_SEND_WQE_U32_36_SGID_INDEX_M, UD_SEND_WQE_U32_36_SGID_INDEX_S, @@ -333,7 +333,7 @@ out: doorbell[0] = le32_to_cpu(sq_db.u32_4); doorbell[1] = le32_to_cpu(sq_db.u32_8); - hns_roce_write64_k(doorbell, qp->sq.db_reg_l); + hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l); qp->sq_next_wqe = ind; } @@ -349,7 +349,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int nreq = 0; int ind = 0; int i = 0; - u32 reg_val = 0; + u32 reg_val; unsigned long flags = 0; struct hns_roce_rq_wqe_ctrl *ctrl = NULL; struct hns_roce_wqe_data_seg *scat = NULL; @@ -402,14 +402,18 @@ out: wmb(); if (ibqp->qp_type == IB_QPT_GSI) { + __le32 tmp; + /* SW update GSI rq header */ reg_val = roce_read(to_hr_dev(ibqp->device), ROCEE_QP1C_CFG3_0_REG + QP1C_CFGN_OFFSET * hr_qp->phy_port); - roce_set_field(reg_val, + tmp = cpu_to_le32(reg_val); + roce_set_field(tmp, ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M, ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S, hr_qp->rq.head); + reg_val = le32_to_cpu(tmp); roce_write(to_hr_dev(ibqp->device), ROCEE_QP1C_CFG3_0_REG + QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val); @@ -430,7 +434,8 @@ out: doorbell[0] = le32_to_cpu(rq_db.u32_4); doorbell[1] = le32_to_cpu(rq_db.u32_8); - hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l); + hns_roce_write64_k((__le32 *)doorbell, + hr_qp->rq.db_reg_l); } } spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -441,51 +446,63 @@ out: static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev, int sdb_mode, int odb_mode) { + __le32 tmp; u32 val; val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode); - roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode); + tmp = cpu_to_le32(val); + roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode); + roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode, u32 odb_mode) { + __le32 tmp; u32 val; /* Configure SDB/ODB extend mode */ val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - roce_set_bit(val, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode); - roce_set_bit(val, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode); + tmp = cpu_to_le32(val); + roce_set_bit(tmp, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode); + roce_set_bit(tmp, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept, u32 sdb_alful) { + __le32 tmp; u32 val; /* Configure SDB */ val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG); - roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful); - roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M, + roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val); } static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept, u32 odb_alful) { + __le32 tmp; u32 val; /* Configure ODB */ val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG); - roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful); - roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M, + roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val); } @@ -496,6 +513,7 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, struct hns_roce_v1_priv *priv; struct hns_roce_db_table *db; dma_addr_t sdb_dma_addr; + __le32 tmp; u32 val; priv = (struct hns_roce_v1_priv *)hr_dev->priv; @@ -511,7 +529,8 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, /* Configure extend SDB depth */ val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG); - roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S, db->ext_db->esdb_dep); /* @@ -519,8 +538,9 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, * using 4K page, and shift more 32 because of * caculating the high 32 bit value evaluated to hardware. */ - roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M, + roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val); dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep); @@ -535,6 +555,7 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, struct hns_roce_v1_priv *priv; struct hns_roce_db_table *db; dma_addr_t odb_dma_addr; + __le32 tmp; u32 val; priv = (struct hns_roce_v1_priv *)hr_dev->priv; @@ -550,12 +571,14 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, /* Configure extend ODB depth */ val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG); - roce_set_field(val, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S, db->ext_db->eodb_dep); - roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M, + roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S, db->ext_db->eodb_dep); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val); dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep); @@ -1161,9 +1184,10 @@ static void hns_roce_db_free(struct hns_roce_dev *hr_dev) static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) { int ret; + u32 val; + __le32 tmp; int raq_shift = 0; dma_addr_t addr; - u32 val; struct hns_roce_v1_priv *priv; struct hns_roce_raq_table *raq; struct device *dev = &hr_dev->pdev->dev; @@ -1189,46 +1213,54 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) /* Configure raq_shift */ raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY); val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG); - roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift); /* * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of * using 4K page, and shift more 32 because of * caculating the high 32 bit value evaluated to hardware. */ - roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M, + roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S, raq->e_raq_buf->map >> 44); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val); dev_dbg(dev, "Configure raq_shift 0x%x.\n", val); /* Configure raq threshold */ val = roce_read(hr_dev, ROCEE_RAQ_WL_REG); - roce_set_field(val, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M, ROCEE_RAQ_WL_ROCEE_RAQ_WL_S, HNS_ROCE_V1_EXT_RAQ_WF); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_RAQ_WL_REG, val); dev_dbg(dev, "Configure raq_wl 0x%x.\n", val); /* Enable extend raq */ val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG); - roce_set_field(val, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S, POL_TIME_INTERVAL_VAL); - roce_set_bit(val, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1); - roce_set_field(val, + roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1); + roce_set_field(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S, 2); - roce_set_bit(val, + roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val); dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val); /* Enable raq drop */ val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - roce_set_bit(val, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1); + tmp = cpu_to_le32(val); + roce_set_bit(tmp, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val); @@ -1255,20 +1287,25 @@ static void hns_roce_raq_free(struct hns_roce_dev *hr_dev) static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag) { + __le32 tmp; u32 val; if (enable_flag) { val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); /* Open all ports */ - roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, ROCEE_GLB_CFG_ROCEE_PORT_ST_S, ALL_PORT_VAL_OPEN); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } else { val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); /* Close all ports */ - roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } } @@ -1498,13 +1535,11 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) int i = 0; struct hns_roce_caps *caps = &hr_dev->caps; - hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG)); - hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev, - ROCEE_VENDOR_PART_ID_REG)); - hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev, - ROCEE_SYS_IMAGE_GUID_L_REG)) | - ((u64)le32_to_cpu(roce_read(hr_dev, - ROCEE_SYS_IMAGE_GUID_H_REG)) << 32); + hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG); + hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG); + hr_dev->sys_image_guid = roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_L_REG) | + ((u64)roce_read(hr_dev, + ROCEE_SYS_IMAGE_GUID_H_REG) << 32); hr_dev->hw_rev = HNS_ROCE_HW_VER1; caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM; @@ -1557,8 +1592,7 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM; caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM; - caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev, - ROCEE_ACK_DELAY_REG)); + caps->local_ca_ack_delay = roce_read(hr_dev, ROCEE_ACK_DELAY_REG); caps->max_mtu = IB_MTU_2048; return 0; @@ -1568,21 +1602,25 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev) { int ret; u32 val; + __le32 tmp; struct device *dev = &hr_dev->pdev->dev; /* DMAE user config */ val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG); - roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf); - roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M, + roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S, 1 << PAGES_SHIFT_16); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val); val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG); - roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf); - roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M, + roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S, 1 << PAGES_SHIFT_16); @@ -1668,6 +1706,7 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + ROCEE_MB1_REG); unsigned long end; u32 val = 0; + __le32 tmp; end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies; while (hns_roce_v1_cmd_pending(hr_dev)) { @@ -1679,15 +1718,17 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, cond_resched(); } - roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, op); - roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, + roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier); - roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event); - roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); - roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M, + roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_EVENT_S, event); + roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); + roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_TOKEN_M, ROCEE_MB6_ROCEE_MB_TOKEN_S, token); + val = le32_to_cpu(tmp); writeq(in_param, hcr + 0); writeq(out_param, hcr + 2); writel(in_modifier, hcr + 4); @@ -1717,7 +1758,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, return -ETIMEDOUT; } - status = le32_to_cpu((__force __be32) + status = le32_to_cpu((__force __le32) __raw_readl(hcr + HCR_STATUS_OFFSET)); if ((status & STATUS_MASK) != 0x1) { dev_err(hr_dev->dev, "mailbox status 0x%x!\n", status); @@ -1760,6 +1801,7 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, { u32 reg_smac_l; u16 reg_smac_h; + __le32 tmp; u16 *p_h; u32 *p; u32 val; @@ -1784,10 +1826,12 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, val = roce_read(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); + tmp = cpu_to_le32(val); p_h = (u16 *)(&addr[4]); reg_smac_h = *p_h; - roce_set_field(val, ROCEE_SMAC_H_ROCEE_SMAC_H_M, + roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_SMAC_H_M, ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, val); @@ -1797,12 +1841,15 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu) { + __le32 tmp; u32 val; val = roce_read(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); - roce_set_field(val, ROCEE_SMAC_H_ROCEE_PORT_MTU_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_PORT_MTU_M, ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, val); } @@ -1848,9 +1895,9 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M, MPT_BYTE_12_MW_BIND_COUNTER_S, 0); - mpt_entry->virt_addr_l = (u32)mr->iova; - mpt_entry->virt_addr_h = (u32)(mr->iova >> 32); - mpt_entry->length = (u32)mr->size; + mpt_entry->virt_addr_l = cpu_to_le32((u32)mr->iova); + mpt_entry->virt_addr_h = cpu_to_le32((u32)(mr->iova >> 32)); + mpt_entry->length = cpu_to_le32((u32)mr->size); roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M, MPT_BYTE_28_PD_S, mr->pd); @@ -1885,64 +1932,59 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_field(mpt_entry->mpt_byte_36, MPT_BYTE_36_PA0_H_M, MPT_BYTE_36_PA0_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); + (u32)(pages[i] >> PAGES_SHIFT_32)); break; case 1: roce_set_field(mpt_entry->mpt_byte_36, MPT_BYTE_36_PA1_L_M, - MPT_BYTE_36_PA1_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_36_PA1_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_40, MPT_BYTE_40_PA1_H_M, MPT_BYTE_40_PA1_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); + (u32)(pages[i] >> PAGES_SHIFT_24)); break; case 2: roce_set_field(mpt_entry->mpt_byte_40, MPT_BYTE_40_PA2_L_M, - MPT_BYTE_40_PA2_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_40_PA2_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_44, MPT_BYTE_44_PA2_H_M, MPT_BYTE_44_PA2_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); + (u32)(pages[i] >> PAGES_SHIFT_16)); break; case 3: roce_set_field(mpt_entry->mpt_byte_44, MPT_BYTE_44_PA3_L_M, - MPT_BYTE_44_PA3_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_44_PA3_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_48, MPT_BYTE_48_PA3_H_M, MPT_BYTE_48_PA3_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_8))); + (u32)(pages[i] >> PAGES_SHIFT_8)); break; case 4: mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_56, MPT_BYTE_56_PA4_H_M, MPT_BYTE_56_PA4_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); + (u32)(pages[i] >> PAGES_SHIFT_32)); break; case 5: roce_set_field(mpt_entry->mpt_byte_56, MPT_BYTE_56_PA5_L_M, - MPT_BYTE_56_PA5_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_56_PA5_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_60, MPT_BYTE_60_PA5_H_M, MPT_BYTE_60_PA5_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); + (u32)(pages[i] >> PAGES_SHIFT_24)); break; case 6: roce_set_field(mpt_entry->mpt_byte_60, MPT_BYTE_60_PA6_L_M, - MPT_BYTE_60_PA6_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_60_PA6_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_PA6_H_M, MPT_BYTE_64_PA6_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); + (u32)(pages[i] >> PAGES_SHIFT_16)); break; default: break; @@ -1951,7 +1993,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, free_page((unsigned long) pages); - mpt_entry->pbl_addr_l = (u32)(mr->pbl_dma_addr); + mpt_entry->pbl_addr_l = cpu_to_le32((u32)(mr->pbl_dma_addr)); roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, MPT_BYTE_12_PBL_ADDR_H_S, @@ -1982,9 +2024,9 @@ static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq) static void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { - u32 doorbell[2]; + __le32 doorbell[2]; - doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1); + doorbell[0] = cpu_to_le32(cons_index & ((hr_cq->cq_depth << 1) - 1)); doorbell[1] = 0; roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, @@ -2081,10 +2123,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID); roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M, CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn); - cq_context->cqc_byte_4 = cpu_to_le32(cq_context->cqc_byte_4); - cq_context->cq_bt_l = (u32)dma_handle; - cq_context->cq_bt_l = cpu_to_le32(cq_context->cq_bt_l); + cq_context->cq_bt_l = cpu_to_le32((u32)dma_handle); roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M, @@ -2096,15 +2136,12 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, ilog2((unsigned int)nent)); roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M, CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector); - cq_context->cqc_byte_12 = cpu_to_le32(cq_context->cqc_byte_12); - cq_context->cur_cqe_ba0_l = (u32)(mtts[0]); - cq_context->cur_cqe_ba0_l = cpu_to_le32(cq_context->cur_cqe_ba0_l); + cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0])); roce_set_field(cq_context->cqc_byte_20, CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M, - CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, - cpu_to_le32((mtts[0]) >> 32)); + CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, (mtts[0]) >> 32); /* Dedicated hardware, directly set 0 */ roce_set_field(cq_context->cqc_byte_20, CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M, @@ -2118,9 +2155,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M, CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S, tptr_dma_addr >> 44); - cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20); - cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12); + cq_context->cqe_tptr_addr_l = cpu_to_le32((u32)(tptr_dma_addr >> 12)); roce_set_field(cq_context->cqc_byte_32, CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M, @@ -2138,7 +2174,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->cqc_byte_32, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); - cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32); } static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) @@ -2151,7 +2186,7 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, { struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); u32 notification_flag; - u32 doorbell[2]; + __le32 doorbell[2]; notification_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; @@ -2159,7 +2194,8 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, * flags = 0; Notification Flag = 1, next * flags = 1; Notification Flag = 0, solocited */ - doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); + doorbell[0] = + cpu_to_le32(hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1)); roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3); @@ -2416,7 +2452,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, struct device *dev = &hr_dev->pdev->dev; struct hns_roce_v1_priv *priv; unsigned long end = 0, flags = 0; - uint32_t bt_cmd_val[2] = {0}; + __le32 bt_cmd_val[2] = {0}; void __iomem *bt_cmd; u64 bt_ba = 0; @@ -2468,7 +2504,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, msleep(HW_SYNC_SLEEP_TIME_INTERVAL); } - bt_cmd_val[0] = (uint32_t)bt_ba; + bt_cmd_val[0] = (__le32)bt_ba; roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32); hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG); @@ -2569,10 +2605,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_sqp_context *context; struct device *dev = &hr_dev->pdev->dev; dma_addr_t dma_handle = 0; + u32 __iomem *addr; int rq_pa_start; + __le32 tmp; u32 reg_val; u64 *mtts; - u32 __iomem *addr; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -2598,7 +2635,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn); - context->sq_rq_bt_l = (u32)(dma_handle); + context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); roce_set_field(context->qp1c_bytes_12, QP1C_BYTES_12_SQ_RQ_BT_H_M, QP1C_BYTES_12_SQ_RQ_BT_H_S, @@ -2610,7 +2647,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SIGNALING_TYPE_S, - hr_qp->sq_signal_bits); + le32_to_cpu(hr_qp->sq_signal_bits)); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S, 1); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S, @@ -2624,7 +2661,8 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index); rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); + context->cur_rq_wqe_ba_l = + cpu_to_le32((u32)(mtts[rq_pa_start])); roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, @@ -2643,7 +2681,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_32_TX_CQ_NUM_S, to_hr_cq(ibqp->send_cq)->cqn); - context->cur_sq_wqe_ba_l = (u32)mtts[0]; + context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]); roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, @@ -2658,23 +2696,25 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, ROCEE_QP1C_CFG0_0_REG + hr_qp->phy_port * sizeof(*context)); - writel(context->qp1c_bytes_4, addr); - writel(context->sq_rq_bt_l, addr + 1); - writel(context->qp1c_bytes_12, addr + 2); - writel(context->qp1c_bytes_16, addr + 3); - writel(context->qp1c_bytes_20, addr + 4); - writel(context->cur_rq_wqe_ba_l, addr + 5); - writel(context->qp1c_bytes_28, addr + 6); - writel(context->qp1c_bytes_32, addr + 7); - writel(context->cur_sq_wqe_ba_l, addr + 8); - writel(context->qp1c_bytes_40, addr + 9); + writel(le32_to_cpu(context->qp1c_bytes_4), addr); + writel(le32_to_cpu(context->sq_rq_bt_l), addr + 1); + writel(le32_to_cpu(context->qp1c_bytes_12), addr + 2); + writel(le32_to_cpu(context->qp1c_bytes_16), addr + 3); + writel(le32_to_cpu(context->qp1c_bytes_20), addr + 4); + writel(le32_to_cpu(context->cur_rq_wqe_ba_l), addr + 5); + writel(le32_to_cpu(context->qp1c_bytes_28), addr + 6); + writel(le32_to_cpu(context->qp1c_bytes_32), addr + 7); + writel(le32_to_cpu(context->cur_sq_wqe_ba_l), addr + 8); + writel(le32_to_cpu(context->qp1c_bytes_40), addr + 9); } /* Modify QP1C status */ reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG + hr_qp->phy_port * sizeof(*context)); - roce_set_field(reg_val, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M, + tmp = cpu_to_le32(reg_val); + roce_set_field(tmp, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state); + reg_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG + hr_qp->phy_port * sizeof(*context), reg_val); @@ -2712,7 +2752,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); dma_addr_t dma_handle_2 = 0; dma_addr_t dma_handle = 0; - uint32_t doorbell[2] = {0}; + __le32 doorbell[2] = {0}; int rq_pa_start = 0; u64 *mtts_2 = NULL; int ret = -EINVAL; @@ -2887,7 +2927,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, dmac = (u8 *)attr->ah_attr.roce.dmac; - context->sq_rq_bt_l = (u32)(dma_handle); + context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); roce_set_field(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, @@ -2899,7 +2939,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M, QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S, attr->min_rnr_timer); - context->irrl_ba_l = (u32)(dma_handle_2); + context->irrl_ba_l = cpu_to_le32((u32)(dma_handle_2)); roce_set_field(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M, QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S, @@ -2913,7 +2953,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, 1); roce_set_bit(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, - hr_qp->sq_signal_bits); + le32_to_cpu(hr_qp->sq_signal_bits)); port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; @@ -2991,7 +3031,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0); rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); + context->cur_rq_wqe_ba_l = + cpu_to_le32((u32)(mtts[rq_pa_start])); roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, @@ -3071,7 +3112,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, goto out; } - context->rx_cur_sq_wqe_ba_l = (u32)(mtts[0]); + context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); roce_set_field(context->qpc_bytes_120, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, @@ -3219,7 +3260,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0); - context->tx_cur_sq_wqe_ba_l = (u32)(mtts[0]); + context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); roce_set_field(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, @@ -3386,16 +3427,16 @@ static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, addr = ROCEE_QP1C_CFG0_0_REG + hr_qp->port * sizeof(struct hns_roce_sqp_context); - context.qp1c_bytes_4 = roce_read(hr_dev, addr); - context.sq_rq_bt_l = roce_read(hr_dev, addr + 1); - context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2); - context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3); - context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4); - context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5); - context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6); - context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7); - context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8); - context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9); + context.qp1c_bytes_4 = cpu_to_le32(roce_read(hr_dev, addr)); + context.sq_rq_bt_l = cpu_to_le32(roce_read(hr_dev, addr + 1)); + context.qp1c_bytes_12 = cpu_to_le32(roce_read(hr_dev, addr + 2)); + context.qp1c_bytes_16 = cpu_to_le32(roce_read(hr_dev, addr + 3)); + context.qp1c_bytes_20 = cpu_to_le32(roce_read(hr_dev, addr + 4)); + context.cur_rq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 5)); + context.qp1c_bytes_28 = cpu_to_le32(roce_read(hr_dev, addr + 6)); + context.qp1c_bytes_32 = cpu_to_le32(roce_read(hr_dev, addr + 7)); + context.cur_sq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 8)); + context.qp1c_bytes_40 = cpu_to_le32(roce_read(hr_dev, addr + 9)); hr_qp->state = roce_get_field(context.qp1c_bytes_4, QP1C_BYTES_4_QP_STATE_M, @@ -3557,7 +3598,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148, QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M, QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S); - qp_attr->rnr_retry = context->rnr_retry; + qp_attr->rnr_retry = (u8)context->rnr_retry; done: qp_attr->cur_qp_state = qp_attr->qp_state; @@ -3595,42 +3636,47 @@ static void hns_roce_check_sdb_status(struct hns_roce_dev *hr_dev, u32 *old_send, u32 *old_retry, u32 *tsp_st, u32 *success_flags) { + __le32 *old_send_tmp, *old_retry_tmp; u32 sdb_retry_cnt; u32 sdb_send_ptr; u32 cur_cnt, old_cnt; + __le32 tmp, tmp1; u32 send_ptr; sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); sdb_retry_cnt = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); - cur_cnt = roce_get_field(sdb_send_ptr, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + tmp = cpu_to_le32(sdb_send_ptr); + tmp1 = cpu_to_le32(sdb_retry_cnt); + cur_cnt = roce_get_field(tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(sdb_retry_cnt, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + + old_send_tmp = (__le32 *)old_send; + old_retry_tmp = (__le32 *)old_retry; if (!roce_get_bit(*tsp_st, ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) { - old_cnt = roce_get_field(*old_send, + old_cnt = roce_get_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(*old_retry, + roce_get_field(*old_retry_tmp, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) *success_flags = 1; } else { - old_cnt = roce_get_field(*old_send, + old_cnt = roce_get_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) { *success_flags = 1; } else { - send_ptr = roce_get_field(*old_send, + send_ptr = roce_get_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(sdb_retry_cnt, + roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - roce_set_field(*old_send, + roce_set_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S, send_ptr); @@ -3646,11 +3692,14 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, { struct device *dev = &hr_dev->pdev->dev; u32 sdb_send_ptr, old_send; + __le32 sdb_issue_ptr_tmp; + __le32 sdb_send_ptr_tmp; u32 success_flags = 0; unsigned long end; u32 old_retry; u32 inv_cnt; u32 tsp_st; + __le32 tmp; if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 || *wait_stage < HNS_ROCE_V1_DB_STAGE1) { @@ -3679,10 +3728,12 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, ROCEE_SDB_SEND_PTR_REG); } - if (roce_get_field(sdb_issue_ptr, + sdb_send_ptr_tmp = cpu_to_le32(sdb_send_ptr); + sdb_issue_ptr_tmp = cpu_to_le32(sdb_issue_ptr); + if (roce_get_field(sdb_issue_ptr_tmp, ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) == - roce_get_field(sdb_send_ptr, + roce_get_field(sdb_send_ptr_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) { old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); @@ -3690,7 +3741,8 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, do { tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG); - if (roce_get_bit(tsp_st, + tmp = cpu_to_le32(tsp_st); + if (roce_get_bit(tmp, ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) { *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; return 0; @@ -3699,8 +3751,9 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, if (!time_before(jiffies, end)) { dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n" "issue 0x%x send 0x%x.\n", - hr_qp->qpn, sdb_issue_ptr, - sdb_send_ptr); + hr_qp->qpn, + le32_to_cpu(sdb_issue_ptr_tmp), + le32_to_cpu(sdb_send_ptr_tmp)); return 0; } @@ -4102,9 +4155,9 @@ static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev, struct device *dev = &hr_dev->pdev->dev; u32 cqn; - cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq, + cqn = roce_get_field(aeqe->event.cq_event.cq, HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)); + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: @@ -4340,6 +4393,7 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) u32 aeshift_val; u32 ceshift_val; u32 cemask_val; + __le32 tmp; int i; /* @@ -4348,30 +4402,34 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) * interrupt, mask irq, clear irq, cancel mask operation */ aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG); + tmp = cpu_to_le32(aeshift_val); /* AEQE overflow */ - if (roce_get_bit(aeshift_val, + if (roce_get_bit(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) { dev_warn(dev, "AEQ overflow!\n"); /* Set mask */ caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - roce_set_bit(caepaemask_val, - ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + tmp = cpu_to_le32(caepaemask_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_ENABLE); + caepaemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); /* Clear int state(INT_WC : write 1 clear) */ caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG); - roce_set_bit(caepaest_val, - ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1); + tmp = cpu_to_le32(caepaest_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1); + caepaest_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val); /* Clear mask */ caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - roce_set_bit(caepaemask_val, - ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + tmp = cpu_to_le32(caepaemask_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_DISABLE); + caepaemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); } @@ -4379,8 +4437,9 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) { ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG + i * CEQ_REG_OFFSET); + tmp = cpu_to_le32(ceshift_val); - if (roce_get_bit(ceshift_val, + if (roce_get_bit(tmp, ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) { dev_warn(dev, "CEQ[%d] almost overflow!\n", i); int_work++; @@ -4389,9 +4448,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) cemask_val = roce_read(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET); - roce_set_bit(cemask_val, + tmp = cpu_to_le32(cemask_val); + roce_set_bit(tmp, ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_ENABLE); + cemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET, cemask_val); @@ -4399,9 +4460,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) cealmovf_val = roce_read(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG + i * CEQ_REG_OFFSET); - roce_set_bit(cealmovf_val, + tmp = cpu_to_le32(cealmovf_val); + roce_set_bit(tmp, ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S, 1); + cealmovf_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG + i * CEQ_REG_OFFSET, cealmovf_val); @@ -4409,9 +4472,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) cemask_val = roce_read(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET); - roce_set_bit(cemask_val, + tmp = cpu_to_le32(cemask_val); + roce_set_bit(tmp, ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_DISABLE); + cemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET, cemask_val); } @@ -4435,13 +4500,16 @@ static void hns_roce_v1_int_mask_enable(struct hns_roce_dev *hr_dev) { u32 aemask_val; int masken = 0; + __le32 tmp; int i; /* AEQ INT */ aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + tmp = cpu_to_le32(aemask_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, masken); - roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken); + aemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val); /* CEQ INT */ @@ -4473,20 +4541,24 @@ static void hns_roce_v1_enable_eq(struct hns_roce_dev *hr_dev, int eq_num, int enable_flag) { void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num]; + __le32 tmp; u32 val; val = readl(eqc); + tmp = cpu_to_le32(val); if (enable_flag) - roce_set_field(val, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, HNS_ROCE_EQ_STAT_VALID); else - roce_set_field(val, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, HNS_ROCE_EQ_STAT_INVALID); + + val = le32_to_cpu(tmp); writel(val, eqc); } @@ -4499,6 +4571,9 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, u32 eqconsindx_val = 0; u32 eqcuridx_val = 0; u32 eqshift_val = 0; + __le32 tmp2 = 0; + __le32 tmp1 = 0; + __le32 tmp = 0; int num_bas; int ret; int i; @@ -4530,14 +4605,13 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE); } eq->cons_index = 0; - roce_set_field(eqshift_val, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, HNS_ROCE_EQ_STAT_INVALID); - roce_set_field(eqshift_val, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S, eq->log_entries); + eqshift_val = le32_to_cpu(tmp); writel(eqshift_val, eqc); /* Configure eq extended address 12~44bit */ @@ -4549,18 +4623,18 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, * using 4K page, and shift more 32 because of * caculating the high 32 bit value evaluated to hardware. */ - roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M, + roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S, eq->buf_list[0].map >> 44); - roce_set_field(eqcuridx_val, - ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M, + roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0); + eqcuridx_val = le32_to_cpu(tmp1); writel(eqcuridx_val, eqc + 8); /* Configure eq consumer index */ - roce_set_field(eqconsindx_val, - ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M, + roce_set_field(tmp2, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0); + eqconsindx_val = le32_to_cpu(tmp2); writel(eqconsindx_val, eqc + 0xc); return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index e9a2717ea7cd..66440147d9eb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -260,7 +260,7 @@ struct hns_roce_cqe { __le32 cqe_byte_4; union { __le32 r_key; - __be32 immediate_data; + __le32 immediate_data; }; __le32 byte_cnt; __le32 cqe_byte_16; -- cgit From 6b63597d3540003c7a0ece4a0d2f5a3b06e3b729 Mon Sep 17 00:00:00 2001 From: oulijun Date: Mon, 9 Jul 2018 17:48:07 +0800 Subject: RDMA/hns: Add TSQ link table support In hip08, TSQ(Transport Service Queue) should be extended to host memory to store the doorbells. This patch adds the support of creating TSQ, and then configured to the hardware. Signed-off-by: Yixian Liu Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 217 +++++++++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 85 ++++++++++- 3 files changed, 297 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index df5e3c12254e..f7a63cc08372 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -720,6 +720,8 @@ struct hns_roce_caps { u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; + u32 sl_num; + u32 tsq_buf_pg_sz; u32 chunk_sz; /* chunk size in non multihop mode*/ u64 flags; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d8c68db4497f..c59a73e18add 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -925,7 +925,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc[2]; - struct hns_roce_pf_res *res; + struct hns_roce_pf_res_a *req_a; + struct hns_roce_pf_res_b *req_b; int ret; int i; @@ -943,21 +944,26 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) if (ret) return ret; - res = (struct hns_roce_pf_res *)desc[0].data; + req_a = (struct hns_roce_pf_res_a *)desc[0].data; + req_b = (struct hns_roce_pf_res_b *)desc[1].data; - hr_dev->caps.qpc_bt_num = roce_get_field(res->qpc_bt_idx_num, + hr_dev->caps.qpc_bt_num = roce_get_field(req_a->qpc_bt_idx_num, PF_RES_DATA_1_PF_QPC_BT_NUM_M, PF_RES_DATA_1_PF_QPC_BT_NUM_S); - hr_dev->caps.srqc_bt_num = roce_get_field(res->srqc_bt_idx_num, + hr_dev->caps.srqc_bt_num = roce_get_field(req_a->srqc_bt_idx_num, PF_RES_DATA_2_PF_SRQC_BT_NUM_M, PF_RES_DATA_2_PF_SRQC_BT_NUM_S); - hr_dev->caps.cqc_bt_num = roce_get_field(res->cqc_bt_idx_num, + hr_dev->caps.cqc_bt_num = roce_get_field(req_a->cqc_bt_idx_num, PF_RES_DATA_3_PF_CQC_BT_NUM_M, PF_RES_DATA_3_PF_CQC_BT_NUM_S); - hr_dev->caps.mpt_bt_num = roce_get_field(res->mpt_bt_idx_num, + hr_dev->caps.mpt_bt_num = roce_get_field(req_a->mpt_bt_idx_num, PF_RES_DATA_4_PF_MPT_BT_NUM_M, PF_RES_DATA_4_PF_MPT_BT_NUM_S); + hr_dev->caps.sl_num = roce_get_field(req_b->qid_idx_sl_num, + PF_RES_DATA_3_PF_SL_NUM_M, + PF_RES_DATA_3_PF_SL_NUM_S); + return 0; } @@ -1203,6 +1209,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->eqe_ba_pg_sz = 0; caps->eqe_buf_pg_sz = 0; caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; + caps->tsq_buf_pg_sz = 0; caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE; caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | @@ -1224,6 +1231,202 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } +static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, + enum hns_roce_link_table_type type) +{ + struct hns_roce_cmq_desc desc[2]; + struct hns_roce_cfg_llm_a *req_a = + (struct hns_roce_cfg_llm_a *)desc[0].data; + struct hns_roce_cfg_llm_b *req_b = + (struct hns_roce_cfg_llm_b *)desc[1].data; + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_link_table *link_tbl; + struct hns_roce_link_table_entry *entry; + enum hns_roce_opcode_type opcode; + u32 page_num; + int i; + + switch (type) { + case TSQ_LINK_TABLE: + link_tbl = &priv->tsq; + opcode = HNS_ROCE_OPC_CFG_EXT_LLM; + break; + default: + return -EINVAL; + } + + page_num = link_tbl->npages; + entry = link_tbl->table.buf; + memset(req_a, 0, sizeof(*req_a)); + memset(req_b, 0, sizeof(*req_b)); + + for (i = 0; i < 2; i++) { + hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false); + + if (i == 0) + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + else + desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + if (i == 0) { + req_a->base_addr_l = link_tbl->table.map & 0xffffffff; + req_a->base_addr_h = (link_tbl->table.map >> 32) & + 0xffffffff; + roce_set_field(req_a->depth_pgsz_init_en, + CFG_LLM_QUE_DEPTH_M, + CFG_LLM_QUE_DEPTH_S, + link_tbl->npages); + roce_set_field(req_a->depth_pgsz_init_en, + CFG_LLM_QUE_PGSZ_M, + CFG_LLM_QUE_PGSZ_S, + link_tbl->pg_sz); + req_a->head_ba_l = entry[0].blk_ba0; + req_a->head_ba_h_nxtptr = entry[0].blk_ba1_nxt_ptr; + roce_set_field(req_a->head_ptr, + CFG_LLM_HEAD_PTR_M, + CFG_LLM_HEAD_PTR_S, 0); + } else { + req_b->tail_ba_l = entry[page_num - 1].blk_ba0; + roce_set_field(req_b->tail_ba_h, + CFG_LLM_TAIL_BA_H_M, + CFG_LLM_TAIL_BA_H_S, + entry[page_num - 1].blk_ba1_nxt_ptr & + HNS_ROCE_LINK_TABLE_BA1_M); + roce_set_field(req_b->tail_ptr, + CFG_LLM_TAIL_PTR_M, + CFG_LLM_TAIL_PTR_S, + (entry[page_num - 2].blk_ba1_nxt_ptr & + HNS_ROCE_LINK_TABLE_NXT_PTR_M) >> + HNS_ROCE_LINK_TABLE_NXT_PTR_S); + } + } + roce_set_field(req_a->depth_pgsz_init_en, + CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1); + + return hns_roce_cmq_send(hr_dev, desc, 2); +} + +static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, + enum hns_roce_link_table_type type) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_link_table *link_tbl; + struct hns_roce_link_table_entry *entry; + struct device *dev = hr_dev->dev; + u32 buf_chk_sz; + dma_addr_t t; + int pg_num_a; + int pg_num_b; + int pg_num; + int size; + int i; + + switch (type) { + case TSQ_LINK_TABLE: + link_tbl = &priv->tsq; + buf_chk_sz = 1 << (hr_dev->caps.tsq_buf_pg_sz + PAGE_SHIFT); + pg_num_a = hr_dev->caps.num_qps * 8 / buf_chk_sz; + pg_num_b = hr_dev->caps.sl_num * 4 + 2; + break; + default: + return -EINVAL; + } + + pg_num = max(pg_num_a, pg_num_b); + size = pg_num * sizeof(struct hns_roce_link_table_entry); + + link_tbl->table.buf = dma_alloc_coherent(dev, size, + &link_tbl->table.map, + GFP_KERNEL); + if (!link_tbl->table.buf) + goto out; + + link_tbl->pg_list = kcalloc(pg_num, sizeof(*link_tbl->pg_list), + GFP_KERNEL); + if (!link_tbl->pg_list) + goto err_kcalloc_failed; + + entry = link_tbl->table.buf; + for (i = 0; i < pg_num; ++i) { + link_tbl->pg_list[i].buf = dma_alloc_coherent(dev, buf_chk_sz, + &t, GFP_KERNEL); + if (!link_tbl->pg_list[i].buf) + goto err_alloc_buf_failed; + + link_tbl->pg_list[i].map = t; + memset(link_tbl->pg_list[i].buf, 0, buf_chk_sz); + + entry[i].blk_ba0 = (t >> 12) & 0xffffffff; + roce_set_field(entry[i].blk_ba1_nxt_ptr, + HNS_ROCE_LINK_TABLE_BA1_M, + HNS_ROCE_LINK_TABLE_BA1_S, + t >> 44); + + if (i < (pg_num - 1)) + roce_set_field(entry[i].blk_ba1_nxt_ptr, + HNS_ROCE_LINK_TABLE_NXT_PTR_M, + HNS_ROCE_LINK_TABLE_NXT_PTR_S, + i + 1); + } + link_tbl->npages = pg_num; + link_tbl->pg_sz = buf_chk_sz; + + return hns_roce_config_link_table(hr_dev, type); + +err_alloc_buf_failed: + for (i -= 1; i >= 0; i--) + dma_free_coherent(dev, buf_chk_sz, + link_tbl->pg_list[i].buf, + link_tbl->pg_list[i].map); + kfree(link_tbl->pg_list); + +err_kcalloc_failed: + dma_free_coherent(dev, size, link_tbl->table.buf, + link_tbl->table.map); + +out: + return -ENOMEM; +} + +static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, + struct hns_roce_link_table *link_tbl) +{ + struct device *dev = hr_dev->dev; + int size; + int i; + + size = link_tbl->npages * sizeof(struct hns_roce_link_table_entry); + + for (i = 0; i < link_tbl->npages; ++i) + if (link_tbl->pg_list[i].buf) + dma_free_coherent(dev, link_tbl->pg_sz, + link_tbl->pg_list[i].buf, + link_tbl->pg_list[i].map); + kfree(link_tbl->pg_list); + + dma_free_coherent(dev, size, link_tbl->table.buf, + link_tbl->table.map); +} + +static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +{ + int ret; + + /* TSQ includes SQ doorbell and ack doorbell */ + ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); + if (ret) + dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret); + + return ret; +} + +static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + + hns_roce_free_link_table(hr_dev, &priv->tsq); +} + static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev) { u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); @@ -4722,6 +4925,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, .hw_profile = hns_roce_v2_profile, + .hw_init = hns_roce_v2_init, + .hw_exit = hns_roce_v2_exit, .post_mbox = hns_roce_v2_post_mbox, .chk_mbox = hns_roce_v2_chk_mbox, .set_gid = hns_roce_v2_set_gid, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index d47675f365c7..18626d39f8c8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -203,6 +203,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, HNS_ROCE_OPC_QUERY_PF_RES = 0x8400, HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, + HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, }; @@ -1061,6 +1062,40 @@ struct hns_roce_query_version { __le32 rsv[5]; }; +struct hns_roce_cfg_llm_a { + __le32 base_addr_l; + __le32 base_addr_h; + __le32 depth_pgsz_init_en; + __le32 head_ba_l; + __le32 head_ba_h_nxtptr; + __le32 head_ptr; +}; + +#define CFG_LLM_QUE_DEPTH_S 0 +#define CFG_LLM_QUE_DEPTH_M GENMASK(12, 0) + +#define CFG_LLM_QUE_PGSZ_S 16 +#define CFG_LLM_QUE_PGSZ_M GENMASK(19, 16) + +#define CFG_LLM_INIT_EN_S 20 +#define CFG_LLM_INIT_EN_M GENMASK(20, 20) + +#define CFG_LLM_HEAD_PTR_S 0 +#define CFG_LLM_HEAD_PTR_M GENMASK(11, 0) + +struct hns_roce_cfg_llm_b { + __le32 tail_ba_l; + __le32 tail_ba_h; + __le32 tail_ptr; + __le32 rsv[3]; +}; + +#define CFG_LLM_TAIL_BA_H_S 0 +#define CFG_LLM_TAIL_BA_H_M GENMASK(19, 0) + +#define CFG_LLM_TAIL_PTR_S 0 +#define CFG_LLM_TAIL_PTR_M GENMASK(11, 0) + struct hns_roce_cfg_global_param { __le32 time_cfg_udp_port; __le32 rsv[5]; @@ -1072,7 +1107,7 @@ struct hns_roce_cfg_global_param { #define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S 16 #define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M GENMASK(31, 16) -struct hns_roce_pf_res { +struct hns_roce_pf_res_a { __le32 rsv; __le32 qpc_bt_idx_num; __le32 srqc_bt_idx_num; @@ -1111,6 +1146,32 @@ struct hns_roce_pf_res { #define PF_RES_DATA_5_PF_EQC_BT_NUM_S 16 #define PF_RES_DATA_5_PF_EQC_BT_NUM_M GENMASK(25, 16) +struct hns_roce_pf_res_b { + __le32 rsv0; + __le32 smac_idx_num; + __le32 sgid_idx_num; + __le32 qid_idx_sl_num; + __le32 rsv[2]; +}; + +#define PF_RES_DATA_1_PF_SMAC_IDX_S 0 +#define PF_RES_DATA_1_PF_SMAC_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_1_PF_SMAC_NUM_S 8 +#define PF_RES_DATA_1_PF_SMAC_NUM_M GENMASK(16, 8) + +#define PF_RES_DATA_2_PF_SGID_IDX_S 0 +#define PF_RES_DATA_2_PF_SGID_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_2_PF_SGID_NUM_S 8 +#define PF_RES_DATA_2_PF_SGID_NUM_M GENMASK(16, 8) + +#define PF_RES_DATA_3_PF_QID_IDX_S 0 +#define PF_RES_DATA_3_PF_QID_IDX_M GENMASK(9, 0) + +#define PF_RES_DATA_3_PF_SL_NUM_S 16 +#define PF_RES_DATA_3_PF_SL_NUM_M GENMASK(26, 16) + struct hns_roce_vf_res_a { __le32 vf_id; __le32 vf_qpc_bt_idx_num; @@ -1276,8 +1337,30 @@ struct hns_roce_v2_cmq { u16 last_status; }; +enum hns_roce_link_table_type { + TSQ_LINK_TABLE, +}; + +struct hns_roce_link_table { + struct hns_roce_buf_list table; + struct hns_roce_buf_list *pg_list; + u32 npages; + u32 pg_sz; +}; + +struct hns_roce_link_table_entry { + u32 blk_ba0; + u32 blk_ba1_nxt_ptr; +}; +#define HNS_ROCE_LINK_TABLE_BA1_S 0 +#define HNS_ROCE_LINK_TABLE_BA1_M GENMASK(19, 0) + +#define HNS_ROCE_LINK_TABLE_NXT_PTR_S 20 +#define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20) + struct hns_roce_v2_priv { struct hns_roce_v2_cmq cmq; + struct hns_roce_link_table tsq; }; struct hns_roce_eq_context { -- cgit From ded58ff987dbc3066825789a552a0ce33075c4f7 Mon Sep 17 00:00:00 2001 From: oulijun Date: Mon, 9 Jul 2018 17:48:08 +0800 Subject: RDMA/hns: Add TPQ link table support In hip08, the TPQ(Timer Poll Queue) should be extended to host memory. This patch adds the support of TPQ. Signed-off-by: Yixian Liu Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 28 +++++++++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 +++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f7a63cc08372..a595e72f243e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -722,6 +722,7 @@ struct hns_roce_caps { u32 eqe_hop_num; u32 sl_num; u32 tsq_buf_pg_sz; + u32 tpq_buf_pg_sz; u32 chunk_sz; /* chunk size in non multihop mode*/ u64 flags; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c59a73e18add..e47d46a8a7d7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1251,6 +1251,10 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, link_tbl = &priv->tsq; opcode = HNS_ROCE_OPC_CFG_EXT_LLM; break; + case TPQ_LINK_TABLE: + link_tbl = &priv->tpq; + opcode = HNS_ROCE_OPC_CFG_TMOUT_LLM; + break; default: return -EINVAL; } @@ -1315,6 +1319,7 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, struct device *dev = hr_dev->dev; u32 buf_chk_sz; dma_addr_t t; + int func_num = 1; int pg_num_a; int pg_num_b; int pg_num; @@ -1328,6 +1333,12 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, pg_num_a = hr_dev->caps.num_qps * 8 / buf_chk_sz; pg_num_b = hr_dev->caps.sl_num * 4 + 2; break; + case TPQ_LINK_TABLE: + link_tbl = &priv->tpq; + buf_chk_sz = 1 << (hr_dev->caps.tpq_buf_pg_sz + PAGE_SHIFT); + pg_num_a = hr_dev->caps.num_cqs * 4 / buf_chk_sz; + pg_num_b = 2 * 4 * func_num + 2; + break; default: return -EINVAL; } @@ -1410,12 +1421,26 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) { + struct hns_roce_v2_priv *priv = hr_dev->priv; int ret; /* TSQ includes SQ doorbell and ack doorbell */ ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); - if (ret) + if (ret) { dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret); + return ret; + } + + ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret); + goto err_tpq_init_failed; + } + + return 0; + +err_tpq_init_failed: + hns_roce_free_link_table(hr_dev, &priv->tsq); return ret; } @@ -1424,6 +1449,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; + hns_roce_free_link_table(hr_dev, &priv->tpq); hns_roce_free_link_table(hr_dev, &priv->tsq); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 18626d39f8c8..6ad83ea135e4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -204,6 +204,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_QUERY_PF_RES = 0x8400, HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, + HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, }; @@ -1339,6 +1340,7 @@ struct hns_roce_v2_cmq { enum hns_roce_link_table_type { TSQ_LINK_TABLE, + TPQ_LINK_TABLE, }; struct hns_roce_link_table { @@ -1361,6 +1363,7 @@ struct hns_roce_link_table_entry { struct hns_roce_v2_priv { struct hns_roce_v2_cmq cmq; struct hns_roce_link_table tsq; + struct hns_roce_link_table tpq; }; struct hns_roce_eq_context { -- cgit From 4db134a3dde76330c65dd82e2918d655f2b6c90a Mon Sep 17 00:00:00 2001 From: oulijun Date: Mon, 9 Jul 2018 17:48:09 +0800 Subject: RDMA/hns: Update the implementation of set_gid This patch updates the implementation of set_gid by using command queue instead of directly writing registers. Signed-off-by: Lijun Ou Signed-off-by: Yixian Liu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_common.h | 6 --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 62 ++++++++++++++++++----------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 18 +++++++-- 3 files changed, 53 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 319cb74aebaf..413501025dc5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -385,12 +385,6 @@ #define ROCEE_VF_SMAC_CFG0_REG 0x12000 #define ROCEE_VF_SMAC_CFG1_REG 0x12004 -#define ROCEE_VF_SGID_CFG0_REG 0x10000 -#define ROCEE_VF_SGID_CFG1_REG 0x10004 -#define ROCEE_VF_SGID_CFG2_REG 0x10008 -#define ROCEE_VF_SGID_CFG3_REG 0x1000c -#define ROCEE_VF_SGID_CFG4_REG 0x10010 - #define ROCEE_VF_ABN_INT_CFG_REG 0x13000 #define ROCEE_VF_ABN_INT_ST_REG 0x13004 #define ROCEE_VF_ABN_INT_EN_REG 0x13008 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index e47d46a8a7d7..0ace51777b25 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1536,13 +1536,45 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } +static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_sgid_tb *sgid_tb = + (struct hns_roce_cfg_sgid_tb *)desc.data; + u32 *p; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); + + roce_set_field(sgid_tb->table_idx_rsv, + CFG_SGID_TB_TABLE_IDX_M, + CFG_SGID_TB_TABLE_IDX_S, gid_index); + roce_set_field(sgid_tb->vf_sgid_type_rsv, + CFG_SGID_TB_VF_SGID_TYPE_M, + CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); + + p = (u32 *)&gid->raw[0]; + sgid_tb->vf_sgid_l = cpu_to_le32(*p); + + p = (u32 *)&gid->raw[4]; + sgid_tb->vf_sgid_ml = cpu_to_le32(*p); + + p = (u32 *)&gid->raw[8]; + sgid_tb->vf_sgid_mh = cpu_to_le32(*p); + + p = (u32 *)&gid->raw[0xc]; + sgid_tb->vf_sgid_h = cpu_to_le32(*p); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr) { enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; - u32 *p; - u32 val; + int ret; if (!gid || !attr) return -EINVAL; @@ -1557,29 +1589,11 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; } - p = (u32 *)&gid->raw[0]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG0_REG + - 0x20 * gid_index); - - p = (u32 *)&gid->raw[4]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG1_REG + - 0x20 * gid_index); - - p = (u32 *)&gid->raw[8]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG2_REG + - 0x20 * gid_index); - - p = (u32 *)&gid->raw[0xc]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG3_REG + - 0x20 * gid_index); - - val = roce_read(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index); - roce_set_field(val, ROCEE_VF_SGID_CFG4_SGID_TYPE_M, - ROCEE_VF_SGID_CFG4_SGID_TYPE_S, sgid_type); - - roce_write(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index, val); + ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (ret) + dev_err(hr_dev->dev, "Configure sgid table failed(%d)!\n", ret); - return 0; + return ret; } static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 6ad83ea135e4..169f747b2d7c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -205,6 +205,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, + HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, }; @@ -1245,9 +1246,6 @@ struct hns_roce_vf_res_b { #define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S 0 #define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M GENMASK(15, 0) -#define ROCEE_VF_SGID_CFG4_SGID_TYPE_S 0 -#define ROCEE_VF_SGID_CFG4_SGID_TYPE_M GENMASK(1, 0) - struct hns_roce_cfg_bt_attr { __le32 vf_qpc_cfg; __le32 vf_srqc_cfg; @@ -1292,6 +1290,20 @@ struct hns_roce_cfg_bt_attr { #define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S 8 #define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M GENMASK(9, 8) +struct hns_roce_cfg_sgid_tb { + __le32 table_idx_rsv; + __le32 vf_sgid_l; + __le32 vf_sgid_ml; + __le32 vf_sgid_mh; + __le32 vf_sgid_h; + __le32 vf_sgid_type_rsv; +}; +#define CFG_SGID_TB_TABLE_IDX_S 0 +#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0) + +#define CFG_SGID_TB_VF_SGID_TYPE_S 0 +#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0) + struct hns_roce_cmq_desc { __le16 opcode; __le16 flag; -- cgit From e8e8b65224625f7d4dc7953484afe1b571db6c73 Mon Sep 17 00:00:00 2001 From: oulijun Date: Mon, 9 Jul 2018 17:48:10 +0800 Subject: RDMA/hns: Update the implementation of set_mac This patch updates the implementation of set_mac by using command queue instead of directly writing registers. Signed-off-by: Lijun Ou Signed-off-by: Yixian Liu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_common.h | 3 --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 24 +++++++++++++++--------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 17 +++++++++++++---- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 413501025dc5..93d4b4ec002d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -382,9 +382,6 @@ #define ROCEE_VF_EQ_DB_CFG0_REG 0x238 #define ROCEE_VF_EQ_DB_CFG1_REG 0x23C -#define ROCEE_VF_SMAC_CFG0_REG 0x12000 -#define ROCEE_VF_SMAC_CFG1_REG 0x12004 - #define ROCEE_VF_ABN_INT_CFG_REG 0x13000 #define ROCEE_VF_ABN_INT_ST_REG 0x13004 #define ROCEE_VF_ABN_INT_EN_REG 0x13008 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0ace51777b25..1983a8e714f6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1599,21 +1599,27 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr) { + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_smac_tb *smac_tb = + (struct hns_roce_cfg_smac_tb *)desc.data; u16 reg_smac_h; u32 reg_smac_l; - u32 val; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SMAC_TB, false); reg_smac_l = *(u32 *)(&addr[0]); - roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_VF_SMAC_CFG0_REG + - 0x08 * phy_port); - val = roce_read(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port); + reg_smac_h = *(u16 *)(&addr[4]); - reg_smac_h = *(u16 *)(&addr[4]); - roce_set_field(val, ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M, - ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S, reg_smac_h); - roce_write(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port, val); + memset(smac_tb, 0, sizeof(*smac_tb)); + roce_set_field(smac_tb->tb_idx_rsv, + CFG_SMAC_TB_IDX_M, + CFG_SMAC_TB_IDX_S, phy_port); + roce_set_field(smac_tb->vf_smac_h_rsv, + CFG_SMAC_TB_VF_SMAC_H_M, + CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h); + smac_tb->vf_smac_l = reg_smac_l; - return 0; + return hns_roce_cmq_send(hr_dev, &desc, 1); } static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 169f747b2d7c..df95b3515c94 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -206,6 +206,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, + HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, }; @@ -1242,10 +1243,6 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_3_VF_SL_NUM_S 16 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) -/* Reg field definition */ -#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S 0 -#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M GENMASK(15, 0) - struct hns_roce_cfg_bt_attr { __le32 vf_qpc_cfg; __le32 vf_srqc_cfg; @@ -1304,6 +1301,18 @@ struct hns_roce_cfg_sgid_tb { #define CFG_SGID_TB_VF_SGID_TYPE_S 0 #define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0) +struct hns_roce_cfg_smac_tb { + __le32 tb_idx_rsv; + __le32 vf_smac_l; + __le32 vf_smac_h_rsv; + __le32 rsv[3]; +}; +#define CFG_SMAC_TB_IDX_S 0 +#define CFG_SMAC_TB_IDX_M GENMASK(7, 0) + +#define CFG_SMAC_TB_VF_SMAC_H_S 0 +#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) + struct hns_roce_cmq_desc { __le16 opcode; __le16 flag; -- cgit From 522628ed1a933a213051228b81bc1a215ea2ab76 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 10 Jul 2018 11:32:16 -0700 Subject: IB/hfi1: Suppress a compiler warning Avoid that the following compiler warning is reported when building with gcc 8: drivers/infiniband/hw/hfi1/verbs.c:1896:2: warning: 'strncpy' output may be truncated copying 64 bytes from a string of length 64 [-Wstringop-truncation] Signed-off-by: Bart Van Assche Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2b07a5667ec8..13374c727b14 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1884,7 +1884,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->process_mad = hfi1_process_mad; ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; - strncpy(ibdev->node_desc, init_utsname()->nodename, + strlcpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); /* -- cgit From 15039efadd3c16307008becb24b0f9e0cebb2e04 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Jul 2018 08:29:00 -0700 Subject: hns: Remove a set-but-not-used variable Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1983a8e714f6..951d839f1392 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -172,7 +172,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct hns_roce_v2_ud_send_wqe *ud_sq_wqe; struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; struct hns_roce_qp *qp = to_hr_qp(ibqp); - struct hns_roce_v2_wqe_data_seg *dseg; struct device *dev = hr_dev->dev; struct hns_roce_v2_db sq_db; unsigned int sge_ind = 0; @@ -485,7 +484,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } wqe += sizeof(struct hns_roce_v2_rc_send_wqe); - dseg = wqe; ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe, &sge_ind, bad_wr); -- cgit From 65ca8d9670b70aa8076054c0c23be032c6ac5c77 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 5 Jul 2018 18:26:01 +0530 Subject: rdma/cxgb4: Add support for 64Byte cqes This patch adds support for iw_cxb4 to extend cqes from existing 32Byte size to 64Byte. Also includes adds backward compatibility support (for 32Byte) to work with older libraries. Signed-off-by: Raju Rangoju Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 43 +++++++++++++++++++++++++++++----- drivers/infiniband/hw/cxgb4/ev.c | 5 ++-- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + drivers/infiniband/hw/cxgb4/t4.h | 18 +++++++++++--- include/uapi/rdma/cxgb4-abi.h | 12 +++++++++- 5 files changed, 67 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index a3a829951ac4..a055f9f08e76 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -77,6 +77,10 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, int user = (uctx != &rdev->uctx); int ret; struct sk_buff *skb; + struct c4iw_ucontext *ucontext = NULL; + + if (user) + ucontext = container_of(uctx, struct c4iw_ucontext, uctx); cq->cqid = c4iw_get_cqid(rdev, uctx); if (!cq->cqid) { @@ -100,6 +104,16 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, dma_unmap_addr_set(cq, mapping, cq->dma_addr); memset(cq->queue, 0, cq->memsize); + if (user && ucontext->is_32b_cqe) { + cq->qp_errp = &((struct t4_status_page *) + ((u8 *)cq->queue + (cq->size - 1) * + (sizeof(*cq->queue) / 2)))->qp_err; + } else { + cq->qp_errp = &((struct t4_status_page *) + ((u8 *)cq->queue + (cq->size - 1) * + sizeof(*cq->queue)))->qp_err; + } + /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + sizeof *res; @@ -132,7 +146,9 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_IQPCIECH_V(2) | FW_RI_RES_WR_IQINTCNTTHRESH_V(0) | FW_RI_RES_WR_IQO_F | - FW_RI_RES_WR_IQESIZE_V(1)); + ((user && ucontext->is_32b_cqe) ? + FW_RI_RES_WR_IQESIZE_V(1) : + FW_RI_RES_WR_IQESIZE_V(2))); res->u.cq.iqsize = cpu_to_be16(cq->size); res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); @@ -884,6 +900,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int vector = attr->comp_vector; struct c4iw_dev *rhp; struct c4iw_cq *chp; + struct c4iw_create_cq ucmd; struct c4iw_create_cq_resp uresp; struct c4iw_ucontext *ucontext = NULL; int ret, wr_len; @@ -899,9 +916,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (vector >= rhp->rdev.lldi.nciq) return ERR_PTR(-EINVAL); + if (ib_context) { + ucontext = to_c4iw_ucontext(ib_context); + if (udata->inlen < sizeof(ucmd)) + ucontext->is_32b_cqe = 1; + } + chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) return ERR_PTR(-ENOMEM); + chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); if (!chp->wr_waitp) { ret = -ENOMEM; @@ -916,9 +940,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, goto err_free_wr_wait; } - if (ib_context) - ucontext = to_c4iw_ucontext(ib_context); - /* account for the status page. */ entries++; @@ -942,13 +963,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (hwentries < 64) hwentries = 64; - memsize = hwentries * sizeof *chp->cq.queue; + memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ? + (sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue)); /* * memsize must be a multiple of the page size if its a user cq. */ if (ucontext) memsize = roundup(memsize, PAGE_SIZE); + chp->cq.size = hwentries; chp->cq.memsize = memsize; chp->cq.vector = vector; @@ -979,6 +1002,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (!mm2) goto err_free_mm; + memset(&uresp, 0, sizeof(uresp)); uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; uresp.size = chp->cq.size; @@ -988,9 +1012,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, ucontext->key += PAGE_SIZE; uresp.gts_key = ucontext->key; ucontext->key += PAGE_SIZE; + /* communicate to the userspace that + * kernel driver supports 64B CQE + */ + uresp.flags |= C4IW_64B_CQE; + spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, - sizeof(uresp) - sizeof(uresp.reserved)); + ucontext->is_32b_cqe ? + sizeof(uresp) - sizeof(uresp.flags) : + sizeof(uresp)); if (ret) goto err_free_mm2; diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 3e9d8b277ab9..8741d23168f3 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -70,9 +70,10 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe) CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - pr_debug("%016llx %016llx %016llx %016llx\n", + pr_debug("%016llx %016llx %016llx %016llx - %016llx %016llx %016llx %016llx\n", be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]), - be64_to_cpu(p[3])); + be64_to_cpu(p[3]), be64_to_cpu(p[4]), be64_to_cpu(p[5]), + be64_to_cpu(p[6]), be64_to_cpu(p[7])); /* * Ingress WRITE and READ_RESP errors provide diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 870649ff049c..8866bf992316 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -566,6 +566,7 @@ struct c4iw_ucontext { spinlock_t mmap_lock; struct list_head mmaps; struct kref kref; + bool is_32b_cqe; }; static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 8369c7c8de83..838a7dee48bd 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -179,9 +179,20 @@ struct t4_cqe { __be32 wrid_hi; __be32 wrid_low; } gen; + struct { + __be32 stag; + __be32 msn; + __be32 reserved; + __be32 abs_rqe_idx; + } srcqe; + struct { + __be64 imm_data; + } imm_data_rcqe; + u64 drain_cookie; + __be64 flits[3]; } u; - __be64 reserved; + __be64 reserved[3]; __be64 bits_type_ts; }; @@ -565,6 +576,7 @@ struct t4_cq { u16 cidx_inc; u8 gen; u8 error; + u8 *qp_errp; unsigned long flags; }; @@ -698,12 +710,12 @@ static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe) static inline int t4_cq_in_error(struct t4_cq *cq) { - return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err; + return *cq->qp_errp; } static inline void t4_set_cq_in_error(struct t4_cq *cq) { - ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; + *cq->qp_errp = 1; } #endif diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h index a159ba8dcf8f..65c9eacd3ffb 100644 --- a/include/uapi/rdma/cxgb4-abi.h +++ b/include/uapi/rdma/cxgb4-abi.h @@ -44,6 +44,16 @@ * In particular do not use pointer types -- pass pointers in __aligned_u64 * instead. */ + +enum { + C4IW_64B_CQE = (1 << 0) +}; + +struct c4iw_create_cq { + __u32 flags; + __u32 reserved; +}; + struct c4iw_create_cq_resp { __aligned_u64 key; __aligned_u64 gts_key; @@ -51,7 +61,7 @@ struct c4iw_create_cq_resp { __u32 cqid; __u32 size; __u32 qid_mask; - __u32 reserved; /* explicit padding (optional for i386) */ + __u32 flags; }; enum { -- cgit From ffaf58def01ebdbf2669204e105c5a4f356ba276 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 8 Jul 2018 13:50:20 +0300 Subject: RDMA/mlx5: Melt consecutive calls to alloc_bfreg() in one call There is no need for three consecutive calls to alloc_bfreg(). It can be implemented with one function. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 ----- drivers/infiniband/hw/mlx5/qp.c | 47 +++++++++--------------------------- 2 files changed, 12 insertions(+), 41 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 67e86c8304a2..93087409f4b8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -78,12 +78,6 @@ enum { MLX5_REQ_SCAT_DATA64_CQE = 0x22, }; -enum mlx5_ib_latency_class { - MLX5_IB_LATENCY_CLASS_LOW, - MLX5_IB_LATENCY_CLASS_MEDIUM, - MLX5_IB_LATENCY_CLASS_HIGH, -}; - enum mlx5_ib_mad_ifc_flags { MLX5_MAD_IFC_IGNORE_MKEY = 1, MLX5_MAD_IFC_IGNORE_BKEY = 2, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6034a670859f..51e68ca20215 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -563,32 +563,21 @@ static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev, } static int alloc_bfreg(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi, - enum mlx5_ib_latency_class lat) + struct mlx5_bfreg_info *bfregi) { - int bfregn = -EINVAL; + int bfregn = -ENOMEM; mutex_lock(&bfregi->lock); - switch (lat) { - case MLX5_IB_LATENCY_CLASS_LOW: + if (bfregi->ver >= 2) { + bfregn = alloc_high_class_bfreg(dev, bfregi); + if (bfregn < 0) + bfregn = alloc_med_class_bfreg(dev, bfregi); + } + + if (bfregn < 0) { BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1); bfregn = 0; bfregi->count[bfregn]++; - break; - - case MLX5_IB_LATENCY_CLASS_MEDIUM: - if (bfregi->ver < 2) - bfregn = -ENOMEM; - else - bfregn = alloc_med_class_bfreg(dev, bfregi); - break; - - case MLX5_IB_LATENCY_CLASS_HIGH: - if (bfregi->ver < 2) - bfregn = -ENOMEM; - else - bfregn = alloc_high_class_bfreg(dev, bfregi); - break; } mutex_unlock(&bfregi->lock); @@ -822,21 +811,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, bfregn = MLX5_CROSS_CHANNEL_BFREG; } else { - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH); - if (bfregn < 0) { - mlx5_ib_dbg(dev, "failed to allocate low latency BFREG\n"); - mlx5_ib_dbg(dev, "reverting to medium latency\n"); - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_MEDIUM); - if (bfregn < 0) { - mlx5_ib_dbg(dev, "failed to allocate medium latency BFREG\n"); - mlx5_ib_dbg(dev, "reverting to high latency\n"); - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_LOW); - if (bfregn < 0) { - mlx5_ib_warn(dev, "bfreg allocation failed\n"); - return bfregn; - } - } - } + bfregn = alloc_bfreg(dev, &context->bfregi); + if (bfregn < 0) + return bfregn; } mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index); -- cgit From 05f58ceba123bdb420cf44c6ea04b6db467edd1c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 8 Jul 2018 13:50:21 +0300 Subject: RDMA/mlx5: Check that supplied blue flame index doesn't overflow User's supplied index is checked again total number of system pages, but this number already includes num_static_sys_pages, so addition of that value to supplied index causes to below error while trying to access sys_pages[]. BUG: KASAN: slab-out-of-bounds in bfregn_to_uar_index+0x34f/0x400 Read of size 4 at addr ffff880065561904 by task syz-executor446/314 CPU: 0 PID: 314 Comm: syz-executor446 Not tainted 4.18.0-rc1+ #256 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xef/0x17e print_address_description+0x83/0x3b0 kasan_report+0x18d/0x4d0 bfregn_to_uar_index+0x34f/0x400 create_user_qp+0x272/0x227d create_qp_common+0x32eb/0x43e0 mlx5_ib_create_qp+0x379/0x1ca0 create_qp.isra.5+0xc94/0x22d0 ib_uverbs_create_qp+0x21b/0x2a0 ib_uverbs_write+0xc2c/0x1010 vfs_write+0x1b0/0x550 ksys_write+0xc6/0x1a0 do_syscall_64+0xa7/0x590 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x433679 Code: fd ff 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b 91 fd ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fff2b3d8e48 EFLAGS: 00000217 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00000000004002f8 RCX: 0000000000433679 RDX: 0000000000000040 RSI: 0000000020000240 RDI: 0000000000000003 RBP: 00000000006d4018 R08: 00000000004002f8 R09: 00000000004002f8 R10: 00000000004002f8 R11: 0000000000000217 R12: 0000000000000000 R13: 000000000040cb00 R14: 000000000040cb90 R15: 0000000000000006 Allocated by task 314: kasan_kmalloc+0xa0/0xd0 __kmalloc+0x1a9/0x510 mlx5_ib_alloc_ucontext+0x966/0x2620 ib_uverbs_get_context+0x23f/0xa60 ib_uverbs_write+0xc2c/0x1010 __vfs_write+0x10d/0x720 vfs_write+0x1b0/0x550 ksys_write+0xc6/0x1a0 do_syscall_64+0xa7/0x590 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 1: __kasan_slab_free+0x12e/0x180 kfree+0x159/0x630 kvfree+0x37/0x50 single_release+0x8e/0xf0 __fput+0x2d8/0x900 task_work_run+0x102/0x1f0 exit_to_usermode_loop+0x159/0x1c0 do_syscall_64+0x408/0x590 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff880065561100 which belongs to the cache kmalloc-4096 of size 4096 The buggy address is located 2052 bytes inside of 4096-byte region [ffff880065561100, ffff880065562100) The buggy address belongs to the page: page:ffffea0001955800 count:1 mapcount:0 mapping:ffff88006c402480 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffea0001a7c000 0000000200000002 ffff88006c402480 raw: 0000000000000000 0000000080070007 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff880065561800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff880065561880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff880065561900: 04 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff880065561980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff880065561a00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc Cc: # 4.15 Fixes: 1ee47ab3e8d8 ("IB/mlx5: Enable QP creation with a given blue flame index") Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mlx5/qp.c | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 93087409f4b8..04a5d82c9cf3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1330,6 +1330,6 @@ unsigned long mlx5_ib_get_xlt_emergency_page(void); void mlx5_ib_put_xlt_emergency_page(void); int bfregn_to_uar_index(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi, int bfregn, + struct mlx5_bfreg_info *bfregi, u32 bfregn, bool dyn_bfreg); #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 51e68ca20215..d4414015b64f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -631,22 +631,23 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq); int bfregn_to_uar_index(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi, int bfregn, + struct mlx5_bfreg_info *bfregi, u32 bfregn, bool dyn_bfreg) { - int bfregs_per_sys_page; - int index_of_sys_page; - int offset; + unsigned int bfregs_per_sys_page; + u32 index_of_sys_page; + u32 offset; bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * MLX5_NON_FP_BFREGS_PER_UAR; index_of_sys_page = bfregn / bfregs_per_sys_page; - if (index_of_sys_page >= bfregi->num_sys_pages) - return -EINVAL; - if (dyn_bfreg) { index_of_sys_page += bfregi->num_static_sys_pages; + + if (index_of_sys_page >= bfregi->num_sys_pages) + return -EINVAL; + if (bfregn > bfregi->num_dyn_bfregs || bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) { mlx5_ib_dbg(dev, "Invalid dynamic uar index\n"); -- cgit From 847462de3a0aabc5343a1e338537f69a03bb61af Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 10 Jul 2018 10:31:58 -0700 Subject: IB/srpt: Fix srpt_cm_req_recv() error path (1/2) Once a target session has been allocated, if an error occurs, the session must be freed. Since it is not safe to call blocking code from the context of an connection manager callback, trigger target session release in this case by calling srpt_close_ch(). Fixes: db7683d7deb2 ("IB/srpt: Fix login-related race conditions") Signed-off-by: Bart Van Assche Cc: Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 3cb99ca841bb..6b86f5c71847 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2087,7 +2087,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, struct rdma_conn_param rdma_cm; struct ib_cm_rep_param ib_cm; } *rep_param = NULL; - struct srpt_rdma_ch *ch; + struct srpt_rdma_ch *ch = NULL; char i_port_id[36]; u32 it_iu_len; int i, ret; @@ -2234,13 +2234,15 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, TARGET_PROT_NORMAL, i_port_id + 2, ch, NULL); if (IS_ERR_OR_NULL(ch->sess)) { + WARN_ON_ONCE(ch->sess == NULL); ret = PTR_ERR(ch->sess); + ch->sess = NULL; pr_info("Rejected login for initiator %s: ret = %d.\n", ch->sess_name, ret); rej->reason = cpu_to_be32(ret == -ENOMEM ? SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES : SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); - goto reject; + goto destroy_ib; } mutex_lock(&sport->mutex); @@ -2279,7 +2281,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n", ret); - goto destroy_ib; + goto reject; } pr_debug("Establish connection sess=%p name=%s ch=%p\n", ch->sess, @@ -2379,6 +2381,15 @@ reject: ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, rej, sizeof(*rej)); + if (ch && ch->sess) { + srpt_close_ch(ch); + /* + * Tell the caller not to free cm_id since + * srpt_release_channel_work() will do that. + */ + ret = 0; + } + out: kfree(rep_param); kfree(rsp); -- cgit From 6869e0004fe16184acd6488f0c637e0081a84a8a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 10 Jul 2018 10:31:59 -0700 Subject: IB/srpt: Fix srpt_cm_req_recv() error path (2/2) If a login request was received through the RDMA/CM and if an error occurs during login, clear rdma_cm_id->context instead of ib_cm_id->context. Fixes: 63cf1a902c9d ("IB/srpt: Add RDMA/CM support") Signed-off-by: Bart Van Assche Cc: Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 6b86f5c71847..8bd7373cb828 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2360,8 +2360,11 @@ free_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, ch->sport->sdev, ch->rq_size, ch->max_rsp_size, DMA_TO_DEVICE); + free_ch: - if (ib_cm_id) + if (rdma_cm_id) + rdma_cm_id->context = NULL; + else ib_cm_id->context = NULL; kfree(ch); ch = NULL; -- cgit From 40ddacf2dda952e0f33b40d850bf5f7403bdbe0f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 10 Jul 2018 13:31:48 +0300 Subject: RDMA/umem: Don't hold mmap_sem for too long DMA mapping is time consuming operation and doesn't need to be performed with mmap_sem semaphore is held. The semaphore only needs to be held for accounting and get_user_pages related activities. Signed-off-by: Huy Nguyen Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 498f59bb4989..abe9924baf7c 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -84,7 +84,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, struct ib_umem *umem; struct page **page_list; struct vm_area_struct **vma_list; - unsigned long locked; unsigned long lock_limit; unsigned long cur_base; unsigned long npages; @@ -149,15 +148,16 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, npages = ib_umem_num_pages(umem); - down_write(¤t->mm->mmap_sem); - - locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + down_write(¤t->mm->mmap_sem); + current->mm->pinned_vm += npages; + if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) { + up_write(¤t->mm->mmap_sem); ret = -ENOMEM; goto out; } + up_write(¤t->mm->mmap_sem); cur_base = addr & PAGE_MASK; @@ -176,14 +176,16 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, need_release = 1; sg_list_start = umem->sg_head.sgl; + down_read(¤t->mm->mmap_sem); while (npages) { ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), gup_flags, page_list, vma_list); - - if (ret < 0) + if (ret < 0) { + up_read(¤t->mm->mmap_sem); goto out; + } umem->npages += ret; cur_base += ret * PAGE_SIZE; @@ -199,6 +201,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, /* preparing for next loop */ sg_list_start = sg; } + up_read(¤t->mm->mmap_sem); umem->nmap = ib_dma_map_sg_attrs(context->device, umem->sg_head.sgl, @@ -215,13 +218,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, out: if (ret < 0) { + down_write(¤t->mm->mmap_sem); + current->mm->pinned_vm -= ib_umem_num_pages(umem); + up_write(¤t->mm->mmap_sem); if (need_release) __ib_umem_release(context->device, umem, 0); kfree(umem); - } else - current->mm->pinned_vm = locked; + } - up_write(¤t->mm->mmap_sem); if (vma_list) free_page((unsigned long) vma_list); free_page((unsigned long) page_list); -- cgit From 1215cb7c88ec888d599a249142a74dd93b8985ad Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 10 Jul 2018 13:31:49 +0300 Subject: RDMA/umem: Refactor exit paths in ib_umem_get Simplify exit paths in ib_umem_get to use the standard goto unwind pattern. Signed-off-by: Leon Romanovsky Reviewed-by: Michael J. Ruhl Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 44 +++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index abe9924baf7c..a41792dbae1f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -91,7 +91,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, int i; unsigned long dma_attrs = 0; struct scatterlist *sg, *sg_list_start; - int need_release = 0; unsigned int gup_flags = FOLL_WRITE; if (dmasync) @@ -120,10 +119,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (access & IB_ACCESS_ON_DEMAND) { ret = ib_umem_odp_get(context, umem, access); - if (ret) { - kfree(umem); - return ERR_PTR(ret); - } + if (ret) + goto umem_kfree; return umem; } @@ -134,8 +131,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { - kfree(umem); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto umem_kfree; } /* @@ -155,7 +152,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) { up_write(¤t->mm->mmap_sem); ret = -ENOMEM; - goto out; + goto vma; } up_write(¤t->mm->mmap_sem); @@ -163,17 +160,16 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (npages == 0 || npages > UINT_MAX) { ret = -EINVAL; - goto out; + goto vma; } ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); if (ret) - goto out; + goto vma; if (!umem->writable) gup_flags |= FOLL_FORCE; - need_release = 1; sg_list_start = umem->sg_head.sgl; down_read(¤t->mm->mmap_sem); @@ -184,7 +180,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, gup_flags, page_list, vma_list); if (ret < 0) { up_read(¤t->mm->mmap_sem); - goto out; + goto umem_release; } umem->npages += ret; @@ -211,26 +207,26 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (!umem->nmap) { ret = -ENOMEM; - goto out; + goto umem_release; } ret = 0; + goto out; +umem_release: + __ib_umem_release(context->device, umem, 0); +vma: + down_write(¤t->mm->mmap_sem); + current->mm->pinned_vm -= ib_umem_num_pages(umem); + up_write(¤t->mm->mmap_sem); out: - if (ret < 0) { - down_write(¤t->mm->mmap_sem); - current->mm->pinned_vm -= ib_umem_num_pages(umem); - up_write(¤t->mm->mmap_sem); - if (need_release) - __ib_umem_release(context->device, umem, 0); - kfree(umem); - } - if (vma_list) free_page((unsigned long) vma_list); free_page((unsigned long) page_list); - - return ret < 0 ? ERR_PTR(ret) : umem; +umem_kfree: + if (ret) + kfree(umem); + return ret ? ERR_PTR(ret) : umem; } EXPORT_SYMBOL(ib_umem_get); -- cgit From c012691508f5fcc09b0a777fd1747f266093fe1b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 11 Jul 2018 11:20:29 +0300 Subject: IB/cm: Remove cma_multicast->igmp_joined This variable isn't read and written to with proper locking, so it is racy. Instead of using an unlocked bool use presence in the mc->list The caller could race rdma_join_multicast with rdma_leave_multicast which would leak a mc join and cause a use after free of mc. Instead, do not add the mc to the list until it has completed initialization, all mcs on the list require leaving. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 53 +++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a735ab4cddda..f2bf997b62cd 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -366,7 +366,6 @@ struct cma_multicast { void *context; struct sockaddr_storage addr; struct kref mcref; - bool igmp_joined; u8 join_state; }; @@ -1643,21 +1642,14 @@ static void cma_release_port(struct rdma_id_private *id_priv) static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv, struct cma_multicast *mc) { - if (mc->igmp_joined) { - struct rdma_dev_addr *dev_addr = - &id_priv->id.route.addr.dev_addr; - struct net_device *ndev = NULL; - - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(dev_addr->net, - dev_addr->bound_dev_if); - if (ndev) { - cma_igmp_send(ndev, - &mc->multicast.ib->rec.mgid, - false); - dev_put(ndev); - } - mc->igmp_joined = false; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + struct net_device *ndev = NULL; + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); + if (ndev) { + cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false); + dev_put(ndev); } kref_put(&mc->mcref, release_mc); } @@ -4196,8 +4188,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (!send_only) { err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, true); - if (!err) - mc->igmp_joined = true; } } } else { @@ -4249,26 +4239,29 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, memcpy(&mc->addr, addr, rdma_addr_size(addr)); mc->context = context; mc->id_priv = id_priv; - mc->igmp_joined = false; mc->join_state = join_state; - spin_lock(&id_priv->lock); - list_add(&mc->list, &id_priv->mc_list); - spin_unlock(&id_priv->lock); if (rdma_protocol_roce(id->device, id->port_num)) { kref_init(&mc->mcref); ret = cma_iboe_join_multicast(id_priv, mc); - } else if (rdma_cap_ib_mcast(id->device, id->port_num)) + if (ret) + goto out_err; + } else if (rdma_cap_ib_mcast(id->device, id->port_num)) { ret = cma_join_ib_multicast(id_priv, mc); - else + if (ret) + goto out_err; + } else { ret = -ENOSYS; - - if (ret) { - spin_lock_irq(&id_priv->lock); - list_del(&mc->list); - spin_unlock_irq(&id_priv->lock); - kfree(mc); + goto out_err; } + + spin_lock(&id_priv->lock); + list_add(&mc->list, &id_priv->mc_list); + spin_unlock(&id_priv->lock); + + return 0; +out_err: + kfree(mc); return ret; } EXPORT_SYMBOL(rdma_join_multicast); -- cgit From 259e19145e1fa9bb9e502f625f7bce1a0c078fa0 Mon Sep 17 00:00:00 2001 From: Jan Dakinevich Date: Mon, 9 Jul 2018 16:51:03 +0300 Subject: IPoIB: use kvzalloc to allocate an array of bucket pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This table by default takes 32KiB which is 3rd memory order. Meanwhile, this memory is not aimed for DMA operation and could be safely allocated by vmalloc. Signed-off-by: Jan Dakinevich Reviewed-by: HÃ¥kon Bugge Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 9bcd487e51c2..012c9e3970ac 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1530,7 +1530,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) return -ENOMEM; set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); size = roundup_pow_of_two(arp_tbl.gc_thresh3); - buckets = kcalloc(size, sizeof(*buckets), GFP_KERNEL); + buckets = kvcalloc(size, sizeof(*buckets), GFP_KERNEL); if (!buckets) { kfree(htbl); return -ENOMEM; @@ -1558,7 +1558,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct ipoib_neigh __rcu **buckets = htbl->buckets; struct ipoib_neigh_table *ntbl = htbl->ntbl; - kfree(buckets); + kvfree(buckets); kfree(htbl); complete(&ntbl->deleted); } -- cgit From aa09ea6e6b1279a2031cbdaa7605e2ec297e63ae Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 19 Jul 2018 00:05:32 +0300 Subject: RDMA/mlx5: Remove set but not used variables Remove "uctx" and "pa" variables that were set but not used. Fixes: a8b92ca1b0e5 ("IB/mlx5: Introduce DEVX") Fixes: 8f0622873358 ("RDMA/mlx5: Remove debug prints of VMA pointers") Signed-off-by: Kamal Heib Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 2 -- drivers/infiniband/hw/mlx5/main.c | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 60ac1fbe940e..7f9d73b03421 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -50,11 +50,9 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *contex u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; u64 general_obj_types; - void *uctx; void *hdr; int err; - uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx); hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr); general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b7f94bc3811a..d4d894e9f942 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2041,7 +2041,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, struct mlx5_bfreg_info *bfregi = &context->bfregi; int err; unsigned long idx; - phys_addr_t pfn, pa; + phys_addr_t pfn; pgprot_t prot; u32 bfreg_dyn_idx = 0; u32 uar_index; @@ -2132,8 +2132,6 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, goto err; } - pa = pfn << PAGE_SHIFT; - err = mlx5_ib_set_vma_data(vma, context); if (err) goto err; -- cgit From acd4307a21b5a9a7761aa55a5f5ca46cbd43f108 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 08:58:02 -0700 Subject: RDMA/bnxt_re: Modify a fall-through annotation This patch avoids that gcc reports the following warning when building with W=1: drivers/infiniband/hw/bnxt_re/ib_verbs.c:2404:4: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 492c750f7ed6..dd800d153aa2 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2409,7 +2409,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr, default: break; } - /* Fall thru to build the wqe */ + /* fall through */ case IB_WR_SEND_WITH_INV: rc = bnxt_re_build_send_wqe(qp, wr, &wqe); break; -- cgit From 2bd2e98a783adfb7ddc5400657499216490d1b12 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:14:36 -0700 Subject: MAINTAINERS: Remove Dave Goodell from the usnic RDMA driver maintainer list The e-mail address dgoodell@exch.cisco.com no longer exists. Additionally, according to https://www.linkedin.com/in/goodell/ Dave is an Amazon employee since December 2017. Hence remove his Cisco e-mail address from the usnic maintainer list. Signed-off-by: Bart Van Assche Acked-by: Christian Benvenuti Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c79f306a936a..1e53fe99eb63 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3506,7 +3506,6 @@ F: drivers/net/ethernet/cisco/enic/ CISCO VIC LOW LATENCY NIC DRIVER M: Christian Benvenuti -M: Dave Goodell S: Supported F: drivers/infiniband/hw/usnic/ -- cgit From 4fca037783512cedfb23a116c66727ce40c8558a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 11 Jul 2018 16:20:44 -0600 Subject: IB/uverbs: Move ib_access_flags and ib_read_counters_flags to uapi These constants are used in the ioctl interface so they are part of the uapi, place them in the correct header for clarity. Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 23 ++++++++++------------- include/uapi/rdma/ib_user_ioctl_verbs.h | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 2696f1d730a1..08348e53082c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1425,14 +1425,16 @@ struct ib_recv_wr { }; enum ib_access_flags { - IB_ACCESS_LOCAL_WRITE = 1, - IB_ACCESS_REMOTE_WRITE = (1<<1), - IB_ACCESS_REMOTE_READ = (1<<2), - IB_ACCESS_REMOTE_ATOMIC = (1<<3), - IB_ACCESS_MW_BIND = (1<<4), - IB_ZERO_BASED = (1<<5), - IB_ACCESS_ON_DEMAND = (1<<6), - IB_ACCESS_HUGETLB = (1<<7), + IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE, + IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE, + IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ, + IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC, + IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND, + IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED, + IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, + IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, + + IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1) }; /* @@ -2223,11 +2225,6 @@ struct ib_counters { atomic_t usecnt; }; -enum ib_read_counters_flags { - /* prefer read values from driver cache */ - IB_READ_COUNTERS_ATTR_PREFER_CACHED = 1 << 0, -}; - struct ib_counters_read_attr { u64 *counters_buff; u32 ncounters; diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index a81d853bf25d..6cdf192070a2 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -40,6 +40,17 @@ #define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name #endif +enum ib_uverbs_access_flags { + IB_UVERBS_ACCESS_LOCAL_WRITE = 1 << 0, + IB_UVERBS_ACCESS_REMOTE_WRITE = 1 << 1, + IB_UVERBS_ACCESS_REMOTE_READ = 1 << 2, + IB_UVERBS_ACCESS_REMOTE_ATOMIC = 1 << 3, + IB_UVERBS_ACCESS_MW_BIND = 1 << 4, + IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5, + IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6, + IB_UVERBS_ACCESS_HUGETLB = 1 << 7, +}; + enum ib_uverbs_query_port_cap_flags { IB_UVERBS_PCF_SM = 1 << 1, IB_UVERBS_PCF_NOTICE_SUP = 1 << 2, @@ -141,4 +152,9 @@ struct ib_uverbs_flow_action_esp { __aligned_u64 hard_limit_pkts; }; +enum ib_uverbs_read_counters_flags { + /* prefer read values from driver cache */ + IB_UVERBS_READ_COUNTERS_PREFER_CACHED = 1 << 0, +}; + #endif -- cgit From 2aada6c0c96e55e3b3f5024d164b9575382de6b8 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 Jun 2018 15:09:48 +0300 Subject: net/mlx5: Add forward compatible support for the FTE match data Use the PRM size including the reserved when working with the FTE match data. This comes to support forward compatibility for cases that current reserved data will be exposed by the firmware by an application that uses the DEVX API without changing the kernel. Also drop some driver checks around the match criteria leaving the work for firmware to enable forward compatibility for future bits there. Signed-off-by: Yishai Hadas Acked-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 77 +---------------------- 1 file changed, 1 insertion(+), 76 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 29b86232f13a..a8d7d00f92f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -309,89 +309,17 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, return NULL; } -static bool check_last_reserved(const u32 *match_criteria) -{ - char *match_criteria_reserved = - MLX5_ADDR_OF(fte_match_param, match_criteria, MLX5_FTE_MATCH_PARAM_RESERVED); - - return !match_criteria_reserved[0] && - !memcmp(match_criteria_reserved, match_criteria_reserved + 1, - MLX5_FLD_SZ_BYTES(fte_match_param, - MLX5_FTE_MATCH_PARAM_RESERVED) - 1); -} - -static bool check_valid_mask(u8 match_criteria_enable, const u32 *match_criteria) -{ - if (match_criteria_enable & ~( - (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) | - (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) | - (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) | - (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2))) - return false; - - if (!(match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)) { - char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, - match_criteria, outer_headers); - - if (fg_type_mask[0] || - memcmp(fg_type_mask, fg_type_mask + 1, - MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1)) - return false; - } - - if (!(match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS)) { - char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, - match_criteria, misc_parameters); - - if (fg_type_mask[0] || - memcmp(fg_type_mask, fg_type_mask + 1, - MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1)) - return false; - } - - if (!(match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)) { - char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, - match_criteria, inner_headers); - - if (fg_type_mask[0] || - memcmp(fg_type_mask, fg_type_mask + 1, - MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1)) - return false; - } - - if (!(match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2)) { - char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, - match_criteria, misc_parameters_2); - - if (fg_type_mask[0] || - memcmp(fg_type_mask, fg_type_mask + 1, - MLX5_ST_SZ_BYTES(fte_match_set_misc2) - 1)) - return false; - } - - return check_last_reserved(match_criteria); -} - static bool check_valid_spec(const struct mlx5_flow_spec *spec) { int i; - if (!check_valid_mask(spec->match_criteria_enable, spec->match_criteria)) { - pr_warn("mlx5_core: Match criteria given mismatches match_criteria_enable\n"); - return false; - } - for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) if (spec->match_value[i] & ~spec->match_criteria[i]) { pr_warn("mlx5_core: match_value differs from match_criteria\n"); return false; } - return check_last_reserved(spec->match_value); + return true; } static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) @@ -1158,9 +1086,6 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, struct mlx5_flow_group *fg; int err; - if (!check_valid_mask(match_criteria_enable, match_criteria)) - return ERR_PTR(-EINVAL); - if (ft->autogroup.active) return ERR_PTR(-EPERM); -- cgit From 664000b6bb4352295dc774108a1fc87c4a1ad0e3 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 Jun 2018 15:23:36 +0300 Subject: net/mlx5: Add support for flow table destination number Add support to set a destination from a flow table number. This functionality will be used in downstream patches from this series by the DEVX stuff. Signed-off-by: Yishai Hadas Acked-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/diag/fs_tracepoint.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 24 ++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 4 +++- include/linux/mlx5/fs.h | 1 + include/linux/mlx5/mlx5_ifc.h | 1 + 5 files changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index b3820a34e773..0f11fff32a9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -240,6 +240,9 @@ const char *parse_fs_dst(struct trace_seq *p, case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: trace_seq_printf(p, "ft=%p\n", dst->ft); break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + trace_seq_printf(p, "ft_num=%u\n", dst->ft_num); + break; case MLX5_FLOW_DESTINATION_TYPE_TIR: trace_seq_printf(p, "tir=%u\n", dst->tir_num); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 6a62b84e57f4..8e01f818021b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -368,18 +368,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, int list_size = 0; list_for_each_entry(dst, &fte->node.children, node.list) { - unsigned int id; + unsigned int id, type = dst->dest_attr.type; - if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) continue; - MLX5_SET(dest_format_struct, in_dests, destination_type, - dst->dest_attr.type); - if (dst->dest_attr.type == - MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + id = dst->dest_attr.ft_num; + type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: id = dst->dest_attr.ft->id; - } else if (dst->dest_attr.type == - MLX5_FLOW_DESTINATION_TYPE_VPORT) { + break; + case MLX5_FLOW_DESTINATION_TYPE_VPORT: id = dst->dest_attr.vport.num; MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id_valid, @@ -387,9 +389,13 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id, dst->dest_attr.vport.vhca_id); - } else { + break; + default: id = dst->dest_attr.tir_num; } + + MLX5_SET(dest_format_struct, in_dests, destination_type, + type); MLX5_SET(dest_format_struct, in_dests, destination_id, id); in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); list_size++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index a8d7d00f92f6..0d8378243903 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1356,7 +1356,9 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && d1->ft == d2->ft) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && - d1->tir_num == d2->tir_num)) + d1->tir_num == d2->tir_num) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM && + d1->ft_num == d2->ft_num)) return true; } diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index c40f2fc68655..af0592400499 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -89,6 +89,7 @@ struct mlx5_flow_destination { enum mlx5_flow_destination_type type; union { u32 tir_num; + u32 ft_num; struct mlx5_flow_table *ft; struct mlx5_fc *counter; struct { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ae12120ef021..c14b81559505 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1180,6 +1180,7 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_PORT = 0x99, MLX5_FLOW_DESTINATION_TYPE_COUNTER = 0x100, + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM = 0x101, }; struct mlx5_ifc_dest_format_struct_bits { -- cgit From fd44e3853c0155fa82314f341f476d4793415cd2 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 Jul 2018 15:25:07 +0300 Subject: IB/mlx5: Introduce flow steering matcher uapi object Introduce flow steering matcher object and its create and destroy methods. This matcher object holds some mlx5 specific driver properties that matches the underlay device specification when an mlx5 flow steering group is created. It will be used in downstream patches to be part of mlx5 specific create flow method. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- .../infiniband/core/uverbs_std_types_flow_action.c | 3 +- drivers/infiniband/hw/mlx5/Makefile | 1 + drivers/infiniband/hw/mlx5/flow.c | 134 +++++++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 11 ++ include/rdma/uverbs_ioctl.h | 2 + include/uapi/rdma/mlx5_user_ioctl_cmds.h | 33 ++++- 6 files changed, 181 insertions(+), 3 deletions(-) create mode 100644 drivers/infiniband/hw/mlx5/flow.c diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index c753a34cd984..adb9209c4710 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -376,8 +376,7 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { .type = UVERBS_ATTR_TYPE_PTR_IN, - /* No need to specify any data */ - UVERBS_ATTR_SIZE(0, 0), + UVERBS_ATTR_NO_DATA(), }, [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { .type = UVERBS_ATTR_TYPE_PTR_IN, diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 577e4c418bae..b8e4b15e2674 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -4,3 +4,4 @@ mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_vi mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o +mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += flow.o diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c new file mode 100644 index 000000000000..ab4bc3778edd --- /dev/null +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "mlx5_ib.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include + +static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { + [MLX5_IB_FLOW_TYPE_NORMAL] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + .u.ptr = { + .len = sizeof(u16), /* data is priority */ + .min_len = sizeof(u16), + } + }, + [MLX5_IB_FLOW_TYPE_SNIFFER] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, +}; + +static int flow_matcher_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct mlx5_ib_flow_matcher *obj = uobject->object; + int ret; + + ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); + if (ret) + return ret; + + kfree(obj); + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( + struct ib_device *ib_dev, struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); + struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + struct mlx5_ib_flow_matcher *obj; + int err; + + obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + obj->mask_len = uverbs_attr_get_len( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); + err = uverbs_copy_from(&obj->matcher_mask, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); + if (err) + goto end; + + obj->flow_type = uverbs_attr_get_enum_id( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); + + if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) { + err = uverbs_copy_from(&obj->priority, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); + if (err) + goto end; + } + + err = uverbs_copy_from(&obj->match_criteria_enable, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA); + if (err) + goto end; + + uobj->object = obj; + obj->mdev = dev->mdev; + atomic_set(&obj->usecnt, 0); + return 0; + +end: + kfree(obj); + return err; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_MATCHER_CREATE, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), + UA_MANDATORY), + UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, + mlx5_ib_flow_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, + UVERBS_ATTR_TYPE(u8), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); + +DECLARE_UVERBS_OBJECT_TREE(flow_objects, + &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER)); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 04a5d82c9cf3..c556b00bf4f7 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -46,6 +46,7 @@ #include #include #include +#include #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -173,6 +174,16 @@ struct mlx5_ib_flow_handler { struct ib_counters *ibcounters; }; +struct mlx5_ib_flow_matcher { + struct mlx5_ib_match_params matcher_mask; + int mask_len; + enum mlx5_ib_flow_type flow_type; + u16 priority; + struct mlx5_core_dev *mdev; + atomic_t usecnt; + u8 match_criteria_enable; +}; + struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 017ccf75890c..7f230d1ec2b8 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -204,6 +204,8 @@ struct uverbs_object_tree_def { #define UVERBS_ATTR_SIZE(_min_len, _len) \ .u.ptr.min_len = _min_len, .u.ptr.len = _len +#define UVERBS_ATTR_NO_DATA() UVERBS_ATTR_SIZE(0, 0) + /* * Specifies a uapi structure that cannot be extended. The user must always * supply the whole structure and nothing more. The structure must be declared diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 1a05bb4b0b34..233d5d140179 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -33,6 +33,7 @@ #ifndef MLX5_USER_IOCTL_CMDS_H #define MLX5_USER_IOCTL_CMDS_H +#include #include enum mlx5_ib_create_flow_action_attrs { @@ -112,10 +113,40 @@ enum mlx5_ib_devx_umem_methods { MLX5_IB_METHOD_DEVX_UMEM_DEREG, }; -enum mlx5_ib_devx_objects { +enum mlx5_ib_objects { MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_OBJECT_DEVX_OBJ, MLX5_IB_OBJECT_DEVX_UMEM, + MLX5_IB_OBJECT_FLOW_MATCHER, +}; + +enum mlx5_ib_flow_matcher_create_attrs { + MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, +}; + +enum mlx5_ib_flow_matcher_destroy_attrs { + MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_flow_matcher_methods { + MLX5_IB_METHOD_FLOW_MATCHER_CREATE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, +}; + +#define MLX5_IB_DW_MATCH_PARAM 0x80 + +struct mlx5_ib_match_params { + __u32 match_params[MLX5_IB_DW_MATCH_PARAM]; +}; + +enum mlx5_ib_flow_type { + MLX5_IB_FLOW_TYPE_NORMAL, + MLX5_IB_FLOW_TYPE_SNIFFER, + MLX5_IB_FLOW_TYPE_ALL_DEFAULT, + MLX5_IB_FLOW_TYPE_MC_DEFAULT, }; #endif -- cgit From 6cd080a674a7adce97c0189c4579cf40782c2770 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 Jul 2018 15:25:08 +0300 Subject: IB: Support ib_flow creation in drivers This patch considers the case that ib_flow is created by some device driver with its specific parameters using the KABI infrastructure. In that case both QP and ib_uflow_resources might not be applicable. Downstream patches from this series use the above functionality. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 4 ++++ drivers/infiniband/core/uverbs_std_types.c | 5 +++-- include/rdma/ib_verbs.h | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index bd6eefaecbd6..987ee38ab4b3 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2771,6 +2771,9 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res) { unsigned int i; + if (!uflow_res) + return; + for (i = 0; i < uflow_res->collection_num; i++) atomic_dec(&uflow_res->collection[i]->usecnt); @@ -3585,6 +3588,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, } atomic_inc(&qp->usecnt); flow_id->qp = qp; + flow_id->device = qp->device; flow_id->uobject = uobj; uobj->object = flow_id; uflow = container_of(uobj, typeof(*uflow), uobject); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 718c8430d364..c1e0492cc78a 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -54,9 +54,10 @@ static int uverbs_free_flow(struct ib_uobject *uobject, struct ib_qp *qp = flow->qp; int ret; - ret = qp->device->destroy_flow(flow); + ret = flow->device->destroy_flow(flow); if (!ret) { - atomic_dec(&qp->usecnt); + if (qp) + atomic_dec(&qp->usecnt); ib_uverbs_flow_resources_free(uflow->resources); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 08348e53082c..24d6ec38feea 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2081,6 +2081,7 @@ struct ib_flow_attr { struct ib_flow { struct ib_qp *qp; + struct ib_device *device; struct ib_uobject *uobject; }; -- cgit From 32269441240064c7475241ae28fee787fcdf55b9 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 Jul 2018 15:25:09 +0300 Subject: IB/mlx5: Introduce driver create and destroy flow methods Introduce driver create and destroy flow methods on the uverbs flow object. This allows the driver to get its specific device attributes to match the underlay specification while still using the generic ib_flow object for cleanup and code sharing. The IB object's attributes are set via the ib_set_flow() helper function. The specific implementation for the given specification is added in downstream patches. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 22 +++++++ drivers/infiniband/hw/mlx5/flow.c | 110 +++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/main.c | 9 +++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 15 +++++ include/rdma/ib_verbs.h | 14 ++++ include/rdma/uverbs_named_ioctl.h | 29 ++++---- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 17 +++++ 7 files changed, 204 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 7f9d73b03421..270452c9e673 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -87,6 +87,28 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } +bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + switch (opcode) { + case MLX5_CMD_OP_DESTROY_TIR: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, + obj_id); + return true; + + case MLX5_CMD_OP_DESTROY_FLOW_TABLE: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox, + table_id); + return true; + default: + return false; + } +} + static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index ab4bc3778edd..b4422e4def17 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -38,6 +38,82 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { }, }; +static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( + struct ib_device *ib_dev, struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_flow_handler *flow_handler; + struct mlx5_ib_flow_matcher *fs_matcher; + void *devx_obj; + int dest_id, dest_type; + void *cmd_in; + int inlen; + bool dest_devx, dest_qp; + struct ib_qp *qp = NULL; + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); + struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + dest_devx = + uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + dest_qp = uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + + if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)) + return -EINVAL; + + if (dest_devx) { + devx_obj = uverbs_attr_get_obj( + attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + if (IS_ERR(devx_obj)) + return PTR_ERR(devx_obj); + + /* Verify that the given DEVX object is a flow + * steering destination. + */ + if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) + return -EINVAL; + } else { + struct mlx5_ib_qp *mqp; + + qp = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + if (IS_ERR(qp)) + return PTR_ERR(qp); + + if (qp->qp_type != IB_QPT_RAW_PACKET) + return -EINVAL; + + mqp = to_mqp(qp); + if (mqp->flags & MLX5_IB_QP_RSS) + dest_id = mqp->rss_qp.tirn; + else + dest_id = mqp->raw_packet_qp.rq.tirn; + dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + } + + if (dev->rep) + return -ENOTSUPP; + + cmd_in = uverbs_attr_get_alloced_ptr( + attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); + inlen = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); + fs_matcher = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCHER); + flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen, + dest_id, dest_type); + if (IS_ERR(flow_handler)) + return PTR_ERR(flow_handler); + + ib_set_flow(uobj, &flow_handler->ibflow, qp, ib_dev); + + return 0; +} + static int flow_matcher_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why) { @@ -101,6 +177,40 @@ end: return err; } +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_CREATE_FLOW, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, + UVERBS_OBJECT_QP, + UVERBS_ACCESS_READ), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DESTROY_FLOW, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +ADD_UVERBS_METHODS(mlx5_ib_fs, + UVERBS_OBJECT_FLOW, + &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW), + &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); + DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_FLOW_MATCHER_CREATE, UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d4d894e9f942..6c8b4745fb0b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3641,6 +3641,15 @@ unlock: return ERR_PTR(err); } +struct mlx5_ib_flow_handler * +mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_matcher *fs_matcher, + void *cmd_in, int inlen, int dest_id, + int dest_type) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) { u32 flags = 0; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c556b00bf4f7..324f4ea5fce6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1230,6 +1230,10 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context); const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); +struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( + struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + void *cmd_in, int inlen, int dest_id, int dest_type); +bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); #else static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, @@ -1238,6 +1242,17 @@ static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) {} static inline const struct uverbs_object_tree_def * mlx5_ib_get_devx_tree(void) { return NULL; } +static inline struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( + struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + void *cmd_in, int inlen, int dest_id, int dest_type) +{ + return ERR_PTR(-EOPNOTSUPP); +} +static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, + int *dest_type) +{ + return false; +} #endif static inline void init_query_mad(struct ib_smp *mad) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 24d6ec38feea..b626aa2310c5 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4134,6 +4134,20 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) } +static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow, + struct ib_qp *qp, struct ib_device *device) +{ + uobj->object = ibflow; + ibflow->uobject = uobj; + + if (qp) { + atomic_inc(&qp->usecnt); + ibflow->qp = qp; + } + + ibflow->device = device; +} + /** * rdma_roce_rescan_device - Rescan all of the network devices in the system * and add their gids, as needed, to the relevant RoCE devices. diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h index 2eb1767042af..b3b21733cc55 100644 --- a/include/rdma/uverbs_named_ioctl.h +++ b/include/rdma/uverbs_named_ioctl.h @@ -97,22 +97,14 @@ .methods = &UVERBS_OBJECT_METHODS(_object_id) \ } -/* Used by drivers to declare a complete parsing tree for a single method that - * differs only in having additional driver specific attributes. +/* Used by drivers to declare a complete parsing tree for new methods */ -#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...) \ - static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ - _method_id)[] = { __VA_ARGS__ }; \ - static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ - .id = _method_id, \ - .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ - .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ - }; \ +#define ADD_UVERBS_METHODS(_name, _object_id, ...) \ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ - _object_id)[] = { &UVERBS_METHOD(_method_id) }; \ + _object_id)[] = { __VA_ARGS__ }; \ static const struct uverbs_object_def _name##_struct = { \ .id = _object_id, \ - .num_methods = 1, \ + .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ .methods = &UVERBS_OBJECT_METHODS(_object_id) \ }; \ static const struct uverbs_object_def *const _name##_ptrs[] = { \ @@ -123,4 +115,17 @@ .objects = &_name##_ptrs, \ } +/* Used by drivers to declare a complete parsing tree for a single method that + * differs only in having additional driver specific attributes. + */ +#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...) \ + static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ + _method_id)[] = { __VA_ARGS__ }; \ + static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ + .id = _method_id, \ + .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ + .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ + }; \ + ADD_UVERBS_METHODS(_name, _object_id, &UVERBS_METHOD(_method_id)) + #endif diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 233d5d140179..9c51801b9e64 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -149,4 +149,21 @@ enum mlx5_ib_flow_type { MLX5_IB_FLOW_TYPE_MC_DEFAULT, }; +enum mlx5_ib_create_flow_attrs { + MLX5_IB_ATTR_CREATE_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, + MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, + MLX5_IB_ATTR_CREATE_FLOW_MATCHER, +}; + +enum mlx5_ib_destoy_flow_attrs { + MLX5_IB_ATTR_DESTROY_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_flow_methods { + MLX5_IB_METHOD_CREATE_FLOW = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_DESTROY_FLOW, +}; + #endif -- cgit From d4be3f4466b8a770ea2c3b57b942efd057fe1c19 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 Jul 2018 15:25:10 +0300 Subject: IB/mlx5: Support adding flow steering rule by raw description Add support to set a public flow steering rule when its destination is a TIR by using raw specification data. The logic follows the verbs API but instead of using ib_spec(s) the raw, device specific, description is used. This allows supporting specialty matchers without having to define new matches in the verbs struct based language. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 216 ++++++++++++++++++++++++++++++++--- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 + 2 files changed, 201 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6c8b4745fb0b..68d2801b79c6 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2978,11 +2978,11 @@ static void counters_clear_description(struct ib_counters *counters) static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) { - struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); struct mlx5_ib_flow_handler *handler = container_of(flow_id, struct mlx5_ib_flow_handler, ibflow); struct mlx5_ib_flow_handler *iter, *tmp; + struct mlx5_ib_dev *dev = handler->dev; mutex_lock(&dev->flow_db->lock); @@ -3000,6 +3000,8 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) counters_clear_description(handler->ibcounters); mutex_unlock(&dev->flow_db->lock); + if (handler->flow_matcher) + atomic_dec(&handler->flow_matcher->usecnt); kfree(handler); return 0; @@ -3020,6 +3022,26 @@ enum flow_table_type { #define MLX5_FS_MAX_TYPES 6 #define MLX5_FS_MAX_ENTRIES BIT(16) + +static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, + struct mlx5_ib_flow_prio *prio, + int priority, + int num_entries, int num_groups) +{ + struct mlx5_flow_table *ft; + + ft = mlx5_create_auto_grouped_flow_table(ns, priority, + num_entries, + num_groups, + 0, 0); + if (IS_ERR(ft)) + return ERR_CAST(ft); + + prio->flow_table = ft; + prio->refcount = 0; + return prio; +} + static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, struct ib_flow_attr *flow_attr, enum flow_table_type ft_type) @@ -3032,7 +3054,6 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, int num_entries; int num_groups; int priority; - int err = 0; max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); @@ -3082,21 +3103,10 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, return ERR_PTR(-ENOMEM); ft = prio->flow_table; - if (!ft) { - ft = mlx5_create_auto_grouped_flow_table(ns, priority, - num_entries, - num_groups, - 0, 0); - - if (!IS_ERR(ft)) { - prio->refcount = 0; - prio->flow_table = ft; - } else { - err = PTR_ERR(ft); - } - } + if (!ft) + return _get_prio(ns, prio, priority, num_entries, num_groups); - return err ? ERR_PTR(err) : prio; + return prio; } static void set_underlay_qp(struct mlx5_ib_dev *dev, @@ -3355,6 +3365,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, ft_prio->refcount++; handler->prio = ft_prio; + handler->dev = dev; ft_prio->flow_table = ft; free: @@ -3641,13 +3652,184 @@ unlock: return ERR_PTR(err); } +static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev, + int priority, bool mcast) +{ + int max_table_size; + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_ib_flow_prio *prio; + + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + log_max_ft_size)); + if (max_table_size < MLX5_FS_MAX_ENTRIES) + return ERR_PTR(-ENOMEM); + + if (mcast) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = ib_prio_to_core_prio(priority, false); + + ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS); + if (!ns) + return ERR_PTR(-ENOTSUPP); + + prio = &dev->flow_db->prios[priority]; + + if (prio->flow_table) + return prio; + + return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES, + MLX5_FS_MAX_TYPES); +} + +static struct mlx5_ib_flow_handler * +_create_raw_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct mlx5_flow_destination *dst, + struct mlx5_ib_flow_matcher *fs_matcher, + void *cmd_in, int inlen) +{ + struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft = ft_prio->flow_table; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler || !spec) { + err = -ENOMEM; + goto free; + } + + INIT_LIST_HEAD(&handler->list); + + memcpy(spec->match_value, cmd_in, inlen); + memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, + fs_matcher->mask_len); + spec->match_criteria_enable = fs_matcher->match_criteria_enable; + + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + handler->rule = mlx5_add_flow_rules(ft, spec, + &flow_act, dst, 1); + + if (IS_ERR(handler->rule)) { + err = PTR_ERR(handler->rule); + goto free; + } + + ft_prio->refcount++; + handler->prio = ft_prio; + handler->dev = dev; + ft_prio->flow_table = ft; + +free: + if (err) + kfree(handler); + kvfree(spec); + return err ? ERR_PTR(err) : handler; +} + +static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, + void *match_v) +{ + void *match_c; + void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; + void *dmac, *dmac_mask; + void *ipv4, *ipv4_mask; + + if (!(fs_matcher->match_criteria_enable & + (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) + return false; + + match_c = fs_matcher->matcher_mask.match_params; + match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + + dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, + dmac_47_16); + dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, + dmac_47_16); + + if (is_multicast_ether_addr(dmac) && + is_multicast_ether_addr(dmac_mask)) + return true; + + ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + if (ipv4_is_multicast(*(__be32 *)(ipv4)) && + ipv4_is_multicast(*(__be32 *)(ipv4_mask))) + return true; + + return false; +} + struct mlx5_ib_flow_handler * mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, void *cmd_in, int inlen, int dest_id, int dest_type) { - return ERR_PTR(-EOPNOTSUPP); + struct mlx5_flow_destination *dst; + struct mlx5_ib_flow_prio *ft_prio; + int priority = fs_matcher->priority; + struct mlx5_ib_flow_handler *handler; + bool mcast; + int err; + + if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) + return ERR_PTR(-EOPNOTSUPP); + + if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) + return ERR_PTR(-ENOMEM); + + if (dest_type != MLX5_FLOW_DESTINATION_TYPE_TIR) + return ERR_PTR(-ENOTSUPP); + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return ERR_PTR(-ENOMEM); + + mcast = raw_fs_is_multicast(fs_matcher, cmd_in); + mutex_lock(&dev->flow_db->lock); + + ft_prio = _get_flow_table(dev, priority, mcast); + if (IS_ERR(ft_prio)) { + err = PTR_ERR(ft_prio); + goto unlock; + } + + dst->type = dest_type; + dst->tir_num = dest_id; + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in, + inlen); + + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + goto destroy_ft; + } + + mutex_unlock(&dev->flow_db->lock); + atomic_inc(&fs_matcher->usecnt); + handler->flow_matcher = fs_matcher; + + kfree(dst); + + return handler; + +destroy_ft: + put_flow_table(dev, ft_prio, false); +unlock: + mutex_unlock(&dev->flow_db->lock); + kfree(dst); + + return ERR_PTR(err); } static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 324f4ea5fce6..33948b547894 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -172,6 +172,8 @@ struct mlx5_ib_flow_handler { struct mlx5_ib_flow_prio *prio; struct mlx5_flow_handle *rule; struct ib_counters *ibcounters; + struct mlx5_ib_dev *dev; + struct mlx5_ib_flow_matcher *flow_matcher; }; struct mlx5_ib_flow_matcher { -- cgit From 6346f0bfa07b3a4ed55b8630cf20b6f43d277b51 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 Jul 2018 15:25:11 +0300 Subject: IB/mlx5: Add support for a flow table destination for driver flow steering Add support to set a destination that is a flow table, this can come from the DEVX destination. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 68d2801b79c6..11ed9416db48 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3789,9 +3789,6 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); - if (dest_type != MLX5_FLOW_DESTINATION_TYPE_TIR) - return ERR_PTR(-ENOTSUPP); - dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) return ERR_PTR(-ENOMEM); @@ -3805,8 +3802,14 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, goto unlock; } - dst->type = dest_type; - dst->tir_num = dest_id; + if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { + dst->type = dest_type; + dst->tir_num = dest_id; + } else { + dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; + dst->ft_num = dest_id; + } + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in, inlen); -- cgit From cb80fb189270e7b2c32fa470d40e951852614eb2 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 Jul 2018 15:25:12 +0300 Subject: IB/mlx5: Enable driver uapi commands for flow steering Expose the mlx5 flow steering parsing trees, exposing the functionality to user space. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/flow.c | 10 ++++++++++ drivers/infiniband/hw/mlx5/main.c | 4 +++- drivers/infiniband/hw/mlx5/mlx5_ib.h | 12 ++++++------ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index b4422e4def17..ee398a9b5f26 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -242,3 +242,13 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, DECLARE_UVERBS_OBJECT_TREE(flow_objects, &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER)); + +int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) +{ + int i = 0; + + root[i++] = &flow_objects; + root[i++] = &mlx5_ib_fs; + + return i; +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 11ed9416db48..a26ab69f3741 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5535,7 +5535,7 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UVERBS_ATTR_TYPE(u64), UA_MANDATORY)); -#define NUM_TREES 3 +#define NUM_TREES 5 static int populate_specs_root(struct mlx5_ib_dev *dev) { const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = { @@ -5555,6 +5555,8 @@ static int populate_specs_root(struct mlx5_ib_dev *dev) !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) default_root[num_trees++] = mlx5_ib_get_devx_tree(); + num_trees += mlx5_ib_get_flow_trees(default_root + num_trees); + dev->ib_dev.driver_specs_root = uverbs_alloc_spec_tree(num_trees, default_root); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 33948b547894..462505c8fa25 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1236,6 +1236,7 @@ struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); +int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); #else static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, @@ -1244,17 +1245,16 @@ static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) {} static inline const struct uverbs_object_tree_def * mlx5_ib_get_devx_tree(void) { return NULL; } -static inline struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( - struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, - void *cmd_in, int inlen, int dest_id, int dest_type) -{ - return ERR_PTR(-EOPNOTSUPP); -} static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) { return false; } +static inline int +mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) +{ + return 0; +} #endif static inline void init_query_mad(struct ib_smp *mad) { -- cgit From 99a7e2bf704d64c966dfacede1ba2d9b47cb676e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 11 Jul 2018 13:15:42 +0000 Subject: IB/ipoib: Fix error return code in ipoib_dev_init() Fix to return a negative error code from the ipoib_neigh_hash_init() error handling case instead of 0, as done elsewhere in this function. Fixes: 515ed4f3aab4 ("IB/IPoIB: Separate control and data related initializations") Signed-off-by: Wei Yongjun Reviewed-by: Yuval Shaia Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 012c9e3970ac..82f0e3869b04 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1791,7 +1791,8 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out_free_pd; } - if (ipoib_neigh_hash_init(priv) < 0) { + ret = ipoib_neigh_hash_init(priv); + if (ret) { pr_warn("%s failed to init neigh hash\n", dev->name); goto out_dev_uninit; } -- cgit From 536ca245c512aedfd84cde072d7b3ca14b6e1792 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 13 Jul 2018 03:10:20 -0400 Subject: IB/rxe: Drop QP0 silently According to "Annex A16: RDMA over Converged Ethernet (RoCE)": A16.4.3 MANAGEMENT INTERFACES As defined in the base specification, a special Queue Pair, QP0 is defined solely for communication between subnet manager(s) and subnet management agents. Since such an IB-defined subnet management architecture is outside the scope of this annex, it follows that there is also no requirement that a port which conforms to this annex be associated with a QP0. Thus, for end nodes designed to conform to this annex, the concept of QP0 is undefined and unused for any port connected to an Ethernet network. CA16-8: A packet arriving at a RoCE port containing a BTH with the destination QP field set to QP0 shall be silently dropped. Signed-off-by: Zhu Yanjun Acked-by: Moni Shoua Reviewed-by: Yuval Shaia Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_recv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index cc5cfd156758..d30dbac24583 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -225,9 +225,14 @@ static int hdr_check(struct rxe_pkt_info *pkt) goto err1; } + if (unlikely(qpn == 0)) { + pr_warn_once("QP 0 not supported"); + goto err1; + } + if (qpn != IB_MULTICAST_QPN) { - index = (qpn == 0) ? port->qp_smi_index : - ((qpn == 1) ? port->qp_gsi_index : qpn); + index = (qpn == 1) ? port->qp_gsi_index : qpn; + qp = rxe_pool_get_index(&rxe->qp_pool, index); if (unlikely(!qp)) { pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn); -- cgit From bb039a870c0593a4deaa72c2693d02a87723305c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:16 -0700 Subject: IB/core: Allow ULPs to specify NULL as the third ib_post_(send|recv|srq_recv)() argument This patch does not change the behavior of the modified functions. Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b626aa2310c5..99bcf64a4762 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3278,7 +3278,9 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { - return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr); + struct ib_recv_wr *dummy; + + return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy); } /** @@ -3379,7 +3381,9 @@ static inline int ib_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, struct ib_send_wr **bad_send_wr) { - return qp->device->post_send(qp, send_wr, bad_send_wr); + struct ib_send_wr *dummy; + + return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy); } /** @@ -3394,7 +3398,9 @@ static inline int ib_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { - return qp->device->post_recv(qp, recv_wr, bad_recv_wr); + struct ib_recv_wr *dummy; + + return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy); } struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, -- cgit From 1fec77bf8fcd95cf7b2feb1e29763d6cd4448912 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:17 -0700 Subject: RDMA/core: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 14 +++++--------- drivers/infiniband/core/rw.c | 4 ++-- drivers/infiniband/core/verbs.c | 7 +++---- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 34e9b2768324..ef459f2f2eeb 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1181,7 +1181,6 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; struct list_head *list; - struct ib_send_wr *bad_send_wr; struct ib_mad_agent *mad_agent; struct ib_sge *sge; unsigned long flags; @@ -1219,7 +1218,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, - &bad_send_wr); + NULL); list = &qp_info->send_queue.list; } else { ret = 0; @@ -2476,7 +2475,6 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; struct ib_mad_qp_info *qp_info; struct ib_mad_queue *send_queue; - struct ib_send_wr *bad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; int ret; @@ -2526,7 +2524,7 @@ retry: if (queued_send_wr) { ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, - &bad_send_wr); + NULL); if (ret) { dev_err(&port_priv->device->dev, "ib_post_send failed: %d\n", ret); @@ -2571,11 +2569,9 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, if (wc->status == IB_WC_WR_FLUSH_ERR) { if (mad_send_wr->retry) { /* Repost send */ - struct ib_send_wr *bad_send_wr; - mad_send_wr->retry = 0; ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, - &bad_send_wr); + NULL); if (!ret) return false; } @@ -2891,7 +2887,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, int post, ret; struct ib_mad_private *mad_priv; struct ib_sge sg_list; - struct ib_recv_wr recv_wr, *bad_recv_wr; + struct ib_recv_wr recv_wr; struct ib_mad_queue *recv_queue = &qp_info->recv_queue; /* Initialize common scatter list fields */ @@ -2935,7 +2931,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, post = (++recv_queue->count < recv_queue->max_active); list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); spin_unlock_irqrestore(&recv_queue->lock, flags); - ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr); + ret = ib_post_recv(qp_info->qp, &recv_wr, NULL); if (ret) { spin_lock_irqsave(&recv_queue->lock, flags); list_del(&mad_priv->header.mad_list.list); diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 474d65297afc..683e6d11a564 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -564,10 +564,10 @@ EXPORT_SYMBOL(rdma_rw_ctx_wrs); int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr) { - struct ib_send_wr *first_wr, *bad_wr; + struct ib_send_wr *first_wr; first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr); - return ib_post_send(qp, first_wr, &bad_wr); + return ib_post_send(qp, first_wr, NULL); } EXPORT_SYMBOL(rdma_rw_ctx_post); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index b6ceb6fd6a67..cde359d48d34 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2473,7 +2473,6 @@ static void __ib_drain_sq(struct ib_qp *qp) struct ib_cq *cq = qp->send_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe sdrain; - struct ib_send_wr *bad_swr; struct ib_rdma_wr swr = { .wr = { .next = NULL, @@ -2492,7 +2491,7 @@ static void __ib_drain_sq(struct ib_qp *qp) sdrain.cqe.done = ib_drain_qp_done; init_completion(&sdrain.done); - ret = ib_post_send(qp, &swr.wr, &bad_swr); + ret = ib_post_send(qp, &swr.wr, NULL); if (ret) { WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); return; @@ -2513,7 +2512,7 @@ static void __ib_drain_rq(struct ib_qp *qp) struct ib_cq *cq = qp->recv_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe rdrain; - struct ib_recv_wr rwr = {}, *bad_rwr; + struct ib_recv_wr rwr = {}; int ret; ret = ib_modify_qp(qp, &attr, IB_QP_STATE); @@ -2526,7 +2525,7 @@ static void __ib_drain_rq(struct ib_qp *qp) rdrain.cqe.done = ib_drain_qp_done; init_completion(&rdrain.done); - ret = ib_post_recv(qp, &rwr, &bad_rwr); + ret = ib_post_recv(qp, &rwr, NULL); if (ret) { WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); return; -- cgit From 4b4671a0f26917a85085020ed47a0c6e41edf128 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:18 -0700 Subject: IB/IPoIB: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 13 ++++--------- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 6 ++---- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 582f199887b0..8b44f33c7ae0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -94,7 +94,6 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - struct ib_recv_wr *bad_wr; int i, ret; priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; @@ -102,7 +101,7 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) for (i = 0; i < priv->cm.num_frags; ++i) priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; - ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); + ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, NULL); if (unlikely(ret)) { ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, @@ -120,7 +119,6 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, struct ib_sge *sge, int id) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - struct ib_recv_wr *bad_wr; int i, ret; wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; @@ -128,7 +126,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, for (i = 0; i < IPOIB_CM_RX_SG; ++i) sge[i].addr = rx->rx_ring[id].mapping[i]; - ret = ib_post_recv(rx->qp, wr, &bad_wr); + ret = ib_post_recv(rx->qp, wr, NULL); if (unlikely(ret)) { ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, @@ -212,7 +210,6 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev, static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) { - struct ib_send_wr *bad_wr; struct ipoib_cm_rx *p; /* We only reserved 1 extra slot in CQ for drain WRs, so @@ -227,7 +224,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) */ p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID; - if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr)) + if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, NULL)) ipoib_warn(priv, "failed to post drain wr\n"); list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); @@ -699,13 +696,11 @@ static inline int post_send(struct ipoib_dev_priv *priv, unsigned int wr_id, struct ipoib_tx_buf *tx_req) { - struct ib_send_wr *bad_wr; - ipoib_build_sge(priv, tx_req); priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM; - return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr); + return ib_post_send(tx->qp, &priv->tx_wr.wr, NULL); } void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5f5d42bad2ea..9006a13af1de 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -101,7 +101,6 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv, static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - struct ib_recv_wr *bad_wr; int ret; priv->rx_wr.wr_id = id | IPOIB_OP_RECV; @@ -109,7 +108,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id) priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1]; - ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); + ret = ib_post_recv(priv->qp, &priv->rx_wr, NULL); if (unlikely(ret)) { ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping); @@ -542,7 +541,6 @@ static inline int post_send(struct ipoib_dev_priv *priv, struct ipoib_tx_buf *tx_req, void *head, int hlen) { - struct ib_send_wr *bad_wr; struct sk_buff *skb = tx_req->skb; ipoib_build_sge(priv, tx_req); @@ -559,7 +557,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, } else priv->tx_wr.wr.opcode = IB_WR_SEND; - return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr); + return ib_post_send(priv->qp, &priv->tx_wr.wr, NULL); } int ipoib_send(struct net_device *dev, struct sk_buff *skb, -- cgit From 604dbdc4a72d77f26cb637a2f196836c04098ffd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:19 -0700 Subject: IB/iser: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iser_verbs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 616d978cbf2b..b686a4aaffe8 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1022,7 +1022,7 @@ int iser_post_recvl(struct iser_conn *iser_conn) { struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_login_desc *desc = &iser_conn->login_desc; - struct ib_recv_wr wr, *wr_failed; + struct ib_recv_wr wr; int ib_ret; desc->sge.addr = desc->rsp_dma; @@ -1036,7 +1036,7 @@ int iser_post_recvl(struct iser_conn *iser_conn) wr.next = NULL; ib_conn->post_recv_buf_count++; - ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed); + ib_ret = ib_post_recv(ib_conn->qp, &wr, NULL); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count--; @@ -1050,7 +1050,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) struct ib_conn *ib_conn = &iser_conn->ib_conn; unsigned int my_rx_head = iser_conn->rx_desc_head; struct iser_rx_desc *rx_desc; - struct ib_recv_wr *wr, *wr_failed; + struct ib_recv_wr *wr; int i, ib_ret; for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) { @@ -1067,7 +1067,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) wr->next = NULL; /* mark end of work requests list */ ib_conn->post_recv_buf_count += count; - ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed); + ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, NULL); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count -= count; @@ -1086,7 +1086,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, bool signal) { - struct ib_send_wr *bad_wr, *wr = iser_tx_next_wr(tx_desc); + struct ib_send_wr *wr = iser_tx_next_wr(tx_desc); int ib_ret; ib_dma_sync_single_for_device(ib_conn->device->ib_device, @@ -1100,10 +1100,10 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, wr->opcode = IB_WR_SEND; wr->send_flags = signal ? IB_SEND_SIGNALED : 0; - ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, &bad_wr); + ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, NULL); if (ib_ret) iser_err("ib_post_send failed, ret:%d opcode:%d\n", - ib_ret, bad_wr->opcode); + ib_ret, wr->opcode); return ib_ret; } -- cgit From e01a76743a9e44ae5bb37d9019bea4a888708588 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:20 -0700 Subject: IB/isert: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/isert/ib_isert.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 7e056f3c82a0..f39670c5c25c 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -810,7 +810,7 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) static int isert_post_recvm(struct isert_conn *isert_conn, u32 count) { - struct ib_recv_wr *rx_wr, *rx_wr_failed; + struct ib_recv_wr *rx_wr; int i, ret; struct iser_rx_desc *rx_desc; @@ -826,8 +826,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count) rx_wr--; rx_wr->next = NULL; /* mark end of work requests list */ - ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr, - &rx_wr_failed); + ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr, NULL); if (ret) isert_err("ib_post_recv() failed with ret: %d\n", ret); @@ -837,7 +836,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count) static int isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc) { - struct ib_recv_wr *rx_wr_failed, rx_wr; + struct ib_recv_wr rx_wr; int ret; if (!rx_desc->in_use) { @@ -854,7 +853,7 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc) rx_wr.num_sge = 1; rx_wr.next = NULL; - ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_failed); + ret = ib_post_recv(isert_conn->qp, &rx_wr, NULL); if (ret) isert_err("ib_post_recv() failed with ret: %d\n", ret); @@ -865,7 +864,7 @@ static int isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc) { struct ib_device *ib_dev = isert_conn->cm_id->device; - struct ib_send_wr send_wr, *send_wr_failed; + struct ib_send_wr send_wr; int ret; ib_dma_sync_single_for_device(ib_dev, tx_desc->dma_addr, @@ -880,7 +879,7 @@ isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_des send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - ret = ib_post_send(isert_conn->qp, &send_wr, &send_wr_failed); + ret = ib_post_send(isert_conn->qp, &send_wr, NULL); if (ret) isert_err("ib_post_send() failed, ret: %d\n", ret); @@ -968,7 +967,7 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, static int isert_login_post_recv(struct isert_conn *isert_conn) { - struct ib_recv_wr rx_wr, *rx_wr_fail; + struct ib_recv_wr rx_wr; struct ib_sge sge; int ret; @@ -987,7 +986,7 @@ isert_login_post_recv(struct isert_conn *isert_conn) rx_wr.sg_list = &sge; rx_wr.num_sge = 1; - ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_fail); + ret = ib_post_recv(isert_conn->qp, &rx_wr, NULL); if (ret) isert_err("ib_post_recv() failed: %d\n", ret); @@ -1830,7 +1829,6 @@ isert_send_done(struct ib_cq *cq, struct ib_wc *wc) static int isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) { - struct ib_send_wr *wr_failed; int ret; ret = isert_post_recv(isert_conn, isert_cmd->rx_desc); @@ -1839,8 +1837,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) return ret; } - ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, - &wr_failed); + ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, NULL); if (ret) { isert_err("ib_post_send failed with %d\n", ret); return ret; -- cgit From 71347b0c645c47c2d5970060768b9ecc97029342 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:21 -0700 Subject: IB/srp: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srp/ib_srp.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 117dc1082e58..f5ec21d1f4dc 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1208,7 +1208,6 @@ static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc) static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, u32 rkey) { - struct ib_send_wr *bad_wr; struct ib_send_wr wr = { .opcode = IB_WR_LOCAL_INV, .next = NULL, @@ -1219,7 +1218,7 @@ static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, wr.wr_cqe = &req->reg_cqe; req->reg_cqe.done = srp_inv_rkey_err_done; - return ib_post_send(ch->qp, &wr, &bad_wr); + return ib_post_send(ch->qp, &wr, NULL); } static void srp_unmap_data(struct scsi_cmnd *scmnd, @@ -1500,7 +1499,6 @@ static int srp_map_finish_fr(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; - struct ib_send_wr *bad_wr; struct ib_reg_wr wr; struct srp_fr_desc *desc; u32 rkey; @@ -1564,7 +1562,7 @@ static int srp_map_finish_fr(struct srp_map_state *state, srp_map_desc(state, desc->mr->iova, desc->mr->length, desc->mr->rkey); - err = ib_post_send(ch->qp, &wr.wr, &bad_wr); + err = ib_post_send(ch->qp, &wr.wr, NULL); if (unlikely(err)) { WARN_ON_ONCE(err == -ENOMEM); return err; @@ -2015,7 +2013,7 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) { struct srp_target_port *target = ch->target; struct ib_sge list; - struct ib_send_wr wr, *bad_wr; + struct ib_send_wr wr; list.addr = iu->dma; list.length = len; @@ -2030,13 +2028,13 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; - return ib_post_send(ch->qp, &wr, &bad_wr); + return ib_post_send(ch->qp, &wr, NULL); } static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) { struct srp_target_port *target = ch->target; - struct ib_recv_wr wr, *bad_wr; + struct ib_recv_wr wr; struct ib_sge list; list.addr = iu->dma; @@ -2050,7 +2048,7 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) wr.sg_list = &list; wr.num_sge = 1; - return ib_post_recv(ch->qp, &wr, &bad_wr); + return ib_post_recv(ch->qp, &wr, NULL); } static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) -- cgit From 9b32a59687f5c4752eb64562edd62c20fa222187 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:22 -0700 Subject: IB/srpt: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 8bd7373cb828..7d9972add65f 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -816,7 +816,7 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *ioctx) { struct ib_sge list; - struct ib_recv_wr wr, *bad_wr; + struct ib_recv_wr wr; BUG_ON(!sdev); list.addr = ioctx->ioctx.dma; @@ -830,9 +830,9 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, wr.num_sge = 1; if (sdev->use_srq) - return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); + return ib_post_srq_recv(sdev->srq, &wr, NULL); else - return ib_post_recv(ch->qp, &wr, &bad_wr); + return ib_post_recv(ch->qp, &wr, NULL); } /** @@ -846,7 +846,6 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, */ static int srpt_zerolength_write(struct srpt_rdma_ch *ch) { - struct ib_send_wr *bad_wr; struct ib_rdma_wr wr = { .wr = { .next = NULL, @@ -859,7 +858,7 @@ static int srpt_zerolength_write(struct srpt_rdma_ch *ch) pr_debug("%s-%d: queued zerolength write\n", ch->sess_name, ch->qp->qp_num); - return ib_post_send(ch->qp, &wr.wr, &bad_wr); + return ib_post_send(ch->qp, &wr.wr, NULL); } static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) @@ -2624,7 +2623,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd) struct srpt_send_ioctx *ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); struct srpt_rdma_ch *ch = ioctx->ch; - struct ib_send_wr *first_wr = NULL, *bad_wr; + struct ib_send_wr *first_wr = NULL; struct ib_cqe *cqe = &ioctx->rdma_cqe; enum srpt_command_state new_state; int ret, i; @@ -2648,7 +2647,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd) cqe = NULL; } - ret = ib_post_send(ch->qp, first_wr, &bad_wr); + ret = ib_post_send(ch->qp, first_wr, NULL); if (ret) { pr_err("%s: ib_post_send() returned %d for %d (avail: %d)\n", __func__, ret, ioctx->n_rdma, @@ -2686,7 +2685,7 @@ static void srpt_queue_response(struct se_cmd *cmd) container_of(cmd, struct srpt_send_ioctx, cmd); struct srpt_rdma_ch *ch = ioctx->ch; struct srpt_device *sdev = ch->sport->sdev; - struct ib_send_wr send_wr, *first_wr = &send_wr, *bad_wr; + struct ib_send_wr send_wr, *first_wr = &send_wr; struct ib_sge sge; enum srpt_command_state state; int resp_len, ret, i; @@ -2759,7 +2758,7 @@ static void srpt_queue_response(struct se_cmd *cmd) send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - ret = ib_post_send(ch->qp, first_wr, &bad_wr); + ret = ib_post_send(ch->qp, first_wr, NULL); if (ret < 0) { pr_err("%s: sending cmd response failed for tag %llu (%d)\n", __func__, ioctx->cmd.tag, ret); -- cgit From 45e3cc1a88bff18bbfe7d8bf4812ff56d9b21e5e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:23 -0700 Subject: nvme-rdma: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/nvme/host/rdma.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c9424da0d23e..a7fe5479732d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1038,7 +1038,6 @@ static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, struct nvme_rdma_request *req) { - struct ib_send_wr *bad_wr; struct ib_send_wr wr = { .opcode = IB_WR_LOCAL_INV, .next = NULL, @@ -1050,7 +1049,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, req->reg_cqe.done = nvme_rdma_inv_rkey_done; wr.wr_cqe = &req->reg_cqe; - return ib_post_send(queue->qp, &wr, &bad_wr); + return ib_post_send(queue->qp, &wr, NULL); } static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, @@ -1244,7 +1243,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, struct ib_send_wr *first) { - struct ib_send_wr wr, *bad_wr; + struct ib_send_wr wr; int ret; sge->addr = qe->dma; @@ -1263,7 +1262,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, else first = ≀ - ret = ib_post_send(queue->qp, first, &bad_wr); + ret = ib_post_send(queue->qp, first, NULL); if (unlikely(ret)) { dev_err(queue->ctrl->ctrl.device, "%s failed with error code %d\n", __func__, ret); @@ -1274,7 +1273,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue, struct nvme_rdma_qe *qe) { - struct ib_recv_wr wr, *bad_wr; + struct ib_recv_wr wr; struct ib_sge list; int ret; @@ -1289,7 +1288,7 @@ static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue, wr.sg_list = &list; wr.num_sge = 1; - ret = ib_post_recv(queue->qp, &wr, &bad_wr); + ret = ib_post_recv(queue->qp, &wr, NULL); if (unlikely(ret)) { dev_err(queue->ctrl->ctrl.device, "%s failed with error code %d\n", __func__, ret); -- cgit From 23f96d1f15a70e2e8ba5449d1c77b634426c4b80 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:24 -0700 Subject: nvmet-rdma: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/nvme/target/rdma.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 0d7f3d603f1d..1a642e214a4c 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -382,15 +382,13 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue) static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *cmd) { - struct ib_recv_wr *bad_wr; - ib_dma_sync_single_for_device(ndev->device, cmd->sge[0].addr, cmd->sge[0].length, DMA_FROM_DEVICE); if (ndev->srq) - return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); - return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr); + return ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); + return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL); } static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue) @@ -472,7 +470,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) struct nvmet_rdma_rsp *rsp = container_of(req, struct nvmet_rdma_rsp, req); struct rdma_cm_id *cm_id = rsp->queue->cm_id; - struct ib_send_wr *first_wr, *bad_wr; + struct ib_send_wr *first_wr; if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) { rsp->send_wr.opcode = IB_WR_SEND_WITH_INV; @@ -493,7 +491,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) rsp->send_sge.addr, rsp->send_sge.length, DMA_TO_DEVICE); - if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) { + if (ib_post_send(cm_id->qp, first_wr, NULL)) { pr_err("sending cmd response failed\n"); nvmet_rdma_release_rsp(rsp); } -- cgit From 73930595066c0996f39ab750cf9fc0963149a1e0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:25 -0700 Subject: fs/cifs: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- fs/cifs/smbdirect.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index c5a1cddd8856..c4a6c6976aab 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -801,7 +801,7 @@ out1: */ static int smbd_post_send_negotiate_req(struct smbd_connection *info) { - struct ib_send_wr send_wr, *send_wr_fail; + struct ib_send_wr send_wr; int rc = -ENOMEM; struct smbd_request *request; struct smbd_negotiate_req *packet; @@ -853,7 +853,7 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->has_payload = false; atomic_inc(&info->send_pending); - rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail); + rc = ib_post_send(info->id->qp, &send_wr, NULL); if (!rc) return 0; @@ -1023,7 +1023,7 @@ static void smbd_destroy_header(struct smbd_connection *info, static int smbd_post_send(struct smbd_connection *info, struct smbd_request *request, bool has_payload) { - struct ib_send_wr send_wr, *send_wr_fail; + struct ib_send_wr send_wr; int rc, i; for (i = 0; i < request->num_sge; i++) { @@ -1054,7 +1054,7 @@ static int smbd_post_send(struct smbd_connection *info, atomic_inc(&info->send_pending); } - rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail); + rc = ib_post_send(info->id->qp, &send_wr, NULL); if (rc) { log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); if (has_payload) { @@ -1183,7 +1183,7 @@ static int smbd_post_send_data( static int smbd_post_recv( struct smbd_connection *info, struct smbd_response *response) { - struct ib_recv_wr recv_wr, *recv_wr_fail = NULL; + struct ib_recv_wr recv_wr; int rc = -EIO; response->sge.addr = ib_dma_map_single( @@ -1202,7 +1202,7 @@ static int smbd_post_recv( recv_wr.sg_list = &response->sge; recv_wr.num_sge = 1; - rc = ib_post_recv(info->id->qp, &recv_wr, &recv_wr_fail); + rc = ib_post_recv(info->id->qp, &recv_wr, NULL); if (rc) { ib_dma_unmap_single(info->id->device, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); @@ -2488,7 +2488,6 @@ struct smbd_mr *smbd_register_mr( int rc, i; enum dma_data_direction dir; struct ib_reg_wr *reg_wr; - struct ib_send_wr *bad_wr; if (num_pages > info->max_frmr_depth) { log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n", @@ -2562,7 +2561,7 @@ skip_multiple_pages: * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution * on the next ib_post_send when we actaully send I/O to remote peer */ - rc = ib_post_send(info->id->qp, ®_wr->wr, &bad_wr); + rc = ib_post_send(info->id->qp, ®_wr->wr, NULL); if (!rc) return smbdirect_mr; @@ -2607,7 +2606,7 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) */ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) { - struct ib_send_wr *wr, *bad_wr; + struct ib_send_wr *wr; struct smbd_connection *info = smbdirect_mr->conn; int rc = 0; @@ -2622,7 +2621,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) wr->send_flags = IB_SEND_SIGNALED; init_completion(&smbdirect_mr->invalidate_done); - rc = ib_post_send(info->id->qp, wr, &bad_wr); + rc = ib_post_send(info->id->qp, wr, NULL); if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); smbd_disconnect_rdma_connection(info); -- cgit From 72bc4d375d89d0ecb0df491c1eada201a995c471 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:26 -0700 Subject: net/9p: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- net/9p/trans_rdma.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 3d414acb7015..b06286f253cb 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -396,7 +396,7 @@ static int post_recv(struct p9_client *client, struct p9_rdma_context *c) { struct p9_trans_rdma *rdma = client->trans; - struct ib_recv_wr wr, *bad_wr; + struct ib_recv_wr wr; struct ib_sge sge; c->busa = ib_dma_map_single(rdma->cm_id->device, @@ -415,7 +415,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) wr.wr_cqe = &c->cqe; wr.sg_list = &sge; wr.num_sge = 1; - return ib_post_recv(rdma->qp, &wr, &bad_wr); + return ib_post_recv(rdma->qp, &wr, NULL); error: p9_debug(P9_DEBUG_ERROR, "EIO\n"); @@ -425,7 +425,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) static int rdma_request(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_rdma *rdma = client->trans; - struct ib_send_wr wr, *bad_wr; + struct ib_send_wr wr; struct ib_sge sge; int err = 0; unsigned long flags; @@ -520,7 +520,7 @@ dont_need_post_recv: * status in case of a very fast reply. */ req->status = REQ_STATUS_SENT; - err = ib_post_send(rdma->qp, &wr, &bad_wr); + err = ib_post_send(rdma->qp, &wr, NULL); if (err) goto send_error; -- cgit From 197c782e3a18cc2de8766dc30a56f37b9b7a9eec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:27 -0700 Subject: net/rds: Remove two WARN_ON() statements Remove two WARN_ON() statements that verify something that is guaranteed by the RDMA API, namely that the failed_wr pointer is not touched if an ib_post_send() call succeeds and that it points at the failed wr if an ib_post_send() call fails. Signed-off-by: Bart Van Assche Acked-by: Santosh Shilimkar Signed-off-by: Jason Gunthorpe --- net/rds/ib_frmr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 48332a6ed738..0209cd81546c 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -137,7 +137,6 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) failed_wr = ®_wr.wr; ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, &failed_wr); - WARN_ON(failed_wr != ®_wr.wr); if (unlikely(ret)) { /* Failure here can be because of -ENOMEM as well */ frmr->fr_state = FRMR_IS_STALE; @@ -257,7 +256,6 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr) failed_wr = s_wr; ret = ib_post_send(i_cm_id->qp, s_wr, &failed_wr); - WARN_ON(failed_wr != s_wr); if (unlikely(ret)) { frmr->fr_state = FRMR_IS_STALE; frmr->fr_inv = false; -- cgit From f112d53b435692331a50a3780e3ad365b66b1a69 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:28 -0700 Subject: net/rds: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Acked-by: Santosh Shilimkar Signed-off-by: Jason Gunthorpe --- net/rds/ib_frmr.c | 9 +++------ net/rds/ib_recv.c | 6 ++---- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 0209cd81546c..09ab97475fc9 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -102,7 +102,6 @@ static void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop) static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) { struct rds_ib_frmr *frmr = &ibmr->u.frmr; - struct ib_send_wr *failed_wr; struct ib_reg_wr reg_wr; int ret, off = 0; @@ -135,8 +134,7 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) IB_ACCESS_REMOTE_WRITE; reg_wr.wr.send_flags = IB_SEND_SIGNALED; - failed_wr = ®_wr.wr; - ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, &failed_wr); + ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, NULL); if (unlikely(ret)) { /* Failure here can be because of -ENOMEM as well */ frmr->fr_state = FRMR_IS_STALE; @@ -229,7 +227,7 @@ out_unmap: static int rds_ib_post_inv(struct rds_ib_mr *ibmr) { - struct ib_send_wr *s_wr, *failed_wr; + struct ib_send_wr *s_wr; struct rds_ib_frmr *frmr = &ibmr->u.frmr; struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id; int ret = -EINVAL; @@ -254,8 +252,7 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr) s_wr->ex.invalidate_rkey = frmr->mr->rkey; s_wr->send_flags = IB_SEND_SIGNALED; - failed_wr = s_wr; - ret = ib_post_send(i_cm_id->qp, s_wr, &failed_wr); + ret = ib_post_send(i_cm_id->qp, s_wr, NULL); if (unlikely(ret)) { frmr->fr_state = FRMR_IS_STALE; frmr->fr_inv = false; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index b4e421aa9727..4c5a937304b2 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -383,7 +383,6 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) { struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_recv_work *recv; - struct ib_recv_wr *failed_wr; unsigned int posted = 0; int ret = 0; bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM); @@ -417,7 +416,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) &recv->r_frag->f_sg)); /* XXX when can this fail? */ - ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); + ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL); if (ret) { rds_ib_conn_error(conn, "recv post on " "%pI4 returned %d, disconnecting and " @@ -650,7 +649,6 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic) static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) { struct rds_header *hdr = ic->i_ack; - struct ib_send_wr *failed_wr; u64 seq; int ret; @@ -663,7 +661,7 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi rds_message_make_checksum(hdr); ic->i_ack_queued = jiffies; - ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr); + ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL); if (unlikely(ret)) { /* Failed to send. Release the WR, and * force another ACK. -- cgit From 12c36dcfd6f48ece2fce4dc8e6ce68224d005d37 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:29 -0700 Subject: net/smc: Remove a WARN_ON() statement Remove a WARN_ON() statement that verifies something that is guaranteed by the RDMA API, namely that the failed_wr pointer is not touched if an ib_post_send() call succeeds and that it points at the failed wr if an ib_post_send() call fails. Signed-off-by: Bart Van Assche Acked-by: Ursula Braun Signed-off-by: Jason Gunthorpe --- net/smc/smc_wr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index dbd2605d1962..8b9bdd9bc615 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -274,7 +274,6 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) link->wr_reg.key = mr->rkey; failed_wr = &link->wr_reg.wr; rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr); - WARN_ON(failed_wr != &link->wr_reg.wr); if (rc) return rc; -- cgit From 2e3bbe46b4a18d16314dab0c0efdba0c079a5ed0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:30 -0700 Subject: net/smc: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Acked-by: Ursula Braun Signed-off-by: Jason Gunthorpe --- net/smc/smc_tx.c | 3 +-- net/smc/smc_wr.c | 8 ++------ net/smc/smc_wr.h | 3 +-- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index cee666400752..a171c168f98e 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -255,7 +255,6 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, int num_sges, struct ib_sge sges[]) { struct smc_link_group *lgr = conn->lgr; - struct ib_send_wr *failed_wr = NULL; struct ib_rdma_wr rdma_wr; struct smc_link *link; int rc; @@ -273,7 +272,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, /* offset within RMBE */ peer_rmbe_offset; rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; - rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr); + rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL); if (rc) { conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; smc_lgr_terminate(lgr); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 8b9bdd9bc615..de1a438cf977 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -240,15 +240,13 @@ int smc_wr_tx_put_slot(struct smc_link *link, */ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) { - struct ib_send_wr *failed_wr = NULL; struct smc_wr_tx_pend *pend; int rc; ib_req_notify_cq(link->smcibdev->roce_cq_send, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); pend = container_of(priv, struct smc_wr_tx_pend, priv); - rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], - &failed_wr); + rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL); if (rc) { struct smc_link_group *lgr = container_of(link, struct smc_link_group, @@ -263,7 +261,6 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) /* Register a memory region and wait for result. */ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) { - struct ib_send_wr *failed_wr = NULL; int rc; ib_req_notify_cq(link->smcibdev->roce_cq_send, @@ -272,8 +269,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr; link->wr_reg.mr = mr; link->wr_reg.key = mr->rkey; - failed_wr = &link->wr_reg.wr; - rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr); + rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, NULL); if (rc) return rc; diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 210bec3c3ebe..1d85bb14fd6f 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -63,7 +63,6 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val) /* post a new receive work request to fill a completed old work request entry */ static inline int smc_wr_rx_post(struct smc_link *link) { - struct ib_recv_wr *bad_recv_wr = NULL; int rc; u64 wr_id, temp_wr_id; u32 index; @@ -72,7 +71,7 @@ static inline int smc_wr_rx_post(struct smc_link *link) temp_wr_id = wr_id; index = do_div(temp_wr_id, link->wr_rx_cnt); link->wr_rx_ibs[index].wr_id = wr_id; - rc = ib_post_recv(link->roce_qp, &link->wr_rx_ibs[index], &bad_recv_wr); + rc = ib_post_recv(link->roce_qp, &link->wr_rx_ibs[index], NULL); return rc; } -- cgit From ed288d74a9e5d9ff869350906ad35eb231c55388 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:31 -0700 Subject: net/xprtrdma: Simplify ib_post_(send|recv|srq_recv)() calls Instead of declaring and passing a dummy 'bad_wr' pointer, pass NULL as third argument to ib_post_(send|recv|srq_recv)(). Signed-off-by: Bart Van Assche Reviewed-by: Chuck Lever Acked-by: Anna Schumaker Signed-off-by: Jason Gunthorpe --- net/sunrpc/xprtrdma/fmr_ops.c | 4 +--- net/sunrpc/xprtrdma/frwr_ops.c | 4 ++-- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 3 +-- net/sunrpc/xprtrdma/svc_rdma_rw.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 3 +-- 5 files changed, 6 insertions(+), 10 deletions(-) diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 17fb1e025654..0f7c465d9a5a 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -279,9 +279,7 @@ out_maperr: static int fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) { - struct ib_send_wr *bad_wr; - - return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, &bad_wr); + return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, NULL); } /* Invalidate all memory regions that were registered for "req". diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index c040de196e13..a167eebf63d5 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -464,7 +464,7 @@ out_mapmr_err: static int frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) { - struct ib_send_wr *post_wr, *bad_wr; + struct ib_send_wr *post_wr; struct rpcrdma_mr *mr; post_wr = &req->rl_sendctx->sc_wr; @@ -486,7 +486,7 @@ frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) /* If ib_post_send fails, the next ->send_request for * @req will queue these MWs for recovery. */ - return ib_post_send(ia->ri_id->qp, post_wr, &bad_wr); + return ib_post_send(ia->ri_id->qp, post_wr, NULL); } /* Handle a remotely invalidated mr on the @mrs list diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 841fca143804..2ef75e885411 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -229,11 +229,10 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt) { - struct ib_recv_wr *bad_recv_wr; int ret; svc_xprt_get(&rdma->sc_xprt); - ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, &bad_recv_wr); + ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL); trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret); if (ret) goto err_post; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index ce3ea8419704..80975427f523 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -329,7 +329,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) do { if (atomic_sub_return(cc->cc_sqecount, &rdma->sc_sq_avail) > 0) { - ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); + ret = ib_post_send(rdma->sc_qp, first_wr, NULL); trace_svcrdma_post_rw(&cc->cc_cqe, cc->cc_sqecount, ret); if (ret) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 4a3efaea277c..ffef0c508f1a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -291,7 +291,6 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) */ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) { - struct ib_send_wr *bad_wr; int ret; might_sleep(); @@ -311,7 +310,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) } svc_xprt_get(&rdma->sc_xprt); - ret = ib_post_send(rdma->sc_qp, wr, &bad_wr); + ret = ib_post_send(rdma->sc_qp, wr, NULL); trace_svcrdma_post_send(wr, ret); if (ret) { set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); -- cgit From 3df593bfe6455f28cda879be8299b30b8601ce3b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 20:55:13 -0600 Subject: IB/uverbs: Get rid of null_obj_type If the method fails after calling rdma_explicit_destroy (eg if copy_to_user faults) then it will trigger a kernel oops: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 800000000548d067 P4D 800000000548d067 PUD 54a0067 PMD 0 SMP PTI CPU: 0 PID: 359 Comm: ibv_rc_pingpong Not tainted 4.18.0-rc1+ #28 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 RIP: 0010: (null) Code: Bad RIP value. RSP: 0018:ffffc900001a3bf0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88000603bd00 RCX: 0000000000000003 RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffff88000603bd00 RBP: 0000000000000001 R08: ffffc900001a3cf8 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffc900001a3cf0 R13: 0000000000000000 R14: ffffc900001a3cf0 R15: 0000000000000000 FS: 00007fb00dda8700(0000) GS:ffff880007c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 000000000548e004 CR4: 00000000003606b0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? rdma_lookup_put_uobject+0x22/0x50 [ib_uverbs] ? uverbs_finalize_object+0x3b/0x60 [ib_uverbs] ? uverbs_finalize_attrs+0x128/0x140 [ib_uverbs] ? ib_uverbs_cmd_verbs+0x698/0x7c0 [ib_uverbs] ? find_held_lock+0x2d/0x90 ? __might_fault+0x39/0x90 ? ib_uverbs_ioctl+0x111/0x1f0 [ib_uverbs] ? do_vfs_ioctl+0xa0/0x6d0 ? trace_hardirqs_on_caller+0xed/0x180 ? _raw_spin_unlock_irq+0x24/0x40 ? syscall_trace_enter+0x138/0x1d0 ? ksys_ioctl+0x35/0x60 ? __x64_sys_ioctl+0x11/0x20 ? do_syscall_64+0x5b/0x1c0 ? entry_SYSCALL_64_after_hwframe+0x49/0xbe This is because the type was replaced with the null_type during explicit destroy that cannot complete the destruction. One of the side effects of replacing the type is to make the object handle totally unreachable - so no other command could attempt to use it, even though it remains on the uboject list. We can get the same end result by just fully destroying the object inside rdma_explicit_destroy and leaving the caller the residual kref for the uobj with no attached HW object, and no presence in the ubojects list. Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky --- drivers/infiniband/core/rdma_core.c | 41 ++++++++----------------------------- 1 file changed, 9 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 847c6a2f1346..aed7cc2a9e86 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -439,12 +439,17 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, struct ib_uverbs_file *ufile = uobj->ufile; int ret; + if (!uobj->object) + return 0; + ret = uobj->type->type_class->remove_commit(uobj, why); if (ib_is_destroy_retryable(ret, why, uobj)) { /* We couldn't remove the object, so just unlock the uobject */ atomic_set(&uobj->usecnt, 0); uobj->type->type_class->lookup_put(uobj, true); } else { + uobj->object = NULL; + mutex_lock(&ufile->uobjects_lock); list_del(&uobj->list); mutex_unlock(&ufile->uobjects_lock); @@ -459,35 +464,13 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) { int ret; - struct ib_uverbs_file *ufile = uobj->ufile; - /* put the ref count we took at lookup_get */ - uverbs_uobject_put(uobj); - /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ufile->cleanup_rwsem)) { - WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); - return 0; - } - assert_uverbs_usecnt(uobj, true); - ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY); - - up_read(&ufile->cleanup_rwsem); + ret = rdma_explicit_destroy(uobj); + /* Pairs with the lookup_get done by the caller */ + rdma_lookup_put_uobject(uobj, true); return ret; } -static int null_obj_type_class_remove_commit(struct ib_uobject *uobj, - enum rdma_remove_reason why) -{ - return 0; -} - -static const struct uverbs_obj_type null_obj_type = { - .type_class = &((const struct uverbs_obj_type_class){ - .remove_commit = null_obj_type_class_remove_commit, - /* be cautious */ - .needs_kfree_rcu = true}), -}; - int rdma_explicit_destroy(struct ib_uobject *uobject) { int ret; @@ -499,14 +482,8 @@ int rdma_explicit_destroy(struct ib_uobject *uobject) return 0; } assert_uverbs_usecnt(uobject, true); - ret = uobject->type->type_class->remove_commit(uobject, - RDMA_REMOVE_DESTROY); - if (ret) - goto out; - - uobject->type = &null_obj_type; + ret = _rdma_remove_commit_uobject(uobject, RDMA_REMOVE_DESTROY); -out: up_read(&ufile->cleanup_rwsem); return ret; } -- cgit From 1250c3048cf1632f5dbb99a0242410baff67955d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 20:55:14 -0600 Subject: IB/uverbs: Handle IDR and FD types without truncation Our ABI for write() uses a s32 for FDs and a u32 for IDRs, but internally we ended up implicitly casting these ABI values into an 'int'. For ioctl() we use a s64 for FDs and a u64 for IDRs, again casting to an int. The various casts to int are all missing range checks which can cause userspace values that should be considered invalid to be accepted. Fix this by making the generic lookup routine accept a s64, which does not truncate the write API's u32/s32 or the ioctl API's s64. Then push the detailed range checking down to the actual type implementations to be shared by both interfaces. Finally, change the copy of the uobj->id to sign extend into a s64, so eg, if we ever wish to return a negative value for a FD it is carried properly. This ensures that userspace values are never weirdly interpreted due to the various trunctations and everything that is really out of range gets an EINVAL. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 22 ++++++++++++------ drivers/infiniband/core/rdma_core.h | 2 +- drivers/infiniband/core/uverbs_cmd.c | 8 ++++--- drivers/infiniband/core/uverbs_ioctl.c | 16 ++++++++------ include/rdma/uverbs_std_types.h | 38 ++++++++++++++++++-------------- include/rdma/uverbs_types.h | 4 ++-- include/uapi/rdma/rdma_user_ioctl_cmds.h | 7 +++++- 7 files changed, 59 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index aed7cc2a9e86..c63583dbc6b9 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -133,7 +133,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) * returns success_res on success (negative errno on failure). For use by * callers that do not need the uobj. */ -int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id, +int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id, struct ib_uverbs_file *ufile, int success_res) { struct ib_uobject *uobj; @@ -212,13 +212,17 @@ static void uverbs_idr_remove_uobj(struct ib_uobject *uobj) /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ static struct ib_uobject * lookup_get_idr_uobject(const struct uverbs_obj_type *type, - struct ib_uverbs_file *ufile, int id, bool exclusive) + struct ib_uverbs_file *ufile, s64 id, bool exclusive) { struct ib_uobject *uobj; + unsigned long idrno = id; + + if (id < 0 || id > ULONG_MAX) + return ERR_PTR(-EINVAL); rcu_read_lock(); /* object won't be released as we're protected in rcu */ - uobj = idr_find(&ufile->idr, id); + uobj = idr_find(&ufile->idr, idrno); if (!uobj) { uobj = ERR_PTR(-ENOENT); goto free; @@ -240,17 +244,21 @@ free: static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile, - int id, bool exclusive) + s64 id, bool exclusive) { struct file *f; struct ib_uobject *uobject; + int fdno = id; const struct uverbs_obj_fd_type *fd_type = container_of(type, struct uverbs_obj_fd_type, type); + if (fdno != id) + return ERR_PTR(-EINVAL); + if (exclusive) return ERR_PTR(-EOPNOTSUPP); - f = fget(id); + f = fget(fdno); if (!f) return ERR_PTR(-EBADF); @@ -270,7 +278,7 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty } struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, - struct ib_uverbs_file *ufile, int id, + struct ib_uverbs_file *ufile, s64 id, bool exclusive) { struct ib_uobject *uobj; @@ -725,7 +733,7 @@ EXPORT_SYMBOL(uverbs_fd_class); struct ib_uobject * uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs, struct ib_uverbs_file *ufile, - enum uverbs_obj_access access, int id) + enum uverbs_obj_access access, s64 id) { switch (access) { case UVERBS_ACCESS_READ: diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 1bba60e960c1..db2339330f6f 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -86,7 +86,7 @@ void uverbs_close_fd(struct file *f); struct ib_uobject * uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs, struct ib_uverbs_file *ufile, - enum uverbs_obj_access access, int id); + enum uverbs_obj_access access, s64 id); /* * Note that certain finalize stages could return a status: diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 987ee38ab4b3..409fd46a2a99 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -48,10 +48,10 @@ #include "core_priv.h" static struct ib_uverbs_completion_event_file * -ib_uverbs_lookup_comp_file(int fd, struct ib_uverbs_file *ufile) +_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile) { - struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL, - fd, ufile); + struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL, + fd, ufile); if (IS_ERR(uobj)) return (void *)uobj; @@ -62,6 +62,8 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_uverbs_file *ufile) return container_of(uobj, struct ib_uverbs_completion_event_file, uobj); } +#define ib_uverbs_lookup_comp_file(_fd, _ufile) \ + _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile) ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_device *ib_dev, diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index d3bf82cfaa2b..26ddc5cadcdb 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -136,15 +136,11 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, break; case UVERBS_ATTR_TYPE_IDR: - if (uattr->data >> 32) - return -EINVAL; - /* fall through */ case UVERBS_ATTR_TYPE_FD: if (uattr->attr_data.reserved) return -EINVAL; - if (uattr->len != 0 || !ufile->ucontext || - uattr->data > INT_MAX) + if (uattr->len != 0 || !ufile->ucontext) return -EINVAL; o_attr = &e->obj_attr; @@ -152,17 +148,23 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, if (!object) return -EINVAL; + /* + * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and + * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64 + * here without caring about truncation as we know that the + * IDR implementation today rejects negative IDs + */ o_attr->uobject = uverbs_get_uobject_from_file( object->type_attrs, ufile, spec->u.obj.access, - (int)uattr->data); + uattr->data_s64); if (IS_ERR(o_attr->uobject)) return PTR_ERR(o_attr->uobject); if (spec->u.obj.access == UVERBS_ACCESS_NEW) { - u64 id = o_attr->uobject->id; + s64 id = o_attr->uobject->id; /* Copy the allocated id to the user-space */ if (put_user(id, &e->uattr->data)) { diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 3e3f108f0912..4f32eab8b7a4 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -46,39 +46,43 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo } #endif -static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, - bool write, - struct ib_uverbs_file *ufile, - int id) -{ - return rdma_lookup_get_uobject(type, ufile, id, write); -} +/* Returns _id, or causes a compile error if _id is not a u32. + * + * The uobj APIs should only be used with the write based uAPI to access + * object IDs. The write API must use a u32 for the object handle, which is + * checked by this macro. + */ +#define _uobj_check_id(_id) ((_id) * typecheck(u32, _id)) #define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs #define uobj_get_read(_type, _id, _ufile) \ - __uobj_get(uobj_get_type(_type), false, _ufile, _id) + rdma_lookup_get_uobject(uobj_get_type(_type), _ufile, \ + _uobj_check_id(_id), false) -static inline void *_uobj_get_obj_read(const struct uverbs_obj_type *type, - int id, struct ib_uverbs_file *ufile) -{ - struct ib_uobject *uobj = __uobj_get(type, false, ufile, id); +#define ufd_get_read(_type, _fdnum, _ufile) \ + rdma_lookup_get_uobject(uobj_get_type(_type), _ufile, \ + (_fdnum)*typecheck(s32, _fdnum), false) +static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) +{ if (IS_ERR(uobj)) return NULL; return uobj->object; } #define uobj_get_obj_read(_object, _type, _id, _ufile) \ - ((struct ib_##_object *)_uobj_get_obj_read(uobj_get_type(_type), _id, \ - _ufile)) + ((struct ib_##_object *)_uobj_get_obj_read( \ + uobj_get_read(_type, _id, _ufile))) #define uobj_get_write(_type, _id, _ufile) \ - __uobj_get(uobj_get_type(_type), true, _ufile, _id) + rdma_lookup_get_uobject(uobj_get_type(_type), _ufile, \ + _uobj_check_id(_id), true) -int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id, +int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id, struct ib_uverbs_file *ufile, int success_res); #define uobj_perform_destroy(_type, _id, _ufile, _success_res) \ - __uobj_perform_destroy(uobj_get_type(_type), _id, _ufile, _success_res) + __uobj_perform_destroy(uobj_get_type(_type), _uobj_check_id(_id), \ + _ufile, _success_res) static inline void uobj_put_read(struct ib_uobject *uobj) { diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index e2fc9db466d3..2f50cc6def3c 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -77,7 +77,7 @@ struct uverbs_obj_type_class { void (*alloc_abort)(struct ib_uobject *uobj); struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type, - struct ib_uverbs_file *ufile, int id, + struct ib_uverbs_file *ufile, s64 id, bool exclusive); void (*lookup_put)(struct ib_uobject *uobj, bool exclusive); /* @@ -121,7 +121,7 @@ struct uverbs_obj_idr_type { struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile, - int id, bool exclusive); + s64 id, bool exclusive); void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive); struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile); diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h index 1da5a1e1f3a8..24800c6c1f32 100644 --- a/include/uapi/rdma/rdma_user_ioctl_cmds.h +++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h @@ -62,7 +62,12 @@ struct ib_uverbs_attr { } enum_data; __u16 reserved; } attr_data; - __aligned_u64 data; /* ptr to command, inline data or idr/fd */ + union { + /* Used by PTR_IN/OUT, ENUM_IN and IDR */ + __aligned_u64 data; + /* Used by FD_IN and FD_OUT */ + __s64 data_s64; + }; }; struct ib_uverbs_ioctl_hdr { -- cgit From c561c288463102b12c9089a42c6c2a9f55c4fb53 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 20:55:15 -0600 Subject: IB/uverbs: Clarify the kref'ing ordering for alloc_commit The alloc_commit callback makes the uobj visible to other threads, and it does so using a 'move' semantic of the uobj kref on the stack into the public storage (eg the IDR, uobject list and file_private_data) Once this is done another thread could start up and trigger deletion of the kref. Fortunately cleanup_rwsem happens to prevent this from being a bug, but that is a fantastically unclear side effect. Re-organize things so that alloc_commit is that last thing to touch the uobj, get rid of the sneaky implicit dependency on cleanup_rwsem, and add a comment reminding that uobj is no longer kref'd after alloc_commit. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index c63583dbc6b9..afa03d2f6826 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -498,24 +498,41 @@ int rdma_explicit_destroy(struct ib_uobject *uobject) static void alloc_commit_idr_uobject(struct ib_uobject *uobj) { - spin_lock(&uobj->ufile->idr_lock); + struct ib_uverbs_file *ufile = uobj->ufile; + + spin_lock(&ufile->idr_lock); /* * We already allocated this IDR with a NULL object, so * this shouldn't fail. + * + * NOTE: Once we set the IDR we loose ownership of our kref on uobj. + * It will be put by remove_commit_idr_uobject() */ - WARN_ON(idr_replace(&uobj->ufile->idr, uobj, uobj->id)); - spin_unlock(&uobj->ufile->idr_lock); + WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id)); + spin_unlock(&ufile->idr_lock); } static void alloc_commit_fd_uobject(struct ib_uobject *uobj) { - fd_install(uobj->id, uobj->object); + int fd = uobj->id; + /* This shouldn't be used anymore. Use the file object instead */ uobj->id = 0; + /* Get another reference as we export this to the fops */ uverbs_uobject_get(uobj); + + /* + * NOTE: Once we install the file we loose ownership of our kref on + * uobj. It will be put by uverbs_close_fd() + */ + fd_install(fd, uobj->object); } +/* + * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the + * caller can no longer assume uobj is valid. + */ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) { struct ib_uverbs_file *ufile = uobj->ufile; @@ -541,6 +558,7 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) list_add(&uobj->list, &ufile->uobjects); mutex_unlock(&ufile->uobjects_lock); + /* alloc_commit consumes the uobj kref */ uobj->type->type_class->alloc_commit(uobj); up_read(&ufile->cleanup_rwsem); -- cgit From 5671f79b42da197466bf0908bce6f7ab4e35488f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 20:55:16 -0600 Subject: IB/uverbs: Revise the placement of get/puts on uobject This wasn't wrong, but the placement of two krefs didn't make any sense. Follow some simple rules. - A kref is held inside uobjects_list - A kref is held inside the IDR - A kref is held inside file->private - A stack based kref is passed bettwen alloc_begin and alloc_abort/alloc_commit Any place we destroy one of the above pointers, we stick a put, or 'move' the kref into another pointer. The key functions have sensible semantics: - alloc_uobj fully initializes the common members in uobj, including the list - Get rid of the uverbs_idr_remove_uobj helper since IDR remove does require put, but it depends on the situation. Later patches will re-consolidate this differently. - alloc_abort always consumes the passed kref, done in the type - alloc_commit always consumes the passed kref, done in the type - rdma_remove_commit_uobject always pairs with a lookup_get After it is all done the only control flow change is to: - move a get from alloc_commit_fd_uobject to rdma_alloc_commit_uobject - add a put to remove_commit_idr_uobject - Consistenly use rdma_lookup_put in rdma_remove_commit_uobject at the right place Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 83 ++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index afa03d2f6826..80e1e3cb2110 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -163,6 +163,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, */ uobj->ufile = ufile; uobj->context = ufile->ucontext; + INIT_LIST_HEAD(&uobj->list); uobj->type = type; /* * Allocated objects start out as write locked to deny any other @@ -198,17 +199,6 @@ static int idr_add_uobj(struct ib_uobject *uobj) return ret < 0 ? ret : 0; } -/* - * It only removes it from the uobjects list, uverbs_uobject_put() is still - * required. - */ -static void uverbs_idr_remove_uobj(struct ib_uobject *uobj) -{ - spin_lock(&uobj->ufile->idr_lock); - idr_remove(&uobj->ufile->idr, uobj->id); - spin_unlock(&uobj->ufile->idr_lock); -} - /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ static struct ib_uobject * lookup_get_idr_uobject(const struct uverbs_obj_type *type, @@ -329,7 +319,9 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type * return uobj; idr_remove: - uverbs_idr_remove_uobj(uobj); + spin_lock(&ufile->idr_lock); + idr_remove(&ufile->idr, uobj->id); + spin_unlock(&ufile->idr_lock); uobj_put: uverbs_uobject_put(uobj); return ERR_PTR(ret); @@ -354,6 +346,13 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t return uobj; } + /* + * The kref for uobj is moved into filp->private data and put in + * uverbs_close_fd(). Once anon_inode_getfile() succeeds + * uverbs_close_fd() must be guaranteed to be called from the provided + * fops release callback. We piggyback our kref of uobj on the stack + * with the lifetime of the struct file. + */ filp = anon_inode_getfile(fd_type->name, fd_type->fops, uobj, @@ -367,7 +366,7 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t uobj->id = new_fd; uobj->object = filp; uobj->ufile = ufile; - INIT_LIST_HEAD(&uobj->list); + /* Matching put will be done in uverbs_close_fd() */ kref_get(&ufile->ref); return uobj; @@ -397,7 +396,13 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, RDMACG_RESOURCE_HCA_OBJECT); - uverbs_idr_remove_uobj(uobj); + + spin_lock(&uobj->ufile->idr_lock); + idr_remove(&uobj->ufile->idr, uobj->id); + spin_unlock(&uobj->ufile->idr_lock); + + /* Matches the kref in alloc_commit_idr_uobject */ + uverbs_uobject_put(uobj); return ret; } @@ -451,24 +456,25 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, return 0; ret = uobj->type->type_class->remove_commit(uobj, why); - if (ib_is_destroy_retryable(ret, why, uobj)) { - /* We couldn't remove the object, so just unlock the uobject */ - atomic_set(&uobj->usecnt, 0); - uobj->type->type_class->lookup_put(uobj, true); - } else { - uobj->object = NULL; - - mutex_lock(&ufile->uobjects_lock); - list_del(&uobj->list); - mutex_unlock(&ufile->uobjects_lock); - /* put the ref we took when we created the object */ - uverbs_uobject_put(uobj); - } + if (ib_is_destroy_retryable(ret, why, uobj)) + return ret; + + uobj->object = NULL; + + mutex_lock(&ufile->uobjects_lock); + list_del(&uobj->list); + mutex_unlock(&ufile->uobjects_lock); + /* Pairs with the get in rdma_alloc_commit_uobject() */ + uverbs_uobject_put(uobj); return ret; } -/* This is called only for user requested DESTROY reasons */ +/* This is called only for user requested DESTROY reasons + * rdma_lookup_get_uobject(exclusive=true) must have been called to get uobj, + * and after this returns the corresponding put has been done, and the kref + * for uobj has been consumed. + */ int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) { int ret; @@ -519,9 +525,6 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj) /* This shouldn't be used anymore. Use the file object instead */ uobj->id = 0; - /* Get another reference as we export this to the fops */ - uverbs_uobject_get(uobj); - /* * NOTE: Once we install the file we loose ownership of our kref on * uobj. It will be put by uverbs_close_fd() @@ -554,6 +557,8 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) assert_uverbs_usecnt(uobj, true); atomic_set(&uobj->usecnt, 0); + /* kref is held so long as the uobj is on the uobj list. */ + uverbs_uobject_get(uobj); mutex_lock(&ufile->uobjects_lock); list_add(&uobj->list, &ufile->uobjects); mutex_unlock(&ufile->uobjects_lock); @@ -567,12 +572,22 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) static void alloc_abort_idr_uobject(struct ib_uobject *uobj) { - uverbs_idr_remove_uobj(uobj); ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, RDMACG_RESOURCE_HCA_OBJECT); + + spin_lock(&uobj->ufile->idr_lock); + /* The value of the handle in the IDR is NULL at this point. */ + idr_remove(&uobj->ufile->idr, uobj->id); + spin_unlock(&uobj->ufile->idr_lock); + + /* Pairs with the kref from alloc_begin_idr_uobject */ uverbs_uobject_put(uobj); } +/* + * This consumes the kref for uobj. It is up to the caller to unwind the HW + * object and anything else connected to uobj before calling this. + */ void rdma_alloc_abort_uobject(struct ib_uobject *uobj) { uobj->type->type_class->alloc_abort(uobj); @@ -605,6 +620,7 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive) else atomic_set(&uobj->usecnt, 0); + /* Pairs with the kref obtained by type->lookup_get */ uverbs_uobject_put(uobj); } @@ -658,6 +674,7 @@ void uverbs_close_fd(struct file *f) struct kref *uverbs_file_ref = &uobj->ufile->ref; _uverbs_close_fd(uobj); + /* Pairs with filp->private_data in alloc_begin_fd_uobject */ uverbs_uobject_put(uobj); kref_put(uverbs_file_ref, ib_uverbs_release_file); } @@ -700,7 +717,7 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, obj->id, err); list_del(&obj->list); - /* put the ref we took when we created the object */ + /* Pairs with the get in rdma_alloc_commit_uobject() */ uverbs_uobject_put(obj); ret = 0; } -- cgit From e6d5d5ddd0869cf44a554289cd213007ccc0afde Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 20:55:17 -0600 Subject: IB/uverbs: Clarify and revise uverbs_close_fd The locking requirements here have changed slightly now that we can rely on the ib_uverbs_file always existing and containing all the necessary locking infrastructure. That means we can get rid of the cleanup_mutex usage (this was protecting the check on !uboj->context). Otherwise, follow the same pattern that IDR uses for destroy, acquire exclusive write access, then call destroy and the undo the 'lookup'. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 41 +++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 80e1e3cb2110..a55646cbf9b1 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -650,33 +650,48 @@ EXPORT_SYMBOL(uverbs_idr_class); static void _uverbs_close_fd(struct ib_uobject *uobj) { - struct ib_uverbs_file *ufile = uobj->ufile; int ret; - mutex_lock(&ufile->cleanup_mutex); + /* + * uobject was already cleaned up, remove_commit_fd_uobject + * sets this + */ + if (!uobj->context) + return; - /* uobject was either already cleaned up or is cleaned up right now anyway */ - if (!uobj->context || - !down_read_trylock(&ufile->cleanup_rwsem)) - goto unlock; + /* + * lookup_get_fd_uobject holds the kref on the struct file any time a + * FD uobj is locked, which prevents this release method from being + * invoked. Meaning we can always get the write lock here, or we have + * a kernel bug. If so dangle the pointers and bail. + */ + ret = uverbs_try_lock_object(uobj, true); + if (WARN(ret, "uverbs_close_fd() racing with lookup_get_fd_uobject()")) + return; ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_CLOSE); - up_read(&ufile->cleanup_rwsem); if (ret) - pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n"); -unlock: - mutex_unlock(&ufile->cleanup_mutex); + pr_warn("Unable to clean up uobject file in %s\n", __func__); + + atomic_set(&uobj->usecnt, 0); } void uverbs_close_fd(struct file *f) { struct ib_uobject *uobj = f->private_data; - struct kref *uverbs_file_ref = &uobj->ufile->ref; + struct ib_uverbs_file *ufile = uobj->ufile; + + if (down_read_trylock(&ufile->cleanup_rwsem)) { + _uverbs_close_fd(uobj); + up_read(&ufile->cleanup_rwsem); + } + + uobj->object = NULL; + /* Matches the get in alloc_begin_fd_uobject */ + kref_put(&ufile->ref, ib_uverbs_release_file); - _uverbs_close_fd(uobj); /* Pairs with filp->private_data in alloc_begin_fd_uobject */ uverbs_uobject_put(uobj); - kref_put(uverbs_file_ref, ib_uverbs_release_file); } static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, -- cgit From 87064277c4d3b0ddb251a91324f2525048027ee2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 20:55:18 -0600 Subject: IB/uverbs: Revise and clarify the rwsem and uobjects_lock Rename 'cleanup_rwsem' to 'hw_destroy_rwsem' which is held across any call to the type destroy function (aka 'hw' destroy). The main purpose of this lock is to prevent normal add and destroy from running concurrently with uverbs_cleanup_ufile() Since the uobjects list is always manipulated under the 'hw_destroy_rwsem' we can eliminate the uobjects_lock in the cleanup function. This allows converting that lock to a very simple spinlock with a narrow critical section. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 26 ++++++++++++-------------- drivers/infiniband/core/uverbs.h | 12 ++++++++---- drivers/infiniband/core/uverbs_main.c | 4 ++-- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index a55646cbf9b1..4545c661acaa 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -461,9 +461,9 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, uobj->object = NULL; - mutex_lock(&ufile->uobjects_lock); + spin_lock_irq(&ufile->uobjects_lock); list_del(&uobj->list); - mutex_unlock(&ufile->uobjects_lock); + spin_unlock_irq(&ufile->uobjects_lock); /* Pairs with the get in rdma_alloc_commit_uobject() */ uverbs_uobject_put(uobj); @@ -491,14 +491,14 @@ int rdma_explicit_destroy(struct ib_uobject *uobject) struct ib_uverbs_file *ufile = uobject->ufile; /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ufile->cleanup_rwsem)) { + if (!down_read_trylock(&ufile->hw_destroy_rwsem)) { WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); return 0; } assert_uverbs_usecnt(uobject, true); ret = _rdma_remove_commit_uobject(uobject, RDMA_REMOVE_DESTROY); - up_read(&ufile->cleanup_rwsem); + up_read(&ufile->hw_destroy_rwsem); return ret; } @@ -541,7 +541,7 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) struct ib_uverbs_file *ufile = uobj->ufile; /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ufile->cleanup_rwsem)) { + if (!down_read_trylock(&ufile->hw_destroy_rwsem)) { int ret; WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n"); @@ -559,13 +559,13 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) /* kref is held so long as the uobj is on the uobj list. */ uverbs_uobject_get(uobj); - mutex_lock(&ufile->uobjects_lock); + spin_lock_irq(&ufile->uobjects_lock); list_add(&uobj->list, &ufile->uobjects); - mutex_unlock(&ufile->uobjects_lock); + spin_unlock_irq(&ufile->uobjects_lock); /* alloc_commit consumes the uobj kref */ uobj->type->type_class->alloc_commit(uobj); - up_read(&ufile->cleanup_rwsem); + up_read(&ufile->hw_destroy_rwsem); return 0; } @@ -681,9 +681,9 @@ void uverbs_close_fd(struct file *f) struct ib_uobject *uobj = f->private_data; struct ib_uverbs_file *ufile = uobj->ufile; - if (down_read_trylock(&ufile->cleanup_rwsem)) { + if (down_read_trylock(&ufile->hw_destroy_rwsem)) { _uverbs_close_fd(uobj); - up_read(&ufile->cleanup_rwsem); + up_read(&ufile->hw_destroy_rwsem); } uobj->object = NULL; @@ -710,7 +710,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * We take and release the lock per traversal in order to let * other threads (which might still use the FDs) chance to run. */ - mutex_lock(&ufile->uobjects_lock); ufile->cleanup_reason = reason; list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) { /* @@ -736,7 +735,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, uverbs_uobject_put(obj); ret = 0; } - mutex_unlock(&ufile->uobjects_lock); return ret; } @@ -751,7 +749,7 @@ void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed) * want to hold this forever as the context is going to be destroyed, * but we'll release it since it causes a "held lock freed" BUG message. */ - down_write(&ufile->cleanup_rwsem); + down_write(&ufile->hw_destroy_rwsem); ufile->ucontext->cleanup_retryable = true; while (!list_empty(&ufile->uobjects)) if (__uverbs_cleanup_ufile(ufile, reason)) { @@ -766,7 +764,7 @@ void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed) if (!list_empty(&ufile->uobjects)) __uverbs_cleanup_ufile(ufile, reason); - up_write(&ufile->cleanup_rwsem); + up_write(&ufile->hw_destroy_rwsem); } const struct uverbs_obj_type_class uverbs_fd_class = { diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index d0a1a54275e5..58b16e840e56 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -145,12 +145,16 @@ struct ib_uverbs_file { struct list_head list; int is_closed; - /* locking the uobjects_list */ - struct mutex uobjects_lock; + /* + * To access the uobjects list hw_destroy_rwsem must be held for write + * OR hw_destroy_rwsem held for read AND uobjects_lock held. + * hw_destroy_rwsem should be called across any destruction of the HW + * object of an associated uobject. + */ + struct rw_semaphore hw_destroy_rwsem; + spinlock_t uobjects_lock; struct list_head uobjects; - /* protects cleanup process from other actions */ - struct rw_semaphore cleanup_rwsem; enum rdma_remove_reason cleanup_reason; struct idr idr; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 8425718bebbd..77faf32fc997 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -889,9 +889,9 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_init(&file->mutex); mutex_init(&file->cleanup_mutex); - mutex_init(&file->uobjects_lock); + spin_lock_init(&file->uobjects_lock); INIT_LIST_HEAD(&file->uobjects); - init_rwsem(&file->cleanup_rwsem); + init_rwsem(&file->hw_destroy_rwsem); filp->private_data = file; kobject_get(&dev->kobj); -- cgit From e951747a087a8655f467833bb367ebf53d57527c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 20:55:19 -0600 Subject: IB/uverbs: Rework the locking for cleaning up the ucontext The locking here has always been a bit crazy and spread out, upon some careful analysis we can simplify things. Create a single function uverbs_destroy_ufile_hw() that internally handles all locking. This pulls together pieces of this process that were sprinkled all over the places into one place, and covers them with one lock. This eliminates several duplicate/confusing locks and makes the control flow in ib_uverbs_close() and ib_uverbs_free_hw_resources() extremely simple. Unfortunately we have to keep an extra mutex, ucontext_lock. This lock is logically part of the rwsem and provides the 'down write, fail if write locked, wait if read locked' semantic we require. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 117 ++++++++++++++++++++++++++++++---- drivers/infiniband/core/rdma_core.h | 3 +- drivers/infiniband/core/uverbs.h | 6 +- drivers/infiniband/core/uverbs_cmd.c | 6 +- drivers/infiniband/core/uverbs_main.c | 98 ++++------------------------ include/rdma/ib_verbs.h | 5 ++ 6 files changed, 127 insertions(+), 108 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 4545c661acaa..eeed6374134c 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -284,11 +285,8 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, } ret = uverbs_try_lock_object(uobj, exclusive); - if (ret) { - WARN(uobj->ufile->cleanup_reason, - "ib_uverbs: Trying to lookup_get while cleanup context\n"); + if (ret) goto free; - } return uobj; free: @@ -694,6 +692,71 @@ void uverbs_close_fd(struct file *f) uverbs_uobject_put(uobj); } +static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ + struct ib_device *ib_dev = ibcontext->device; + struct task_struct *owning_process = NULL; + struct mm_struct *owning_mm = NULL; + + owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); + if (!owning_process) + return; + + owning_mm = get_task_mm(owning_process); + if (!owning_mm) { + pr_info("no mm, disassociate ucontext is pending task termination\n"); + while (1) { + put_task_struct(owning_process); + usleep_range(1000, 2000); + owning_process = get_pid_task(ibcontext->tgid, + PIDTYPE_PID); + if (!owning_process || + owning_process->state == TASK_DEAD) { + pr_info("disassociate ucontext done, task was terminated\n"); + /* in case task was dead need to release the + * task struct. + */ + if (owning_process) + put_task_struct(owning_process); + return; + } + } + } + + down_write(&owning_mm->mmap_sem); + ib_dev->disassociate_ucontext(ibcontext); + up_write(&owning_mm->mmap_sem); + mmput(owning_mm); + put_task_struct(owning_process); +} + +/* + * Drop the ucontext off the ufile and completely disconnect it from the + * ib_device + */ +static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) +{ + struct ib_ucontext *ucontext = ufile->ucontext; + int ret; + + if (reason == RDMA_REMOVE_DRIVER_REMOVE) + ufile_disassociate_ucontext(ucontext); + + put_pid(ucontext->tgid); + ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device, + RDMACG_RESOURCE_HCA_HANDLE); + + /* + * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove + * the error return. + */ + ret = ucontext->device->dealloc_ucontext(ufile->ucontext); + WARN_ON(ret); + + ufile->ucontext = NULL; +} + static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason) { @@ -710,7 +773,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * We take and release the lock per traversal in order to let * other threads (which might still use the FDs) chance to run. */ - ufile->cleanup_reason = reason; list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) { /* * if we hit this WARN_ON, that means we are @@ -738,18 +800,43 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, return ret; } -void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed) +/* + * Destroy the uncontext and every uobject associated with it. If called with + * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has + * been completed and ufile->ucontext is NULL. + * + * This is internally locked and can be called in parallel from multiple + * contexts. + */ +void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) { - enum rdma_remove_reason reason = device_removed ? - RDMA_REMOVE_DRIVER_REMOVE : - RDMA_REMOVE_CLOSE; + if (reason == RDMA_REMOVE_CLOSE) { + /* + * During destruction we might trigger something that + * synchronously calls release on any file descriptor. For + * this reason all paths that come from file_operations + * release must use try_lock. They can progress knowing that + * there is an ongoing uverbs_destroy_ufile_hw that will clean + * up the driver resources. + */ + if (!mutex_trylock(&ufile->ucontext_lock)) + return; + + } else { + mutex_lock(&ufile->ucontext_lock); + } + + down_write(&ufile->hw_destroy_rwsem); /* - * Waits for all remove_commit and alloc_commit to finish. Logically, We - * want to hold this forever as the context is going to be destroyed, - * but we'll release it since it causes a "held lock freed" BUG message. + * If a ucontext was never created then we can't have any uobjects to + * cleanup, nothing to do. */ - down_write(&ufile->hw_destroy_rwsem); + if (!ufile->ucontext) + goto done; + + ufile->ucontext->closing = true; ufile->ucontext->cleanup_retryable = true; while (!list_empty(&ufile->uobjects)) if (__uverbs_cleanup_ufile(ufile, reason)) { @@ -764,7 +851,11 @@ void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed) if (!list_empty(&ufile->uobjects)) __uverbs_cleanup_ufile(ufile, reason); + ufile_destroy_ucontext(ufile, reason); + +done: up_write(&ufile->hw_destroy_rwsem); + mutex_unlock(&ufile->ucontext_lock); } const struct uverbs_obj_type_class uverbs_fd_class = { diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index db2339330f6f..a736b46d18e3 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -49,7 +49,8 @@ const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile, const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, uint16_t method); -void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed); +void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason); /* * uverbs_uobject_get is called in order to increase the reference count on diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 58b16e840e56..ca9b0450d3f9 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -136,9 +136,9 @@ struct ib_uverbs_completion_event_file { struct ib_uverbs_file { struct kref ref; - struct mutex mutex; - struct mutex cleanup_mutex; /* protect cleanup */ struct ib_uverbs_device *device; + /* Protects writing to ucontext */ + struct mutex ucontext_lock; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; struct ib_uverbs_async_event_file *async_file; @@ -155,8 +155,6 @@ struct ib_uverbs_file { spinlock_t uobjects_lock; struct list_head uobjects; - enum rdma_remove_reason cleanup_reason; - struct idr idr; /* spinlock protects write access to idr */ spinlock_t idr_lock; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 409fd46a2a99..f2611c760184 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -84,7 +84,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&file->mutex); + mutex_lock(&file->ucontext_lock); if (file->ucontext) { ret = -EINVAL; @@ -150,7 +150,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, fd_install(resp.async_fd, filp); - mutex_unlock(&file->mutex); + mutex_unlock(&file->ucontext_lock); return in_len; @@ -169,7 +169,7 @@ err_alloc: ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); err: - mutex_unlock(&file->mutex); + mutex_unlock(&file->ucontext_lock); return ret; } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 77faf32fc997..78d79020ea5c 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -41,8 +41,6 @@ #include #include #include -#include -#include #include #include #include @@ -227,21 +225,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, } } -static int ib_uverbs_cleanup_ufile(struct ib_uverbs_file *file, - bool device_removed) -{ - struct ib_ucontext *context = file->ucontext; - - context->closing = 1; - uverbs_cleanup_ufile(file, device_removed); - put_pid(context->tgid); - - ib_rdmacg_uncharge(&context->cg_obj, context->device, - RDMACG_RESOURCE_HCA_HANDLE); - - return context->device->dealloc_ucontext(context); -} - static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) { complete(&dev->comp); @@ -886,8 +869,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) spin_lock_init(&file->idr_lock); idr_init(&file->idr); kref_init(&file->ref); - mutex_init(&file->mutex); - mutex_init(&file->cleanup_mutex); + mutex_init(&file->ucontext_lock); spin_lock_init(&file->uobjects_lock); INIT_LIST_HEAD(&file->uobjects); @@ -917,12 +899,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - mutex_lock(&file->cleanup_mutex); - if (file->ucontext) { - ib_uverbs_cleanup_ufile(file, false); - file->ucontext = NULL; - } - mutex_unlock(&file->cleanup_mutex); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); idr_destroy(&file->idr); mutex_lock(&file->device->lists_mutex); @@ -1109,44 +1086,6 @@ err: return; } -static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext) -{ - struct ib_device *ib_dev = ibcontext->device; - struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; - - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); - if (!owning_process) - return; - - owning_mm = get_task_mm(owning_process); - if (!owning_mm) { - pr_info("no mm, disassociate ucontext is pending task termination\n"); - while (1) { - put_task_struct(owning_process); - usleep_range(1000, 2000); - owning_process = get_pid_task(ibcontext->tgid, - PIDTYPE_PID); - if (!owning_process || - owning_process->state == TASK_DEAD) { - pr_info("disassociate ucontext done, task was terminated\n"); - /* in case task was dead need to release the - * task struct. - */ - if (owning_process) - put_task_struct(owning_process); - return; - } - } - } - - down_write(&owning_mm->mmap_sem); - ib_dev->disassociate_ucontext(ibcontext); - up_write(&owning_mm->mmap_sem); - mmput(owning_mm); - put_task_struct(owning_process); -} - static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { @@ -1162,39 +1101,24 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { - struct ib_ucontext *ucontext; file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); file->is_closed = 1; list_del(&file->list); kref_get(&file->ref); - mutex_unlock(&uverbs_dev->lists_mutex); - - - mutex_lock(&file->cleanup_mutex); - ucontext = file->ucontext; - file->ucontext = NULL; - mutex_unlock(&file->cleanup_mutex); - /* At this point ib_uverbs_close cannot be running - * ib_uverbs_cleanup_ufile + /* We must release the mutex before going ahead and calling + * uverbs_cleanup_ufile, as it might end up indirectly calling + * uverbs_close, for example due to freeing the resources (e.g + * mmput). */ - if (ucontext) { - /* We must release the mutex before going ahead and - * calling disassociate_ucontext. disassociate_ucontext - * might end up indirectly calling uverbs_close, - * for example due to freeing the resources - * (e.g mmput). - */ - ib_uverbs_event_handler(&file->event_handler, &event); - ib_uverbs_disassociate_ucontext(ucontext); - mutex_lock(&file->cleanup_mutex); - ib_uverbs_cleanup_ufile(file, true); - mutex_unlock(&file->cleanup_mutex); - } + mutex_unlock(&uverbs_dev->lists_mutex); - mutex_lock(&uverbs_dev->lists_mutex); + ib_uverbs_event_handler(&file->event_handler, &event); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); kref_put(&file->ref, ib_uverbs_release_file); + + mutex_lock(&uverbs_dev->lists_mutex); } while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 99bcf64a4762..42cbf8eabe9d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1479,6 +1479,11 @@ struct ib_rdmacg_object { struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; + /* + * 'closing' can be read by the driver only during a destroy callback, + * it is set when we are closing the file descriptor and indicates + * that mm_sem may be locked. + */ int closing; bool cleanup_retryable; -- cgit From 2c96eb7d62de5048aa08e9ee4fbb607f29e2638c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 20:55:20 -0600 Subject: IB/uverbs: Always propagate errors from rdma_alloc_commit_uobject() The ioctl framework already does this correctly, but the write path did not. This is trivially fixed by simply using a standard pattern to return uobj_alloc_commit() as the last statement in every function. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 5 ++-- drivers/infiniband/core/uverbs_cmd.c | 49 +++++++++++------------------------- include/rdma/uverbs_std_types.h | 9 +++++-- include/rdma/uverbs_types.h | 2 +- 4 files changed, 26 insertions(+), 39 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index eeed6374134c..2aab8cd2ca6b 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -532,9 +532,10 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj) /* * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the - * caller can no longer assume uobj is valid. + * caller can no longer assume uobj is valid. If this function fails it + * destroys the uboject, including the attached HW object. */ -int rdma_alloc_commit_uobject(struct ib_uobject *uobj) +int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) { struct ib_uverbs_file *ufile = uobj->ufile; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index f2611c760184..73b563edb587 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -372,9 +372,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, goto err_copy; } - uobj_alloc_commit(uobj); - - return in_len; + return uobj_alloc_commit(uobj, in_len); err_copy: ib_dealloc_pd(pd); @@ -579,9 +577,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, mutex_unlock(&file->device->xrcd_tree_mutex); - uobj_alloc_commit(&obj->uobject); - - return in_len; + return uobj_alloc_commit(&obj->uobject, in_len); err_copy: if (inode) { @@ -723,9 +719,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, uobj_put_obj_read(pd); - uobj_alloc_commit(uobj); - - return in_len; + return uobj_alloc_commit(uobj, in_len); err_copy: ib_dereg_mr(mr); @@ -901,9 +895,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, } uobj_put_obj_read(pd); - uobj_alloc_commit(uobj); - - return in_len; + return uobj_alloc_commit(uobj, in_len); err_copy: uverbs_dealloc_mw(mw); @@ -959,8 +951,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, return -EFAULT; } - uobj_alloc_commit(uobj); - return in_len; + return uobj_alloc_commit(uobj, in_len); } static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, @@ -1041,7 +1032,9 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (ret) goto err_cb; - uobj_alloc_commit(&obj->uobject); + ret = uobj_alloc_commit(&obj->uobject, 0); + if (ret) + return ERR_PTR(ret); return obj; err_cb: @@ -1596,9 +1589,7 @@ static int create_qp(struct ib_uverbs_file *file, if (ind_tbl) uobj_put_obj_read(ind_tbl); - uobj_alloc_commit(&obj->uevent.uobject); - - return 0; + return uobj_alloc_commit(&obj->uevent.uobject, 0); err_cb: ib_destroy_qp(qp); @@ -1801,10 +1792,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, qp->uobject = &obj->uevent.uobject; uobj_put_read(xrcd_uobj); - - uobj_alloc_commit(&obj->uevent.uobject); - - return in_len; + return uobj_alloc_commit(&obj->uevent.uobject, in_len); err_destroy: ib_destroy_qp(qp); @@ -2607,9 +2595,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, } uobj_put_obj_read(pd); - uobj_alloc_commit(uobj); - - return in_len; + return uobj_alloc_commit(uobj, in_len); err_copy: rdma_destroy_ah(ah); @@ -3155,8 +3141,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, uobj_put_obj_read(pd); uobj_put_obj_read(cq); - uobj_alloc_commit(&obj->uevent.uobject); - return 0; + return uobj_alloc_commit(&obj->uevent.uobject, 0); err_copy: ib_destroy_wq(wq); @@ -3403,8 +3388,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (j = 0; j < num_read_wqs; j++) uobj_put_obj_read(wqs[j]); - uobj_alloc_commit(uobj); - return 0; + return uobj_alloc_commit(uobj, 0); err_copy: ib_destroy_rwq_ind_table(rwq_ind_tbl); @@ -3605,11 +3589,10 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_copy; uobj_put_obj_read(qp); - uobj_alloc_commit(uobj); kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); - return 0; + return uobj_alloc_commit(uobj, 0); err_copy: if (!qp->device->destroy_flow(flow_id)) atomic_dec(&qp->usecnt); @@ -3761,9 +3744,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, uobj_put_obj_read(attr.ext.cq); uobj_put_obj_read(pd); - uobj_alloc_commit(&obj->uevent.uobject); - - return 0; + return uobj_alloc_commit(&obj->uevent.uobject, 0); err_copy: ib_destroy_srq(srq); diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 4f32eab8b7a4..076f085d2dcf 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -102,9 +102,14 @@ static inline int __must_check uobj_remove_commit(struct ib_uobject *uobj) return rdma_remove_commit_uobject(uobj); } -static inline void uobj_alloc_commit(struct ib_uobject *uobj) +static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj, + int success_res) { - rdma_alloc_commit_uobject(uobj); + int ret = rdma_alloc_commit_uobject(uobj); + + if (ret) + return ret; + return success_res; } static inline void uobj_alloc_abort(struct ib_uobject *uobj) diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 2f50cc6def3c..9b82e36128aa 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -127,7 +127,7 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile); void rdma_alloc_abort_uobject(struct ib_uobject *uobj); int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj); -int rdma_alloc_commit_uobject(struct ib_uobject *uobj); +int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj); int rdma_explicit_destroy(struct ib_uobject *uobject); struct uverbs_obj_fd_type { -- cgit From aba94548c9e49939fafc92bb406a7f8e7ed87643 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 20:55:21 -0600 Subject: IB/uverbs: Move the FD uobj type struct file allocation to alloc_commit Allocating the struct file during alloc_begin creates this strange asymmetry with IDR, where the FD has two krefs pointing at it during the pre-commit phase. In particular this makes the abort process for FD very strange and confusing. For instance abort currently calls the type's destroy_object twice, and the fops release once if abort is done. This is very counter intuitive. No fops should be called until alloc_commit succeeds, and destroy_object should only ever be called once. Moving the struct file allocation to the alloc_commit is now simple, as we already support failure of rdma_alloc_commit_uobject, with all the required rollback pieces. This creates an understandable symmetry with IDR and simplifies/fixes the abort handling for FD types. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 83 ++++++++++++++++++++----------------- include/rdma/uverbs_types.h | 2 +- 2 files changed, 47 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 2aab8cd2ca6b..8a6ce66d4726 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -328,11 +328,8 @@ uobj_put: static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile) { - const struct uverbs_obj_fd_type *fd_type = - container_of(type, struct uverbs_obj_fd_type, type); int new_fd; struct ib_uobject *uobj; - struct file *filp; new_fd = get_unused_fd_flags(O_CLOEXEC); if (new_fd < 0) @@ -344,28 +341,8 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t return uobj; } - /* - * The kref for uobj is moved into filp->private data and put in - * uverbs_close_fd(). Once anon_inode_getfile() succeeds - * uverbs_close_fd() must be guaranteed to be called from the provided - * fops release callback. We piggyback our kref of uobj on the stack - * with the lifetime of the struct file. - */ - filp = anon_inode_getfile(fd_type->name, - fd_type->fops, - uobj, - fd_type->flags); - if (IS_ERR(filp)) { - put_unused_fd(new_fd); - uverbs_uobject_put(uobj); - return (void *)filp; - } - uobj->id = new_fd; - uobj->object = filp; uobj->ufile = ufile; - /* Matching put will be done in uverbs_close_fd() */ - kref_get(&ufile->ref); return uobj; } @@ -407,12 +384,10 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, static void alloc_abort_fd_uobject(struct ib_uobject *uobj) { - struct file *filp = uobj->object; - int id = uobj->id; + put_unused_fd(uobj->id); - /* Unsuccessful NEW */ - fput(filp); - put_unused_fd(id); + /* Pairs with the kref from alloc_begin_idr_uobject */ + uverbs_uobject_put(uobj); } static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, @@ -500,7 +475,7 @@ int rdma_explicit_destroy(struct ib_uobject *uobject) return ret; } -static void alloc_commit_idr_uobject(struct ib_uobject *uobj) +static int alloc_commit_idr_uobject(struct ib_uobject *uobj) { struct ib_uverbs_file *ufile = uobj->ufile; @@ -514,11 +489,34 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj) */ WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id)); spin_unlock(&ufile->idr_lock); + + return 0; } -static void alloc_commit_fd_uobject(struct ib_uobject *uobj) +static int alloc_commit_fd_uobject(struct ib_uobject *uobj) { + const struct uverbs_obj_fd_type *fd_type = + container_of(uobj->type, struct uverbs_obj_fd_type, type); int fd = uobj->id; + struct file *filp; + + /* + * The kref for uobj is moved into filp->private data and put in + * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd() + * must be guaranteed to be called from the provided fops release + * callback. + */ + filp = anon_inode_getfile(fd_type->name, + fd_type->fops, + uobj, + fd_type->flags); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + uobj->object = filp; + + /* Matching put will be done in uverbs_close_fd() */ + kref_get(&uobj->ufile->ref); /* This shouldn't be used anymore. Use the file object instead */ uobj->id = 0; @@ -527,7 +525,9 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj) * NOTE: Once we install the file we loose ownership of our kref on * uobj. It will be put by uverbs_close_fd() */ - fd_install(fd, uobj->object); + fd_install(fd, filp); + + return 0; } /* @@ -538,11 +538,10 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj) int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) { struct ib_uverbs_file *ufile = uobj->ufile; + int ret; /* Cleanup is running. Calling this should have been impossible */ if (!down_read_trylock(&ufile->hw_destroy_rwsem)) { - int ret; - WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n"); ret = uobj->type->type_class->remove_commit(uobj, RDMA_REMOVE_DURING_CLEANUP); @@ -552,9 +551,18 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) return ret; } - /* matches atomic_set(-1) in alloc_uobj */ assert_uverbs_usecnt(uobj, true); - atomic_set(&uobj->usecnt, 0); + + /* alloc_commit consumes the uobj kref */ + ret = uobj->type->type_class->alloc_commit(uobj); + if (ret) { + if (uobj->type->type_class->remove_commit( + uobj, RDMA_REMOVE_DURING_CLEANUP)) + pr_warn("ib_uverbs: cleanup of idr object %d failed\n", + uobj->id); + up_read(&ufile->hw_destroy_rwsem); + return ret; + } /* kref is held so long as the uobj is on the uobj list. */ uverbs_uobject_get(uobj); @@ -562,8 +570,9 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) list_add(&uobj->list, &ufile->uobjects); spin_unlock_irq(&ufile->uobjects_lock); - /* alloc_commit consumes the uobj kref */ - uobj->type->type_class->alloc_commit(uobj); + /* matches atomic_set(-1) in alloc_uobj */ + atomic_set(&uobj->usecnt, 0); + up_read(&ufile->hw_destroy_rwsem); return 0; diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 9b82e36128aa..cfc50fcdbff6 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -73,7 +73,7 @@ struct uverbs_obj_type_class { */ struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile); - void (*alloc_commit)(struct ib_uobject *uobj); + int (*alloc_commit)(struct ib_uobject *uobj); void (*alloc_abort)(struct ib_uobject *uobj); struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type, -- cgit From c36ee46dafaea1bb525c4e34d3e35dd9a6768a47 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 20:55:22 -0600 Subject: IB/mlx5: Use the ucontext from the uobj, not the file This approach matches the standard flow of the typical write method that relies on the HW object to store the device and the uobject to access the ucontext. Avoids the use of the devx_ufile2uctx in several places will make revising the semantics of ib_uverbs_get_ucontext() in the next patch simpler. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 270452c9e673..98b1575226c1 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -706,13 +706,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(struct ib_device *ib_d struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); - struct mlx5_ib_dev *dev = to_mdev(ib_dev); void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT); void *cmd_out; - struct ib_uobject *uobj; + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); struct devx_obj *obj; int err; @@ -739,7 +740,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(struct ib_device *ib_d if (err) goto cmd_free; - uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); uobj->object = obj; obj->mdev = dev->mdev; devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id); @@ -763,13 +763,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(struct ib_device *ib_d struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); - struct mlx5_ib_dev *dev = to_mdev(ib_dev); void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT); struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE); + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct devx_obj *obj = uobj->object; void *cmd_out; int err; @@ -779,7 +779,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(struct ib_device *ib_d if (!devx_is_obj_modify_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(uobj->object, cmd_in)) + if (!devx_is_valid_obj_id(obj, cmd_in)) return -EINVAL; cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); @@ -787,7 +787,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(struct ib_device *ib_d return -ENOMEM; MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); - err = mlx5_cmd_exec(dev->mdev, cmd_in, + err = mlx5_cmd_exec(obj->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), cmd_out, cmd_out_len); if (err) @@ -805,13 +805,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(struct ib_device *ib_de struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); - struct mlx5_ib_dev *dev = to_mdev(ib_dev); void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT); struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE); + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct devx_obj *obj = uobj->object; void *cmd_out; int err; @@ -821,7 +821,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(struct ib_device *ib_de if (!devx_is_obj_query_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(uobj->object, cmd_in)) + if (!devx_is_valid_obj_id(obj, cmd_in)) return -EINVAL; cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); @@ -829,7 +829,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(struct ib_device *ib_de return -ENOMEM; MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); - err = mlx5_cmd_exec(dev->mdev, cmd_in, + err = mlx5_cmd_exec(obj->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), cmd_out, cmd_out_len); if (err) @@ -920,18 +920,18 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(struct ib_device *ib_dev struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); - struct mlx5_ib_dev *dev = to_mdev(ib_dev); struct devx_umem_reg_cmd cmd; struct devx_umem *obj; - struct ib_uobject *uobj; + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE); u32 obj_id; + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); int err; if (!c->devx_uid) return -EPERM; - uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE); obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); if (!obj) return -ENOMEM; -- cgit From 22fa27fbc64d01cbbe1e4da751e64cc22d24a6e4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 13:43:06 -0600 Subject: IB/uverbs: Fix locking around struct ib_uverbs_file ucontext We have a parallel unlocked reader and writer with ib_uverbs_get_context() vs everything else, and nothing guarantees this works properly. Audit and fix all of the places that access ucontext to use one of the following locking schemes: - Call ib_uverbs_get_ucontext() under SRCU and check for failure - Access the ucontext through an struct ib_uobject context member while holding a READ or WRITE lock on the uobject. This value cannot be NULL and has no race. - Hold the ucontext_lock and check for ufile->ucontext !NULL This also re-implements ib_uverbs_get_ucontext() in a way that is safe against concurrent ib_uverbs_get_context() and disassociation. As a side effect, every access to ucontext in the commands is via ib_uverbs_get_context() with an error check, or via the uobject, so there is no longer any need for the core code to check ucontext on every command call. These checks are also removed. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 14 +++++++--- drivers/infiniband/core/uverbs.h | 5 +++- drivers/infiniband/core/uverbs_cmd.c | 14 ++++++---- drivers/infiniband/core/uverbs_ioctl.c | 5 +--- drivers/infiniband/core/uverbs_main.c | 38 +++++++++++++++++---------- drivers/infiniband/core/uverbs_std_types_cq.c | 2 +- drivers/infiniband/core/uverbs_std_types_dm.c | 2 +- drivers/infiniband/hw/mlx5/devx.c | 20 ++++++++++---- include/rdma/uverbs_ioctl.h | 8 ------ 9 files changed, 65 insertions(+), 43 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 8a6ce66d4726..a63844ba8414 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -154,8 +154,14 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id, static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, const struct uverbs_obj_type *type) { - struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL); + struct ib_uobject *uobj; + struct ib_ucontext *ucontext; + + ucontext = ib_uverbs_get_ucontext(ufile); + if (IS_ERR(ucontext)) + return ERR_CAST(ucontext); + uobj = kzalloc(type->obj_size, GFP_KERNEL); if (!uobj) return ERR_PTR(-ENOMEM); /* @@ -163,7 +169,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, * The object is added to the list in the commit stage. */ uobj->ufile = ufile; - uobj->context = ufile->ucontext; + uobj->context = ucontext; INIT_LIST_HEAD(&uobj->list); uobj->type = type; /* @@ -309,7 +315,7 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type * if (ret) goto uobj_put; - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ufile->ucontext->device, + ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device, RDMACG_RESOURCE_HCA_OBJECT); if (ret) goto idr_remove; @@ -761,7 +767,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove * the error return. */ - ret = ucontext->device->dealloc_ucontext(ufile->ucontext); + ret = ucontext->device->dealloc_ucontext(ucontext); WARN_ON(ret); ufile->ucontext = NULL; diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index ca9b0450d3f9..cf02b433000c 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -137,8 +137,11 @@ struct ib_uverbs_completion_event_file { struct ib_uverbs_file { struct kref ref; struct ib_uverbs_device *device; - /* Protects writing to ucontext */ struct mutex ucontext_lock; + /* + * ucontext must be accessed via ib_uverbs_get_ucontext() or with + * ucontext_lock held + */ struct ib_ucontext *ucontext; struct ib_event_handler event_handler; struct ib_uverbs_async_event_file *async_file; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 73b563edb587..38d7de3f9b2f 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -146,10 +146,14 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err_file; } - file->ucontext = ucontext; - fd_install(resp.async_fd, filp); + /* + * Make sure that ib_uverbs_get_ucontext() sees the pointer update + * only after all writes to setup the ucontext have completed + */ + smp_store_release(&file->ucontext, ucontext); + mutex_unlock(&file->ucontext_lock); return in_len; @@ -350,7 +354,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); + pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata); if (IS_ERR(pd)) { ret = PTR_ERR(pd); goto err; @@ -538,7 +542,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } if (!xrcd) { - xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata); + xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; @@ -1004,7 +1008,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) attr.flags = cmd->flags; - cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw); + cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 26ddc5cadcdb..db7a92ea5dbe 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -140,7 +140,7 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, if (uattr->attr_data.reserved) return -EINVAL; - if (uattr->len != 0 || !ufile->ucontext) + if (uattr->len != 0) return -EINVAL; o_attr = &e->obj_attr; @@ -373,9 +373,6 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, if (!method_spec) return -EPROTONOSUPPORT; - if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext) - return -EINVAL; - ctx_size = sizeof(*ctx) + sizeof(struct uverbs_attr_bundle) + sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets + diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 78d79020ea5c..34df04ed142b 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -136,9 +136,27 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); +/* + * Must be called with the ufile->device->disassociate_srcu held, and the lock + * must be held until use of the ucontext is finished. + */ struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile) { - return ufile->ucontext; + /* + * We do not hold the hw_destroy_rwsem lock for this flow, instead + * srcu is used. It does not matter if someone races this with + * get_context, we get NULL or valid ucontext. + */ + struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext); + + if (!srcu_dereference(ufile->device->ib_dev, + &ufile->device->disassociate_srcu)) + return ERR_PTR(-EIO); + + if (!ucontext) + return ERR_PTR(-EINVAL); + + return ucontext; } EXPORT_SYMBOL(ib_uverbs_get_ucontext); @@ -729,10 +747,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (ret) return ret; - if (!file->ucontext && - (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended)) - return -EINVAL; - if (extended) { if (count < (sizeof(hdr) + sizeof(ex_hdr))) return -EINVAL; @@ -791,22 +805,18 @@ out: static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; - struct ib_device *ib_dev; + struct ib_ucontext *ucontext; int ret = 0; int srcu_key; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; + ucontext = ib_uverbs_get_ucontext(file); + if (IS_ERR(ucontext)) { + ret = PTR_ERR(ucontext); goto out; } - if (!file->ucontext) - ret = -ENODEV; - else - ret = ib_dev->mmap(file->ucontext, vma); + ret = ucontext->device->mmap(ucontext, vma); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 5a6154345fa0..c71305fc0433 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -113,7 +113,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, /* Temporary, only until drivers get the new uverbs_attr_bundle */ create_udata(attrs, &uhw); - cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, &uhw); + cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_event_file; diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index 9e148e322523..c90efa4b99f4 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -70,7 +70,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject; - dm = ib_dev->alloc_dm(ib_dev, file->ucontext, &attr, attrs); + dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs); if (IS_ERR(dm)) return PTR_ERR(dm); diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 98b1575226c1..fee800f2fdec 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -458,16 +458,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(struct ib_device *ib_de struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); + struct mlx5_ib_ucontext *c; + struct mlx5_ib_dev *dev; u32 user_idx; s32 dev_idx; + c = devx_ufile2uctx(file); + if (IS_ERR(c)) + return PTR_ERR(c); + dev = to_mdev(c->ibucontext.device); + if (uverbs_copy_from(&user_idx, attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX)) return -EFAULT; - dev_idx = bfregn_to_uar_index(to_mdev(ib_dev), - &c->bfregi, user_idx, true); + dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true); if (dev_idx < 0) return dev_idx; @@ -482,8 +487,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev, struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct mlx5_ib_ucontext *c = devx_ufile2uctx(file); - struct mlx5_ib_dev *dev = to_mdev(ib_dev); + struct mlx5_ib_ucontext *c; + struct mlx5_ib_dev *dev; void *cmd_in = uverbs_attr_get_alloced_ptr( attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -491,6 +496,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev, void *cmd_out; int err; + c = devx_ufile2uctx(file); + if (IS_ERR(c)) + return PTR_ERR(c); + dev = to_mdev(c->ibucontext.device); + if (!c->devx_uid) return -EPERM; diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 7f230d1ec2b8..d16d31d4322d 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -123,14 +123,6 @@ struct uverbs_attr_spec_hash { struct uverbs_attr_bundle; struct ib_uverbs_file; -enum { - /* - * Action marked with this flag creates a context (or root for all - * objects). - */ - UVERBS_ACTION_FLAG_CREATE_ROOT = 1U << 0, -}; - struct uverbs_method_spec { /* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */ u32 flags; -- cgit From 9491a1edbae39d1fb1c88289d327e33916b98a1e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:01:35 -0700 Subject: RDMA/ocrdma: Suppress a compiler warning This patch avoids that the following compiler warning is reported when building with gcc 8 and W=1: In function 'ocrdma_mbx_get_ctrl_attribs', inlined from 'ocrdma_init_hw' at drivers/infiniband/hw/ocrdma/ocrdma_hw.c:3224:11: drivers/infiniband/hw/ocrdma/ocrdma_hw.c:1368:3: warning: 'strncpy' output may be truncated copying 31 bytes from a string of length 31 [-Wstringop-truncation] strncpy(dev->model_number, ^~~~~~~~~~~~~~~~~~~~~~~~~~ hba_attribs->controller_model_number, 31); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Bart Van Assche Acked-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index c6c87cba943b..e578281471af 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1365,8 +1365,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv & OCRDMA_HBA_ATTRB_PTNUM_MASK) >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT; - strncpy(dev->model_number, - hba_attribs->controller_model_number, 31); + strlcpy(dev->model_number, + hba_attribs->controller_model_number, + sizeof(dev->model_number)); } dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa); free_mqe: -- cgit From 076dd53be52b729acc1e24e50669dbd761d8ee06 Mon Sep 17 00:00:00 2001 From: Varsha Rao Date: Wed, 25 Jul 2018 20:43:56 +0200 Subject: IB/core: Remove extra parentheses Remove unnecessary parentheses to fix the clang warning of extraneous parentheses. Signed-off-by: Varsha Rao Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f2bf997b62cd..338df1789884 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4030,7 +4030,7 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else if (addr->sa_family == AF_IB) { memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); - } else if ((addr->sa_family == AF_INET6)) { + } else if (addr->sa_family == AF_INET6) { ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ -- cgit From 7fc7a7cffab6b94cb5e47148e6852ba633078ea1 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 25 Jul 2018 21:22:13 +0530 Subject: rdma/cxgb4: Add support for srq functions & structs This patch adds kernel mode t4_srq structures and support functions, uapi structures and defines, as well as firmware work request structures. Signed-off-by: Raju Rangoju Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 38 ++++++++++ drivers/infiniband/hw/cxgb4/t4.h | 117 +++++++++++++++++++++++++++++- drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 19 +++++ include/uapi/rdma/cxgb4-abi.h | 17 +++++ 4 files changed, 190 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 8866bf992316..1d567aaf88e3 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -97,6 +97,7 @@ struct c4iw_resource { struct c4iw_id_table tpt_table; struct c4iw_id_table qid_table; struct c4iw_id_table pdid_table; + struct c4iw_id_table srq_table; }; struct c4iw_qid_list { @@ -130,6 +131,8 @@ struct c4iw_stats { struct c4iw_stat stag; struct c4iw_stat pbl; struct c4iw_stat rqt; + struct c4iw_stat srqt; + struct c4iw_stat srq; struct c4iw_stat ocqp; u64 db_full; u64 db_empty; @@ -549,6 +552,7 @@ struct c4iw_qp { struct kref kref; wait_queue_head_t wait; int sq_sig_all; + struct c4iw_srq *srq; struct work_struct free_work; struct c4iw_ucontext *ucontext; struct c4iw_wr_wait *wr_waitp; @@ -559,6 +563,26 @@ static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) return container_of(ibqp, struct c4iw_qp, ibqp); } +struct c4iw_srq { + struct ib_srq ibsrq; + struct list_head db_fc_entry; + struct c4iw_dev *rhp; + struct t4_srq wq; + struct sk_buff *destroy_skb; + u32 srq_limit; + u32 pdid; + int idx; + u32 flags; + spinlock_t lock; /* protects srq */ + struct c4iw_wr_wait *wr_waitp; + bool armed; +}; + +static inline struct c4iw_srq *to_c4iw_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct c4iw_srq, ibsrq); +} + struct c4iw_ucontext { struct ib_ucontext ibucontext; struct c4iw_dev_ucontext uctx; @@ -1040,6 +1064,13 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, struct ib_udata *udata); int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); +int c4iw_destroy_srq(struct ib_srq *ib_srq); +struct ib_srq *c4iw_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *attrs, + struct ib_udata *udata); int c4iw_destroy_qp(struct ib_qp *ib_qp); struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, @@ -1076,12 +1107,19 @@ extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, enum cxgb4_bar2_qtype qtype, unsigned int *pbar2_qid, u64 *pbar2_pa); +int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev); +void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx); extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe); extern int c4iw_wr_log; extern int db_fc_threshold; extern int db_coalescing_threshold; extern int use_dsgl; void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); +void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq); +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16); +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx); +int c4iw_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); typedef int c4iw_restrack_func(struct sk_buff *msg, diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 838a7dee48bd..29a4dd5053f2 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -52,12 +52,16 @@ struct t4_status_page { __be16 pidx; u8 qp_err; /* flit 1 - sw owns */ u8 db_off; - u8 pad; + u8 pad[2]; u16 host_wq_pidx; u16 host_cidx; u16 host_pidx; + u16 pad2; + u32 srqidx; }; +#define T4_RQT_ENTRY_SHIFT 6 +#define T4_RQT_ENTRY_SIZE BIT(T4_RQT_ENTRY_SHIFT) #define T4_EQ_ENTRY_SIZE 64 #define T4_SQ_NUM_SLOTS 5 @@ -248,6 +252,7 @@ struct t4_cqe { /* used for RQ completion processing */ #define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag)) #define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn)) +#define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx)) /* used for SQ completion processing */ #define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) @@ -331,6 +336,7 @@ struct t4_swrqe { u64 wr_id; ktime_t host_time; u64 sge_ts; + int valid; }; struct t4_rq { @@ -360,8 +366,98 @@ struct t4_wq { void __iomem *db; struct c4iw_rdev *rdev; int flushed; + u8 *qp_errp; + u32 *srqidxp; +}; + +struct t4_srq_pending_wr { + u64 wr_id; + union t4_recv_wr wqe; + u8 len16; +}; + +struct t4_srq { + union t4_recv_wr *queue; + dma_addr_t dma_addr; + DECLARE_PCI_UNMAP_ADDR(mapping); + struct t4_swrqe *sw_rq; + void __iomem *bar2_va; + u64 bar2_pa; + size_t memsize; + u32 bar2_qid; + u32 qid; + u32 msn; + u32 rqt_hwaddr; + u32 rqt_abs_idx; + u16 rqt_size; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; + u16 wq_pidx_inc; + u16 in_use; + struct t4_srq_pending_wr *pending_wrs; + u16 pending_cidx; + u16 pending_pidx; + u16 pending_in_use; + u16 ooo_count; }; +static inline u32 t4_srq_avail(struct t4_srq *srq) +{ + return srq->size - 1 - srq->in_use; +} + +static inline void t4_srq_produce(struct t4_srq *srq, u8 len16) +{ + srq->in_use++; + if (++srq->pidx == srq->size) + srq->pidx = 0; + srq->wq_pidx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + if (srq->wq_pidx >= srq->size * T4_RQ_NUM_SLOTS) + srq->wq_pidx %= srq->size * T4_RQ_NUM_SLOTS; + srq->queue[srq->size].status.host_pidx = srq->pidx; +} + +static inline void t4_srq_produce_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use++; + srq->in_use++; + if (++srq->pending_pidx == srq->size) + srq->pending_pidx = 0; +} + +static inline void t4_srq_consume_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use--; + srq->in_use--; + if (++srq->pending_cidx == srq->size) + srq->pending_cidx = 0; +} + +static inline void t4_srq_produce_ooo(struct t4_srq *srq) +{ + srq->in_use--; + srq->ooo_count++; +} + +static inline void t4_srq_consume_ooo(struct t4_srq *srq) +{ + srq->cidx++; + if (srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; + srq->ooo_count--; +} + +static inline void t4_srq_consume(struct t4_srq *srq) +{ + srq->in_use--; + if (++srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; +} + static inline int t4_rqes_posted(struct t4_wq *wq) { return wq->rq.in_use; @@ -475,6 +571,25 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src) } } +static inline void t4_ring_srq_db(struct t4_srq *srq, u16 inc, u8 len16, + union t4_recv_wr *wqe) +{ + /* Flush host queue memory writes. */ + wmb(); + if (inc == 1 && srq->bar2_qid == 0 && wqe) { + pr_debug("%s : WC srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + pio_copy(srq->bar2_va + SGE_UDB_WCDOORBELL, (u64 *)wqe); + } else { + pr_debug("%s: DB srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + writel(PIDX_T5_V(inc) | QID_V(srq->bar2_qid), + srq->bar2_va + SGE_UDB_KDOORBELL); + } + /* Flush user doorbell area writes. */ + wmb(); +} + static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe) { diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 58c531db4f4a..0f4f86b004d6 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -263,6 +263,7 @@ enum fw_ri_res_type { FW_RI_RES_TYPE_SQ, FW_RI_RES_TYPE_RQ, FW_RI_RES_TYPE_CQ, + FW_RI_RES_TYPE_SRQ, }; enum fw_ri_res_op { @@ -296,6 +297,20 @@ struct fw_ri_res { __be32 r6_lo; __be64 r7; } cq; + struct fw_ri_res_srq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 eqid; + __be32 r4[2]; + __be32 fetchszm_to_iqid; + __be32 dcaen_to_eqsize; + __be64 eqaddr; + __be32 srqid; + __be32 pdid; + __be32 hwsrqsize; + __be32 hwsrqaddr; + } srq; } u; }; @@ -707,6 +722,10 @@ enum fw_ri_init_p2ptype { FW_RI_INIT_P2PTYPE_DISABLED = 0xf, }; +enum fw_ri_init_rqeqid_srq { + FW_RI_INIT_RQEQID_SRQ = 1 << 31, +}; + struct fw_ri_wr { __be32 op_compl; __be32 flowid_len16; diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h index 65c9eacd3ffb..d0b2d829471a 100644 --- a/include/uapi/rdma/cxgb4-abi.h +++ b/include/uapi/rdma/cxgb4-abi.h @@ -84,6 +84,23 @@ struct c4iw_create_qp_resp { __u32 flags; }; +struct c4iw_create_srq_resp { + __aligned_u64 srq_key; + __aligned_u64 srq_db_gts_key; + __aligned_u64 srq_memsize; + __u32 srqid; + __u32 srq_size; + __u32 rqt_abs_idx; + __u32 qid_mask; + __u32 flags; + __u32 reserved; /* explicit padding */ +}; + +enum { + /* HW supports SRQ_LIMIT_REACHED event */ + T4_SRQ_LIMIT_SUPPORT = 1 << 0, +}; + struct c4iw_alloc_ucontext_resp { __aligned_u64 status_page_key; __u32 status_page_size; -- cgit From 6a0b6174d35a141dfa30a32c848a3903e2d7f495 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 25 Jul 2018 21:22:14 +0530 Subject: rdma/cxgb4: Add support for kernel mode SRQ's This patch implements the srq specific verbs such as create/destroy/modify and post_srq_recv. And adds srq specific structures and defines to t4.h and uapi. Also updates the cq poll logic to deal with completions that are associated with the SRQ's. This patch also handles kernel mode SRQ_LIMIT events as well as flushed SRQ buffers Signed-off-by: Raju Rangoju Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 42 +- drivers/infiniband/hw/cxgb4/cq.c | 142 +++++- drivers/infiniband/hw/cxgb4/device.c | 19 +- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 3 +- drivers/infiniband/hw/cxgb4/provider.c | 12 +- drivers/infiniband/hw/cxgb4/qp.c | 818 ++++++++++++++++++++++++++++----- drivers/infiniband/hw/cxgb4/resource.c | 51 +- drivers/infiniband/hw/cxgb4/t4.h | 9 +- 8 files changed, 929 insertions(+), 167 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 77243f7e17d5..54f7fbef7880 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1853,10 +1853,34 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx_status) +{ + enum chip_type adapter_type; + u32 srqidx; + u8 status; + + adapter_type = ep->com.dev->rdev.lldi.adapter_type; + status = ABORT_RSS_STATUS_G(be32_to_cpu(srqidx_status)); + srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status)); + + /* + * If this TCB had a srq buffer cached, then we must complete + * it. For user mode, that means saving the srqidx in the + * user/kernel status page for this qp. For kernel mode, just + * synthesize the CQE now. + */ + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) { + if (ep->com.qp->ibqp.uobject) + t4_set_wq_in_error(&ep->com.qp->wq, srqidx); + else + c4iw_flush_srqidx(ep->com.qp, srqidx); + } +} + static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; - struct cpl_abort_rpl_rss *rpl = cplhdr(skb); + struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb); int release = 0; unsigned int tid = GET_TID(rpl); @@ -1865,6 +1889,9 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) pr_warn("Abort rpl to freed endpoint\n"); return 0; } + + complete_cached_srq_buffers(ep, rpl->srqidx_status); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); mutex_lock(&ep->com.mutex); switch (ep->com.state) { @@ -2719,28 +2746,35 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { - struct cpl_abort_req_rss *req = cplhdr(skb); + struct cpl_abort_req_rss6 *req = cplhdr(skb); struct c4iw_ep *ep; struct sk_buff *rpl_skb; struct c4iw_qp_attributes attrs; int ret; int release = 0; unsigned int tid = GET_TID(req); + u8 status; + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); ep = get_ep_from_tid(dev, tid); if (!ep) return 0; - if (cxgb_is_neg_adv(req->status)) { + status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status)); + + if (cxgb_is_neg_adv(status)) { pr_debug("Negative advice on abort- tid %u status %d (%s)\n", - ep->hwtid, req->status, neg_adv_str(req->status)); + ep->hwtid, status, neg_adv_str(status)); ep->stats.abort_neg_adv++; mutex_lock(&dev->rdev.stats.lock); dev->rdev.stats.neg_adv++; mutex_unlock(&dev->rdev.stats.lock); goto deref_ep; } + + complete_cached_srq_buffers(ep, req->srqidx_status); + pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); set_bit(PEER_ABORT, &ep->com.history); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index a055f9f08e76..d266c8d0bf94 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -182,7 +182,7 @@ err1: return ret; } -static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) +static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx) { struct t4_cqe cqe; @@ -195,6 +195,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) CQE_SWCQE_V(1) | CQE_QPID_V(wq->sq.qid)); cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); + if (srqidx) + cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx); cq->sw_queue[cq->sw_pidx] = cqe; t4_swcq_produce(cq); } @@ -207,7 +209,7 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) pr_debug("wq %p cq %p rq.in_use %u skip count %u\n", wq, cq, wq->rq.in_use, count); while (in_use--) { - insert_recv_cqe(wq, cq); + insert_recv_cqe(wq, cq, 0); flushed++; } return flushed; @@ -458,6 +460,72 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) pr_debug("cq %p count %d\n", cq, *count); } +static void post_pending_srq_wrs(struct t4_srq *srq) +{ + struct t4_srq_pending_wr *pwr; + u16 idx = 0; + + while (srq->pending_in_use) { + pwr = &srq->pending_wrs[srq->pending_cidx]; + srq->sw_rq[srq->pidx].wr_id = pwr->wr_id; + srq->sw_rq[srq->pidx].valid = 1; + + pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n", + __func__, + srq->cidx, srq->pidx, srq->wq_pidx, + srq->in_use, srq->size, + (unsigned long long)pwr->wr_id); + + c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16); + t4_srq_consume_pending_wr(srq); + t4_srq_produce(srq, pwr->len16); + idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE); + } + + if (idx) { + t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe); + srq->queue[srq->size].status.host_wq_pidx = + srq->wq_pidx; + } +} + +static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq) +{ + int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx; + u64 wr_id; + + srq->sw_rq[rel_idx].valid = 0; + wr_id = srq->sw_rq[rel_idx].wr_id; + + if (rel_idx == srq->cidx) { + pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n", + __func__, rel_idx, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_consume(srq); + while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) { + pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n", + __func__, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, + srq->size, srq->ooo_count, + (unsigned long long) + srq->sw_rq[srq->cidx].wr_id); + t4_srq_consume_ooo(srq); + } + if (srq->ooo_count == 0 && srq->pending_in_use) + post_pending_srq_wrs(srq); + } else { + pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n", + __func__, rel_idx, srq->cidx, + srq->pidx, srq->wq_pidx, + srq->in_use, srq->size, + srq->ooo_count, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_produce_ooo(srq); + } + return wr_id; +} + /* * poll_cq * @@ -475,7 +543,8 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) * -EOVERFLOW CQ overflow detected. */ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit) + u8 *cqe_flushed, u64 *cookie, u32 *credit, + struct t4_srq *srq) { int ret = 0; struct t4_cqe *hw_cqe, read_cqe; @@ -540,7 +609,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (CQE_TYPE(hw_cqe) == 1) { if (CQE_STATUS(hw_cqe)) - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); ret = -EAGAIN; goto skip_cqe; } @@ -551,7 +620,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (CQE_WRID_STAG(hw_cqe) == 1) { if (CQE_STATUS(hw_cqe)) - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); ret = -EAGAIN; goto skip_cqe; } @@ -576,7 +645,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); } /* @@ -590,15 +659,9 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, * then we complete this with T4_ERR_MSN and mark the wq in * error. */ - - if (t4_rq_empty(wq)) { - t4_set_wq_in_error(wq); - ret = -EAGAIN; - goto skip_cqe; - } if (unlikely(!CQE_STATUS(hw_cqe) && CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); } goto proc_cqe; @@ -657,11 +720,16 @@ proc_cqe: c4iw_log_wr_stats(wq, hw_cqe); t4_sq_consume(wq); } else { - pr_debug("completing rq idx %u\n", wq->rq.cidx); - *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; - if (c4iw_wr_log) - c4iw_log_wr_stats(wq, hw_cqe); - t4_rq_consume(wq); + if (!srq) { + pr_debug("completing rq idx %u\n", wq->rq.cidx); + *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; + if (c4iw_wr_log) + c4iw_log_wr_stats(wq, hw_cqe); + t4_rq_consume(wq); + } else { + *cookie = reap_srq_cqe(hw_cqe, srq); + } + wq->rq.msn++; goto skip_cqe; } @@ -685,7 +753,7 @@ skip_cqe: } static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp, - struct ib_wc *wc) + struct ib_wc *wc, struct c4iw_srq *srq) { struct t4_cqe uninitialized_var(cqe); struct t4_wq *wq = qhp ? &qhp->wq : NULL; @@ -694,7 +762,8 @@ static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp, u64 cookie = 0; int ret; - ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); + ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit, + srq ? &srq->wq : NULL); if (ret) goto out; @@ -703,6 +772,13 @@ static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp, wc->vendor_err = CQE_STATUS(&cqe); wc->wc_flags = 0; + /* + * Simulate a SRQ_LIMIT_REACHED HW notification if required. + */ + if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed && + srq->wq.in_use < srq->srq_limit) + c4iw_dispatch_srq_limit_reached_event(srq); + pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n", CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe), @@ -828,6 +904,7 @@ out: */ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) { + struct c4iw_srq *srq = NULL; struct c4iw_qp *qhp = NULL; struct t4_cqe *rd_cqe; int ret; @@ -840,10 +917,15 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); if (qhp) { spin_lock(&qhp->lock); - ret = __c4iw_poll_cq_one(chp, qhp, wc); + srq = qhp->srq; + if (srq) + spin_lock(&srq->lock); + ret = __c4iw_poll_cq_one(chp, qhp, wc, srq); spin_unlock(&qhp->lock); + if (srq) + spin_unlock(&srq->lock); } else { - ret = __c4iw_poll_cq_one(chp, NULL, wc); + ret = __c4iw_poll_cq_one(chp, NULL, wc, NULL); } return ret; } @@ -1078,3 +1160,19 @@ int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) spin_unlock_irqrestore(&chp->lock, flag); return ret; } + +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx) +{ + struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + unsigned long flag; + + /* locking heirarchy: cq lock first, then qp lock. */ + spin_lock_irqsave(&rchp->lock, flag); + spin_lock(&qhp->lock); + + /* create a SRQ RECV CQE for srqidx */ + insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx); + + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); +} diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index a3c3418afd73..5ef082bfa95a 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -275,10 +275,11 @@ static int dump_qp(int id, void *p, void *data) set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin); cc = snprintf(qpd->buf + qpd->pos, space, - "rc qp sq id %u rq id %u state %u " + "rc qp sq id %u %s id %u state %u " "onchip %u ep tid %u state %u " "%pI4:%u/%u->%pI4:%u/%u\n", - qp->wq.sq.qid, qp->wq.rq.qid, + qp->wq.sq.qid, qp->srq ? "srq" : "rq", + qp->srq ? qp->srq->idx : qp->wq.rq.qid, (int)qp->attr.state, qp->wq.sq.flags & T4_SQ_ONCHIP, ep->hwtid, (int)ep->com.state, @@ -480,6 +481,9 @@ static int stats_show(struct seq_file *seq, void *v) seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n", dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur, dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail); + seq_printf(seq, " SRQS: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.srqt.total, dev->rdev.stats.srqt.cur, + dev->rdev.stats.srqt.max, dev->rdev.stats.srqt.fail); seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n", dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur, dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail); @@ -530,6 +534,8 @@ static ssize_t stats_clear(struct file *file, const char __user *buf, dev->rdev.stats.pbl.fail = 0; dev->rdev.stats.rqt.max = 0; dev->rdev.stats.rqt.fail = 0; + dev->rdev.stats.rqt.max = 0; + dev->rdev.stats.rqt.fail = 0; dev->rdev.stats.ocqp.max = 0; dev->rdev.stats.ocqp.fail = 0; dev->rdev.stats.db_full = 0; @@ -802,7 +808,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->qpmask = rdev->lldi.udb_density - 1; rdev->cqmask = rdev->lldi.ucq_density - 1; - pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n", + pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n", pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start, rdev->lldi.vr->stag.size, c4iw_num_stags(rdev), rdev->lldi.vr->pbl.start, @@ -811,7 +817,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.vr->qp.start, rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.start, - rdev->lldi.vr->cq.size); + rdev->lldi.vr->cq.size, + rdev->lldi.vr->srq.size); pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n", &rdev->lldi.pdev->resource[2], rdev->lldi.db_reg, rdev->lldi.gts_reg, @@ -824,10 +831,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->stats.stag.total = rdev->lldi.vr->stag.size; rdev->stats.pbl.total = rdev->lldi.vr->pbl.size; rdev->stats.rqt.total = rdev->lldi.vr->rq.size; + rdev->stats.srqt.total = rdev->lldi.vr->srq.size; rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size; rdev->stats.qid.total = rdev->lldi.vr->qp.size; - err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD); + err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), + T4_MAX_NUM_PD, rdev->lldi.vr->srq.size); if (err) { pr_err("error %d initializing resources\n", err); return err; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 1d567aaf88e3..047106cb0393 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -1013,7 +1013,8 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid, struct c4iw_dev_ucontext *uctx); u32 c4iw_get_resource(struct c4iw_id_table *id_table); void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry); -int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, + u32 nr_pdid, u32 nr_srqt); int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); int c4iw_pblpool_create(struct c4iw_rdev *rdev); int c4iw_rqtpool_create(struct c4iw_rdev *rdev); diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 61b8bdb9423d..c314d8fdfbba 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -342,9 +342,12 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device; props->max_mr_size = T4_MAX_MR_SIZE; props->max_qp = dev->rdev.lldi.vr->qp.size / 2; + props->max_srq = dev->rdev.lldi.vr->srq.size; props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth; + props->max_srq_wr = dev->rdev.hw_queue.t4_max_qp_depth; props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE); props->max_recv_sge = T4_MAX_RECV_SGE; + props->max_srq_sge = T4_MAX_RECV_SGE; props->max_sge_rd = 1; props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter; props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp, @@ -593,7 +596,10 @@ void c4iw_register_device(struct work_struct *work) (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV); + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ibdev.node_type = RDMA_NODE_RNIC; BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); @@ -615,6 +621,9 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.modify_qp = c4iw_ib_modify_qp; dev->ibdev.query_qp = c4iw_ib_query_qp; dev->ibdev.destroy_qp = c4iw_destroy_qp; + dev->ibdev.create_srq = c4iw_create_srq; + dev->ibdev.modify_srq = c4iw_modify_srq; + dev->ibdev.destroy_srq = c4iw_destroy_srq; dev->ibdev.create_cq = c4iw_create_cq; dev->ibdev.destroy_cq = c4iw_destroy_cq; dev->ibdev.resize_cq = c4iw_resize_cq; @@ -632,6 +641,7 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.req_notify_cq = c4iw_arm_cq; dev->ibdev.post_send = c4iw_post_send; dev->ibdev.post_recv = c4iw_post_receive; + dev->ibdev.post_srq_recv = c4iw_post_srq_recv; dev->ibdev.alloc_hw_stats = c4iw_alloc_stats; dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index aef53305f1c3..08dc555942af 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -147,21 +147,24 @@ static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user) } static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, - struct c4iw_dev_ucontext *uctx) + struct c4iw_dev_ucontext *uctx, int has_rq) { /* * uP clears EQ contexts when the connection exits rdma mode, * so no need to post a RESET WR for these EQs. */ - dma_free_coherent(&(rdev->lldi.pdev->dev), - wq->rq.memsize, wq->rq.queue, - dma_unmap_addr(&wq->rq, mapping)); dealloc_sq(rdev, &wq->sq); - c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); - kfree(wq->rq.sw_rq); kfree(wq->sq.sw_sq); - c4iw_put_qpid(rdev, wq->rq.qid, uctx); c4iw_put_qpid(rdev, wq->sq.qid, uctx); + + if (has_rq) { + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); + kfree(wq->rq.sw_rq); + c4iw_put_qpid(rdev, wq->rq.qid, uctx); + } return 0; } @@ -195,7 +198,8 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx, - struct c4iw_wr_wait *wr_waitp) + struct c4iw_wr_wait *wr_waitp, + int need_rq) { int user = (uctx != &rdev->uctx); struct fw_ri_res_wr *res_wr; @@ -209,10 +213,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (!wq->sq.qid) return -ENOMEM; - wq->rq.qid = c4iw_get_qpid(rdev, uctx); - if (!wq->rq.qid) { - ret = -ENOMEM; - goto free_sq_qid; + if (need_rq) { + wq->rq.qid = c4iw_get_qpid(rdev, uctx); + if (!wq->rq.qid) { + ret = -ENOMEM; + goto free_sq_qid; + } } if (!user) { @@ -220,25 +226,31 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, GFP_KERNEL); if (!wq->sq.sw_sq) { ret = -ENOMEM; - goto free_rq_qid; + goto free_rq_qid;//FIXME } - wq->rq.sw_rq = kcalloc(wq->rq.size, sizeof(*wq->rq.sw_rq), - GFP_KERNEL); - if (!wq->rq.sw_rq) { - ret = -ENOMEM; - goto free_sw_sq; + if (need_rq) { + wq->rq.sw_rq = kcalloc(wq->rq.size, + sizeof(*wq->rq.sw_rq), + GFP_KERNEL); + if (!wq->rq.sw_rq) { + ret = -ENOMEM; + goto free_sw_sq; + } } } - /* - * RQT must be a power of 2 and at least 16 deep. - */ - wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); - wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); - if (!wq->rq.rqt_hwaddr) { - ret = -ENOMEM; - goto free_sw_rq; + if (need_rq) { + /* + * RQT must be a power of 2 and at least 16 deep. + */ + wq->rq.rqt_size = + roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); + wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); + if (!wq->rq.rqt_hwaddr) { + ret = -ENOMEM; + goto free_sw_rq; + } } ret = alloc_sq(rdev, &wq->sq, user); @@ -247,34 +259,39 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, memset(wq->sq.queue, 0, wq->sq.memsize); dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); - wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), - wq->rq.memsize, &(wq->rq.dma_addr), - GFP_KERNEL); - if (!wq->rq.queue) { - ret = -ENOMEM; - goto free_sq; + if (need_rq) { + wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, + &wq->rq.dma_addr, + GFP_KERNEL); + if (!wq->rq.queue) { + ret = -ENOMEM; + goto free_sq; + } + pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", + wq->sq.queue, + (unsigned long long)virt_to_phys(wq->sq.queue), + wq->rq.queue, + (unsigned long long)virt_to_phys(wq->rq.queue)); + memset(wq->rq.queue, 0, wq->rq.memsize); + dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); } - pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", - wq->sq.queue, - (unsigned long long)virt_to_phys(wq->sq.queue), - wq->rq.queue, - (unsigned long long)virt_to_phys(wq->rq.queue)); - memset(wq->rq.queue, 0, wq->rq.memsize); - dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = rdev->lldi.db_reg; wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, &wq->sq.bar2_qid, user ? &wq->sq.bar2_pa : NULL); - wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS, - &wq->rq.bar2_qid, - user ? &wq->rq.bar2_pa : NULL); + if (need_rq) + wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, + T4_BAR2_QTYPE_EGRESS, + &wq->rq.bar2_qid, + user ? &wq->rq.bar2_pa : NULL); /* * User mode must have bar2 access. */ - if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) { + if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) { pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n", pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); goto free_dma; @@ -285,7 +302,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + 2 * sizeof *res; - + if (need_rq) + wr_len += sizeof(*res); skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) { ret = -ENOMEM; @@ -296,7 +314,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, res_wr = __skb_put_zero(skb, wr_len); res_wr->op_nres = cpu_to_be32( FW_WR_OP_V(FW_RI_RES_WR) | - FW_RI_RES_WR_NRES_V(2) | + FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (uintptr_t)wr_waitp; @@ -327,30 +345,36 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, FW_RI_RES_WR_EQSIZE_V(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); - res++; - res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; - res->u.sqrq.op = FW_RI_RES_OP_WRITE; - /* - * eqsize is the number of 64B entries plus the status page size. - */ - eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + - rdev->hw_queue.t4_eq_status_entries; - res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( - FW_RI_RES_WR_HOSTFCMODE_V(0) | /* no host cidx updates */ - FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ - FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ - FW_RI_RES_WR_IQID_V(rcq->cqid)); - res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( - FW_RI_RES_WR_DCAEN_V(0) | - FW_RI_RES_WR_DCACPU_V(0) | - FW_RI_RES_WR_FBMIN_V(2) | - FW_RI_RES_WR_FBMAX_V(3) | - FW_RI_RES_WR_CIDXFTHRESHO_V(0) | - FW_RI_RES_WR_CIDXFTHRESH_V(0) | - FW_RI_RES_WR_EQSIZE_V(eqsize)); - res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); - res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); + if (need_rq) { + res++; + res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; + res->u.sqrq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size + */ + eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; + res->u.sqrq.fetchszm_to_iqid = + /* no host cidx updates */ + cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) | + /* don't keep in chip cache */ + FW_RI_RES_WR_CPRIO_V(0) | + /* set by uP at ri_init time */ + FW_RI_RES_WR_PCIECHN_V(0) | + FW_RI_RES_WR_IQID_V(rcq->cqid)); + res->u.sqrq.dcaen_to_eqsize = + cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + FW_RI_RES_WR_FBMAX_V(3) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); + res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); + res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); + } c4iw_init_wr_wait(wr_waitp); ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__); @@ -363,19 +387,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, return 0; free_dma: - dma_free_coherent(&(rdev->lldi.pdev->dev), - wq->rq.memsize, wq->rq.queue, - dma_unmap_addr(&wq->rq, mapping)); + if (need_rq) + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); free_sq: dealloc_sq(rdev, &wq->sq); free_hwaddr: - c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); + if (need_rq) + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); free_sw_rq: - kfree(wq->rq.sw_rq); + if (need_rq) + kfree(wq->rq.sw_rq); free_sw_sq: kfree(wq->sq.sw_sq); free_rq_qid: - c4iw_put_qpid(rdev, wq->rq.qid, uctx); + if (need_rq) + c4iw_put_qpid(rdev, wq->rq.qid, uctx); free_sq_qid: c4iw_put_qpid(rdev, wq->sq.qid, uctx); return ret; @@ -605,6 +633,20 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } +static int build_srq_recv(union t4_recv_wr *wqe, struct ib_recv_wr *wr, + u8 *len16) +{ + int ret; + + ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1), + &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); + if (ret) + return ret; + *len16 = DIV_ROUND_UP(sizeof(wqe->recv) + + wr->num_sge * sizeof(struct fw_ri_sge), 16); + return 0; +} + static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16) @@ -721,7 +763,7 @@ static void free_qp_work(struct work_struct *work) pr_debug("qhp %p ucontext %p\n", qhp, ucontext); destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq); if (ucontext) c4iw_put_ucontext(ucontext); @@ -1145,6 +1187,89 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, return err; } +static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe, + u64 wr_id, u8 len16) +{ + struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx]; + + pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n", + __func__, srq->cidx, srq->pidx, srq->wq_pidx, + srq->in_use, srq->ooo_count, + (unsigned long long)wr_id, srq->pending_cidx, + srq->pending_pidx, srq->pending_in_use); + pwr->wr_id = wr_id; + pwr->len16 = len16; + memcpy(&pwr->wqe, wqe, len16 * 16); + t4_srq_produce_pending_wr(srq); +} + +int c4iw_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + union t4_recv_wr *wqe, lwqe; + struct c4iw_srq *srq; + unsigned long flag; + u8 len16 = 0; + u16 idx = 0; + int err = 0; + u32 num_wrs; + + srq = to_c4iw_srq(ibsrq); + spin_lock_irqsave(&srq->lock, flag); + num_wrs = t4_srq_avail(&srq->wq); + if (num_wrs == 0) { + spin_unlock_irqrestore(&srq->lock, flag); + return -ENOMEM; + } + while (wr) { + if (wr->num_sge > T4_MAX_RECV_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } + wqe = &lwqe; + if (num_wrs) + err = build_srq_recv(wqe, wr, &len16); + else + err = -ENOMEM; + if (err) { + *bad_wr = wr; + break; + } + + wqe->recv.opcode = FW_RI_RECV_WR; + wqe->recv.r1 = 0; + wqe->recv.wrid = srq->wq.pidx; + wqe->recv.r2[0] = 0; + wqe->recv.r2[1] = 0; + wqe->recv.r2[2] = 0; + wqe->recv.len16 = len16; + + if (srq->wq.ooo_count || + srq->wq.pending_in_use || + srq->wq.sw_rq[srq->wq.pidx].valid) { + defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16); + } else { + srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id; + srq->wq.sw_rq[srq->wq.pidx].valid = 1; + c4iw_copy_wr_to_srq(&srq->wq, wqe, len16); + pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n", + __func__, srq->wq.cidx, + srq->wq.pidx, srq->wq.wq_pidx, + srq->wq.in_use, + (unsigned long long)wr->wr_id); + t4_srq_produce(&srq->wq, len16); + idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + } + wr = wr->next; + num_wrs--; + } + if (idx) + t4_ring_srq_db(&srq->wq, idx, len16, wqe); + spin_unlock_irqrestore(&srq->lock, flag); + return err; +} + static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, u8 *ecode) { @@ -1321,7 +1446,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, struct c4iw_cq *schp) { int count; - int rq_flushed, sq_flushed; + int rq_flushed = 0, sq_flushed; unsigned long flag; pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp); @@ -1340,11 +1465,13 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, return; } qhp->wq.flushed = 1; - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); c4iw_flush_hw_cq(rchp, qhp); - c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); - rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + if (!qhp->srq) { + c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); + rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + } if (schp != rchp) c4iw_flush_hw_cq(schp, qhp); @@ -1388,7 +1515,7 @@ static void flush_qp(struct c4iw_qp *qhp) schp = to_c4iw_cq(qhp->ibqp.send_cq); if (qhp->ibqp.uobject) { - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); @@ -1517,16 +1644,21 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd); wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid); wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid); - wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); + if (qhp->srq) { + wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ | + qhp->srq->idx); + } else { + wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); + wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); + wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - + rhp->rdev.lldi.vr->rq.start); + } wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq); wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq); wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord); wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird); wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq); wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq); - wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); - wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - - rhp->rdev.lldi.vr->rq.start); if (qhp->attr.mpa_attr.initiator) build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); @@ -1643,7 +1775,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, case C4IW_QP_STATE_RTS: switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; if (!internal) { @@ -1656,7 +1788,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, goto err; break; case C4IW_QP_STATE_TERMINATE: - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_TERMINATE); qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; @@ -1673,7 +1805,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, } break; case C4IW_QP_STATE_ERROR: - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_ERROR); if (!internal) { abort = 1; @@ -1819,7 +1951,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; - unsigned int sqsize, rqsize; + unsigned int sqsize, rqsize = 0; struct c4iw_ucontext *ucontext; int ret; struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm; @@ -1840,11 +1972,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) return ERR_PTR(-EINVAL); - if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) - return ERR_PTR(-E2BIG); - rqsize = attrs->cap.max_recv_wr + 1; - if (rqsize < 8) - rqsize = 8; + if (!attrs->srq) { + if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) + return ERR_PTR(-E2BIG); + rqsize = attrs->cap.max_recv_wr + 1; + if (rqsize < 8) + rqsize = 8; + } if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size) return ERR_PTR(-E2BIG); @@ -1869,19 +2003,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64); qhp->wq.sq.flush_cidx = -1; - qhp->wq.rq.size = rqsize; - qhp->wq.rq.memsize = - (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * - sizeof(*qhp->wq.rq.queue); + if (!attrs->srq) { + qhp->wq.rq.size = rqsize; + qhp->wq.rq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*qhp->wq.rq.queue); + } if (ucontext) { qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); - qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE); + if (!attrs->srq) + qhp->wq.rq.memsize = + roundup(qhp->wq.rq.memsize, PAGE_SIZE); } ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, - qhp->wr_waitp); + qhp->wr_waitp, !attrs->srq); if (ret) goto err_free_wr_wait; @@ -1894,10 +2032,12 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid; qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid; qhp->attr.sq_num_entries = attrs->cap.max_send_wr; - qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; qhp->attr.sq_max_sges = attrs->cap.max_send_sge; qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; - qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + if (!attrs->srq) { + qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; + qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + } qhp->attr.state = C4IW_QP_STATE_IDLE; qhp->attr.next_state = C4IW_QP_STATE_IDLE; qhp->attr.enable_rdma_read = 1; @@ -1922,20 +2062,25 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ret = -ENOMEM; goto err_remove_handle; } - rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); - if (!rq_key_mm) { - ret = -ENOMEM; - goto err_free_sq_key; + if (!attrs->srq) { + rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); + if (!rq_key_mm) { + ret = -ENOMEM; + goto err_free_sq_key; + } } sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL); if (!sq_db_key_mm) { ret = -ENOMEM; goto err_free_rq_key; } - rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); - if (!rq_db_key_mm) { - ret = -ENOMEM; - goto err_free_sq_db_key; + if (!attrs->srq) { + rq_db_key_mm = + kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); + if (!rq_db_key_mm) { + ret = -ENOMEM; + goto err_free_sq_db_key; + } } if (t4_sq_onchip(&qhp->wq.sq)) { ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm), @@ -1951,9 +2096,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, uresp.sqid = qhp->wq.sq.qid; uresp.sq_size = qhp->wq.sq.size; uresp.sq_memsize = qhp->wq.sq.memsize; - uresp.rqid = qhp->wq.rq.qid; - uresp.rq_size = qhp->wq.rq.size; - uresp.rq_memsize = qhp->wq.rq.memsize; + if (!attrs->srq) { + uresp.rqid = qhp->wq.rq.qid; + uresp.rq_size = qhp->wq.rq.size; + uresp.rq_memsize = qhp->wq.rq.memsize; + } spin_lock(&ucontext->mmap_lock); if (ma_sync_key_mm) { uresp.ma_sync_key = ucontext->key; @@ -1963,12 +2110,16 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, } uresp.sq_key = ucontext->key; ucontext->key += PAGE_SIZE; - uresp.rq_key = ucontext->key; - ucontext->key += PAGE_SIZE; + if (!attrs->srq) { + uresp.rq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } uresp.sq_db_gts_key = ucontext->key; ucontext->key += PAGE_SIZE; - uresp.rq_db_gts_key = ucontext->key; - ucontext->key += PAGE_SIZE; + if (!attrs->srq) { + uresp.rq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) @@ -1977,18 +2128,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, sq_key_mm->addr = qhp->wq.sq.phys_addr; sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize); insert_mmap(ucontext, sq_key_mm); - rq_key_mm->key = uresp.rq_key; - rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); - rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); - insert_mmap(ucontext, rq_key_mm); + if (!attrs->srq) { + rq_key_mm->key = uresp.rq_key; + rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); + rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); + insert_mmap(ucontext, rq_key_mm); + } sq_db_key_mm->key = uresp.sq_db_gts_key; sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa; sq_db_key_mm->len = PAGE_SIZE; insert_mmap(ucontext, sq_db_key_mm); - rq_db_key_mm->key = uresp.rq_db_gts_key; - rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa; - rq_db_key_mm->len = PAGE_SIZE; - insert_mmap(ucontext, rq_db_key_mm); + if (!attrs->srq) { + rq_db_key_mm->key = uresp.rq_db_gts_key; + rq_db_key_mm->addr = + (u64)(unsigned long)qhp->wq.rq.bar2_pa; + rq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, rq_db_key_mm); + } if (ma_sync_key_mm) { ma_sync_key_mm->key = uresp.ma_sync_key; ma_sync_key_mm->addr = @@ -2001,7 +2157,19 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, c4iw_get_ucontext(ucontext); qhp->ucontext = ucontext; } + if (!attrs->srq) { + qhp->wq.qp_errp = + &qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err; + } else { + qhp->wq.qp_errp = + &qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err; + qhp->wq.srqidxp = + &qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx; + } + qhp->ibqp.qp_num = qhp->wq.sq.qid; + if (attrs->srq) + qhp->srq = to_c4iw_srq(attrs->srq); INIT_LIST_HEAD(&qhp->db_fc_entry); pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n", qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize, @@ -2011,18 +2179,20 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, err_free_ma_sync_key: kfree(ma_sync_key_mm); err_free_rq_db_key: - kfree(rq_db_key_mm); + if (!attrs->srq) + kfree(rq_db_key_mm); err_free_sq_db_key: kfree(sq_db_key_mm); err_free_rq_key: - kfree(rq_key_mm); + if (!attrs->srq) + kfree(rq_key_mm); err_free_sq_key: kfree(sq_key_mm); err_remove_handle: remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); err_destroy_qp: destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq); err_free_wr_wait: c4iw_put_wr_wait(qhp->wr_waitp); err_free_qhp: @@ -2088,6 +2258,45 @@ struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); } +void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq) +{ + struct ib_event event = {0}; + + event.device = &srq->rhp->ibdev; + event.element.srq = &srq->ibsrq; + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + ib_dispatch_event(&event); +} + +int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata) +{ + struct c4iw_srq *srq = to_c4iw_srq(ib_srq); + int ret = 0; + + /* + * XXX 0 mask == a SW interrupt for srq_limit reached... + */ + if (udata && !srq_attr_mask) { + c4iw_dispatch_srq_limit_reached_event(srq); + goto out; + } + + /* no support for this yet */ + if (srq_attr_mask & IB_SRQ_MAX_WR) { + ret = -EINVAL; + goto out; + } + + if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) { + srq->armed = true; + srq->srq_limit = attr->srq_limit; + } +out: + return ret; +} + int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { @@ -2104,3 +2313,358 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; } + +static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) +{ + struct c4iw_rdev *rdev = &srq->rhp->rdev; + struct sk_buff *skb = srq->destroy_skb; + struct t4_srq *wq = &srq->wq; + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + + wr_len = sizeof(*res_wr) + sizeof(*res); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.srq.restype = FW_RI_RES_TYPE_SRQ; + res->u.srq.op = FW_RI_RES_OP_RESET; + res->u.srq.srqid = cpu_to_be32(srq->idx); + res->u.srq.eqid = cpu_to_be32(wq->qid); + + c4iw_init_wr_wait(wr_waitp); + c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); + + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->memsize, wq->queue, + pci_unmap_addr(wq, mapping)); + c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size); + kfree(wq->sw_rq); + c4iw_put_qpid(rdev, wq->qid, uctx); +} + +static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) +{ + struct c4iw_rdev *rdev = &srq->rhp->rdev; + int user = (uctx != &rdev->uctx); + struct t4_srq *wq = &srq->wq; + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + struct sk_buff *skb; + int wr_len; + int eqsize; + int ret = -ENOMEM; + + wq->qid = c4iw_get_qpid(rdev, uctx); + if (!wq->qid) + goto err; + + if (!user) { + wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq), + GFP_KERNEL); + if (!wq->sw_rq) + goto err_put_qpid; + wq->pending_wrs = kcalloc(srq->wq.size, + sizeof(*srq->wq.pending_wrs), + GFP_KERNEL); + if (!wq->pending_wrs) + goto err_free_sw_rq; + } + + wq->rqt_size = wq->size; + wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size); + if (!wq->rqt_hwaddr) + goto err_free_pending_wrs; + wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >> + T4_RQT_ENTRY_SHIFT; + + wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->memsize, &wq->dma_addr, + GFP_KERNEL); + if (!wq->queue) + goto err_free_rqtpool; + + memset(wq->queue, 0, wq->memsize); + pci_unmap_addr_set(wq, mapping, wq->dma_addr); + + wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS, + &wq->bar2_qid, + user ? &wq->bar2_pa : NULL); + + /* + * User mode must have bar2 access. + */ + + if (user && !wq->bar2_va) { + pr_warn(MOD "%s: srqid %u not in BAR2 range.\n", + pci_name(rdev->lldi.pdev), wq->qid); + ret = -EINVAL; + goto err_free_queue; + } + + /* build fw_ri_res_wr */ + wr_len = sizeof(*res_wr) + sizeof(*res); + + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + goto err_free_queue; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.srq.restype = FW_RI_RES_TYPE_SRQ; + res->u.srq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size. + */ + eqsize = wq->size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; + res->u.srq.eqid = cpu_to_be32(wq->qid); + res->u.srq.fetchszm_to_iqid = + /* no host cidx updates */ + cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) | + FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ + FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ + FW_RI_RES_WR_FETCHRO_V(0)); /* relaxed_ordering */ + res->u.srq.dcaen_to_eqsize = + cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + FW_RI_RES_WR_FBMAX_V(3) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); + res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr); + res->u.srq.srqid = cpu_to_be32(srq->idx); + res->u.srq.pdid = cpu_to_be32(srq->pdid); + res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size); + res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr - + rdev->lldi.vr->rq.start); + + c4iw_init_wr_wait(wr_waitp); + + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__); + if (ret) + goto err_free_queue; + + pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n" + " bar2_addr %p rqt addr 0x%x size %d\n", + __func__, srq->idx, wq->qid, srq->pdid, wq->queue, + (u64)virt_to_phys(wq->queue), wq->bar2_va, + wq->rqt_hwaddr, wq->rqt_size); + + return 0; +err_free_queue: + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->memsize, wq->queue, + pci_unmap_addr(wq, mapping)); +err_free_rqtpool: + c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size); +err_free_pending_wrs: + if (!user) + kfree(wq->pending_wrs); +err_free_sw_rq: + if (!user) + kfree(wq->sw_rq); +err_put_qpid: + c4iw_put_qpid(rdev, wq->qid, uctx); +err: + return ret; +} + +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16) +{ + u64 *src, *dst; + + src = (u64 *)wqe; + dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE); + while (len16) { + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + len16--; + } +} + +struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, + struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_srq *srq; + struct c4iw_pd *php; + struct c4iw_create_srq_resp uresp; + struct c4iw_ucontext *ucontext; + struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm; + int rqsize; + int ret; + int wr_len; + + pr_debug("%s ib_pd %p\n", __func__, pd); + + php = to_c4iw_pd(pd); + rhp = php->rhp; + + if (!rhp->rdev.lldi.vr->srq.size) + return ERR_PTR(-EINVAL); + if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size) + return ERR_PTR(-E2BIG); + if (attrs->attr.max_sge > T4_MAX_RECV_SGE) + return ERR_PTR(-E2BIG); + + /* + * SRQ RQT and RQ must be a power of 2 and at least 16 deep. + */ + rqsize = attrs->attr.max_wr + 1; + rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16)); + + ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); + if (!srq->wr_waitp) { + ret = -ENOMEM; + goto err_free_srq; + } + + srq->idx = c4iw_alloc_srq_idx(&rhp->rdev); + if (srq->idx < 0) { + ret = -ENOMEM; + goto err_free_wr_wait; + } + + wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); + srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); + if (!srq->destroy_skb) { + ret = -ENOMEM; + goto err_free_srq_idx; + } + + srq->rhp = rhp; + srq->pdid = php->pdid; + + srq->wq.size = rqsize; + srq->wq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*srq->wq.queue); + if (ucontext) + srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE); + + ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx : + &rhp->rdev.uctx, srq->wr_waitp); + if (ret) + goto err_free_skb; + attrs->attr.max_wr = rqsize - 1; + + if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) + srq->flags = T4_SRQ_LIMIT_SUPPORT; + + ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid); + if (ret) + goto err_free_queue; + + if (udata) { + srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL); + if (!srq_key_mm) { + ret = -ENOMEM; + goto err_remove_handle; + } + srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL); + if (!srq_db_key_mm) { + ret = -ENOMEM; + goto err_free_srq_key_mm; + } + uresp.flags = srq->flags; + uresp.qid_mask = rhp->rdev.qpmask; + uresp.srqid = srq->wq.qid; + uresp.srq_size = srq->wq.size; + uresp.srq_memsize = srq->wq.memsize; + uresp.rqt_abs_idx = srq->wq.rqt_abs_idx; + spin_lock(&ucontext->mmap_lock); + uresp.srq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.srq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + spin_unlock(&ucontext->mmap_lock); + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) + goto err_free_srq_db_key_mm; + srq_key_mm->key = uresp.srq_key; + srq_key_mm->addr = virt_to_phys(srq->wq.queue); + srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize); + insert_mmap(ucontext, srq_key_mm); + srq_db_key_mm->key = uresp.srq_db_gts_key; + srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa; + srq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, srq_db_key_mm); + } + + pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n", + __func__, srq->wq.qid, srq->idx, srq->wq.size, + (unsigned long)srq->wq.memsize, attrs->attr.max_wr); + + spin_lock_init(&srq->lock); + return &srq->ibsrq; +err_free_srq_db_key_mm: + kfree(srq_db_key_mm); +err_free_srq_key_mm: + kfree(srq_key_mm); +err_remove_handle: + remove_handle(rhp, &rhp->qpidr, srq->wq.qid); +err_free_queue: + free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + srq->wr_waitp); +err_free_skb: + if (srq->destroy_skb) + kfree_skb(srq->destroy_skb); +err_free_srq_idx: + c4iw_free_srq_idx(&rhp->rdev, srq->idx); +err_free_wr_wait: + c4iw_put_wr_wait(srq->wr_waitp); +err_free_srq: + kfree(srq); + return ERR_PTR(ret); +} + +int c4iw_destroy_srq(struct ib_srq *ibsrq) +{ + struct c4iw_dev *rhp; + struct c4iw_srq *srq; + struct c4iw_ucontext *ucontext; + + srq = to_c4iw_srq(ibsrq); + rhp = srq->rhp; + + pr_debug("%s id %d\n", __func__, srq->wq.qid); + + remove_handle(rhp, &rhp->qpidr, srq->wq.qid); + ucontext = ibsrq->uobject ? + to_c4iw_ucontext(ibsrq->uobject->context) : NULL; + free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + srq->wr_waitp); + c4iw_free_srq_idx(&rhp->rdev, srq->idx); + c4iw_put_wr_wait(srq->wr_waitp); + kfree(srq); + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 0ef25ae05e6f..57ed26b3cc21 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -53,7 +53,8 @@ static int c4iw_init_qid_table(struct c4iw_rdev *rdev) } /* nr_* must be power of 2 */ -int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, + u32 nr_pdid, u32 nr_srqt) { int err = 0; err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1, @@ -67,7 +68,17 @@ int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) nr_pdid, 1, 0); if (err) goto pdid_err; + if (!nr_srqt) + err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0, + 1, 1, 0); + else + err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0, + nr_srqt, 0, 0); + if (err) + goto srq_err; return 0; + srq_err: + c4iw_id_table_free(&rdev->resource.pdid_table); pdid_err: c4iw_id_table_free(&rdev->resource.qid_table); qid_err: @@ -371,13 +382,21 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) int c4iw_rqtpool_create(struct c4iw_rdev *rdev) { unsigned rqt_start, rqt_chunk, rqt_top; + int skip = 0; rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); if (!rdev->rqt_pool) return -ENOMEM; - rqt_start = rdev->lldi.vr->rq.start; - rqt_chunk = rdev->lldi.vr->rq.size; + /* + * If SRQs are supported, then never use the first RQE from + * the RQT region. This is because HW uses RQT index 0 as NULL. + */ + if (rdev->lldi.vr->srq.size) + skip = T4_RQT_ENTRY_SIZE; + + rqt_start = rdev->lldi.vr->rq.start + skip; + rqt_chunk = rdev->lldi.vr->rq.size - skip; rqt_top = rqt_start + rqt_chunk; while (rqt_start < rqt_top) { @@ -405,6 +424,32 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) kref_put(&rdev->rqt_kref, destroy_rqtpool); } +int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev) +{ + int idx; + + idx = c4iw_id_alloc(&rdev->resource.srq_table); + mutex_lock(&rdev->stats.lock); + if (idx == -1) { + rdev->stats.srqt.fail++; + mutex_unlock(&rdev->stats.lock); + return -ENOMEM; + } + rdev->stats.srqt.cur++; + if (rdev->stats.srqt.cur > rdev->stats.srqt.max) + rdev->stats.srqt.max = rdev->stats.srqt.cur; + mutex_unlock(&rdev->stats.lock); + return idx; +} + +void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx) +{ + c4iw_id_free(&rdev->resource.srq_table, idx); + mutex_lock(&rdev->stats.lock); + rdev->stats.srqt.cur--; + mutex_unlock(&rdev->stats.lock); +} + /* * On-Chip QP Memory. */ diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 29a4dd5053f2..11d55fc2ded7 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -491,7 +491,6 @@ static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) static inline void t4_rq_consume(struct t4_wq *wq) { wq->rq.in_use--; - wq->rq.msn++; if (++wq->rq.cidx == wq->rq.size) wq->rq.cidx = 0; } @@ -641,12 +640,14 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, static inline int t4_wq_in_error(struct t4_wq *wq) { - return wq->rq.queue[wq->rq.size].status.qp_err; + return *wq->qp_errp; } -static inline void t4_set_wq_in_error(struct t4_wq *wq) +static inline void t4_set_wq_in_error(struct t4_wq *wq, u32 srqidx) { - wq->rq.queue[wq->rq.size].status.qp_err = 1; + if (srqidx) + *wq->srqidxp = srqidx; + *wq->qp_errp = 1; } static inline void t4_disable_wq_db(struct t4_wq *wq) -- cgit From d238ca09810a02e9224c698094afda0e8003f419 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jul 2018 14:37:52 -0600 Subject: IB/usnic: usnic should not select INFINIBAND_USER_ACCESS This driver doesn't provide any kernel services, it only provides an interface via uverbs, so it should depend on, not select, uverbs support. Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/usnic/Kconfig b/drivers/infiniband/hw/usnic/Kconfig index 29ab11c34f3f..d1dae2af4ca9 100644 --- a/drivers/infiniband/hw/usnic/Kconfig +++ b/drivers/infiniband/hw/usnic/Kconfig @@ -1,10 +1,10 @@ config INFINIBAND_USNIC tristate "Verbs support for Cisco VIC" depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU + depends on INFINIBAND_USER_ACCESS select ENIC select NET_VENDOR_CISCO select PCI_IOV - select INFINIBAND_USER_ACCESS ---help--- This is a low-level driver for Cisco's Virtual Interface Cards (VICs), including the VIC 1240 and 1280 cards. -- cgit From cee104334c98dd04e9dd4d9a4fa4784f7f6aada9 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Jul 2018 11:50:11 +0300 Subject: IB/core: Introduce and use sgid_attr in CM requests For RoCE, when CM requests are received for RC and UD connections, netdevice of the incoming request is unavailable. Because of that CM requests are always forwarded to init_net namespace. Now that we have the GID attribute available, introduce SGID attribute in incoming CM requests and refer to the netdevice of it. This is similar to existing SGID attribute field in outgoing CM requests for RC and UD transports. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 5 ++++- drivers/infiniband/core/cma.c | 28 ++++++++++++++++++++++------ include/rdma/ib_cm.h | 13 +++++++++++++ 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4724cb09b69d..6e39c27dca8e 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1716,6 +1716,7 @@ static void cm_format_req_event(struct cm_work *work, param->retry_count = cm_req_get_retry_count(req_msg); param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); param->srq = cm_req_get_srq(req_msg); + param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; work->cm_event.private_data = &req_msg->private_data; } @@ -3532,6 +3533,7 @@ out: EXPORT_SYMBOL(ib_send_cm_sidr_req); static void cm_format_sidr_req_event(struct cm_work *work, + const struct cm_id_private *rx_cm_id, struct ib_cm_id *listen_id) { struct cm_sidr_req_msg *sidr_req_msg; @@ -3545,6 +3547,7 @@ static void cm_format_sidr_req_event(struct cm_work *work, param->service_id = sidr_req_msg->service_id; param->bth_pkey = cm_get_bth_pkey(work); param->port = work->port->port_num; + param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr; work->cm_event.private_data = &sidr_req_msg->private_data; } @@ -3602,7 +3605,7 @@ static int cm_sidr_req_handler(struct cm_work *work) cm_id_priv->id.service_id = sidr_req_msg->service_id; cm_id_priv->id.service_mask = ~cpu_to_be64(0); - cm_format_sidr_req_event(work, &cur_cm_id_priv->id); + cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id); cm_process_work(cm_id_priv, work); cm_deref_id(cur_cm_id_priv); return 0; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 338df1789884..693e025a1585 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1371,6 +1371,22 @@ static bool validate_net_dev(struct net_device *net_dev, } } +static struct net_device * +roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event) +{ + const struct ib_gid_attr *sgid_attr = NULL; + + if (ib_event->event == IB_CM_REQ_RECEIVED) + sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr; + else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) + sgid_attr = ib_event->param.sidr_req_rcvd.sgid_attr; + + if (!sgid_attr) + return NULL; + dev_hold(sgid_attr->ndev); + return sgid_attr->ndev; +} + static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, struct cma_req_info *req) { @@ -1386,8 +1402,12 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, if (err) return ERR_PTR(err); - net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, - gid, listen_addr); + if (rdma_protocol_roce(req->device, req->port)) + net_dev = roce_get_net_dev_by_cm_event(ib_event); + else + net_dev = ib_get_net_dev_by_params(req->device, req->port, + req->pkey, + gid, listen_addr); if (!net_dev) return ERR_PTR(-ENODEV); @@ -1508,10 +1528,6 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { /* Assuming the protocol is AF_IB */ *net_dev = NULL; - } else if (rdma_protocol_roce(req.device, req.port)) { - /* TODO find the net dev matching the request parameters - * through the RoCE GID table */ - *net_dev = NULL; } else { return ERR_CAST(*net_dev); } diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index c98d603c0b63..568708a87239 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -120,6 +120,13 @@ struct ib_cm_req_event_param { struct sa_path_rec *primary_path; struct sa_path_rec *alternate_path; + /* + * SGID attribute of the primary path. Currently only + * useful for RoCE. Alternate path GID attributes + * are not yet supported. + */ + const struct ib_gid_attr *ppath_sgid_attr; + __be64 remote_ca_guid; u32 remote_qkey; u32 remote_qpn; @@ -226,6 +233,12 @@ struct ib_cm_apr_event_param { struct ib_cm_sidr_req_event_param { struct ib_cm_id *listen_id; __be64 service_id; + + /* + * SGID attribute of the request. Currently only + * useful for RoCE. + */ + const struct ib_gid_attr *sgid_attr; /* P_Key that was used by the GMP's BTH header */ u16 bth_pkey; u8 port; -- cgit From d274e45ce1ed0bc5750f4d3f066bfc164843216b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Jul 2018 11:50:12 +0300 Subject: RDMA/cma: Consider netdevice for RoCE ports When netdevice is not found for a request, and if it for RoCE port, currently it allows matching the listener as long as port number matches by ignoring the netdevice. Now that we always prefer to have netdevice associated with RoCE, when netdevice is not found, don't consider RoCE ports. In other words, a NULL netdevice with RoCE is not acceptable. Therefore, remove this confusing RoCE port ignorance check. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 693e025a1585..2eb4022f1790 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1470,10 +1470,9 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id, const struct rdma_addr *addr = &id->route.addr; if (!net_dev) - /* This request is an AF_IB request or a RoCE request */ + /* This request is an AF_IB request */ return (!id->port_num || id->port_num == port_num) && - (addr->src_addr.ss_family == AF_IB || - rdma_protocol_roce(id->device, port_num)); + (addr->src_addr.ss_family == AF_IB); return !addr->dev_addr.bound_dev_if || (net_eq(dev_net(net_dev), addr->dev_addr.net) && -- cgit From 643d213a9a034fa04f5575a40dfc8548e33ce04f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Jul 2018 11:50:13 +0300 Subject: RDMA/cma: Do not ignore net namespace for unbound cm_id Currently if the cm_id is not bound to any netdevice, than for such cm_id, net namespace is ignored; which is incorrect. Regardless of cm_id bound to a netdevice or not, net namespace must match. When a cm_id is bound to a netdevice, in such case net namespace and netdevice both must match. Fixes: 4c21b5bcef73 ("IB/cma: Add net_dev and private data checks to RDMA CM") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2eb4022f1790..7379094bbbab 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1474,9 +1474,16 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id, return (!id->port_num || id->port_num == port_num) && (addr->src_addr.ss_family == AF_IB); - return !addr->dev_addr.bound_dev_if || - (net_eq(dev_net(net_dev), addr->dev_addr.net) && - addr->dev_addr.bound_dev_if == net_dev->ifindex); + /* + * Net namespaces must match, and if the listner is listening + * on a specific netdevice than netdevice must match as well. + */ + if (net_eq(dev_net(net_dev), addr->dev_addr.net) && + (!!addr->dev_addr.bound_dev_if == + (addr->dev_addr.bound_dev_if == net_dev->ifindex))) + return true; + else + return false; } static struct rdma_id_private *cma_find_listener( -- cgit From 5d85a822fae2b484f26ddb09815063a88962a0cb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 26 Jul 2018 11:36:50 -0600 Subject: net/xprtrdma: Restore needed argument to ib_post_send The call in svc_rdma_post_chunk_ctxt() does actually use bad_wr. Fixes: ed288d74a9e5 ("net/xprtrdma: Simplify ib_post_(send|recv|srq_recv)() calls") Reported-by: Stephen Rothwell Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 80975427f523..ce3ea8419704 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -329,7 +329,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) do { if (atomic_sub_return(cc->cc_sqecount, &rdma->sc_sq_avail) > 0) { - ret = ib_post_send(rdma->sc_qp, first_wr, NULL); + ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); trace_svcrdma_post_rw(&cc->cc_cqe, cc->cc_sqecount, ret); if (ret) -- cgit From 7cfcc71eb0e50f191983d714223c169c47437021 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 23 Jul 2018 15:37:01 -0700 Subject: RDMA/usnic: Suppress a compiler warning This patch avoids that the following compiler warning is reported when building with gcc 8 and W=1: drivers/infiniband/hw/usnic/usnic_fwd.c:95:2: warning: 'strncpy' output may be truncated copying 16 bytes from a string of length 20 [-Wstringop-truncation] strncpy(ufdev->name, netdev_name(ufdev->netdev), ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sizeof(ufdev->name) - 1); ~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/usnic_fwd.c | 4 ++-- drivers/infiniband/hw/usnic/usnic_fwd.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c index 995a26b65156..7875883621f4 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -92,8 +92,8 @@ struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev) ufdev->pdev = pdev; ufdev->netdev = pci_get_drvdata(pdev); spin_lock_init(&ufdev->lock); - strncpy(ufdev->name, netdev_name(ufdev->netdev), - sizeof(ufdev->name) - 1); + BUILD_BUG_ON(sizeof(ufdev->name) != sizeof(ufdev->netdev->name)); + strcpy(ufdev->name, ufdev->netdev->name); return ufdev; } diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h index 0b2cc4e79707..f0b71d593da5 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.h +++ b/drivers/infiniband/hw/usnic/usnic_fwd.h @@ -57,7 +57,7 @@ struct usnic_fwd_dev { char mac[ETH_ALEN]; unsigned int mtu; __be32 inaddr; - char name[IFNAMSIZ+1]; + char name[IFNAMSIZ]; }; struct usnic_fwd_flow { -- cgit From 2577188edcf3e235b4f060a350fad84510eb7680 Mon Sep 17 00:00:00 2001 From: Qing Huang Date: Mon, 23 Jul 2018 14:15:08 -0700 Subject: IB/mlx5: avoid excessive warning msgs when creating VFs on 2nd port When a CX5 device is configured in dual-port RoCE mode, after creating many VFs against port 1, creating the same number of VFs against port 2 will flood kernel/syslog with something like "mlx5_*:mlx5_ib_bind_slave_port:4266:(pid 5269): port 2 already affiliated." So basically, when traversing mlx5_ib_dev_list, mlx5_ib_add_slave_port() repeatedly attempts to bind the new mpi structure to every device on the list until it finds an unbound device. Change the log level from warn to dbg to avoid log flooding as the warning should be harmless. Signed-off-by: Qing Huang Reviewed-by: Daniel Jurgens Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a26ab69f3741..61c78f4e4ebc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5380,8 +5380,8 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, spin_lock(&ibdev->port[port_num].mp.mpi_lock); if (ibdev->port[port_num].mp.mpi) { - mlx5_ib_warn(ibdev, "port %d already affiliated.\n", - port_num + 1); + mlx5_ib_dbg(ibdev, "port %d already affiliated.\n", + port_num + 1); spin_unlock(&ibdev->port[port_num].mp.mpi_lock); return false; } -- cgit From 3635ac020842d37d207570891cb901afa653e55d Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Wed, 25 Jul 2018 15:29:31 +0800 Subject: RDMA/hns: Do not overwrite the error code during error unwind in hns_roce_init When init cmq fail in initial flow of RoCE, it should return the errno of cmq_init function, not of the rest call. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 850032de8676..3ea2182a3334 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -886,8 +886,7 @@ error_failed_cmd_init: error_failed_cmq_init: if (hr_dev->hw->reset) { - ret = hr_dev->hw->reset(hr_dev, false); - if (ret) + if (hr_dev->hw->reset(hr_dev, false)) dev_err(dev, "Dereset RoCE engine failed!\n"); } -- cgit From aaa31567799dbce55a1b780355dabf83556018d0 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Wed, 25 Jul 2018 15:29:33 +0800 Subject: RDMA/hns: Add 50GE type of hnae3 device match This patch adds PCI matching for the hns 50GE NIC. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 951d839f1392..948b06088d32 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4995,6 +4995,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, /* required last entry */ {0, } -- cgit From dedf63506a8e55235edf902fa7455cd2974e462d Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Wed, 25 Jul 2018 15:29:36 +0800 Subject: RDMA/hns: Return correct error code from hns_roce_v1_rsv_lp_qp() When create loop qp fail, it will return the correct result when modify_qp() fails. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 783d28dd3ca4..ae6b642ec073 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -785,6 +785,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd); if (!free_mr->mr_free_qp[i]) { dev_err(dev, "Create loop qp failed!\n"); + ret = -ENOMEM; goto create_lp_qp_failed; } hr_qp = free_mr->mr_free_qp[i]; @@ -854,7 +855,7 @@ alloc_pd_failed: if (hns_roce_ib_destroy_cq(cq)) dev_err(dev, "Destroy cq for create_lp_qp failed!\n"); - return -EINVAL; + return ret; } static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) -- cgit From 26f63b9c33ceda12fb9136a1d0c80e03c9ebb514 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Wed, 25 Jul 2018 15:29:37 +0800 Subject: RDMA/hns: Add illegal hop_num judgement When hop_num is more than three, it need to return -EINVAL. This patch fixes it. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 63b5b3edabcb..8dc336a85128 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -494,6 +494,9 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, step_idx = 1; } else if (hop_num == HNS_ROCE_HOP_NUM_0) { step_idx = 0; + } else { + ret = -EINVAL; + goto err_dma_alloc_l1; } /* set HEM base address to hardware */ -- cgit From 73b4e1f4c04e76b19fca296d09b97555bfb35202 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Wed, 25 Jul 2018 15:29:38 +0800 Subject: RDMA/hns: Use delay instead of usleep In order to avoid using usleep function in lock function, we use delay function instead of it. Besides, it also use brackets for standardized the computed order. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 8dc336a85128..f6faefed96e8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -170,7 +170,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, case 3: mhop->l2_idx = table_idx & (chunk_ba_num - 1); mhop->l1_idx = table_idx / chunk_ba_num & (chunk_ba_num - 1); - mhop->l0_idx = table_idx / chunk_ba_num / chunk_ba_num; + mhop->l0_idx = (table_idx / chunk_ba_num) / chunk_ba_num; break; case 2: mhop->l1_idx = table_idx & (chunk_ba_num - 1); @@ -342,7 +342,7 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, } else { break; } - msleep(HW_SYNC_SLEEP_TIME_INTERVAL); + mdelay(HW_SYNC_SLEEP_TIME_INTERVAL); } bt_cmd_l = (u32)bt_ba; -- cgit From 0c4a0e2987a51415de73180ce9f389a99b3dddd1 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Wed, 25 Jul 2018 15:29:40 +0800 Subject: RDMA/hns: Update the data type of immediate data Because the data structure of hip08 is little endian, it needs to fix the immediate field of wqe and cqe into __le32. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 ++++++++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 6 +++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 948b06088d32..6fa12f2262f4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -272,7 +272,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM: - ud_sq_wqe->immtdata = wr->ex.imm_data; + ud_sq_wqe->immtdata = + cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); break; default: ud_sq_wqe->immtdata = 0; @@ -370,7 +371,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM: - rc_sq_wqe->immtdata = wr->ex.imm_data; + rc_sq_wqe->immtdata = + cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); break; case IB_WR_SEND_WITH_INV: rc_sq_wqe->inv_key = @@ -2178,7 +2180,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM: wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IB_WC_WITH_IMM; - wc->ex.imm_data = cqe->immtdata; + wc->ex.imm_data = + cpu_to_be32(le32_to_cpu(cqe->immtdata)); break; case HNS_ROCE_V2_OPCODE_SEND: wc->opcode = IB_WC_RECV; @@ -2187,7 +2190,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM: wc->opcode = IB_WC_RECV; wc->wc_flags = IB_WC_WITH_IMM; - wc->ex.imm_data = cqe->immtdata; + wc->ex.imm_data = + cpu_to_be32(le32_to_cpu(cqe->immtdata)); break; case HNS_ROCE_V2_OPCODE_SEND_WITH_INV: wc->opcode = IB_WC_RECV; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index df95b3515c94..f40d8c22d357 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -772,7 +772,7 @@ struct hns_roce_v2_cqe { __le32 byte_4; union { __le32 rkey; - __be32 immtdata; + __le32 immtdata; }; __le32 byte_12; __le32 byte_16; @@ -930,7 +930,7 @@ struct hns_roce_v2_cq_db { struct hns_roce_v2_ud_send_wqe { __le32 byte_4; __le32 msg_len; - __be32 immtdata; + __le32 immtdata; __le32 byte_16; __le32 byte_20; __le32 byte_24; @@ -1016,7 +1016,7 @@ struct hns_roce_v2_rc_send_wqe { __le32 msg_len; union { __le32 inv_key; - __be32 immtdata; + __le32 immtdata; }; __le32 byte_16; __le32 byte_20; -- cgit From df0651079380c5646bc0d0f3a4aa7621ef1c2b7c Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Wed, 25 Jul 2018 15:29:41 +0800 Subject: RDMA/hns: Enable modify_cq for uverbs. The driver implements the modify_cq callback, but did not set the bit to expose it to userspace. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 3ea2182a3334..c5cae9a38c04 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -534,6 +534,9 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); + ib_dev->uverbs_ex_cmd_mask |= + (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); + /* HCA||device||port */ ib_dev->modify_device = hns_roce_modify_device; ib_dev->query_device = hns_roce_query_device; -- cgit From 7aaa1807e698f73094b78f0ef25b1a37a4409a55 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 27 Jul 2018 09:48:30 -0600 Subject: IB/cache: Restore compatibility for ib_query_gid Code changes in smc have become so complicated this cycle that the RDMA patches to remove ib_query_gid in smc create too complex merge conflicts. Allow those conflicts to be resolved by using the net/smc hunks by providing a compatibility wrapper. During the second phase of the merge window this wrapper will be deleted and smc updated to use the new API. Reported-by: Stephen Rothwell Reviewed-by: Parav Pandit Signed-off-by: Jason Gunthorpe --- include/rdma/ib_cache.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 1108d4220276..a4ce441f36f0 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -132,4 +132,28 @@ const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index); void rdma_put_gid_attr(const struct ib_gid_attr *attr); void rdma_hold_gid_attr(const struct ib_gid_attr *attr); + +/* + * This is to be removed. It only exists to make merging rdma and smc simpler. + */ +static inline __deprecated int ib_query_gid(struct ib_device *device, + u8 port_num, int index, + union ib_gid *gid, + struct ib_gid_attr *attr_out) +{ + const struct ib_gid_attr *attr; + + attr = rdma_get_gid_attr(device, port_num, index); + if (IS_ERR(attr)) + return PTR_ERR(attr); + + if (attr->ndev) + dev_hold(attr->ndev); + *attr_out = *attr; + + rdma_put_gid_attr(attr); + + return 0; +} + #endif /* _IB_CACHE_H */ -- cgit From 3e081b773e4c892c1ba2515184072327db9fb29a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:13 -0700 Subject: IB/iser: Inline two work request conversion functions Since the next patch will change the return type of these functions into a const pointer and since the iSER driver modifies the work request these functions return a pointer two, inline two work request conversion function calls. This patch does not change any functionality. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iser_memory.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index ca844a926e6a..a77d020cc89d 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -405,7 +405,8 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - wr = sig_handover_wr(iser_tx_next_wr(tx_desc)); + wr = container_of(iser_tx_next_wr(tx_desc), struct ib_sig_handover_wr, + wr); wr->wr.opcode = IB_WR_REG_SIG_MR; wr->wr.wr_cqe = cqe; wr->wr.sg_list = &data_reg->sge; @@ -457,7 +458,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return n < 0 ? n : -EINVAL; } - wr = reg_wr(iser_tx_next_wr(tx_desc)); + wr = container_of(iser_tx_next_wr(tx_desc), struct ib_reg_wr, wr); wr->wr.opcode = IB_WR_REG_MR; wr->wr.wr_cqe = cqe; wr->wr.send_flags = 0; -- cgit From f696bf6d64b195b83ca1bdb7cd33c999c9dcf514 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:14 -0700 Subject: RDMA: Constify the argument of the work request conversion functions When posting a send work request, the work request that is posted is not modified by any of the RDMA drivers. Make this explicit by constifying most ib_send_wr pointers in RDMA transport drivers. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Reviewed-by: Steve Wise Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 18 ++++++++-------- drivers/infiniband/hw/cxgb3/iwch_qp.c | 20 +++++++++--------- drivers/infiniband/hw/cxgb4/qp.c | 21 +++++++++++-------- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- drivers/infiniband/hw/mlx4/qp.c | 25 +++++++++++----------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mlx5/qp.c | 31 ++++++++++++++-------------- drivers/infiniband/hw/mthca/mthca_qp.c | 8 +++---- drivers/infiniband/hw/nes/nes_verbs.c | 3 ++- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 12 +++++------ drivers/infiniband/hw/qedr/qedr_roce_cm.c | 4 ++-- drivers/infiniband/hw/qedr/verbs.c | 7 ++++--- drivers/infiniband/hw/qib/qib_verbs.h | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 3 ++- drivers/infiniband/sw/rdmavt/qp.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_verbs.c | 8 +++---- include/rdma/ib_verbs.h | 11 +++++----- 18 files changed, 97 insertions(+), 88 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index dd800d153aa2..a0082e0bb8e2 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1876,7 +1876,7 @@ out: /* Routine for sending QP1 packets for RoCE V1 an V2 */ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe, int payload_size) { @@ -2093,7 +2093,7 @@ static int is_ud_qp(struct bnxt_re_qp *qp) } static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { struct bnxt_re_ah *ah = NULL; @@ -2131,7 +2131,7 @@ static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp, return 0; } -static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr, +static int bnxt_re_build_rdma_wqe(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { switch (wr->opcode) { @@ -2163,7 +2163,7 @@ static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr, return 0; } -static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr, +static int bnxt_re_build_atomic_wqe(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { switch (wr->opcode) { @@ -2190,7 +2190,7 @@ static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr, return 0; } -static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr, +static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV; @@ -2209,7 +2209,7 @@ static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr, return 0; } -static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr, +static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr, struct bnxt_qplib_swqe *wqe) { struct bnxt_re_mr *mr = container_of(wr->mr, struct bnxt_re_mr, ib_mr); @@ -2251,7 +2251,7 @@ static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr, } static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { /* Copy the inline data to the data field */ @@ -2281,7 +2281,7 @@ static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev, } static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { int payload_sz = 0; @@ -2313,7 +2313,7 @@ static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp) static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { struct bnxt_qplib_swqe wqe; int rc = 0, payload_sz = 0; diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 3871e1fd8395..29ab6910a004 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -39,8 +39,8 @@ #define NO_SUPPORT -1 -static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, - u8 * flit_cnt) +static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr, + u8 *flit_cnt) { int i; u32 plen; @@ -84,8 +84,8 @@ static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt) +static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr, + u8 *flit_cnt) { int i; u32 plen; @@ -125,8 +125,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt) +static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr, + u8 *flit_cnt) { if (wr->num_sge > 1) return -EINVAL; @@ -146,8 +146,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr, - u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) +static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr, + u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) { struct iwch_mr *mhp = to_iwch_mr(wr->mr); int i; @@ -189,8 +189,8 @@ static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr, return 0; } -static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt) +static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr, + u8 *flit_cnt) { wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); wqe->local_inv.reserved = 0; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 08dc555942af..dbd697b113ec 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -410,7 +410,7 @@ free_sq_qid: } static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, - struct ib_send_wr *wr, int max, u32 *plenp) + const struct ib_send_wr *wr, int max, u32 *plenp) { u8 *dstp, *srcp; u32 plen = 0; @@ -480,7 +480,7 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end, } static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) + const struct ib_send_wr *wr, u8 *len16) { u32 plen; int size; @@ -547,7 +547,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, } static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) + const struct ib_send_wr *wr, u8 *len16) { u32 plen; int size; @@ -589,7 +589,8 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr, + u8 *len16) { if (wr->num_sge > 1) return -EINVAL; @@ -648,7 +649,7 @@ static int build_srq_recv(union t4_recv_wr *wqe, struct ib_recv_wr *wr, } static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, - struct ib_reg_wr *wr, struct c4iw_mr *mhp, + const struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16) { __be64 *p = (__be64 *)fr->pbl; @@ -680,8 +681,8 @@ static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, } static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16, - bool dsgl_supported) + const struct ib_reg_wr *wr, struct c4iw_mr *mhp, + u8 *len16, bool dsgl_supported) { struct fw_ri_immd *imdp; __be64 *p; @@ -743,7 +744,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr, + u8 *len16) { wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; @@ -862,7 +864,8 @@ static int ib_to_fw_opcode(int ib_opcode) return opcode; } -static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) +static int complete_sq_drain_wr(struct c4iw_qp *qhp, + const struct ib_send_wr *wr) { struct t4_cqe cqe = {}; struct c4iw_cq *schp; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a595e72f243e..ceb490c732c0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -985,7 +985,7 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, int cnt); -__be32 send_ieth(struct ib_send_wr *wr); +__be32 send_ieth(const struct ib_send_wr *wr); int to_hr_qp_type(int qp_type); struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 6fa12f2262f4..a9bc6b279175 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -53,7 +53,7 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, dseg->len = cpu_to_le32(sg->length); } -static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr, +static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, unsigned int *sge_ind) { struct hns_roce_v2_wqe_data_seg *dseg; @@ -100,7 +100,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr, } } -static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, +static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, void *wqe, unsigned int *sge_ind, struct ib_send_wr **bad_wr) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 408e720fd923..44fc684b5e3a 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2925,7 +2925,7 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) } static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, - struct ib_ud_wr *wr, + const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); @@ -3073,7 +3073,7 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num, } #define MLX4_ROCEV2_QP1_SPORT 0xC000 -static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, +static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct ib_device *ib_dev = sqp->qp.ibqp.device; @@ -3355,7 +3355,7 @@ static __be32 convert_access(int acc) } static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg, - struct ib_reg_wr *wr) + const struct ib_reg_wr *wr) { struct mlx4_ib_mr *mr = to_mmr(wr->mr); @@ -3385,7 +3385,7 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, } static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, - struct ib_atomic_wr *wr) + const struct ib_atomic_wr *wr) { if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->swap); @@ -3401,7 +3401,7 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, } static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, - struct ib_atomic_wr *wr) + const struct ib_atomic_wr *wr) { aseg->swap_add = cpu_to_be64(wr->swap); aseg->swap_add_mask = cpu_to_be64(wr->swap_mask); @@ -3410,7 +3410,7 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_ud_wr *wr) + const struct ib_ud_wr *wr) { memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->remote_qpn); @@ -3421,7 +3421,7 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, struct mlx4_wqe_datagram_seg *dseg, - struct ib_ud_wr *wr, + const struct ib_ud_wr *wr, enum mlx4_ib_qp_type qpt) { union mlx4_ext_av *av = &to_mah(wr->ah)->av; @@ -3443,7 +3443,8 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } -static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) +static void build_tunnel_header(const struct ib_ud_wr *wr, void *wqe, + unsigned *mlx_seg_len) { struct mlx4_wqe_inline_seg *inl = wqe; struct mlx4_ib_tunnel_header hdr; @@ -3526,9 +3527,9 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr, - struct mlx4_ib_qp *qp, unsigned *lso_seg_len, - __be32 *lso_hdr_sz, __be32 *blh) +static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, + const struct ib_ud_wr *wr, struct mlx4_ib_qp *qp, + unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh) { unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16); @@ -3546,7 +3547,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr, return 0; } -static __be32 send_ieth(struct ib_send_wr *wr) +static __be32 send_ieth(const struct ib_send_wr *wr) { switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 462505c8fa25..8d9eaa31fab2 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -469,7 +469,7 @@ struct mlx5_umr_wr { u32 mkey; }; -static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr) +static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr) { return container_of(wr, struct mlx5_umr_wr, wr); } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d4414015b64f..3cbd00015182 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3508,7 +3508,7 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, } static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, - struct ib_send_wr *wr, void *qend, + const struct ib_send_wr *wr, void *qend, struct mlx5_ib_qp *qp, int *size) { void *seg = eseg; @@ -3561,7 +3561,7 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, } static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); @@ -3709,9 +3709,9 @@ static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) static int set_reg_umr_segment(struct mlx5_ib_dev *dev, struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, int atomic) + const struct ib_send_wr *wr, int atomic) { - struct mlx5_umr_wr *umrwr = umr_wr(wr); + const struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(umr, 0, sizeof(*umr)); @@ -3782,9 +3782,10 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) seg->status = MLX5_MKEY_STATUS_FREE; } -static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) +static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, + const struct ib_send_wr *wr) { - struct mlx5_umr_wr *umrwr = umr_wr(wr); + const struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) @@ -3833,7 +3834,7 @@ static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp, seg += mr_list_size; } -static __be32 send_ieth(struct ib_send_wr *wr) +static __be32 send_ieth(const struct ib_send_wr *wr) { switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: @@ -3865,7 +3866,7 @@ static u8 wq_sig(void *wqe) return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); } -static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, +static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, void *wqe, int *sz) { struct mlx5_wqe_inline_seg *seg; @@ -4011,7 +4012,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, return 0; } -static int set_sig_data_segment(struct ib_sig_handover_wr *wr, +static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, struct mlx5_ib_qp *qp, void **seg, int *size) { struct ib_sig_attrs *sig_attrs = wr->sig_attrs; @@ -4113,7 +4114,7 @@ static int set_sig_data_segment(struct ib_sig_handover_wr *wr, } static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, - struct ib_sig_handover_wr *wr, u32 size, + const struct ib_sig_handover_wr *wr, u32 size, u32 length, u32 pdn) { struct ib_mr *sig_mr = wr->sig_mr; @@ -4144,10 +4145,10 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, } -static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp, - void **seg, int *size) +static int set_sig_umr_wr(const struct ib_send_wr *send_wr, + struct mlx5_ib_qp *qp, void **seg, int *size) { - struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); + const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); u32 pdn = get_pd(qp)->pdn; u32 xlt_size; @@ -4222,7 +4223,7 @@ static int set_psv_wr(struct ib_sig_domain *domain, } static int set_reg_wr(struct mlx5_ib_qp *qp, - struct ib_reg_wr *wr, + const struct ib_reg_wr *wr, void **seg, int *size) { struct mlx5_ib_mr *mr = to_mmr(wr->mr); @@ -4295,7 +4296,7 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, - struct ib_send_wr *wr, unsigned *idx, + const struct ib_send_wr *wr, unsigned *idx, int *size, int nreq) { if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index af1c49d70b89..0e390f410b39 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1488,7 +1488,7 @@ void mthca_free_qp(struct mthca_dev *dev, /* Create UD header for an MLX send and build a data segment for it */ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, - int ind, struct ib_ud_wr *wr, + int ind, const struct ib_ud_wr *wr, struct mthca_mlx_seg *mlx, struct mthca_data_seg *data) { @@ -1581,7 +1581,7 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg, } static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, - struct ib_atomic_wr *wr) + const struct ib_atomic_wr *wr) { if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->swap); @@ -1594,7 +1594,7 @@ static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, } static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, - struct ib_ud_wr *wr) + const struct ib_ud_wr *wr) { useg->lkey = cpu_to_be32(to_mah(wr->ah)->key); useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma); @@ -1604,7 +1604,7 @@ static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, } static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, - struct ib_ud_wr *wr) + const struct ib_ud_wr *wr) { memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE); useg->dqpn = cpu_to_be32(wr->remote_qpn); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 82b8f9630ee8..8b1f114062e5 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3040,7 +3040,8 @@ static int nes_process_mad(struct ib_device *ibdev, int mad_flags, } static inline void -fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselkey) +fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, const struct ib_send_wr *ib_wr, + u32 uselkey) { int sge_index; int total_payload_length = 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 86b22f6b7271..5da1d0b88d1a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1953,7 +1953,7 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq) /* unprivileged verbs and their support functions. */ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { struct ocrdma_ewqe_ud_hdr *ud_hdr = (struct ocrdma_ewqe_ud_hdr *)(hdr + 1); @@ -2000,7 +2000,7 @@ static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge) static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, struct ocrdma_sge *sge, - struct ib_send_wr *wr, u32 wqe_size) + const struct ib_send_wr *wr, u32 wqe_size) { int i; char *dpp_addr; @@ -2038,7 +2038,7 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, } static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { int status; struct ocrdma_sge *sge; @@ -2057,7 +2057,7 @@ static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, } static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { int status; struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1); @@ -2075,7 +2075,7 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, } static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1); struct ocrdma_sge *sge = ext_rw + 1; @@ -2105,7 +2105,7 @@ static int get_encoded_page_size(int pg_sz) static int ocrdma_build_reg(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_reg_wr *wr) + const struct ib_reg_wr *wr) { u64 fbo; struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index 2e1f352c037d..b5d49740bf8a 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -380,7 +380,7 @@ int qedr_destroy_gsi_qp(struct qedr_dev *dev) #define QEDR_GSI_QPN (1) static inline int qedr_gsi_build_header(struct qedr_dev *dev, struct qedr_qp *qp, - struct ib_send_wr *swr, + const struct ib_send_wr *swr, struct ib_ud_header *udh, int *roce_mode) { @@ -488,7 +488,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, static inline int qedr_gsi_build_packet(struct qedr_dev *dev, struct qedr_qp *qp, - struct ib_send_wr *swr, + const struct ib_send_wr *swr, struct qed_roce_ll2_packet **p_packet) { u8 ud_header_buffer[QEDR_MAX_UD_HEADER_SIZE]; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index b82c5d5fb0e3..34d8b5580138 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2781,7 +2781,7 @@ static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev, } while (0) static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { u32 data_size = 0; int i; @@ -2845,7 +2845,7 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev, static int qedr_prepare_reg(struct qedr_qp *qp, struct rdma_sq_fmr_wqe_1st *fwqe1, - struct ib_reg_wr *wr) + const struct ib_reg_wr *wr) { struct qedr_mr *mr = get_qedr_mr(wr->mr); struct rdma_sq_fmr_wqe_2nd *fwqe2; @@ -2907,7 +2907,8 @@ static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode) } } -static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) +static inline bool qedr_can_post_send(struct qedr_qp *qp, + const struct ib_send_wr *wr) { int wq_is_full, err_wr, pbl_is_full; struct qedr_dev *dev = qp->dev; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index e72562a8959a..666613eef88f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -311,7 +311,7 @@ void qib_rc_rnr_retry(unsigned long arg); void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); -int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr); +int qib_post_ud_send(struct rvt_qp *qp, const struct ib_send_wr *wr); void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index eb5b1065ec08..1864621ef942 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -599,7 +599,8 @@ static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n) qp->rq.offset + n * qp->rq.wqe_size); } -static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr) +static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, + const struct ib_reg_wr *wr) { struct pvrdma_user_mr *mr = to_vmr(wr->mr); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index d29e3c943399..858c992906c1 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1620,7 +1620,7 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, static inline int rvt_qp_valid_operation( struct rvt_qp *qp, const struct rvt_operation_params *post_parms, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { int len; @@ -1717,7 +1717,7 @@ static inline int rvt_qp_is_avail( * @wr: the work request to send */ static int rvt_post_one_wr(struct rvt_qp *qp, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, int *call_send) { struct rvt_swqe *wqe; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 1188e163204d..0ea394554c8e 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -554,7 +554,7 @@ static int rxe_destroy_qp(struct ib_qp *ibqp) return 0; } -static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr, +static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, unsigned int length) { int num_sge = ibwr->num_sge; @@ -582,7 +582,7 @@ err1: } static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, - struct ib_send_wr *ibwr) + const struct ib_send_wr *ibwr) { wr->wr_id = ibwr->wr_id; wr->num_sge = ibwr->num_sge; @@ -637,7 +637,7 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, } } -static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, +static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, unsigned int length, struct rxe_send_wqe *wqe) { @@ -685,7 +685,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, return 0; } -static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr, +static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, u32 length) { int err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 42cbf8eabe9d..cf38d47fa8f8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1352,7 +1352,7 @@ struct ib_rdma_wr { u32 rkey; }; -static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr) +static inline const struct ib_rdma_wr *rdma_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_rdma_wr, wr); } @@ -1367,7 +1367,7 @@ struct ib_atomic_wr { u32 rkey; }; -static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr) +static inline const struct ib_atomic_wr *atomic_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_atomic_wr, wr); } @@ -1384,7 +1384,7 @@ struct ib_ud_wr { u8 port_num; /* valid for DR SMPs on switch only */ }; -static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr) +static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_ud_wr, wr); } @@ -1396,7 +1396,7 @@ struct ib_reg_wr { int access; }; -static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr) +static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_reg_wr, wr); } @@ -1409,7 +1409,8 @@ struct ib_sig_handover_wr { struct ib_sge *prot; }; -static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr) +static inline const struct ib_sig_handover_wr * +sig_handover_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_sig_handover_wr, wr); } -- cgit From 7bb1fafc2f163ad03a2007295bb2f57cfdbfb630 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:15 -0700 Subject: IB/mlx5, ib_post_send(), IB_WR_REG_SIG_MR: Do not modify the 'wr' argument Since the next patch will constify the wr pointer, do not modify the data that pointer points at. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Cc: Saeed Mahameed Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 3cbd00015182..535f6ad038b2 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4294,10 +4294,10 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) } } -static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, +static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, const struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq) + int *size, int nreq, bool send_signaled, bool solicited) { if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) return -ENOMEM; @@ -4308,10 +4308,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, *(uint32_t *)(*seg + 8) = 0; (*ctrl)->imm = send_ieth(wr); (*ctrl)->fm_ce_se = qp->sq_signal_bits | - (wr->send_flags & IB_SEND_SIGNALED ? - MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (wr->send_flags & IB_SEND_SOLICITED ? - MLX5_WQE_CTRL_SOLICITED : 0); + (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; @@ -4319,6 +4317,16 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, return 0; } +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned *idx, + int *size, int nreq) +{ + return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq, + wr->send_flags & IB_SEND_SIGNALED, + wr->send_flags & IB_SEND_SOLICITED); +} + static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, u8 size, unsigned idx, u64 wr_id, @@ -4477,10 +4485,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, * SET_PSV WQEs are not signaled and solicited * on error */ - wr->send_flags &= ~IB_SEND_SIGNALED; - wr->send_flags |= IB_SEND_SOLICITED; - err = begin_wqe(qp, &seg, &ctrl, wr, - &idx, &size, nreq); + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, nreq, false, true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4499,8 +4505,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, MLX5_OPCODE_SET_PSV); - err = begin_wqe(qp, &seg, &ctrl, wr, - &idx, &size, nreq); + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, nreq, false, true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; -- cgit From d34ac5cd3a73aacd11009c4fc3ba15d7ea62c411 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Jul 2018 09:25:32 -0700 Subject: RDMA, core and ULPs: Declare ib_post_send() and ib_post_recv() arguments const Since neither ib_post_send() nor ib_post_recv() modify the data structure their second argument points at, declare that argument const. This change makes it necessary to declare the 'bad_wr' argument const too and also to modify all ULPs that call ib_post_send(), ib_post_recv() or ib_post_srq_recv(). This patch does not change any functionality but makes it possible for the compiler to verify whether the ib_post_(send|recv|srq_recv) really do not modify the posted work request. To make this possible, only one cast had to be introduce that casts away constness, namely in rpcrdma_post_recvs(). The only way I can think of to avoid that cast is to introduce an additional loop in that function or to change the data type of bad_wr from struct ib_recv_wr ** into int (an index that refers to an element in the work request list). However, both approaches would require even more extensive changes than this patch. Signed-off-by: Bart Van Assche Reviewed-by: Chuck Lever Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 9 +++++--- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 16 ++++++------- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 12 +++++----- drivers/infiniband/hw/cxgb3/iwch_provider.h | 8 +++---- drivers/infiniband/hw/cxgb3/iwch_qp.c | 12 +++++----- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 12 +++++----- drivers/infiniband/hw/cxgb4/qp.c | 27 ++++++++++++---------- drivers/infiniband/hw/hns/hns_roce_device.h | 8 +++---- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 13 ++++++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 +++++----- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 9 ++++---- drivers/infiniband/hw/mlx4/mad.c | 7 +++--- drivers/infiniband/hw/mlx4/mlx4_ib.h | 12 +++++----- drivers/infiniband/hw/mlx4/qp.c | 21 ++++++++--------- drivers/infiniband/hw/mlx4/srq.c | 4 ++-- drivers/infiniband/hw/mlx5/gsi.c | 8 +++---- drivers/infiniband/hw/mlx5/mlx5_ib.h | 20 ++++++++--------- drivers/infiniband/hw/mlx5/mr.c | 2 +- drivers/infiniband/hw/mlx5/qp.c | 21 ++++++++--------- drivers/infiniband/hw/mlx5/srq.c | 4 ++-- drivers/infiniband/hw/mthca/mthca_dev.h | 24 ++++++++++---------- drivers/infiniband/hw/mthca/mthca_qp.c | 16 ++++++------- drivers/infiniband/hw/mthca/mthca_srq.c | 8 +++---- drivers/infiniband/hw/nes/nes_verbs.c | 8 +++---- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 16 ++++++------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 12 +++++----- drivers/infiniband/hw/qedr/qedr_roce_cm.c | 8 +++---- drivers/infiniband/hw/qedr/qedr_roce_cm.h | 8 +++---- drivers/infiniband/hw/qedr/verbs.c | 26 ++++++++++----------- drivers/infiniband/hw/qedr/verbs.h | 8 +++---- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 8 +++---- drivers/infiniband/hw/usnic/usnic_ib_verbs.h | 8 +++---- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 8 +++---- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 4 ++-- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 12 +++++----- drivers/infiniband/sw/rdmavt/qp.c | 12 +++++----- drivers/infiniband/sw/rdmavt/qp.h | 12 +++++----- drivers/infiniband/sw/rxe/rxe_verbs.c | 18 +++++++-------- include/rdma/ib_verbs.h | 30 ++++++++++++------------- net/rds/ib_send.c | 6 ++--- net/sunrpc/xprtrdma/frwr_ops.c | 3 ++- net/sunrpc/xprtrdma/svc_rdma_rw.c | 3 ++- net/sunrpc/xprtrdma/verbs.c | 3 ++- 43 files changed, 257 insertions(+), 241 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 38d7de3f9b2f..d4c3bc042343 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2155,7 +2155,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, struct ib_uverbs_post_send cmd; struct ib_uverbs_post_send_resp resp; struct ib_uverbs_send_wr *user_wr; - struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; + struct ib_send_wr *wr = NULL, *last, *next; + const struct ib_send_wr *bad_wr; struct ib_qp *qp; int i, sg_ind; int is_ud; @@ -2434,7 +2435,8 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, { struct ib_uverbs_post_recv cmd; struct ib_uverbs_post_recv_resp resp; - struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_recv_wr *wr, *next; + const struct ib_recv_wr *bad_wr; struct ib_qp *qp; ssize_t ret = -EINVAL; @@ -2483,7 +2485,8 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, { struct ib_uverbs_post_srq_recv cmd; struct ib_uverbs_post_srq_recv_resp resp; - struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_recv_wr *wr, *next; + const struct ib_recv_wr *bad_wr; struct ib_srq *srq; ssize_t ret = -EINVAL; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index a0082e0bb8e2..5d955b293c6d 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1519,8 +1519,8 @@ int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr) return 0; } -int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); @@ -2048,7 +2048,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, * and the MAD datagram out to the provided SGE. */ static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp, - struct ib_recv_wr *wr, + const struct ib_recv_wr *wr, struct bnxt_qplib_swqe *wqe, int payload_size) { @@ -2361,8 +2361,8 @@ bad: return rc; } -int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_qplib_swqe wqe; @@ -2461,7 +2461,7 @@ bad: static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp, - struct ib_recv_wr *wr) + const struct ib_recv_wr *wr) { struct bnxt_qplib_swqe wqe; int rc = 0; @@ -2494,8 +2494,8 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, return rc; } -int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_qplib_swqe wqe; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index bd04d40d897a..aa33e7b82c84 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -181,8 +181,8 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, struct ib_udata *udata); int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int bnxt_re_destroy_srq(struct ib_srq *srq); -int bnxt_re_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); +int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); @@ -191,10 +191,10 @@ int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int bnxt_re_destroy_qp(struct ib_qp *qp); -int bnxt_re_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); -int bnxt_re_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); +int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr); +int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 2e38ddefea8a..8adbe9658935 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -326,10 +326,10 @@ enum iwch_qp_query_flags { }; u16 iwch_rqes_posted(struct iwch_qp *qhp); -int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); int iwch_post_zb_read(struct iwch_ep *ep); diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 29ab6910a004..c649faad63f9 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -246,7 +246,7 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, } static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, - struct ib_recv_wr *wr) + const struct ib_recv_wr *wr) { int i, err = 0; u32 pbl_addr[T3_MAX_SGE]; @@ -286,7 +286,7 @@ static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, } static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, - struct ib_recv_wr *wr) + const struct ib_recv_wr *wr) { int i; u32 pbl_addr; @@ -348,8 +348,8 @@ static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, return 0; } -int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int err = 0; u8 uninitialized_var(t3_wr_flit_cnt); @@ -463,8 +463,8 @@ out: return err; } -int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int err = 0; struct iwch_qp *qhp; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 047106cb0393..3cf93463021a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -1033,10 +1033,10 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx); int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog); int c4iw_destroy_listen(struct iw_cm_id *cm_id); @@ -1119,8 +1119,8 @@ void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq); void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16); void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx); -int c4iw_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); typedef int c4iw_restrack_func(struct sk_buff *msg, diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index dbd697b113ec..62e2c0d899f5 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -620,7 +620,7 @@ static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr, } static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, - struct ib_recv_wr *wr, u8 *len16) + const struct ib_recv_wr *wr, u8 *len16) { int ret; @@ -634,7 +634,7 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } -static int build_srq_recv(union t4_recv_wr *wqe, struct ib_recv_wr *wr, +static int build_srq_recv(union t4_recv_wr *wqe, const struct ib_recv_wr *wr, u8 *len16) { int ret; @@ -903,8 +903,9 @@ static int complete_sq_drain_wr(struct c4iw_qp *qhp, return 0; } -static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int complete_sq_drain_wrs(struct c4iw_qp *qhp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int ret = 0; @@ -919,7 +920,8 @@ static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, return ret; } -static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +static void complete_rq_drain_wr(struct c4iw_qp *qhp, + const struct ib_recv_wr *wr) { struct t4_cqe cqe = {}; struct c4iw_cq *rchp; @@ -951,7 +953,8 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) } } -static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +static void complete_rq_drain_wrs(struct c4iw_qp *qhp, + const struct ib_recv_wr *wr) { while (wr) { complete_rq_drain_wr(qhp, wr); @@ -959,8 +962,8 @@ static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) } } -int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int err = 0; u8 len16 = 0; @@ -1110,8 +1113,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return err; } -int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int err = 0; struct c4iw_qp *qhp; @@ -1206,8 +1209,8 @@ static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe, t4_srq_produce_pending_wr(srq); } -int c4iw_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { union t4_recv_wr *wqe, lwqe; struct c4iw_srq *srq; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ceb490c732c0..1c252753fb12 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -763,10 +763,10 @@ struct hns_roce_hw { int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state); int (*destroy_qp)(struct ib_qp *ibqp); - int (*post_send)(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); - int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); + int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); + int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index ae6b642ec073..8e11c6b62009 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -58,8 +58,9 @@ static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr, rseg->len = 0; } -static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int hns_roce_v1_post_send(struct ib_qp *ibqp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); @@ -342,8 +343,9 @@ out: return ret; } -static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int hns_roce_v1_post_recv(struct ib_qp *ibqp, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int ret = 0; int nreq = 0; @@ -993,7 +995,8 @@ static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp) { struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); struct device *dev = &hr_dev->pdev->dev; - struct ib_send_wr send_wr, *bad_wr; + struct ib_send_wr send_wr; + const struct ib_send_wr *bad_wr; int ret; memset(&send_wr, 0, sizeof(send_wr)); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a9bc6b279175..25e0407d3f31 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -103,7 +103,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, void *wqe, unsigned int *sge_ind, - struct ib_send_wr **bad_wr) + const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_v2_wqe_data_seg *dseg = wqe; @@ -164,8 +164,9 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, return 0; } -static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int hns_roce_v2_post_send(struct ib_qp *ibqp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); @@ -530,8 +531,9 @@ out: return ret; } -static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int hns_roce_v2_post_recv(struct ib_qp *ibqp, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 7d85414742ff..e780454256df 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2201,8 +2201,8 @@ static void i40iw_copy_sg_list(struct i40iw_sge *sg_list, struct ib_sge *sgl, in * @bad_wr: return of bad wr if err */ static int i40iw_post_send(struct ib_qp *ibqp, - struct ib_send_wr *ib_wr, - struct ib_send_wr **bad_wr) + const struct ib_send_wr *ib_wr, + const struct ib_send_wr **bad_wr) { struct i40iw_qp *iwqp; struct i40iw_qp_uk *ukqp; @@ -2377,9 +2377,8 @@ out: * @ib_wr: work request for receive * @bad_wr: bad wr caused an error */ -static int i40iw_post_recv(struct ib_qp *ibqp, - struct ib_recv_wr *ib_wr, - struct ib_recv_wr **bad_wr) +static int i40iw_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr, + const struct ib_recv_wr **bad_wr) { struct i40iw_qp *iwqp; struct i40iw_qp_uk *ukqp; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 8d730a69793d..e5466d786bb1 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -506,7 +506,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, { struct ib_sge list; struct ib_ud_wr wr; - struct ib_send_wr *bad_wr; + const struct ib_send_wr *bad_wr; struct mlx4_ib_demux_pv_ctx *tun_ctx; struct mlx4_ib_demux_pv_qp *tun_qp; struct mlx4_rcv_tunnel_mad *tun_mad; @@ -1310,7 +1310,8 @@ static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, int index) { struct ib_sge sg_list; - struct ib_recv_wr recv_wr, *bad_recv_wr; + struct ib_recv_wr recv_wr; + const struct ib_recv_wr *bad_recv_wr; int size; size = (tun_qp->qp->qp_type == IB_QPT_UD) ? @@ -1361,7 +1362,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, { struct ib_sge list; struct ib_ud_wr wr; - struct ib_send_wr *bad_wr; + const struct ib_send_wr *bad_wr; struct mlx4_ib_demux_pv_ctx *sqp_ctx; struct mlx4_ib_demux_pv_qp *sqp; struct mlx4_mad_snd_buf *sqp_mad; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 1a0fad30633b..e817a2f55546 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -775,8 +775,8 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int mlx4_ib_destroy_srq(struct ib_srq *srq); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); -int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, @@ -788,10 +788,10 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 44fc684b5e3a..b431757d4668 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -3569,8 +3569,8 @@ static void add_zero_len_inline(void *wqe) inl->byte_count = cpu_to_be32(1 << 31); } -static int _mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr, bool drain) +static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, bool drain) { struct mlx4_ib_qp *qp = to_mqp(ibqp); void *wqe; @@ -3901,14 +3901,14 @@ out: return err; } -int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { return _mlx4_ib_post_send(ibqp, wr, bad_wr, false); } -static int _mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr, bool drain) +static int _mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr, bool drain) { struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_wqe_data_seg *scat; @@ -3995,8 +3995,8 @@ out: return err; } -int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { return _mlx4_ib_post_recv(ibqp, wr, bad_wr, false); } @@ -4536,7 +4536,7 @@ void mlx4_ib_drain_sq(struct ib_qp *qp) struct ib_cq *cq = qp->send_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct mlx4_ib_drain_cqe sdrain; - struct ib_send_wr *bad_swr; + const struct ib_send_wr *bad_swr; struct ib_rdma_wr swr = { .wr = { .next = NULL, @@ -4571,7 +4571,8 @@ void mlx4_ib_drain_rq(struct ib_qp *qp) struct ib_cq *cq = qp->recv_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct mlx4_ib_drain_cqe rdrain; - struct ib_recv_wr rwr = {}, *bad_rwr; + struct ib_recv_wr rwr = {}; + const struct ib_recv_wr *bad_rwr; int ret; struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_dev *mdev = dev->dev; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index ebee56cbc0e2..3731b31c3653 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -307,8 +307,8 @@ void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) spin_unlock(&srq->lock); } -int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mlx4_ib_srq *srq = to_msrq(ibsrq); struct mlx4_wqe_srq_next_seg *next; diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 79e6309460dc..4950df3f71b6 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -477,8 +477,8 @@ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) return gsi->tx_qps[qp_index]; } -int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); struct ib_qp *tx_qp; @@ -522,8 +522,8 @@ err: return ret; } -int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 8d9eaa31fab2..b75754efc663 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1013,8 +1013,8 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); int mlx5_ib_destroy_srq(struct ib_srq *srq); -int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); @@ -1025,10 +1025,10 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr int mlx5_ib_destroy_qp(struct ib_qp *qp); void mlx5_ib_drain_sq(struct ib_qp *qp); void mlx5_ib_drain_rq(struct ib_qp *qp); -int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length, @@ -1209,10 +1209,10 @@ int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 308456d28afb..9fb1d9cb9401 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -898,7 +898,7 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, struct mlx5_umr_wr *umrwr) { struct umr_common *umrc = &dev->umrc; - struct ib_send_wr *bad; + const struct ib_send_wr *bad; int err; struct mlx5_ib_umr_context umr_context; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 535f6ad038b2..6efd770797d1 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4348,8 +4348,8 @@ static void finish_wqe(struct mlx5_ib_qp *qp, qp->sq.w_list[idx].next = qp->sq.cur_post; } -static int _mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr, bool drain) +static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, bool drain) { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -4675,8 +4675,8 @@ out: return err; } -int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { return _mlx5_ib_post_send(ibqp, wr, bad_wr, false); } @@ -4686,8 +4686,8 @@ static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) sig->signature = calc_sig(sig, size); } -static int _mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr, bool drain) +static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr, bool drain) { struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_data_seg *scat; @@ -4767,8 +4767,8 @@ out: return err; } -int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false); } @@ -5764,7 +5764,7 @@ void mlx5_ib_drain_sq(struct ib_qp *qp) struct ib_cq *cq = qp->send_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct mlx5_ib_drain_cqe sdrain; - struct ib_send_wr *bad_swr; + const struct ib_send_wr *bad_swr; struct ib_rdma_wr swr = { .wr = { .next = NULL, @@ -5799,7 +5799,8 @@ void mlx5_ib_drain_rq(struct ib_qp *qp) struct ib_cq *cq = qp->recv_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct mlx5_ib_drain_cqe rdrain; - struct ib_recv_wr rwr = {}, *bad_rwr; + struct ib_recv_wr rwr = {}; + const struct ib_recv_wr *bad_rwr; int ret; struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_core_dev *mdev = dev->mdev; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 0af7b7905550..f9dc2b79a51f 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -440,8 +440,8 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) spin_unlock(&srq->lock); } -int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mlx5_ib_srq *srq = to_msrq(ibsrq); struct mlx5_wqe_srq_next_seg *next; diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 5508afbf1c67..220a3e4717a3 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -519,10 +519,10 @@ int mthca_max_srq_sge(struct mthca_dev *dev); void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type); void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); -int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mthca_tavor_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int mthca_arbel_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); void mthca_qp_event(struct mthca_dev *dev, u32 qpn, enum ib_event_type event_type); @@ -530,14 +530,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m struct ib_qp_init_attr *qp_init_attr); int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, int index, int *dbd, __be32 *new_wqe); int mthca_alloc_qp(struct mthca_dev *dev, diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 0e390f410b39..3d37f2373d63 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1611,8 +1611,8 @@ static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, useg->qkey = cpu_to_be32(wr->remote_qkey); } -int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); @@ -1814,8 +1814,8 @@ out: return err; } -int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); @@ -1925,8 +1925,8 @@ out: return err; } -int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); @@ -2165,8 +2165,8 @@ out: return err; } -int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index f79732bc73b4..9a3fc6fb0d7e 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -472,8 +472,8 @@ void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) spin_unlock(&srq->lock); } -int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); @@ -572,8 +572,8 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, return err; } -int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 8b1f114062e5..3bd3c61af55b 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3067,8 +3067,8 @@ fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, const struct ib_send_wr *ib_wr, /** * nes_post_send */ -static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, - struct ib_send_wr **bad_wr) +static int nes_post_send(struct ib_qp *ibqp, const struct ib_send_wr *ib_wr, + const struct ib_send_wr **bad_wr) { u64 u64temp; unsigned long flags = 0; @@ -3329,8 +3329,8 @@ out: /** * nes_post_recv */ -static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, - struct ib_recv_wr **bad_wr) +static int nes_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr, + const struct ib_recv_wr **bad_wr) { u64 u64temp; unsigned long flags = 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 5da1d0b88d1a..c158ca9fde6d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2166,8 +2166,8 @@ static void ocrdma_ring_sq_db(struct ocrdma_qp *qp) iowrite32(val, qp->sq_db); } -int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int ocrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int status = 0; struct ocrdma_qp *qp = get_ocrdma_qp(ibqp); @@ -2278,8 +2278,8 @@ static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) iowrite32(val, qp->rq_db); } -static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr, - u16 tag) +static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, + const struct ib_recv_wr *wr, u16 tag) { u32 wqe_size = 0; struct ocrdma_sge *sge; @@ -2299,8 +2299,8 @@ static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr, ocrdma_cpu_to_le32(rqe, wqe_size); } -int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int ocrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int status = 0; unsigned long flags; @@ -2369,8 +2369,8 @@ static void ocrdma_ring_srq_db(struct ocrdma_srq *srq) iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET); } -int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int ocrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int status = 0; unsigned long flags; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 9a9971708646..b69cfdce7970 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -43,10 +43,10 @@ #ifndef __OCRDMA_VERBS_H__ #define __OCRDMA_VERBS_H__ -int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *, - struct ib_send_wr **bad_wr); -int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *, - struct ib_recv_wr **bad_wr); +int ocrdma_post_send(struct ib_qp *, const struct ib_send_wr *, + const struct ib_send_wr **bad_wr); +int ocrdma_post_recv(struct ib_qp *, const struct ib_recv_wr *, + const struct ib_recv_wr **bad_wr); int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags); @@ -100,8 +100,8 @@ int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *, enum ib_srq_attr_mask, struct ib_udata *); int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *); int ocrdma_destroy_srq(struct ib_srq *); -int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *, - struct ib_recv_wr **bad_recv_wr); +int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *, + const struct ib_recv_wr **bad_recv_wr); int ocrdma_dereg_mr(struct ib_mr *); struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc); diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index b5d49740bf8a..85578887421b 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -537,8 +537,8 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev, return 0; } -int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct qed_roce_ll2_packet *pkt = NULL; struct qedr_qp *qp = get_qedr_qp(ibqp); @@ -607,8 +607,8 @@ err: return rc; } -int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct qedr_dev *dev = get_qedr_dev(ibqp->device); struct qedr_qp *qp = get_qedr_qp(ibqp); diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.h b/drivers/infiniband/hw/qedr/qedr_roce_cm.h index a55916323ea9..d46dcd3f6424 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.h +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.h @@ -46,10 +46,10 @@ static inline u32 qedr_get_ipv4_from_gid(const u8 *gid) /* RDMA CM */ int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); +int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev, struct ib_qp_init_attr *attrs, struct qedr_qp *qp); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 34d8b5580138..4aaeb24cebfc 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2696,9 +2696,9 @@ static void swap_wqe_data64(u64 *p) static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev, struct qedr_qp *qp, u8 *wqe_size, - struct ib_send_wr *wr, - struct ib_send_wr **bad_wr, u8 *bits, - u8 bit) + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, + u8 *bits, u8 bit) { u32 data_size = sge_data_len(wr->sg_list, wr->num_sge); char *seg_prt, *wqe; @@ -2805,8 +2805,8 @@ static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev, struct qedr_qp *qp, struct rdma_sq_rdma_wqe_1st *rwqe, struct rdma_sq_rdma_wqe_2nd *rwqe2, - struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey); DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr); @@ -2828,8 +2828,8 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev, struct qedr_qp *qp, struct rdma_sq_send_wqe_1st *swqe, struct rdma_sq_send_wqe_2st *swqe2, - struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { memset(swqe2, 0, sizeof(*swqe2)); if (wr->send_flags & IB_SEND_INLINE) { @@ -2945,8 +2945,8 @@ static inline bool qedr_can_post_send(struct qedr_qp *qp, return true; } -static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct qedr_dev *dev = get_qedr_dev(ibqp->device); struct qedr_qp *qp = get_qedr_qp(ibqp); @@ -3160,8 +3160,8 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return rc; } -int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct qedr_dev *dev = get_qedr_dev(ibqp->device); struct qedr_qp *qp = get_qedr_qp(ibqp); @@ -3226,8 +3226,8 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return rc; } -int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct qedr_qp *qp = get_qedr_qp(ibqp); struct qedr_dev *dev = qp->dev; diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 2c57e4c592a6..087baf009864 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -82,10 +82,10 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); -int qedr_post_send(struct ib_qp *, struct ib_send_wr *, - struct ib_send_wr **bad_wr); -int qedr_post_recv(struct ib_qp *, struct ib_recv_wr *, - struct ib_recv_wr **bad_wr); +int qedr_post_send(struct ib_qp *, const struct ib_send_wr *, + const struct ib_send_wr **bad_wr); +int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *, + const struct ib_recv_wr **bad_wr); int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 9524524fade4..9973ac893635 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -771,15 +771,15 @@ int usnic_ib_destroy_ah(struct ib_ah *ah) return -EINVAL; } -int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { usnic_dbg("\n"); return -EINVAL; } -int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { usnic_dbg("\n"); return -EINVAL; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 1fda94425116..2a2c9beb715f 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -80,10 +80,10 @@ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, struct ib_udata *udata); int usnic_ib_destroy_ah(struct ib_ah *ah); -int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int usnic_ib_req_notify_cq(struct ib_cq *cq, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 1864621ef942..60083c0363a5 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -624,8 +624,8 @@ static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, * * @return: 0 on success, otherwise errno returned. */ -int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct pvrdma_qp *qp = to_vqp(ibqp); struct pvrdma_dev *dev = to_vdev(ibqp->device); @@ -828,8 +828,8 @@ out: * * @return: 0 on success, otherwise errno returned. */ -int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct pvrdma_dev *dev = to_vdev(ibqp->device); unsigned long flags; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index af235967a9c2..a0a82731ea24 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -52,8 +52,8 @@ #include "pvrdma.h" -int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int pvrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { /* No support for kernel clients. */ return -EOPNOTSUPP; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index b7b25728a7e5..6ebf3360ea12 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -435,8 +435,8 @@ int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int pvrdma_destroy_srq(struct ib_srq *srq); -int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int pvrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, @@ -446,9 +446,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int pvrdma_destroy_qp(struct ib_qp *qp); -int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); #endif /* __PVRDMA_VERBS_H__ */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 858c992906c1..5ce403c6cddb 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1538,8 +1538,8 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, * * Return: 0 on success otherwise errno */ -int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_rwq *wq = qp->r_rq.wq; @@ -1891,8 +1891,8 @@ bail_inval_free: * * Return: 0 on success else errno */ -int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); @@ -1948,8 +1948,8 @@ bail: * * Return: 0 on success else errno */ -int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_rwq *wq; diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 8409f80d5f25..264811fdc530 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -60,10 +60,10 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int rvt_destroy_qp(struct ib_qp *ibqp); int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); -int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); #endif /* DEF_RVTQP_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 0ea394554c8e..f5b1e0ad6142 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -287,7 +287,7 @@ static int rxe_destroy_ah(struct ib_ah *ibah) return 0; } -static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr) +static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) { int err; int i; @@ -438,8 +438,8 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq) return 0; } -static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int err = 0; unsigned long flags; @@ -726,8 +726,8 @@ err1: return err; } -static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int err = 0; unsigned int mask; @@ -769,8 +769,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, return err; } -static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct rxe_qp *qp = to_rqp(ibqp); @@ -792,8 +792,8 @@ static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return rxe_post_send_kernel(qp, wr, bad_wr); } -static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int err = 0; struct rxe_qp *qp = to_rqp(ibqp); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cf38d47fa8f8..1de8f0d2797c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2373,8 +2373,8 @@ struct ib_device { struct ib_srq_attr *srq_attr); int (*destroy_srq)(struct ib_srq *srq); int (*post_srq_recv)(struct ib_srq *srq, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); + const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_qp * (*create_qp)(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); @@ -2388,11 +2388,11 @@ struct ib_device { struct ib_qp_init_attr *qp_init_attr); int (*destroy_qp)(struct ib_qp *qp); int (*post_send)(struct ib_qp *qp, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); + const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr); int (*post_recv)(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); + const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_cq * (*create_cq)(struct ib_device *device, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -3281,10 +3281,10 @@ int ib_destroy_srq(struct ib_srq *srq); * the work request that failed to be posted on the QP. */ static inline int ib_post_srq_recv(struct ib_srq *srq, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) + const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr) { - struct ib_recv_wr *dummy; + const struct ib_recv_wr *dummy; return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy); } @@ -3384,10 +3384,10 @@ int ib_close_qp(struct ib_qp *qp); * earlier work requests in the list. */ static inline int ib_post_send(struct ib_qp *qp, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr) + const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr) { - struct ib_send_wr *dummy; + const struct ib_send_wr *dummy; return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy); } @@ -3401,10 +3401,10 @@ static inline int ib_post_send(struct ib_qp *qp, * the work request that failed to be posted on the QP. */ static inline int ib_post_recv(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) + const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr) { - struct ib_recv_wr *dummy; + const struct ib_recv_wr *dummy; return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy); } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 8557a1cae041..8ac80c1b051e 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -492,7 +492,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, struct rds_ib_send_work *send = NULL; struct rds_ib_send_work *first; struct rds_ib_send_work *prev; - struct ib_send_wr *failed_wr; + const struct ib_send_wr *failed_wr; struct scatterlist *scat; u32 pos; u32 i; @@ -758,7 +758,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) { struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_send_work *send = NULL; - struct ib_send_wr *failed_wr; + const struct ib_send_wr *failed_wr; struct rds_ib_device *rds_ibdev; u32 pos; u32 work_alloc; @@ -849,7 +849,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) struct rds_ib_send_work *send = NULL; struct rds_ib_send_work *first; struct rds_ib_send_work *prev; - struct ib_send_wr *failed_wr; + const struct ib_send_wr *failed_wr; struct scatterlist *scat; unsigned long len; u64 remote_addr = op->op_remote_addr; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index a167eebf63d5..1bb00dd6ccdb 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -517,7 +517,8 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) static void frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) { - struct ib_send_wr *first, **prev, *last, *bad_wr; + struct ib_send_wr *first, **prev, *last; + const struct ib_send_wr *bad_wr; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_frwr *frwr; struct rpcrdma_mr *mr; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index ce3ea8419704..04cb3363172a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -307,7 +307,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) { struct svcxprt_rdma *rdma = cc->cc_rdma; struct svc_xprt *xprt = &rdma->sc_xprt; - struct ib_send_wr *first_wr, *bad_wr; + struct ib_send_wr *first_wr; + const struct ib_send_wr *bad_wr; struct list_head *tmp; struct ib_cqe *cqe; int ret; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 112a15abc4a4..5efeba08918b 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1559,7 +1559,8 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) if (!count) return; - rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, &bad_wr); + rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, + (const struct ib_recv_wr **)&bad_wr); if (rc) { for (wr = bad_wr; wr; wr = wr->next) { struct rpcrdma_rep *rep; -- cgit From bccd06223f21654eb268e153426a77deb117c1e8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 26 Jul 2018 16:37:14 -0600 Subject: IB/uverbs: Add UVERBS_ATTR_FLAGS_IN to the specs language This clearly indicates that the input is a bitwise combination of values in an enum, and identifies which enum contains the definition of the bits. Special accessors are provided that handle the mandatory validation of the allowed bits and enforce the correct type for bitwise flags. If we had introduced this at the start then the kabi would have uniformly used u64 data to pass flags, however today there is a mixture of u64 and u32 flags. All places are converted to accept both sizes and the accessor fixes it. This allows all existing flags to grow to u64 in future without any hassle. Finally all flags are, by definition, optional. If flags are not passed the accessor does not fail, but provides a value of zero. Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_ioctl.c | 51 ++++++++++++++++++++++ .../infiniband/core/uverbs_std_types_counters.c | 10 ++--- drivers/infiniband/core/uverbs_std_types_cq.c | 13 +++--- drivers/infiniband/core/uverbs_std_types_mr.c | 10 ++--- drivers/infiniband/hw/mlx5/devx.c | 16 ++++--- drivers/infiniband/hw/mlx5/main.c | 16 +++---- include/rdma/uverbs_ioctl.h | 33 ++++++++++++++ 7 files changed, 119 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index db7a92ea5dbe..23a1777f26e2 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -486,3 +486,54 @@ out: return err; } + +int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + const struct uverbs_attr *attr; + u64 flags; + + attr = uverbs_attr_get(attrs_bundle, idx); + /* Missing attribute means 0 flags */ + if (IS_ERR(attr)) { + *to = 0; + return 0; + } + + /* + * New userspace code should use 8 bytes to pass flags, but we + * transparently support old userspaces that were using 4 bytes as + * well. + */ + if (attr->ptr_attr.len == 8) + flags = attr->ptr_attr.data; + else if (attr->ptr_attr.len == 4) + memcpy(&flags, &attr->ptr_attr.data, 4); + else + return -EINVAL; + + if (flags & ~allowed_bits) + return -EINVAL; + + *to = flags; + return 0; +} +EXPORT_SYMBOL(uverbs_get_flags64); + +int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + u64 flags; + int ret; + + ret = uverbs_get_flags64(&flags, attrs_bundle, idx, allowed_bits); + if (ret) + return ret; + + if (flags > U32_MAX) + return -EINVAL; + *to = flags; + + return 0; +} +EXPORT_SYMBOL(uverbs_get_flags32); diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index dfe59ad721f6..34589799f446 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -97,8 +97,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev, if (!atomic_read(&counters->usecnt)) return -EINVAL; - ret = uverbs_copy_from(&read_attr.flags, attrs, - UVERBS_ATTR_READ_COUNTERS_FLAGS); + ret = uverbs_get_flags32(&read_attr.flags, attrs, + UVERBS_ATTR_READ_COUNTERS_FLAGS, + IB_UVERBS_READ_COUNTERS_PREFER_CACHED); if (ret) return ret; @@ -147,9 +148,8 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, UVERBS_ATTR_MIN_SIZE(0), UA_MANDATORY), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, - UVERBS_ATTR_TYPE(__u32), - UA_MANDATORY)); + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, + enum ib_uverbs_read_counters_flags)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters), diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index c71305fc0433..3179203a2dd7 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -84,10 +84,12 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, if (ret) return ret; - /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ - if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&attr.flags, attrs, - UVERBS_ATTR_CREATE_CQ_FLAGS))) - return -EFAULT; + ret = uverbs_get_flags32(&attr.flags, attrs, + UVERBS_ATTR_CREATE_CQ_FLAGS, + IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION | + IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN); + if (ret) + return ret; ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL); if (!IS_ERR(ev_file_uobj)) { @@ -164,7 +166,8 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32), UA_MANDATORY), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, + enum ib_uverbs_ex_create_cq_flags), UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), UA_MANDATORY), diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index c1b9124d611e..d63da0c2a8c1 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -62,8 +62,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev, if (ret) return ret; - ret = uverbs_copy_from(&attr.access_flags, attrs, - UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS); + ret = uverbs_get_flags32(&attr.access_flags, attrs, + UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + IB_ACCESS_SUPPORTED); if (ret) return ret; @@ -131,9 +132,8 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_OBJECT_PD, UVERBS_ACCESS_READ, UA_MANDATORY), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, - UVERBS_ATTR_TYPE(u32), - UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + enum ib_access_flags), UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM, UVERBS_ACCESS_READ, diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index fee800f2fdec..c9a7a12a8c13 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -858,16 +858,21 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, { u64 addr; size_t size; - int access; + u32 access; int npages; int err; u32 page_mask; if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || - uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN) || - uverbs_copy_from(&access, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS)) + uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) return -EFAULT; + err = uverbs_get_flags32(&access, attrs, + MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + IB_ACCESS_SUPPORTED); + if (err) + return err; + err = ib_check_mr_access(access); if (err) return err; @@ -1012,9 +1017,8 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, UVERBS_ATTR_TYPE(u64), UA_MANDATORY), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, - UVERBS_ATTR_TYPE(u32), - UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + enum ib_access_flags), UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 61c78f4e4ebc..06d6309b719a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3859,12 +3859,11 @@ mlx5_ib_create_flow_action_esp(struct ib_device *device, u64 flags; int err = 0; - if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs, - MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS))) - return ERR_PTR(-EFAULT); - - if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1)) - return ERR_PTR(-EOPNOTSUPP); + err = uverbs_get_flags64( + &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); + if (err) + return ERR_PTR(err); flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); @@ -5531,9 +5530,8 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION, UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - UVERBS_ATTR_TYPE(u64), - UA_MANDATORY)); + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + enum mlx5_ib_uapi_flow_action_flags)); #define NUM_TREES 5 static int populate_specs_root(struct mlx5_ib_dev *dev) diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index d16d31d4322d..5e6d0569d97c 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -268,6 +268,19 @@ struct uverbs_object_tree_def { __VA_ARGS__ }, \ }) +/* + * An input value that is a bitwise combination of values of _enum_type. + * This permits the flag value to be passed as either a u32 or u64, it must + * be retrieved via uverbs_get_flag(). + */ +#define UVERBS_ATTR_FLAGS_IN(_attr_id, _enum_type, ...) \ + UVERBS_ATTR_PTR_IN( \ + _attr_id, \ + UVERBS_ATTR_SIZE(sizeof(u32) + BUILD_BUG_ON_ZERO( \ + !sizeof(_enum_type *)), \ + sizeof(u64)), \ + __VA_ARGS__) + /* * This spec is used in order to pass information to the hardware driver in a * legacy way. Every verb that could get driver specific data should get this @@ -520,6 +533,26 @@ static inline int _uverbs_copy_from_or_zero(void *to, #define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \ _uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to)) +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits); +int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits); +#else +static inline int +uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + return -EINVAL; +} +static inline int +uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + return -EINVAL; +} +#endif + /* ================================================= * Definitions -> Specs infrastructure * ================================================= -- cgit From f95ccffc715bf0fc6792fda52d24e0a92ad955e7 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 26 Jul 2018 10:08:37 +0300 Subject: IB/mlx4: Use 4K pages for kernel QP's WQE buffer In the current implementation, the driver tries to allocate contiguous memory, and if it fails, it falls back to 4K fragmented allocation. Once the memory is fragmented, the first allocation might take a lot of time, and even fail, which can cause connection failures. This patch changes the logic to always allocate with 4K granularity, since it's more robust and more likely to succeed. This patch was tested with Lustre and no performance degradation was observed. Note: This commit eliminates the "shrinking WQE" feature. This feature depended on using vmap to create a virtually contiguous send WQ. vmap use was abandoned due to problems with several processors (see the commit cited below). As a result, shrinking WQE was available only with physically contiguous send WQs. Allocating such send WQs caused the problems described above. Therefore, as a side effect of eliminating the use of large physically contiguous send WQs, the shrinking WQE feature became unavailable. Warning example: worker/20:1: page allocation failure: order:8, mode:0x80d0 CPU: 20 PID: 513 Comm: kworker/20:1 Tainted: G OE ------------ Workqueue: ib_cm cm_work_handler [ib_cm] Call Trace: [] dump_stack+0x19/0x1b [] warn_alloc_failed+0x110/0x180 [] __alloc_pages_nodemask+0x9b4/0xba0 [] alloc_pages_current+0x98/0x110 [] __get_free_pages+0xe/0x50 [] swiotlb_alloc_coherent+0x5e/0x150 [] x86_swiotlb_alloc_coherent+0x41/0x50 [] mlx4_buf_direct_alloc.isra.7+0xc4/0x180 [mlx4_core] [] mlx4_buf_alloc+0x1bb/0x260 [mlx4_core] [] create_qp_common+0x536/0x1000 [mlx4_ib] [] ? dma_pool_free+0xa7/0xd0 [] mlx4_ib_create_qp+0x3b1/0xdc0 [mlx4_ib] [] ? mlx4_ib_create_cq+0x2d2/0x430 [mlx4_ib] [] mlx4_ib_create_qp_wrp+0x10/0x20 [mlx4_ib] [] ib_create_qp+0x7a/0x2f0 [ib_core] [] rdma_create_qp+0x34/0xb0 [rdma_cm] [] kiblnd_create_conn+0xbf9/0x1950 [ko2iblnd] [] ? cfs_percpt_unlock+0x1a/0xb0 [libcfs] [] kiblnd_passive_connect+0xa99/0x18c0 [ko2iblnd] Fixes: 73898db04301 ("net/mlx4: Avoid wrong virtual mappings") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 - drivers/infiniband/hw/mlx4/qp.c | 209 ++++++----------------------------- 2 files changed, 34 insertions(+), 176 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e817a2f55546..e10dccc7958f 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -322,7 +322,6 @@ struct mlx4_ib_qp { u32 doorbell_qpn; __be32 sq_signal_bits; unsigned sq_next_wqe; - int sq_max_wqes_per_wr; int sq_spare_wqes; struct mlx4_ib_wq sq; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index b431757d4668..6dd3cd2c2f80 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -204,89 +204,24 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) /* * Stamp a SQ WQE so that it is invalid if prefetched by marking the - * first four bytes of every 64 byte chunk with - * 0x7FFFFFF | (invalid_ownership_value << 31). - * - * When the max work request size is less than or equal to the WQE - * basic block size, as an optimization, we can stamp all WQEs with - * 0xffffffff, and skip the very first chunk of each WQE. + * first four bytes of every 64 byte chunk with 0xffffffff, except for + * the very first chunk of the WQE. */ -static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) +static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n) { __be32 *wqe; int i; int s; - int ind; void *buf; - __be32 stamp; - struct mlx4_wqe_ctrl_seg *ctrl; - - if (qp->sq_max_wqes_per_wr > 1) { - s = roundup(size, 1U << qp->sq.wqe_shift); - for (i = 0; i < s; i += 64) { - ind = (i >> qp->sq.wqe_shift) + n; - stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : - cpu_to_be32(0xffffffff); - buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); - wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1)); - *wqe = stamp; - } - } else { - ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); - s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4; - for (i = 64; i < s; i += 64) { - wqe = buf + i; - *wqe = cpu_to_be32(0xffffffff); - } - } -} - -static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) -{ struct mlx4_wqe_ctrl_seg *ctrl; - struct mlx4_wqe_inline_seg *inl; - void *wqe; - int s; - - ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); - s = sizeof(struct mlx4_wqe_ctrl_seg); - - if (qp->ibqp.qp_type == IB_QPT_UD) { - struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl; - struct mlx4_av *av = (struct mlx4_av *)dgram->av; - memset(dgram, 0, sizeof *dgram); - av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn); - s += sizeof(struct mlx4_wqe_datagram_seg); - } - /* Pad the remainder of the WQE with an inline data segment. */ - if (size > s) { - inl = wqe + s; - inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl)); + buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + ctrl = (struct mlx4_wqe_ctrl_seg *)buf; + s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4; + for (i = 64; i < s; i += 64) { + wqe = buf + i; + *wqe = cpu_to_be32(0xffffffff); } - ctrl->srcrb_flags = 0; - ctrl->qpn_vlan.fence_size = size / 16; - /* - * Make sure descriptor is fully written before setting ownership bit - * (because HW can start executing as soon as we do). - */ - wmb(); - - ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) | - (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); - - stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); -} - -/* Post NOP WQE to prevent wrap-around in the middle of WR */ -static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) -{ - unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1)); - if (unlikely(s < qp->sq_max_wqes_per_wr)) { - post_nop_wqe(qp, ind, s << qp->sq.wqe_shift); - ind += s; - } - return ind; } static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) @@ -433,8 +368,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, } static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp, - bool shrink_wqe) + enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp) { int s; @@ -461,70 +395,20 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, if (s > dev->dev->caps.max_sq_desc_sz) return -EINVAL; + qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); + /* - * Hermon supports shrinking WQEs, such that a single work - * request can include multiple units of 1 << wqe_shift. This - * way, work requests can differ in size, and do not have to - * be a power of 2 in size, saving memory and speeding up send - * WR posting. Unfortunately, if we do this then the - * wqe_index field in CQEs can't be used to look up the WR ID - * anymore, so we do this only if selective signaling is off. - * - * Further, on 32-bit platforms, we can't use vmap() to make - * the QP buffer virtually contiguous. Thus we have to use - * constant-sized WRs to make sure a WR is always fully within - * a single page-sized chunk. - * - * Finally, we use NOP work requests to pad the end of the - * work queue, to avoid wrap-around in the middle of WR. We - * set NEC bit to avoid getting completions with error for - * these NOP WRs, but since NEC is only supported starting - * with firmware 2.2.232, we use constant-sized WRs for older - * firmware. - * - * And, since MLX QPs only support SEND, we use constant-sized - * WRs in this case. - * - * We look for the smallest value of wqe_shift such that the - * resulting number of wqes does not exceed device - * capabilities. - * - * We set WQE size to at least 64 bytes, this way stamping - * invalidates each WQE. + * We need to leave 2 KB + 1 WR of headroom in the SQ to + * allow HW to prefetch. */ - if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && - qp->sq_signal_bits && BITS_PER_LONG == 64 && - type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI && - !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | - MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) - qp->sq.wqe_shift = ilog2(64); - else - qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); - - for (;;) { - qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); - - /* - * We need to leave 2 KB + 1 WR of headroom in the SQ to - * allow HW to prefetch. - */ - qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr; - qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr * - qp->sq_max_wqes_per_wr + - qp->sq_spare_wqes); - - if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes) - break; - - if (qp->sq_max_wqes_per_wr <= 1) - return -EINVAL; - - ++qp->sq.wqe_shift; - } - - qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz, - (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) - - send_wqe_overhead(type, qp->flags)) / + qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; + qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + + qp->sq_spare_wqes); + + qp->sq.max_gs = + (min(dev->dev->caps.max_sq_desc_sz, + (1 << qp->sq.wqe_shift)) - + send_wqe_overhead(type, qp->flags)) / sizeof (struct mlx4_wqe_data_seg); qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + @@ -538,7 +422,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, } cap->max_send_wr = qp->sq.max_post = - (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; + qp->sq.wqe_cnt - qp->sq_spare_wqes; cap->max_send_sge = min(qp->sq.max_gs, min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)); @@ -977,7 +861,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, { int qpn; int err; - struct ib_qp_cap backup_cap; struct mlx4_ib_sqp *sqp = NULL; struct mlx4_ib_qp *qp; enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; @@ -1178,9 +1061,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; } - memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap)); - err = set_kernel_sq_size(dev, &init_attr->cap, - qp_type, qp, true); + err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); if (err) goto err; @@ -1192,20 +1073,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, *qp->db.db = 0; } - if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size, + if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { - memcpy(&init_attr->cap, &backup_cap, - sizeof(backup_cap)); - err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, - qp, false); - if (err) - goto err_db; - - if (mlx4_buf_alloc(dev->dev, qp->buf_size, - PAGE_SIZE * 2, &qp->buf)) { - err = -ENOMEM; - goto err_db; - } + err = -ENOMEM; + goto err_db; } err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, @@ -2582,11 +2453,9 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); ctrl->owner_opcode = cpu_to_be32(1 << 31); - if (qp->sq_max_wqes_per_wr == 1) - ctrl->qpn_vlan.fence_size = - 1 << (qp->sq.wqe_shift - 4); - - stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); + ctrl->qpn_vlan.fence_size = + 1 << (qp->sq.wqe_shift - 4); + stamp_send_wqe(qp, i); } } @@ -3580,7 +3449,6 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, int nreq; int err = 0; unsigned ind; - int uninitialized_var(stamp); int uninitialized_var(size); unsigned uninitialized_var(seglen); __be32 dummy; @@ -3853,22 +3721,14 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; - stamp = ind + qp->sq_spare_wqes; - ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); - /* * We can improve latency by not stamping the last * send queue WQE until after ringing the doorbell, so * only stamp here if there are still more WQEs to post. - * - * Same optimization applies to padding with NOP wqe - * in case of WQE shrinking (used to prevent wrap-around - * in the middle of WR). */ - if (wr->next) { - stamp_send_wqe(qp, stamp, size * 16); - ind = pad_wraparound(qp, ind); - } + if (wr->next) + stamp_send_wqe(qp, ind + qp->sq_spare_wqes); + ind++; } out: @@ -3890,9 +3750,8 @@ out: */ mmiowb(); - stamp_send_wqe(qp, stamp, size * 16); + stamp_send_wqe(qp, ind + qp->sq_spare_wqes - 1); - ind = pad_wraparound(qp, ind); qp->sq_next_wqe = ind; } -- cgit From 8380b74e7d606c4e053d7eea623362fcd8d432c2 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 30 Jul 2018 21:56:43 +0300 Subject: RDMA/providers: Fix return value from create_srq callbacks The proper return code is "-EOPNOTSUPP" when the create_srq() callback is not supported. Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- drivers/infiniband/hw/mthca/mthca_provider.c | 2 +- drivers/infiniband/sw/rdmavt/srq.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 5d955b293c6d..bbfb86eb2d24 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1397,7 +1397,7 @@ struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd, } if (srq_init_attr->srq_type != IB_SRQT_BASIC) { - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto exit; } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 20febafc1fdd..0d3473b4596e 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -449,7 +449,7 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, int err; if (init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); srq = kmalloc(sizeof *srq, GFP_KERNEL); if (!srq) diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 3707952b4364..78e06fc456c5 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -82,7 +82,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_srq *ret; if (srq_init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); if (srq_init_attr->attr.max_sge == 0 || srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge || -- cgit From e586e1e1b73f3dc020b82e6fc42c7789b7d0e95d Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 30 Jul 2018 21:56:44 +0300 Subject: RDMA/ipoib: Fix check for return code from ib_create_srq Make sure to check for "-EOPNOTSUPP" instead of "-ENOSYS" which is the return code from ib_create_srq() in case that it not supported. Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 8b44f33c7ae0..8ef50e46157c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1560,7 +1560,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); if (IS_ERR(priv->cm.srq)) { - if (PTR_ERR(priv->cm.srq) != -ENOSYS) + if (PTR_ERR(priv->cm.srq) != -EOPNOTSUPP) pr_warn("%s: failed to allocate SRQ, error %ld\n", priv->ca->name, PTR_ERR(priv->cm.srq)); priv->cm.srq = NULL; -- cgit From 7150c3d5544b12c96a053153db9e1cec07b0892d Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Fri, 27 Jul 2018 21:23:04 +0300 Subject: RDMA/core: Remove {create,destroy}_ah from mandatory verbs {create,destroy}_ah aren't mandatory verbs, because not all providers are implementing them. Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b8144f194777..db3b6271f09d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -105,8 +105,6 @@ static int ib_device_check_mandatory(struct ib_device *device) IB_MANDATORY_FUNC(query_pkey), IB_MANDATORY_FUNC(alloc_pd), IB_MANDATORY_FUNC(dealloc_pd), - IB_MANDATORY_FUNC(create_ah), - IB_MANDATORY_FUNC(destroy_ah), IB_MANDATORY_FUNC(create_qp), IB_MANDATORY_FUNC(modify_qp), IB_MANDATORY_FUNC(destroy_qp), -- cgit From 0584c47bbc3b3149bfe544d4e090fb6d96addf6c Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Fri, 27 Jul 2018 21:23:05 +0300 Subject: RDMA/core: Check for verbs callbacks before using them Make sure the providers implement the verbs callbacks before calling them, otherwise return -EOPNOTSUPP. Signed-off-by: Kamal Heib Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 3 ++- drivers/infiniband/core/verbs.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d4c3bc042343..b2af4eeb7669 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2504,7 +2504,8 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, goto out; resp.bad_wr = 0; - ret = srq->device->post_srq_recv(srq, wr, &bad_wr); + ret = srq->device->post_srq_recv ? + srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP; uobj_put_obj_read(srq); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index cde359d48d34..5b76c7ebfa02 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -479,6 +479,9 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, { struct ib_ah *ah; + if (!pd->device->create_ah) + return ERR_PTR(-EOPNOTSUPP); + ah = pd->device->create_ah(pd, ah_attr, udata); if (!IS_ERR(ah)) { -- cgit From 1ffba6264268e3a3f32f963ef3f44006ea9ebd35 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Fri, 27 Jul 2018 21:23:06 +0300 Subject: RDMA/providers: Remove pointless functions The rdma core is taking care of return the right error code when the rdma device callbacks aren't supported. Signed-off-by: Kamal Heib Acked-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 41 ----------------- drivers/infiniband/hw/cxgb4/cq.c | 5 --- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 - drivers/infiniband/hw/cxgb4/provider.c | 41 ----------------- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 40 ----------------- drivers/infiniband/hw/nes/nes_verbs.c | 60 ------------------------- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 6 --- drivers/infiniband/hw/ocrdma/ocrdma_ah.h | 1 - drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 - drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 26 ----------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 3 -- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 7 --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 7 --- 13 files changed, 239 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 68bc2f9a532f..1b9ff21aa1d5 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -61,42 +61,6 @@ #include #include "common.h" -static struct ib_ah *iwch_ah_create(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) -{ - return ERR_PTR(-ENOSYS); -} - -static int iwch_ah_destroy(struct ib_ah *ah) -{ - return -ENOSYS; -} - -static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int iwch_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, - struct ib_mad_hdr *out_mad, - size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - return -ENOSYS; -} - static int iwch_dealloc_ucontext(struct ib_ucontext *context) { struct iwch_dev *rhp = to_iwch_dev(context->device); @@ -1399,8 +1363,6 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.mmap = iwch_mmap; dev->ibdev.alloc_pd = iwch_allocate_pd; dev->ibdev.dealloc_pd = iwch_deallocate_pd; - dev->ibdev.create_ah = iwch_ah_create; - dev->ibdev.destroy_ah = iwch_ah_destroy; dev->ibdev.create_qp = iwch_create_qp; dev->ibdev.modify_qp = iwch_ib_modify_qp; dev->ibdev.destroy_qp = iwch_destroy_qp; @@ -1415,9 +1377,6 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.dealloc_mw = iwch_dealloc_mw; dev->ibdev.alloc_mr = iwch_alloc_mr; dev->ibdev.map_mr_sg = iwch_map_mr_sg; - dev->ibdev.attach_mcast = iwch_multicast_attach; - dev->ibdev.detach_mcast = iwch_multicast_detach; - dev->ibdev.process_mad = iwch_process_mad; dev->ibdev.req_notify_cq = iwch_arm_cq; dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index d266c8d0bf94..0c13f2838c84 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -1140,11 +1140,6 @@ err_free_chp: return ERR_PTR(ret); } -int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) -{ - return -ENOSYS; -} - int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct c4iw_cq *chp; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 3cf93463021a..afa86a3c5cb4 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -1063,7 +1063,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *ib_context, struct ib_udata *udata); -int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask srq_attr_mask, diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index c314d8fdfbba..4eda6872e617 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -58,41 +58,6 @@ static int fastreg_support = 1; module_param(fastreg_support, int, 0644); MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); -static struct ib_ah *c4iw_ah_create(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) - -{ - return ERR_PTR(-ENOSYS); -} - -static int c4iw_ah_destroy(struct ib_ah *ah) -{ - return -ENOSYS; -} - -static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, - struct ib_mad_hdr *out_mad, - size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - return -ENOSYS; -} - void _c4iw_free_ucontext(struct kref *kref) { struct c4iw_ucontext *ucontext; @@ -615,8 +580,6 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.mmap = c4iw_mmap; dev->ibdev.alloc_pd = c4iw_allocate_pd; dev->ibdev.dealloc_pd = c4iw_deallocate_pd; - dev->ibdev.create_ah = c4iw_ah_create; - dev->ibdev.destroy_ah = c4iw_ah_destroy; dev->ibdev.create_qp = c4iw_create_qp; dev->ibdev.modify_qp = c4iw_ib_modify_qp; dev->ibdev.query_qp = c4iw_ib_query_qp; @@ -626,7 +589,6 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.destroy_srq = c4iw_destroy_srq; dev->ibdev.create_cq = c4iw_create_cq; dev->ibdev.destroy_cq = c4iw_destroy_cq; - dev->ibdev.resize_cq = c4iw_resize_cq; dev->ibdev.poll_cq = c4iw_poll_cq; dev->ibdev.get_dma_mr = c4iw_get_dma_mr; dev->ibdev.reg_user_mr = c4iw_reg_user_mr; @@ -635,9 +597,6 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.dealloc_mw = c4iw_dealloc_mw; dev->ibdev.alloc_mr = c4iw_alloc_mr; dev->ibdev.map_mr_sg = c4iw_map_mr_sg; - dev->ibdev.attach_mcast = c4iw_multicast_attach; - dev->ibdev.detach_mcast = c4iw_multicast_detach; - dev->ibdev.process_mad = c4iw_process_mad; dev->ibdev.req_notify_cq = c4iw_arm_cq; dev->ibdev.post_send = c4iw_post_send; dev->ibdev.post_recv = c4iw_post_receive; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index e780454256df..e2e6c74a7452 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2702,21 +2702,6 @@ static int i40iw_query_gid(struct ib_device *ibdev, return 0; } -/** - * i40iw_modify_port Modify port properties - * @ibdev: device pointer from stack - * @port: port number - * @port_modify_mask: mask for port modifications - * @props: port properties - */ -static int i40iw_modify_port(struct ib_device *ibdev, - u8 port, - int port_modify_mask, - struct ib_port_modify *props) -{ - return -ENOSYS; -} - /** * i40iw_query_pkey - Query partition key * @ibdev: device pointer from stack @@ -2733,28 +2718,6 @@ static int i40iw_query_pkey(struct ib_device *ibdev, return 0; } -/** - * i40iw_create_ah - create address handle - * @ibpd: ptr of pd - * @ah_attr: address handle attributes - */ -static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd, - struct rdma_ah_attr *attr, - struct ib_udata *udata) - -{ - return ERR_PTR(-ENOSYS); -} - -/** - * i40iw_destroy_ah - Destroy address handle - * @ah: pointer to address handle - */ -static int i40iw_destroy_ah(struct ib_ah *ah) -{ - return -ENOSYS; -} - /** * i40iw_get_vector_affinity - report IRQ affinity mask * @ibdev: IB device @@ -2822,7 +2785,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; iwibdev->ibdev.query_port = i40iw_query_port; - iwibdev->ibdev.modify_port = i40iw_modify_port; iwibdev->ibdev.query_pkey = i40iw_query_pkey; iwibdev->ibdev.query_gid = i40iw_query_gid; iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext; @@ -2842,8 +2804,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats; iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats; iwibdev->ibdev.query_device = i40iw_query_device; - iwibdev->ibdev.create_ah = i40iw_create_ah; - iwibdev->ibdev.destroy_ah = i40iw_destroy_ah; iwibdev->ibdev.drain_sq = i40iw_drain_sq; iwibdev->ibdev.drain_rq = i40iw_drain_rq; iwibdev->ibdev.alloc_mr = i40iw_alloc_mr; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 3bd3c61af55b..6940c7215961 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -754,26 +754,6 @@ static int nes_dealloc_pd(struct ib_pd *ibpd) } -/** - * nes_create_ah - */ -static struct ib_ah *nes_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) -{ - return ERR_PTR(-ENOSYS); -} - - -/** - * nes_destroy_ah - */ -static int nes_destroy_ah(struct ib_ah *ah) -{ - return -ENOSYS; -} - - /** * nes_get_encoded_size */ @@ -3005,40 +2985,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return err; } - -/** - * nes_muticast_attach - */ -static int nes_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - nes_debug(NES_DBG_INIT, "\n"); - return -ENOSYS; -} - - -/** - * nes_multicast_detach - */ -static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - nes_debug(NES_DBG_INIT, "\n"); - return -ENOSYS; -} - - -/** - * nes_process_mad - */ -static int nes_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - nes_debug(NES_DBG_INIT, "\n"); - return -ENOSYS; -} - static inline void fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, const struct ib_send_wr *ib_wr, u32 uselkey) @@ -3737,8 +3683,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.mmap = nes_mmap; nesibdev->ibdev.alloc_pd = nes_alloc_pd; nesibdev->ibdev.dealloc_pd = nes_dealloc_pd; - nesibdev->ibdev.create_ah = nes_create_ah; - nesibdev->ibdev.destroy_ah = nes_destroy_ah; nesibdev->ibdev.create_qp = nes_create_qp; nesibdev->ibdev.modify_qp = nes_modify_qp; nesibdev->ibdev.query_qp = nes_query_qp; @@ -3755,10 +3699,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.alloc_mr = nes_alloc_mr; nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; - nesibdev->ibdev.attach_mcast = nes_multicast_attach; - nesibdev->ibdev.detach_mcast = nes_multicast_detach; - nesibdev->ibdev.process_mad = nes_process_mad; - nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; nesibdev->ibdev.post_send = nes_post_send; nesibdev->ibdev.post_recv = nes_post_recv; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 8cc9459a9f9b..58188fe5aed2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -254,12 +254,6 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -int ocrdma_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) -{ - /* modify_ah is unsupported */ - return -ENOSYS; -} - int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 1a65c47945aa..c0c32c9b80ae 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -55,7 +55,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata); int ocrdma_destroy_ah(struct ib_ah *ah); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int ocrdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *, int process_mad_flags, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 5962c0ed9847..7832ee3e0c84 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -176,7 +176,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.create_ah = ocrdma_create_ah; dev->ibdev.destroy_ah = ocrdma_destroy_ah; dev->ibdev.query_ah = ocrdma_query_ah; - dev->ibdev.modify_ah = ocrdma_modify_ah; dev->ibdev.poll_cq = ocrdma_poll_cq; dev->ibdev.post_send = ocrdma_post_send; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index f95b97646c25..0f004c737620 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -278,19 +278,6 @@ int pvrdma_destroy_cq(struct ib_cq *cq) return ret; } -/** - * pvrdma_modify_cq - modify the CQ moderation parameters - * @ibcq: the CQ to modify - * @cq_count: number of CQEs that will trigger an event - * @cq_period: max period of time in usec before triggering an event - * - * @return: -EOPNOTSUPP as CQ resize is not supported. - */ -int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) -{ - return -EOPNOTSUPP; -} - static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) { return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr( @@ -428,16 +415,3 @@ int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) /* Ensure we do not return errors from poll_cq */ return npolled; } - -/** - * pvrdma_resize_cq - resize CQ - * @ibcq: the completion queue - * @entries: CQ entries - * @udata: user data - * - * @return: -EOPNOTSUPP as CQ resize is not supported. - */ -int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) -{ - return -EOPNOTSUPP; -} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 912933549dfb..a5719899f49a 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -214,8 +214,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->ib_dev.post_send = pvrdma_post_send; dev->ib_dev.post_recv = pvrdma_post_recv; dev->ib_dev.create_cq = pvrdma_create_cq; - dev->ib_dev.modify_cq = pvrdma_modify_cq; - dev->ib_dev.resize_cq = pvrdma_resize_cq; dev->ib_dev.destroy_cq = pvrdma_destroy_cq; dev->ib_dev.poll_cq = pvrdma_poll_cq; dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; @@ -259,7 +257,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->ib_dev.modify_srq = pvrdma_modify_srq; dev->ib_dev.query_srq = pvrdma_query_srq; dev->ib_dev.destroy_srq = pvrdma_destroy_srq; - dev->ib_dev.post_srq_recv = pvrdma_post_srq_recv; dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, sizeof(struct pvrdma_srq *), diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index a0a82731ea24..dc0ce877c7a3 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -52,13 +52,6 @@ #include "pvrdma.h" -int pvrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - /* No support for kernel clients. */ - return -EOPNOTSUPP; -} - /** * pvrdma_query_srq - query shared receive queue * @ibsrq: the shared receive queue to query diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 6ebf3360ea12..b2e3ab50cb08 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -412,15 +412,10 @@ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); -int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, - struct ib_udata *udata); struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); -int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, - struct ib_udata *udata); int pvrdma_destroy_cq(struct ib_cq *cq); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); @@ -435,8 +430,6 @@ int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int pvrdma_destroy_srq(struct ib_srq *srq); -int pvrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr); struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, -- cgit From 610b89677f422e8a07b2343a0750a1b3faca7922 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Mon, 30 Jul 2018 20:20:25 +0800 Subject: RDMA/hns: Only assgin the fields of the av if IB_QP_AV bit is set Only when the IB_QP_AV flag of attr_mask is set is it valid to assign the related fields of the av into the qp context. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 111 +++++++++++++++++++++-------- 1 file changed, 80 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 25e0407d3f31..4fece73ee0c9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include "hnae3.h" @@ -3139,21 +3140,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, - V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit); - roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, - V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, - V2_QPC_BYTE_28_FL_S, grh->flow_label); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, - V2_QPC_BYTE_28_FL_S, 0); - - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class); - roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, 0); - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S, IB_MTU_4096); @@ -3164,9 +3150,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S, 0); - memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); - memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); - roce_set_field(context->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, hr_qp->rq.head); @@ -3205,12 +3188,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_168_LP_SGEN_INI_M, V2_QPC_BYTE_168_LP_SGEN_INI_S, 0); - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr)); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, 0); - hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - return 0; } @@ -3388,13 +3365,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_28_AT_S, 0); } - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, - rdma_ah_get_sl(&attr->ah_attr)); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, 0); - hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, V2_QPC_BYTE_172_SQ_CUR_PSN_S, attr->sq_psn); roce_set_field(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, @@ -3480,6 +3450,85 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, goto out; } + if (attr_mask & IB_QP_AV) { + const struct ib_global_route *grh = + rdma_ah_read_grh(&attr->ah_attr); + const struct ib_gid_attr *gid_attr = NULL; + u8 src_mac[ETH_ALEN]; + int is_roce_protocol; + u16 vlan = 0xffff; + u8 ib_port; + u8 hr_port; + + ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : + hr_qp->port + 1; + hr_port = ib_port - 1; + is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) && + rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH; + + if (is_roce_protocol) { + gid_attr = attr->ah_attr.grh.sgid_attr; + vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev); + memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN); + } + + if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) { + dev_err(hr_dev->dev, + "sgid_index(%u) too large. max is %d\n", + grh->sgid_index, + hr_dev->caps.gid_table_len[hr_port]); + ret = -EINVAL; + goto out; + } + + if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) { + dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n"); + ret = -EINVAL; + goto out; + } + + roce_set_field(context->byte_52_udpspn_dmac, + V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S, + (gid_attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) ? + 0 : 0x12b7); + + roce_set_field(qpc_mask->byte_52_udpspn_dmac, + V2_QPC_BYTE_52_UDPSPN_M, + V2_QPC_BYTE_52_UDPSPN_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, + V2_QPC_BYTE_20_SGID_IDX_S, grh->sgid_index); + + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, + V2_QPC_BYTE_20_SGID_IDX_S, 0); + + roce_set_field(context->byte_24_mtu_tc, + V2_QPC_BYTE_24_HOP_LIMIT_M, + V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit); + roce_set_field(qpc_mask->byte_24_mtu_tc, + V2_QPC_BYTE_24_HOP_LIMIT_M, + V2_QPC_BYTE_24_HOP_LIMIT_S, 0); + + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, grh->traffic_class); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, 0); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, + V2_QPC_BYTE_28_FL_S, grh->flow_label); + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, + V2_QPC_BYTE_28_FL_S, 0); + memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); + memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S, + rdma_ah_get_sl(&attr->ah_attr)); + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S, 0); + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + } + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); -- cgit From c8e46f8d632c30e2b1cda0afa6697cfea16d57c0 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Mon, 30 Jul 2018 20:20:27 +0800 Subject: RDMA/hns: Assign the value for vlan field of qp context This patch mainly fills the correct value into the vlan id field of qp context as well as update the vlan field name according to the latest hardware user manual. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 15 +++++++++++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 4 ++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4fece73ee0c9..b4958422af94 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2527,10 +2527,10 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); /* No VLAN need to set 0xFFF */ - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_IDX_M, - V2_QPC_BYTE_24_VLAN_IDX_S, 0xfff); - roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_IDX_M, - V2_QPC_BYTE_24_VLAN_IDX_S, 0); + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, 0xfff); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, 0); /* * Set some fields in context to zero, Because the default values @@ -3472,6 +3472,13 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN); } + roce_set_field(context->byte_24_mtu_tc, + V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, vlan); + roce_set_field(qpc_mask->byte_24_mtu_tc, + V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, 0); + if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) { dev_err(hr_dev->dev, "sgid_index(%u) too large. max is %d\n", diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index f40d8c22d357..5c43ba11a0bd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -451,8 +451,8 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_24_TC_S 8 #define V2_QPC_BYTE_24_TC_M GENMASK(15, 8) -#define V2_QPC_BYTE_24_VLAN_IDX_S 16 -#define V2_QPC_BYTE_24_VLAN_IDX_M GENMASK(27, 16) +#define V2_QPC_BYTE_24_VLAN_ID_S 16 +#define V2_QPC_BYTE_24_VLAN_ID_M GENMASK(27, 16) #define V2_QPC_BYTE_24_MTU_S 28 #define V2_QPC_BYTE_24_MTU_M GENMASK(31, 28) -- cgit From ac7cbf96c20991dbeea0af04ca9ff25252af9e5f Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Mon, 30 Jul 2018 20:20:28 +0800 Subject: RDMA/hns: Modify qp will return errno when qp type is illegal Set for ret was missing in the error path here, resulting in incorrect error code for modify_qp. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b4958422af94..39842ece3c0f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3447,6 +3447,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, ; } else { dev_err(dev, "Illegal state for QP!\n"); + ret = -EINVAL; goto out; } -- cgit From 426c414619ab742f6cb97c648d06975aa82cd62e Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Mon, 30 Jul 2018 20:20:29 +0800 Subject: RDMA/hns: Use macro instead of magic number This patch mainly uses CMD_CSQ_DESC_NUM instead of magic number in order to improve readability. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 39842ece3c0f..57d744dc48f3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -705,8 +705,8 @@ static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev) int ret; /* Setup the queue entries for command queue */ - priv->cmq.csq.desc_num = 1024; - priv->cmq.crq.desc_num = 1024; + priv->cmq.csq.desc_num = CMD_CSQ_DESC_NUM; + priv->cmq.crq.desc_num = CMD_CRQ_DESC_NUM; /* Setup the lock for command queue */ spin_lock_init(&priv->cmq.csq.lock); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 5c43ba11a0bd..14aa308befef 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -112,6 +112,9 @@ (step_idx == 1 && hop_num == 1) || \ (step_idx == 2 && hop_num == 2)) +#define CMD_CSQ_DESC_NUM 1024 +#define CMD_CRQ_DESC_NUM 1024 + enum { NO_ARMED = 0x0, REG_NXT_CEQE = 0x2, -- cgit From cdfa4ad5d65ececbff24d070f21549db3b4c9cd9 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Mon, 30 Jul 2018 20:20:30 +0800 Subject: RDMA/hns: Program the tclass and flow label into the hardware This was missed in a few places, and was just using 0. Also correct the spelling of HNS_ROCE_FLOW_LABEL_MASK Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_ah.c | 2 +- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 8 ++++++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 11 +++++------ 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 14efa3b9adb2..0d96c5bb38cd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -97,7 +97,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate); rdma_ah_set_grh(ah_attr, NULL, (le32_to_cpu(ah->av.sl_tclass_flowlabel) & - HNS_ROCE_FLOW_LABLE_MASK), ah->av.gid_index, + HNS_ROCE_FLOW_LABEL_MASK), ah->av.gid_index, ah->av.hop_limit, (le32_to_cpu(ah->av.sl_tclass_flowlabel) >> HNS_ROCE_TCLASS_SHIFT)); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1c252753fb12..0eab5a2f45e5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -76,7 +76,7 @@ /* 4G/4K = 1M */ #define HNS_ROCE_SL_SHIFT 28 #define HNS_ROCE_TCLASS_SHIFT 20 -#define HNS_ROCE_FLOW_LABLE_MASK 0xfffff +#define HNS_ROCE_FLOW_LABEL_MASK 0xfffff #define HNS_ROCE_MAX_PORTS 6 #define HNS_ROCE_MAX_GID_NUM 16 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 8e11c6b62009..aa7f2342d4eb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -174,7 +174,9 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->u32_36, UD_SEND_WQE_U32_36_FLOW_LABEL_M, - UD_SEND_WQE_U32_36_FLOW_LABEL_S, 0); + UD_SEND_WQE_U32_36_FLOW_LABEL_S, + ah->av.sl_tclass_flowlabel & + HNS_ROCE_FLOW_LABEL_MASK); roce_set_field(ud_sq_wqe->u32_36, UD_SEND_WQE_U32_36_PRIORITY_M, UD_SEND_WQE_U32_36_PRIORITY_S, @@ -192,7 +194,9 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ah->av.hop_limit); roce_set_field(ud_sq_wqe->u32_40, UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M, - UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, 0); + UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, + ah->av.sl_tclass_flowlabel >> + HNS_ROCE_TCLASS_SHIFT); memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 57d744dc48f3..268d55bfca07 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -332,14 +332,13 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, V2_UD_SEND_WQE_BYTE_36_TCLASS_S, - 0); - roce_set_field(ud_sq_wqe->byte_36, - V2_UD_SEND_WQE_BYTE_36_TCLASS_M, - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, - 0); + ah->av.sl_tclass_flowlabel >> + HNS_ROCE_TCLASS_SHIFT); roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, 0); + V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, + ah->av.sl_tclass_flowlabel & + HNS_ROCE_FLOW_LABEL_MASK); roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, V2_UD_SEND_WQE_BYTE_40_SL_S, -- cgit From e11fef9f8d442e6929dce6c80dacea6466e348b0 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 29 Jul 2018 11:53:08 +0300 Subject: RDMA/cma: Initialize resource type in __rdma_create_id() Currently rdma_cm_id's resource tracking fields such as owner task and kern_name and other non resource tracking fields are initialized in in single function __rdma_create_id(). Therefore, initialize rdma_cm_id's resource type also in same init function. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 7379094bbbab..714fff27d2e3 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -495,7 +495,6 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); - id_priv->res.type = RDMA_RESTRACK_CM_ID; rdma_restrack_add(&id_priv->res); } @@ -795,6 +794,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, id_priv->res.kern_name = caller; else rdma_restrack_set_task(&id_priv->res, current); + id_priv->res.type = RDMA_RESTRACK_CM_ID; id_priv->state = RDMA_CM_IDLE; id_priv->id.context = context; id_priv->id.event_handler = event_handler; -- cgit From 219d2e9dfda9431b808c28d5efc74b404b95b638 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 29 Jul 2018 11:53:09 +0300 Subject: RDMA/cma: Simplify rdma_resolve_addr() error flow Currently dst address is first set and later on cleared on either of the 3 error conditions are met. However none of the APIs or checks are supposed to refer to the destination address of the cm_id. Therefore, set the destination address after necessary checks pass which simplifies the error flow. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 714fff27d2e3..0794b99d2507 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2971,25 +2971,19 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, int ret; id_priv = container_of(id, struct rdma_id_private, id); - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); + if (ret) return ret; - } } - if (cma_family(id_priv) != dst_addr->sa_family) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); + if (cma_family(id_priv) != dst_addr->sa_family) return -EINVAL; - } - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) return -EINVAL; - } + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); atomic_inc(&id_priv->refcount); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); -- cgit From 2df7dba855e10cca8eddcd38bca825446ea10e1d Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 29 Jul 2018 11:53:10 +0300 Subject: RDMA/core: Constify dst_addr argument Following APIs are not supposed to modify addr or dest_addr contents. Therefore make those function argument const for better code readability. 1. rdma_resolve_ip() 2. rdma_addr_size() 3. rdma_resolve_addr() Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/addr.c | 4 ++-- drivers/infiniband/core/cma.c | 14 +++++++------- include/rdma/ib_addr.h | 4 ++-- include/rdma/rdma_cm.h | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1b817fdb97a4..46b855a42884 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -188,7 +188,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, return -ENODATA; } -int rdma_addr_size(struct sockaddr *addr) +int rdma_addr_size(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: @@ -585,7 +585,7 @@ static void process_one_req(struct work_struct *_work) spin_unlock_bh(&lock); } -int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, +int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0794b99d2507..f326965a0616 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1046,7 +1046,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, } EXPORT_SYMBOL(rdma_init_qp_attr); -static inline int cma_zero_addr(struct sockaddr *addr) +static inline int cma_zero_addr(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: @@ -1060,7 +1060,7 @@ static inline int cma_zero_addr(struct sockaddr *addr) } } -static inline int cma_loopback_addr(struct sockaddr *addr) +static inline int cma_loopback_addr(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: @@ -1074,7 +1074,7 @@ static inline int cma_loopback_addr(struct sockaddr *addr) } } -static inline int cma_any_addr(struct sockaddr *addr) +static inline int cma_any_addr(const struct sockaddr *addr) { return cma_zero_addr(addr) || cma_loopback_addr(addr); } @@ -1097,7 +1097,7 @@ static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) } } -static __be16 cma_port(struct sockaddr *addr) +static __be16 cma_port(const struct sockaddr *addr) { struct sockaddr_ib *sib; @@ -1115,7 +1115,7 @@ static __be16 cma_port(struct sockaddr *addr) } } -static inline int cma_any_port(struct sockaddr *addr) +static inline int cma_any_port(const struct sockaddr *addr) { return !cma_port(addr); } @@ -2944,7 +2944,7 @@ err: } static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - struct sockaddr *dst_addr) + const struct sockaddr *dst_addr) { if (!src_addr || !src_addr->sa_family) { src_addr = (struct sockaddr *) &id->route.addr.src_addr; @@ -2965,7 +2965,7 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, } int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - struct sockaddr *dst_addr, int timeout_ms) + const struct sockaddr *dst_addr, int timeout_ms) { struct rdma_id_private *id_priv; int ret; diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 715394f6d18a..77c7908b7d73 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -97,7 +97,7 @@ int rdma_translate_ip(const struct sockaddr *addr, * or been canceled. A status of 0 indicates success. * @context: User-specified context associated with the call. */ -int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, +int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), @@ -109,7 +109,7 @@ void rdma_copy_addr(struct rdma_dev_addr *dev_addr, const struct net_device *dev, const unsigned char *dst_dev_addr); -int rdma_addr_size(struct sockaddr *addr); +int rdma_addr_size(const struct sockaddr *addr); int rdma_addr_size_in6(struct sockaddr_in6 *addr); int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr); diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index c5c1435c129a..5d71a7f51a9f 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -192,7 +192,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr); * @timeout_ms: Time to wait for resolution to complete. */ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - struct sockaddr *dst_addr, int timeout_ms); + const struct sockaddr *dst_addr, int timeout_ms); /** * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier -- cgit From e7ff98aefc9e532a2067d5a2112a23902726e9a3 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 29 Jul 2018 11:53:11 +0300 Subject: RDMA/cma: Constify path record, ib_cm_event, listen_id pointers Constify several pointers such as path_rec, ib_cm_event and listen_id pointers in several functions. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 55 +++++++++++++++++++-------------- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 15 +++++---- drivers/infiniband/ulp/srp/ib_srp.c | 8 +++-- drivers/infiniband/ulp/srpt/ib_srpt.c | 5 +-- include/rdma/ib_cm.h | 2 +- 5 files changed, 49 insertions(+), 36 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f326965a0616..5571f8d52302 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -641,7 +641,7 @@ static void cma_bind_sgid_attr(struct rdma_id_private *id_priv, } static int cma_acquire_dev(struct rdma_id_private *id_priv, - struct rdma_id_private *listen_id_priv) + const struct rdma_id_private *listen_id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; const struct ib_gid_attr *sgid_attr; @@ -1122,8 +1122,8 @@ static inline int cma_any_port(const struct sockaddr *addr) static void cma_save_ib_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, - struct rdma_cm_id *listen_id, - struct sa_path_rec *path) + const struct rdma_cm_id *listen_id, + const struct sa_path_rec *path) { struct sockaddr_ib *listen_ib, *ib; @@ -1208,7 +1208,7 @@ static u16 cma_port_from_service_id(__be64 service_id) static int cma_save_ip_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, - struct ib_cm_event *ib_event, + const struct ib_cm_event *ib_event, __be64 service_id) { struct cma_hdr *hdr; @@ -1238,8 +1238,8 @@ static int cma_save_ip_info(struct sockaddr *src_addr, static int cma_save_net_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, - struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event, + const struct rdma_cm_id *listen_id, + const struct ib_cm_event *ib_event, sa_family_t sa_family, __be64 service_id) { if (sa_family == AF_IB) { @@ -1387,7 +1387,7 @@ roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event) return sgid_attr->ndev; } -static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, +static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event, struct cma_req_info *req) { struct sockaddr *listen_addr = @@ -1516,9 +1516,10 @@ static struct rdma_id_private *cma_find_listener( return ERR_PTR(-EINVAL); } -static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, - struct ib_cm_event *ib_event, - struct net_device **net_dev) +static struct rdma_id_private * +cma_id_from_event(struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event, + struct net_device **net_dev) { struct cma_req_info req; struct rdma_bind_list *bind_list; @@ -1766,7 +1767,7 @@ reject: } static void cma_set_rep_event_data(struct rdma_cm_event *event, - struct ib_cm_rep_event_param *rep_data, + const struct ib_cm_rep_event_param *rep_data, void *private_data) { event->param.conn.private_data = private_data; @@ -1779,7 +1780,8 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = rep_data->remote_qpn; } -static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) +static int cma_ib_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; struct rdma_cm_event event; @@ -1861,9 +1863,10 @@ out: return ret; } -static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event, - struct net_device *net_dev) +static struct rdma_id_private * +cma_new_conn_id(const struct rdma_cm_id *listen_id, + const struct ib_cm_event *ib_event, + struct net_device *net_dev) { struct rdma_id_private *listen_id_priv; struct rdma_id_private *id_priv; @@ -1924,11 +1927,12 @@ err: return NULL; } -static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event, - struct net_device *net_dev) +static struct rdma_id_private * +cma_new_udp_id(const struct rdma_cm_id *listen_id, + const struct ib_cm_event *ib_event, + struct net_device *net_dev) { - struct rdma_id_private *listen_id_priv; + const struct rdma_id_private *listen_id_priv; struct rdma_id_private *id_priv; struct rdma_cm_id *id; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; @@ -1968,7 +1972,7 @@ err: } static void cma_set_req_event_data(struct rdma_cm_event *event, - struct ib_cm_req_event_param *req_data, + const struct ib_cm_req_event_param *req_data, void *private_data, int offset) { event->param.conn.private_data = private_data + offset; @@ -1982,7 +1986,8 @@ static void cma_set_req_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = req_data->remote_qpn; } -static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) +static int cma_check_req_qp_type(const struct rdma_cm_id *id, + const struct ib_cm_event *ib_event) { return (((ib_event->event == IB_CM_REQ_RECEIVED) && (ib_event->param.req_rcvd.qp_type == id->qp_type)) || @@ -1991,7 +1996,8 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e (!id->qp_type)); } -static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) +static int cma_req_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id = NULL; struct rdma_cm_event event; @@ -3479,11 +3485,12 @@ static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) } static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *ib_event) + const struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; struct rdma_cm_event event; - struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; + const struct ib_cm_sidr_rep_event_param *rep = + &ib_event->param.sidr_rep_rcvd; int ret = 0; mutex_lock(&id_priv->handler_mutex); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 8ef50e46157c..518313a1b0c9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -78,7 +78,7 @@ static struct ib_send_wr ipoib_cm_rx_drain_wr = { }; static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event); + const struct ib_cm_event *event); static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, u64 mapping[IPOIB_CM_RX_SG]) @@ -418,7 +418,8 @@ err_free_1: } static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, - struct ib_qp *qp, struct ib_cm_req_event_param *req, + struct ib_qp *qp, + const struct ib_cm_req_event_param *req, unsigned int psn) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -438,7 +439,8 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, return ib_send_cm_rep(cm_id, &rep); } -static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) { struct net_device *dev = cm_id->context; struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -500,7 +502,7 @@ err_qp: } static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event) + const struct ib_cm_event *event) { struct ipoib_cm_rx *p; struct ipoib_dev_priv *priv; @@ -978,7 +980,8 @@ void ipoib_cm_dev_stop(struct net_device *dev) cancel_delayed_work(&priv->cm.stale_task); } -static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) { struct ipoib_cm_tx *p = cm_id->context; struct ipoib_dev_priv *priv = ipoib_priv(p->dev); @@ -1244,7 +1247,7 @@ timeout: } static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event) + const struct ib_cm_event *event) { struct ipoib_cm_tx *tx = cm_id->context; struct ipoib_dev_priv *priv = ipoib_priv(tx->dev); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index f5ec21d1f4dc..444d16520506 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -142,7 +142,8 @@ static void srp_remove_one(struct ib_device *device, void *client_data); static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, const char *opname); -static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); +static int srp_ib_cm_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event); static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); @@ -2553,7 +2554,7 @@ error: } static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event, + const struct ib_cm_event *event, struct srp_rdma_ch *ch) { struct srp_target_port *target = ch->target; @@ -2638,7 +2639,8 @@ static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id, } } -static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +static int srp_ib_cm_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) { struct srp_rdma_ch *ch = cm_id->context; struct srp_target_port *target = ch->target; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 7d9972add65f..d73e14699aa9 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2401,7 +2401,7 @@ out: } static int srpt_ib_cm_req_recv(struct ib_cm_id *cm_id, - struct ib_cm_req_event_param *param, + const struct ib_cm_req_event_param *param, void *private_data) { char sguid[40]; @@ -2513,7 +2513,8 @@ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch) * a non-zero value in any other case will trigger a race with the * ib_destroy_cm_id() call in srpt_release_channel(). */ -static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +static int srpt_cm_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) { struct srpt_rdma_ch *ch = cm_id->context; int ret; diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 568708a87239..c10f4b5ea8ab 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -311,7 +311,7 @@ struct ib_cm_event { * destroy the @cm_id after the callback completes. */ typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id, - struct ib_cm_event *event); + const struct ib_cm_event *event); struct ib_cm_id { ib_cm_handler cm_handler; -- cgit From 05e0b86c413dc09454229f070eb6d2016340f602 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 29 Jul 2018 11:53:12 +0300 Subject: RDMA/cma: Get rid of 1 bit boolean Arrange fields of cma_req_info structure for efficiency on stack and get rid of one bit boolean field. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 5571f8d52302..5ffd3899dac2 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -411,11 +411,11 @@ struct cma_req_info { struct sockaddr_storage listen_addr_storage; struct sockaddr_storage src_addr_storage; struct ib_device *device; - int port; union ib_gid local_gid; __be64 service_id; + int port; + bool has_gid; u16 pkey; - bool has_gid:1; }; static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) -- cgit From ca3a8ace2b128abb7f164de9e10b8431eaddc2ac Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 29 Jul 2018 11:53:13 +0300 Subject: RDMA/core: Return bool instead of int Return bool for following internal and inline functions as their underlying APIs return bool too. 1. cma_zero_addr() 2. cma_loopback_addr() 3. cma_any_addr() 4. ib_addr_any() 5. ib_addr_loopback() While we are touching cma_loopback_addr(), remove extra white spaces in it. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 23 +++++++++++++---------- include/rdma/ib.h | 4 ++-- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 5ffd3899dac2..853f73a0499f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1046,35 +1046,38 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, } EXPORT_SYMBOL(rdma_init_qp_attr); -static inline int cma_zero_addr(const struct sockaddr *addr) +static inline bool cma_zero_addr(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); case AF_INET6: - return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); + return ipv6_addr_any(&((struct sockaddr_in6 *)addr)->sin6_addr); case AF_IB: - return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); + return ib_addr_any(&((struct sockaddr_ib *)addr)->sib_addr); default: - return 0; + return false; } } -static inline int cma_loopback_addr(const struct sockaddr *addr) +static inline bool cma_loopback_addr(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: - return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); + return ipv4_is_loopback( + ((struct sockaddr_in *)addr)->sin_addr.s_addr); case AF_INET6: - return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); + return ipv6_addr_loopback( + &((struct sockaddr_in6 *)addr)->sin6_addr); case AF_IB: - return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); + return ib_addr_loopback( + &((struct sockaddr_ib *)addr)->sib_addr); default: - return 0; + return false; } } -static inline int cma_any_addr(const struct sockaddr *addr) +static inline bool cma_any_addr(const struct sockaddr *addr) { return cma_zero_addr(addr) || cma_loopback_addr(addr); } diff --git a/include/rdma/ib.h b/include/rdma/ib.h index 66dbed0c146d..4f385ec54f80 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -53,12 +53,12 @@ struct ib_addr { #define sib_interface_id ib_u.uib_addr64[1] }; -static inline int ib_addr_any(const struct ib_addr *a) +static inline bool ib_addr_any(const struct ib_addr *a) { return ((a->sib_addr64[0] | a->sib_addr64[1]) == 0); } -static inline int ib_addr_loopback(const struct ib_addr *a) +static inline bool ib_addr_loopback(const struct ib_addr *a) { return ((a->sib_addr32[0] | a->sib_addr32[1] | a->sib_addr32[2] | (a->sib_addr32[3] ^ htonl(1))) == 0); -- cgit From 7582df826734bad71522b442b8977e3dee63a77a Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 29 Jul 2018 11:53:14 +0300 Subject: RDMA/core: Avoid holding lock while initializing fields on stack In various functions rdma_cm_event is zero initialized on stack using memset() while holding lock which is not necessary. Therefore, don't hold the lock while initializing on stack. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 853f73a0499f..fcae854b3ca8 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1787,7 +1787,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, const struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; int ret = 0; mutex_lock(&id_priv->handler_mutex); @@ -1797,7 +1797,6 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, id_priv->state != RDMA_CM_DISCONNECT)) goto out; - memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_REQ_ERROR: case IB_CM_REP_ERROR: @@ -2003,7 +2002,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, const struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id = NULL; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; struct net_device *net_dev; u8 offset; int ret; @@ -2023,7 +2022,6 @@ static int cma_req_handler(struct ib_cm_id *cm_id, goto err1; } - memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { @@ -2132,7 +2130,7 @@ EXPORT_SYMBOL(rdma_read_gids); static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) { struct rdma_id_private *id_priv = iw_id->context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; int ret = 0; struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; @@ -2141,7 +2139,6 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) if (id_priv->state != RDMA_CM_CONNECT) goto out; - memset(&event, 0, sizeof event); switch (iw_event->event) { case IW_CM_EVENT_CLOSE: event.event = RDMA_CM_EVENT_DISCONNECTED; @@ -2201,11 +2198,17 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, { struct rdma_cm_id *new_cm_id; struct rdma_id_private *listen_id, *conn_id; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; int ret = -ECONNABORTED; struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; + event.event = RDMA_CM_EVENT_CONNECT_REQUEST; + event.param.conn.private_data = iw_event->private_data; + event.param.conn.private_data_len = iw_event->private_data_len; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; + listen_id = cm_id->context; mutex_lock(&listen_id->handler_mutex); @@ -2247,13 +2250,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); - memset(&event, 0, sizeof event); - event.event = RDMA_CM_EVENT_CONNECT_REQUEST; - event.param.conn.private_data = iw_event->private_data; - event.param.conn.private_data_len = iw_event->private_data_len; - event.param.conn.initiator_depth = iw_event->ird; - event.param.conn.responder_resources = iw_event->ord; - /* * Protect against the user destroying conn_id from another thread * until we're done accessing it. @@ -2860,9 +2856,8 @@ static void addr_handler(int status, struct sockaddr *src_addr, struct rdma_dev_addr *dev_addr, void *context) { struct rdma_id_private *id_priv = context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; - memset(&event, 0, sizeof event); mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_RESOLVED)) @@ -3491,7 +3486,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, const struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; const struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; int ret = 0; @@ -3500,7 +3495,6 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, if (id_priv->state != RDMA_CM_CONNECT) goto out; - memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_SIDR_REQ_ERROR: event.event = RDMA_CM_EVENT_UNREACHABLE; @@ -3972,7 +3966,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) { struct rdma_id_private *id_priv; struct cma_multicast *mc = multicast->context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; int ret = 0; id_priv = mc->id_priv; @@ -3996,7 +3990,6 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) } mutex_unlock(&id_priv->qp_mutex); - memset(&event, 0, sizeof event); event.status = status; event.param.ud.private_data = mc->context; if (!status) { @@ -4441,7 +4434,7 @@ free_cma_dev: static int cma_remove_id_dev(struct rdma_id_private *id_priv) { - struct rdma_cm_event event; + struct rdma_cm_event event = {}; enum rdma_cm_state state; int ret = 0; @@ -4457,7 +4450,6 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) goto out; - memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; ret = id_priv->id.event_handler(&id_priv->id, &event); out: -- cgit From 79d684f026473c344ce32708687f16b2bd12aff4 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 29 Jul 2018 11:53:15 +0300 Subject: RDMA/core: Simplify gid type check in cma_acquire_dev() cma_add_one() initializes the default GID regardless of device type. listen_id is bound to a device and an IP address, its GID type is initialized by cma_acquire_dev(). Therefore a valid default GID type is always available, it is not needed to check port type during cma_acquire_dev(). Initialize gid type of a cm id when the cm_id is created instead of doing conditional checks during cma_acquire_dev() and trying to initialize to 0 during _cma_attach_to_dev(). Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fcae854b3ca8..12ce5fd6de9f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -490,7 +490,6 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, { cma_ref_dev(cma_dev); id_priv->cma_dev = cma_dev; - id_priv->gid_type = 0; id_priv->id.device = cma_dev->device; id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); @@ -648,7 +647,6 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev; union ib_gid gid, iboe_gid, *gidp; enum ib_gid_type gid_type; - enum ib_gid_type default_type; int ret = -ENODEV; u8 port; @@ -668,9 +666,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, port = listen_id_priv->id.port_num; gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; - gid_type = rdma_protocol_ib(cma_dev->device, port) ? - IB_GID_TYPE_IB : - listen_id_priv->gid_type; + gid_type = listen_id_priv->gid_type; sgid_attr = cma_validate_port(cma_dev->device, port, gid_type, gidp, id_priv); if (!IS_ERR(sgid_attr)) { @@ -690,10 +686,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; - default_type = cma_dev->default_gid_type[port - 1]; - gid_type = - rdma_protocol_ib(cma_dev->device, port) ? - IB_GID_TYPE_IB : default_type; + gid_type = cma_dev->default_gid_type[port - 1]; sgid_attr = cma_validate_port(cma_dev->device, port, gid_type, gidp, id_priv); if (!IS_ERR(sgid_attr)) { @@ -801,6 +794,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, id_priv->id.ps = ps; id_priv->id.qp_type = qp_type; id_priv->tos_set = false; + id_priv->gid_type = IB_GID_TYPE_IB; spin_lock_init(&id_priv->lock); mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); -- cgit From 854633165164d19832225ece148370bb9f3b0034 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 29 Jul 2018 11:53:16 +0300 Subject: RDMA/core: Prefix _ib to IB/RoCE specific functions In rdma cm module, functions which are common between IB and iWarp are named with cma_. iWarp specific functions are prefixed with cma_iw. IB specific functions are perfixed with cma_ib. However some functions in request processing path didn't follow cma_ib notion. Prefix them with _ib for better code clarity. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 12ce5fd6de9f..f72677291b69 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1514,9 +1514,9 @@ static struct rdma_id_private *cma_find_listener( } static struct rdma_id_private * -cma_id_from_event(struct ib_cm_id *cm_id, - const struct ib_cm_event *ib_event, - struct net_device **net_dev) +cma_ib_id_from_event(struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event, + struct net_device **net_dev) { struct cma_req_info req; struct rdma_bind_list *bind_list; @@ -1860,9 +1860,9 @@ out: } static struct rdma_id_private * -cma_new_conn_id(const struct rdma_cm_id *listen_id, - const struct ib_cm_event *ib_event, - struct net_device *net_dev) +cma_ib_new_conn_id(const struct rdma_cm_id *listen_id, + const struct ib_cm_event *ib_event, + struct net_device *net_dev) { struct rdma_id_private *listen_id_priv; struct rdma_id_private *id_priv; @@ -1924,9 +1924,9 @@ err: } static struct rdma_id_private * -cma_new_udp_id(const struct rdma_cm_id *listen_id, - const struct ib_cm_event *ib_event, - struct net_device *net_dev) +cma_ib_new_udp_id(const struct rdma_cm_id *listen_id, + const struct ib_cm_event *ib_event, + struct net_device *net_dev) { const struct rdma_id_private *listen_id_priv; struct rdma_id_private *id_priv; @@ -1982,8 +1982,8 @@ static void cma_set_req_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = req_data->remote_qpn; } -static int cma_check_req_qp_type(const struct rdma_cm_id *id, - const struct ib_cm_event *ib_event) +static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id, + const struct ib_cm_event *ib_event) { return (((ib_event->event == IB_CM_REQ_RECEIVED) && (ib_event->param.req_rcvd.qp_type == id->qp_type)) || @@ -1992,8 +1992,8 @@ static int cma_check_req_qp_type(const struct rdma_cm_id *id, (!id->qp_type)); } -static int cma_req_handler(struct ib_cm_id *cm_id, - const struct ib_cm_event *ib_event) +static int cma_ib_req_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id = NULL; struct rdma_cm_event event = {}; @@ -2001,11 +2001,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, u8 offset; int ret; - listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); + listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev); if (IS_ERR(listen_id)) return PTR_ERR(listen_id); - if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { + if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) { ret = -EINVAL; goto net_dev_put; } @@ -2019,12 +2019,12 @@ static int cma_req_handler(struct ib_cm_id *cm_id, offset = cma_user_data_offset(listen_id); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { - conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); + conn_id = cma_ib_new_udp_id(&listen_id->id, ib_event, net_dev); event.param.ud.private_data = ib_event->private_data + offset; event.param.ud.private_data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; } else { - conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); + conn_id = cma_ib_new_conn_id(&listen_id->id, ib_event, net_dev); cma_set_req_event_data(&event, &ib_event->param.req_rcvd, ib_event->private_data, offset); } @@ -2276,7 +2276,8 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) addr = cma_src_addr(id_priv); svc_id = rdma_get_service_id(&id_priv->id, addr); - id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); + id = ib_cm_insert_listen(id_priv->id.device, + cma_ib_req_handler, svc_id); if (IS_ERR(id)) return PTR_ERR(id); id_priv->cm_id.ib = id; -- cgit From 7810e09bfba56bc0f2aff705ca7086e6c1f103f6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 31 Jul 2018 08:08:15 -0700 Subject: rdma/cxgb4: Remove a set-but-not-used variable This patch avoids that the following warning is reported when building with W=1: drivers/infiniband/hw/cxgb4/cm.c:1860:5: warning: variable 'status' set but not used [-Wunused-but-set-variable] u8 status; ^~~~~~ Fixes: 6a0b6174d35a ("rdma/cxgb4: Add support for kernel mode SRQ's") Signed-off-by: Bart Van Assche Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 54f7fbef7880..d7cfa38baad2 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1857,10 +1857,8 @@ static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx_status) { enum chip_type adapter_type; u32 srqidx; - u8 status; adapter_type = ep->com.dev->rdev.lldi.adapter_type; - status = ABORT_RSS_STATUS_G(be32_to_cpu(srqidx_status)); srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status)); /* -- cgit From eb2463bab4bce46b0482a0e7e74575771a32fcf0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 31 Jul 2018 08:25:41 -0700 Subject: rdma/cxgb4: Fix SRQ endianness annotations This patch avoids that sparse complains about casts to restricted __be32. Fixes: a3cdaa69e4ae ("cxgb4: Adds CPL support for Shared Receive Queues") Signed-off-by: Bart Van Assche Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 3 ++- drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d7cfa38baad2..9e1463080c22 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1853,7 +1853,8 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } -static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx_status) +static void complete_cached_srq_buffers(struct c4iw_ep *ep, + __be32 srqidx_status) { enum chip_type adapter_type; u32 srqidx; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index 09e38f0733bd..b8f75a22fb6c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -755,7 +755,7 @@ struct cpl_abort_req_rss { struct cpl_abort_req_rss6 { WR_HDR; union opcode_tid ot; - __u32 srqidx_status; + __be32 srqidx_status; }; #define ABORT_RSS_STATUS_S 0 @@ -785,7 +785,7 @@ struct cpl_abort_rpl_rss { struct cpl_abort_rpl_rss6 { union opcode_tid ot; - __u32 srqidx_status; + __be32 srqidx_status; }; struct cpl_abort_rpl { -- cgit From dd708e7b4541da1d61cdce2db1d9701444fdb317 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 31 Jul 2018 08:51:30 -0700 Subject: rdma/cxgb4: Simplify a structure initialization This patch avoids that sparse reports the following warning: drivers/infiniband/hw/cxgb4/qp.c:2269:34: warning: Using plain integer as NULL pointer Signed-off-by: Bart Van Assche Acked-by: Steve Wise Acked-by: Raju Rangoju Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 62e2c0d899f5..c26086c76f0b 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2266,7 +2266,7 @@ struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq) { - struct ib_event event = {0}; + struct ib_event event = {}; event.device = &srq->rhp->ibdev; event.element.srq = &srq->ibsrq; -- cgit From 26e551c5aec572442c4ad7109ff4350f427cd39d Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 31 Jul 2018 09:02:36 +0300 Subject: RDMA: Fix return code check in rdma_set_cq_moderation The proper return code is "-EOPNOTSUPP" when the modify_cq() callback is not supported, all drivers should generate this and all users should check for it when detecting not supported functionality. Signed-off-by: Kamal Heib Acked-by: Leon Romanovsky (for mlx5) Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cq.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index ad39d64b8108..088205d7f1a1 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -1184,7 +1184,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) int err; if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) - return -ENOSYS; + return -EOPNOTSUPP; if (cq_period > MLX5_MAX_CQ_PERIOD) return -EINVAL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 2706bf26cbac..83429925dfc6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -102,7 +102,7 @@ static int ipoib_set_coalesce(struct net_device *dev, ret = rdma_set_cq_moderation(priv->recv_cq, coal->rx_max_coalesced_frames, coal->rx_coalesce_usecs); - if (ret && ret != -ENOSYS) { + if (ret && ret != -EOPNOTSUPP) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); return ret; } -- cgit From aa72c9a5f986444f5e245767402ed1f3066fca2c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 26 Jul 2018 15:57:56 -0600 Subject: IB/uverbs: Remove rdma_explicit_destroy() from the ioctl methods The core code will destroy the HW object on behalf of the method, if the method provides an implementation it must simply copy data from the stub uobj into the response. Destroy methods cannot touch the HW object. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 5 +---- drivers/infiniband/core/uverbs_ioctl.c | 28 ++++++++++++++++++++++++--- drivers/infiniband/core/uverbs_std_types_cq.c | 21 ++++++-------------- 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index a63844ba8414..9e84ded6d3be 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -924,10 +924,7 @@ int uverbs_finalize_object(struct ib_uobject *uobj, rdma_lookup_put_uobject(uobj, true); break; case UVERBS_ACCESS_DESTROY: - if (commit) - ret = rdma_remove_commit_uobject(uobj); - else - rdma_lookup_put_uobject(uobj, true); + rdma_lookup_put_uobject(uobj, true); break; case UVERBS_ACCESS_NEW: if (commit) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 23a1777f26e2..404acfcdbeb2 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -51,6 +51,7 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, u16 attr_id, const struct uverbs_attr_spec_hash *attr_spec_bucket, struct uverbs_attr_bundle_hash *attr_bundle_h, + struct uverbs_obj_attr **destroy_attr, struct ib_uverbs_attr __user *uattr_ptr) { const struct uverbs_attr_spec *spec; @@ -148,6 +149,12 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, if (!object) return -EINVAL; + /* specs are allowed to have only one destroy attribute */ + WARN_ON(spec->u.obj.access == UVERBS_ACCESS_DESTROY && + *destroy_attr); + if (spec->u.obj.access == UVERBS_ACCESS_DESTROY) + *destroy_attr = o_attr; + /* * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64 @@ -235,6 +242,7 @@ static int uverbs_uattrs_process(struct ib_uverbs_file *ufile, size_t num_uattrs, const struct uverbs_method_spec *method, struct uverbs_attr_bundle *attr_bundle, + struct uverbs_obj_attr **destroy_attr, struct ib_uverbs_attr __user *uattr_ptr) { size_t i; @@ -268,7 +276,8 @@ static int uverbs_uattrs_process(struct ib_uverbs_file *ufile, attr_spec_bucket = method->attr_buckets[ret]; ret = uverbs_process_attr(ufile, uattr, attr_id, attr_spec_bucket, - &attr_bundle->hash[ret], uattr_ptr++); + &attr_bundle->hash[ret], destroy_attr, + uattr_ptr++); if (ret) { uverbs_finalize_attrs(attr_bundle, method->attr_buckets, @@ -322,9 +331,11 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, int ret; int finalize_ret; int num_given_buckets; + struct uverbs_obj_attr *destroy_attr = NULL; - num_given_buckets = uverbs_uattrs_process( - ufile, uattrs, num_uattrs, method_spec, attr_bundle, uattr_ptr); + num_given_buckets = + uverbs_uattrs_process(ufile, uattrs, num_uattrs, method_spec, + attr_bundle, &destroy_attr, uattr_ptr); if (num_given_buckets <= 0) return -EINVAL; @@ -333,7 +344,18 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, if (ret) goto cleanup; + /* + * We destroy the HW object before invoking the handler, handlers do + * not get to manipulate the HW objects. + */ + if (destroy_attr) { + ret = rdma_explicit_destroy(destroy_attr->uobject); + if (ret) + goto cleanup; + } + ret = method_spec->handler(ibdev, ufile, attr_bundle); + cleanup: finalize_ret = uverbs_finalize_attrs(attr_bundle, method_spec->attr_buckets, diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 3179203a2dd7..68c86e6e932e 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -179,21 +179,12 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE); - struct ib_uverbs_destroy_cq_resp resp; - struct ib_ucq_object *obj; - int ret; - - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - obj = container_of(uobj, struct ib_ucq_object, uobject); - - ret = rdma_explicit_destroy(uobj); - if (ret) - return ret; - - resp.comp_events_reported = obj->comp_events_reported; - resp.async_events_reported = obj->async_events_reported; + struct ib_ucq_object *obj = + container_of(uobj, struct ib_ucq_object, uobject); + struct ib_uverbs_destroy_cq_resp resp = { + .comp_events_reported = obj->comp_events_reported, + .async_events_reported = obj->async_events_reported + }; return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp, sizeof(resp)); -- cgit From 32ed5c00ac5fdea49058fd49bf8707e101dc3dfe Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 25 Jul 2018 21:40:11 -0600 Subject: IB/uverbs: Make the write path destroy methods use the same flow as ioctl The ridiculous dance with uobj_remove_commit() is not needed, the write path can follow the same flow as ioctl - lock and destroy the HW object then use the data left over in the uobject to form the response to userspace. Two helpers are introduced to make this flow straightforward for the caller. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 53 ++++++++++++++----------- drivers/infiniband/core/uverbs_cmd.c | 77 +++++++----------------------------- include/rdma/uverbs_std_types.h | 16 +++++--- include/rdma/uverbs_types.h | 1 - 4 files changed, 55 insertions(+), 92 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 9e84ded6d3be..7db75d784070 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -130,24 +130,44 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) } /* - * Does both rdma_lookup_get_uobject() and rdma_remove_commit_uobject(), then - * returns success_res on success (negative errno on failure). For use by - * callers that do not need the uobj. + * uobj_get_destroy destroys the HW object and returns a handle to the uobj + * with a NULL object pointer. The caller must pair this with + * uverbs_put_destroy. */ -int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id, - struct ib_uverbs_file *ufile, int success_res) +struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type, + u32 id, struct ib_uverbs_file *ufile) { struct ib_uobject *uobj; int ret; uobj = rdma_lookup_get_uobject(type, ufile, id, true); if (IS_ERR(uobj)) - return PTR_ERR(uobj); + return uobj; - ret = rdma_remove_commit_uobject(uobj); - if (ret) - return ret; + ret = rdma_explicit_destroy(uobj); + if (ret) { + rdma_lookup_put_uobject(uobj, true); + return ERR_PTR(ret); + } + + return uobj; +} +/* + * Does both uobj_get_destroy() and uobj_put_destroy(). Returns success_res + * on success (negative errno on failure). For use by callers that do not need + * the uobj. + */ +int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id, + struct ib_uverbs_file *ufile, int success_res) +{ + struct ib_uobject *uobj; + + uobj = __uobj_get_destroy(type, id, ufile); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + + rdma_lookup_put_uobject(uobj, true); return success_res; } @@ -449,21 +469,6 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, return ret; } -/* This is called only for user requested DESTROY reasons - * rdma_lookup_get_uobject(exclusive=true) must have been called to get uobj, - * and after this returns the corresponding put has been done, and the kref - * for uobj has been consumed. - */ -int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) -{ - int ret; - - ret = rdma_explicit_destroy(uobj); - /* Pairs with the lookup_get done by the caller */ - rdma_lookup_put_uobject(uobj, true); - return ret; -} - int rdma_explicit_destroy(struct ib_uobject *uobject) { int ret; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b2af4eeb7669..fe96ceda6cd2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1304,37 +1304,22 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_cq cmd; struct ib_uverbs_destroy_cq_resp resp; struct ib_uobject *uobj; - struct ib_cq *cq; struct ib_ucq_object *obj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle, file); + uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); - /* - * Make sure we don't free the memory in remove_commit as we still - * needs the uobject memory to create the response. - */ - uverbs_uobject_get(uobj); - cq = uobj->object; - obj = container_of(cq->uobject, struct ib_ucq_object, uobject); - + obj = container_of(uobj, struct ib_ucq_object, uobject); memset(&resp, 0, sizeof(resp)); - - ret = uobj_remove_commit(uobj); - if (ret) { - uverbs_uobject_put(uobj); - return ret; - } - resp.comp_events_reported = obj->comp_events_reported; resp.async_events_reported = obj->async_events_reported; - uverbs_uobject_put(uobj); + uobj_put_destroy(uobj); + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; @@ -2104,32 +2089,19 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, struct ib_uverbs_destroy_qp_resp resp; struct ib_uobject *uobj; struct ib_uqp_object *obj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - memset(&resp, 0, sizeof resp); - - uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle, file); + uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); - /* - * Make sure we don't free the memory in remove_commit as we still - * needs the uobject memory to create the response. - */ - uverbs_uobject_get(uobj); - - ret = uobj_remove_commit(uobj); - if (ret) { - uverbs_uobject_put(uobj); - return ret; - } - + memset(&resp, 0, sizeof(resp)); resp.events_reported = obj->uevent.events_reported; - uverbs_uobject_put(uobj); + + uobj_put_destroy(uobj); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; @@ -3198,22 +3170,14 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, return -EOPNOTSUPP; resp.response_length = required_resp_len; - uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle, file); + uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); - /* - * Make sure we don't free the memory in remove_commit as we still - * needs the uobject memory to create the response. - */ - uverbs_uobject_get(uobj); - - ret = uobj_remove_commit(uobj); resp.events_reported = obj->uevent.events_reported; - uverbs_uobject_put(uobj); - if (ret) - return ret; + + uobj_put_destroy(uobj); return ib_copy_to_udata(ucore, &resp, resp.response_length); } @@ -3920,31 +3884,20 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_srq_resp resp; struct ib_uobject *uobj; struct ib_uevent_object *obj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle, file); + uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); obj = container_of(uobj, struct ib_uevent_object, uobject); - /* - * Make sure we don't free the memory in remove_commit as we still - * needs the uobject memory to create the response. - */ - uverbs_uobject_get(uobj); - memset(&resp, 0, sizeof(resp)); - - ret = uobj_remove_commit(uobj); - if (ret) { - uverbs_uobject_put(uobj); - return ret; - } resp.events_reported = obj->events_reported; - uverbs_uobject_put(uobj); + + uobj_put_destroy(uobj); + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) return -EFAULT; diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 076f085d2dcf..c2f89e41cbd2 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -84,6 +84,17 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id, __uobj_perform_destroy(uobj_get_type(_type), _uobj_check_id(_id), \ _ufile, _success_res) +struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type, + u32 id, struct ib_uverbs_file *ufile); + +#define uobj_get_destroy(_type, _id, _ufile) \ + __uobj_get_destroy(uobj_get_type(_type), _uobj_check_id(_id), _ufile) + +static inline void uobj_put_destroy(struct ib_uobject *uobj) +{ + rdma_lookup_put_uobject(uobj, true); +} + static inline void uobj_put_read(struct ib_uobject *uobj) { rdma_lookup_put_uobject(uobj, false); @@ -97,11 +108,6 @@ static inline void uobj_put_write(struct ib_uobject *uobj) rdma_lookup_put_uobject(uobj, true); } -static inline int __must_check uobj_remove_commit(struct ib_uobject *uobj) -{ - return rdma_remove_commit_uobject(uobj); -} - static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj, int success_res) { diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index cfc50fcdbff6..8bae28dd2e4f 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -126,7 +126,6 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive); struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile); void rdma_alloc_abort_uobject(struct ib_uobject *uobj); -int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj); int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj); int rdma_explicit_destroy(struct ib_uobject *uobject); -- cgit From 87ad80abc70d2d5a4e383bc7e63867c9bc660838 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 25 Jul 2018 21:40:12 -0600 Subject: IB/uverbs: Consolidate uobject destruction There are several flows that can destroy a uobject and each one is minimized and sprinkled throughout the code base, making it difficult to understand and very hard to modify the destroy path. Consolidate all of these into uverbs_destroy_uobject() and call it in all cases where a uobject has to be destroyed. This makes one change to the lifecycle, during any abort (eg when alloc_commit is not called) we always call out to alloc_abort, even if remove_commit needs to be called to delete a HW object. This also renames RDMA_REMOVE_DURING_CLEANUP to RDMA_REMOVE_ABORT to clarify its actual usage and revises some of the comments to reflect what the life cycle is for the type implementation. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 251 ++++++++++++++++++------------------ include/rdma/ib_verbs.h | 4 +- include/rdma/uverbs_types.h | 70 +++++----- 3 files changed, 157 insertions(+), 168 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 7db75d784070..aa1d16d87746 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -129,6 +129,95 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; } +static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive) +{ +#ifdef CONFIG_LOCKDEP + if (exclusive) + WARN_ON(atomic_read(&uobj->usecnt) != -1); + else + WARN_ON(atomic_read(&uobj->usecnt) <= 0); +#endif +} + +/* + * This must be called with the hw_destroy_rwsem locked (except for + * RDMA_REMOVE_ABORT) for read or write, also The uobject itself must be + * locked for write. + * + * Upon return the HW object is guaranteed to be destroyed. + * + * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held, + * however the type's allocat_commit function cannot have been called and the + * uobject cannot be on the uobjects_lists + * + * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via + * rdma_lookup_get_uobject) and the object is left in a state where the caller + * needs to call rdma_lookup_put_uobject. + * + * For all other destroy modes this function internally unlocks the uobject + * and consumes the kref on the uobj. + */ +static int uverbs_destroy_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason reason) +{ + struct ib_uverbs_file *ufile = uobj->ufile; + unsigned long flags; + int ret; + + assert_uverbs_usecnt(uobj, true); + + if (uobj->object) { + ret = uobj->type->type_class->remove_commit(uobj, reason); + if (ret) { + if (ib_is_destroy_retryable(ret, reason, uobj)) + return ret; + + /* Nothing to be done, dangle the memory and move on */ + WARN(true, + "ib_uverbs: failed to remove uobject id %d, driver err=%d", + uobj->id, ret); + } + + uobj->object = NULL; + } + + if (reason == RDMA_REMOVE_ABORT) { + WARN_ON(!list_empty(&uobj->list)); + WARN_ON(!uobj->context); + uobj->type->type_class->alloc_abort(uobj); + } + + uobj->context = NULL; + + /* + * For DESTROY the usecnt is held write locked, the caller is expected + * to put it unlock and put the object when done with it. + */ + if (reason != RDMA_REMOVE_DESTROY) + atomic_set(&uobj->usecnt, 0); + + if (!list_empty(&uobj->list)) { + spin_lock_irqsave(&ufile->uobjects_lock, flags); + list_del_init(&uobj->list); + spin_unlock_irqrestore(&ufile->uobjects_lock, flags); + + /* + * Pairs with the get in rdma_alloc_commit_uobject(), could + * destroy uobj. + */ + uverbs_uobject_put(uobj); + } + + /* + * When aborting the stack kref remains owned by the core code, and is + * not transferred into the type. Pairs with the get in alloc_uobj + */ + if (reason == RDMA_REMOVE_ABORT) + uverbs_uobject_put(uobj); + + return 0; +} + /* * uobj_get_destroy destroys the HW object and returns a handle to the uobj * with a NULL object pointer. The caller must pair this with @@ -171,6 +260,7 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id, return success_res; } +/* alloc_uobj must be undone by uverbs_destroy_uobject() */ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, const struct uverbs_obj_type *type) { @@ -379,6 +469,16 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, return type->type_class->alloc_begin(type, ufile); } +static void alloc_abort_idr_uobject(struct ib_uobject *uobj) +{ + ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, + RDMACG_RESOURCE_HCA_OBJECT); + + spin_lock(&uobj->ufile->idr_lock); + idr_remove(&uobj->ufile->idr, uobj->id); + spin_unlock(&uobj->ufile->idr_lock); +} + static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, enum rdma_remove_reason why) { @@ -395,25 +495,19 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, if (ib_is_destroy_retryable(ret, why, uobj)) return ret; - ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, - RDMACG_RESOURCE_HCA_OBJECT); - - spin_lock(&uobj->ufile->idr_lock); - idr_remove(&uobj->ufile->idr, uobj->id); - spin_unlock(&uobj->ufile->idr_lock); + if (why == RDMA_REMOVE_ABORT) + return 0; + alloc_abort_idr_uobject(uobj); /* Matches the kref in alloc_commit_idr_uobject */ uverbs_uobject_put(uobj); - return ret; + return 0; } static void alloc_abort_fd_uobject(struct ib_uobject *uobj) { put_unused_fd(uobj->id); - - /* Pairs with the kref from alloc_begin_idr_uobject */ - uverbs_uobject_put(uobj); } static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, @@ -426,47 +520,7 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, if (ib_is_destroy_retryable(ret, why, uobj)) return ret; - if (why == RDMA_REMOVE_DURING_CLEANUP) { - alloc_abort_fd_uobject(uobj); - return ret; - } - - uobj->context = NULL; - return ret; -} - -static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive) -{ -#ifdef CONFIG_LOCKDEP - if (exclusive) - WARN_ON(atomic_read(&uobj->usecnt) != -1); - else - WARN_ON(atomic_read(&uobj->usecnt) <= 0); -#endif -} - -static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) -{ - struct ib_uverbs_file *ufile = uobj->ufile; - int ret; - - if (!uobj->object) - return 0; - - ret = uobj->type->type_class->remove_commit(uobj, why); - if (ib_is_destroy_retryable(ret, why, uobj)) - return ret; - - uobj->object = NULL; - - spin_lock_irq(&ufile->uobjects_lock); - list_del(&uobj->list); - spin_unlock_irq(&ufile->uobjects_lock); - /* Pairs with the get in rdma_alloc_commit_uobject() */ - uverbs_uobject_put(uobj); - - return ret; + return 0; } int rdma_explicit_destroy(struct ib_uobject *uobject) @@ -479,8 +533,8 @@ int rdma_explicit_destroy(struct ib_uobject *uobject) WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); return 0; } - assert_uverbs_usecnt(uobject, true); - ret = _rdma_remove_commit_uobject(uobject, RDMA_REMOVE_DESTROY); + + ret = uverbs_destroy_uobject(uobject, RDMA_REMOVE_DESTROY); up_read(&ufile->hw_destroy_rwsem); return ret; @@ -554,24 +608,14 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) /* Cleanup is running. Calling this should have been impossible */ if (!down_read_trylock(&ufile->hw_destroy_rwsem)) { WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n"); - ret = uobj->type->type_class->remove_commit(uobj, - RDMA_REMOVE_DURING_CLEANUP); - if (ret) - pr_warn("ib_uverbs: cleanup of idr object %d failed\n", - uobj->id); - return ret; + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); + return -EINVAL; } - assert_uverbs_usecnt(uobj, true); - /* alloc_commit consumes the uobj kref */ ret = uobj->type->type_class->alloc_commit(uobj); if (ret) { - if (uobj->type->type_class->remove_commit( - uobj, RDMA_REMOVE_DURING_CLEANUP)) - pr_warn("ib_uverbs: cleanup of idr object %d failed\n", - uobj->id); - up_read(&ufile->hw_destroy_rwsem); + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); return ret; } @@ -589,27 +633,14 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) return 0; } -static void alloc_abort_idr_uobject(struct ib_uobject *uobj) -{ - ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, - RDMACG_RESOURCE_HCA_OBJECT); - - spin_lock(&uobj->ufile->idr_lock); - /* The value of the handle in the IDR is NULL at this point. */ - idr_remove(&uobj->ufile->idr, uobj->id); - spin_unlock(&uobj->ufile->idr_lock); - - /* Pairs with the kref from alloc_begin_idr_uobject */ - uverbs_uobject_put(uobj); -} - /* * This consumes the kref for uobj. It is up to the caller to unwind the HW * object and anything else connected to uobj before calling this. */ void rdma_alloc_abort_uobject(struct ib_uobject *uobj) { - uobj->type->type_class->alloc_abort(uobj); + uobj->object = NULL; + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); } static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive) @@ -667,45 +698,23 @@ const struct uverbs_obj_type_class uverbs_idr_class = { }; EXPORT_SYMBOL(uverbs_idr_class); -static void _uverbs_close_fd(struct ib_uobject *uobj) -{ - int ret; - - /* - * uobject was already cleaned up, remove_commit_fd_uobject - * sets this - */ - if (!uobj->context) - return; - - /* - * lookup_get_fd_uobject holds the kref on the struct file any time a - * FD uobj is locked, which prevents this release method from being - * invoked. Meaning we can always get the write lock here, or we have - * a kernel bug. If so dangle the pointers and bail. - */ - ret = uverbs_try_lock_object(uobj, true); - if (WARN(ret, "uverbs_close_fd() racing with lookup_get_fd_uobject()")) - return; - - ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_CLOSE); - if (ret) - pr_warn("Unable to clean up uobject file in %s\n", __func__); - - atomic_set(&uobj->usecnt, 0); -} - void uverbs_close_fd(struct file *f) { struct ib_uobject *uobj = f->private_data; struct ib_uverbs_file *ufile = uobj->ufile; if (down_read_trylock(&ufile->hw_destroy_rwsem)) { - _uverbs_close_fd(uobj); + /* + * lookup_get_fd_uobject holds the kref on the struct file any + * time a FD uobj is locked, which prevents this release + * method from being invoked. Meaning we can always get the + * write lock here, or we have a kernel bug. + */ + WARN_ON(uverbs_try_lock_object(uobj, true)); + uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE); up_read(&ufile->hw_destroy_rwsem); } - uobj->object = NULL; /* Matches the get in alloc_begin_fd_uobject */ kref_put(&ufile->ref, ib_uverbs_release_file); @@ -783,7 +792,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, { struct ib_uobject *obj, *next_obj; int ret = -EINVAL; - int err = 0; /* * This shouldn't run while executing other commands on this @@ -800,23 +808,8 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * racing with a lookup_get. */ WARN_ON(uverbs_try_lock_object(obj, true)); - err = obj->type->type_class->remove_commit(obj, reason); - - if (ib_is_destroy_retryable(err, reason, obj)) { - pr_debug("ib_uverbs: failed to remove uobject id %d err %d\n", - obj->id, err); - atomic_set(&obj->usecnt, 0); - continue; - } - - if (err) - pr_err("ib_uverbs: unable to remove uobject id %d err %d\n", - obj->id, err); - - list_del(&obj->list); - /* Pairs with the get in rdma_alloc_commit_uobject() */ - uverbs_uobject_put(obj); - ret = 0; + if (!uverbs_destroy_uobject(obj, reason)) + ret = 0; } return ret; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1de8f0d2797c..be208421f7d3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1467,8 +1467,8 @@ enum rdma_remove_reason { RDMA_REMOVE_CLOSE, /* Driver is being hot-unplugged. This call should delete the actual object itself */ RDMA_REMOVE_DRIVER_REMOVE, - /* Context is being cleaned-up, but commit was just completed */ - RDMA_REMOVE_DURING_CLEANUP, + /* uobj is being cleaned-up before being committed */ + RDMA_REMOVE_ABORT, }; struct ib_rdmacg_object { diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 8bae28dd2e4f..875dd8c16ba3 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -38,53 +38,49 @@ struct uverbs_obj_type; +/* + * The following sequences are valid: + * Success flow: + * alloc_begin + * alloc_commit + * [..] + * Access flow: + * lookup_get(exclusive=false) & uverbs_try_lock_object + * lookup_put(exclusive=false) via rdma_lookup_put_uobject + * Destruction flow: + * lookup_get(exclusive=true) & uverbs_try_lock_object + * remove_commit + * lookup_put(exclusive=true) via rdma_lookup_put_uobject + * + * Allocate Error flow #1 + * alloc_begin + * alloc_abort + * Allocate Error flow #2 + * alloc_begin + * remove_commit + * alloc_abort + * Allocate Error flow #3 + * alloc_begin + * alloc_commit (fails) + * remove_commit + * alloc_abort + * + * In all cases the caller must hold the ufile kref until alloc_commit or + * alloc_abort returns. + */ struct uverbs_obj_type_class { - /* - * Get an ib_uobject that corresponds to the given id from ucontext, - * These functions could create or destroy objects if required. - * The action will be finalized only when commit, abort or put fops are - * called. - * The flow of the different actions is: - * [alloc]: Starts with alloc_begin. The handlers logic is than - * executed. If the handler is successful, alloc_commit - * is called and the object is inserted to the repository. - * Once alloc_commit completes the object is visible to - * other threads and userspace. - e Otherwise, alloc_abort is called and the object is - * destroyed. - * [lookup]: Starts with lookup_get which fetches and locks the - * object. After the handler finished using the object, it - * needs to call lookup_put to unlock it. The exclusive - * flag indicates if the object is locked for exclusive - * access. - * [remove]: Starts with lookup_get with exclusive flag set. This - * locks the object for exclusive access. If the handler - * code completed successfully, remove_commit is called - * and the ib_uobject is removed from the context's - * uobjects repository and put. The object itself is - * destroyed as well. Once remove succeeds new krefs to - * the object cannot be acquired by other threads or - * userspace and the hardware driver is removed from the - * object. Other krefs on the object may still exist. - * If the handler code failed, lookup_put should be - * called. This callback is used when the context - * is destroyed as well (process termination, - * reset flow). - */ struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile); + /* This consumes the kref on uobj */ int (*alloc_commit)(struct ib_uobject *uobj); + /* This does not consume the kref on uobj */ void (*alloc_abort)(struct ib_uobject *uobj); struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile, s64 id, bool exclusive); void (*lookup_put)(struct ib_uobject *uobj, bool exclusive); - /* - * Must be called with the exclusive lock held. If successful uobj is - * invalid on return. On failure uobject is left completely - * unchanged - */ + /* This does not consume the kref on uobj */ int __must_check (*remove_commit)(struct ib_uobject *uobj, enum rdma_remove_reason why); u8 needs_kfree_rcu; -- cgit From 9867f5c6695f0a17cde9a4dc140fe026b4e40d4a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 25 Jul 2018 21:40:13 -0600 Subject: IB/uverbs: Convert 'bool exclusive' into an enum This is more readable, and future patches will need a 3rd lookup type. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 94 ++++++++++++++++++++++--------------- include/rdma/uverbs_std_types.h | 13 ++--- include/rdma/uverbs_types.h | 16 +++++-- 3 files changed, 75 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index aa1d16d87746..435dbe8ef2a2 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -108,7 +108,8 @@ void uverbs_uobject_put(struct ib_uobject *uobject) kref_put(&uobject->ref, uverbs_uobject_free); } -static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) +static int uverbs_try_lock_object(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { /* * When a shared access is required, we use a positive counter. Each @@ -121,21 +122,29 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) * concurrently, setting the counter to zero is enough for releasing * this lock. */ - if (!exclusive) + switch (mode) { + case UVERBS_LOOKUP_READ: return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ? -EBUSY : 0; - - /* lock is either WRITE or DESTROY - should be exclusive */ - return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; + case UVERBS_LOOKUP_WRITE: + /* lock is either WRITE or DESTROY - should be exclusive */ + return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; + } + return 0; } -static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive) +static void assert_uverbs_usecnt(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { #ifdef CONFIG_LOCKDEP - if (exclusive) - WARN_ON(atomic_read(&uobj->usecnt) != -1); - else + switch (mode) { + case UVERBS_LOOKUP_READ: WARN_ON(atomic_read(&uobj->usecnt) <= 0); + break; + case UVERBS_LOOKUP_WRITE: + WARN_ON(atomic_read(&uobj->usecnt) != -1); + break; + } #endif } @@ -164,7 +173,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, unsigned long flags; int ret; - assert_uverbs_usecnt(uobj, true); + assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); if (uobj->object) { ret = uobj->type->type_class->remove_commit(uobj, reason); @@ -229,13 +238,13 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type, struct ib_uobject *uobj; int ret; - uobj = rdma_lookup_get_uobject(type, ufile, id, true); + uobj = rdma_lookup_get_uobject(type, ufile, id, UVERBS_LOOKUP_WRITE); if (IS_ERR(uobj)) return uobj; ret = rdma_explicit_destroy(uobj); if (ret) { - rdma_lookup_put_uobject(uobj, true); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); return ERR_PTR(ret); } @@ -256,7 +265,7 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id, if (IS_ERR(uobj)) return PTR_ERR(uobj); - rdma_lookup_put_uobject(uobj, true); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); return success_res; } @@ -319,7 +328,8 @@ static int idr_add_uobj(struct ib_uobject *uobj) /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ static struct ib_uobject * lookup_get_idr_uobject(const struct uverbs_obj_type *type, - struct ib_uverbs_file *ufile, s64 id, bool exclusive) + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode) { struct ib_uobject *uobj; unsigned long idrno = id; @@ -349,9 +359,10 @@ free: return uobj; } -static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type, - struct ib_uverbs_file *ufile, - s64 id, bool exclusive) +static struct ib_uobject * +lookup_get_fd_uobject(const struct uverbs_obj_type *type, + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode) { struct file *f; struct ib_uobject *uobject; @@ -362,7 +373,7 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty if (fdno != id) return ERR_PTR(-EINVAL); - if (exclusive) + if (mode != UVERBS_LOOKUP_READ) return ERR_PTR(-EOPNOTSUPP); f = fget(fdno); @@ -386,12 +397,12 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile, s64 id, - bool exclusive) + enum rdma_lookup_mode mode) { struct ib_uobject *uobj; int ret; - uobj = type->type_class->lookup_get(type, ufile, id, exclusive); + uobj = type->type_class->lookup_get(type, ufile, id, mode); if (IS_ERR(uobj)) return uobj; @@ -400,13 +411,13 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, goto free; } - ret = uverbs_try_lock_object(uobj, exclusive); + ret = uverbs_try_lock_object(uobj, mode); if (ret) goto free; return uobj; free: - uobj->type->type_class->lookup_put(uobj, exclusive); + uobj->type->type_class->lookup_put(uobj, mode); uverbs_uobject_put(uobj); return ERR_PTR(ret); } @@ -643,32 +654,39 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj) uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); } -static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive) +static void lookup_put_idr_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { } -static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive) +static void lookup_put_fd_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { struct file *filp = uobj->object; - WARN_ON(exclusive); + WARN_ON(mode != UVERBS_LOOKUP_READ); /* This indirectly calls uverbs_close_fd and free the object */ fput(filp); } -void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive) +void rdma_lookup_put_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { - assert_uverbs_usecnt(uobj, exclusive); - uobj->type->type_class->lookup_put(uobj, exclusive); + assert_uverbs_usecnt(uobj, mode); + uobj->type->type_class->lookup_put(uobj, mode); /* * In order to unlock an object, either decrease its usecnt for * read access or zero it in case of exclusive access. See * uverbs_try_lock_object for locking schema information. */ - if (!exclusive) + switch (mode) { + case UVERBS_LOOKUP_READ: atomic_dec(&uobj->usecnt); - else + break; + case UVERBS_LOOKUP_WRITE: atomic_set(&uobj->usecnt, 0); + break; + } /* Pairs with the kref obtained by type->lookup_get */ uverbs_uobject_put(uobj); @@ -710,7 +728,7 @@ void uverbs_close_fd(struct file *f) * method from being invoked. Meaning we can always get the * write lock here, or we have a kernel bug. */ - WARN_ON(uverbs_try_lock_object(uobj, true)); + WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE)); uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE); up_read(&ufile->hw_destroy_rwsem); } @@ -807,7 +825,7 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * if we hit this WARN_ON, that means we are * racing with a lookup_get. */ - WARN_ON(uverbs_try_lock_object(obj, true)); + WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); if (!uverbs_destroy_uobject(obj, reason)) ret = 0; } @@ -890,10 +908,12 @@ uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs, { switch (access) { case UVERBS_ACCESS_READ: - return rdma_lookup_get_uobject(type_attrs, ufile, id, false); + return rdma_lookup_get_uobject(type_attrs, ufile, id, + UVERBS_LOOKUP_READ); case UVERBS_ACCESS_DESTROY: case UVERBS_ACCESS_WRITE: - return rdma_lookup_get_uobject(type_attrs, ufile, id, true); + return rdma_lookup_get_uobject(type_attrs, ufile, id, + UVERBS_LOOKUP_WRITE); case UVERBS_ACCESS_NEW: return rdma_alloc_begin_uobject(type_attrs, ufile); default: @@ -916,13 +936,13 @@ int uverbs_finalize_object(struct ib_uobject *uobj, switch (access) { case UVERBS_ACCESS_READ: - rdma_lookup_put_uobject(uobj, false); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ); break; case UVERBS_ACCESS_WRITE: - rdma_lookup_put_uobject(uobj, true); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); break; case UVERBS_ACCESS_DESTROY: - rdma_lookup_put_uobject(uobj, true); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); break; case UVERBS_ACCESS_NEW: if (commit) diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index c2f89e41cbd2..8c54e1439ba1 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -58,11 +58,12 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo #define uobj_get_read(_type, _id, _ufile) \ rdma_lookup_get_uobject(uobj_get_type(_type), _ufile, \ - _uobj_check_id(_id), false) + _uobj_check_id(_id), UVERBS_LOOKUP_READ) #define ufd_get_read(_type, _fdnum, _ufile) \ rdma_lookup_get_uobject(uobj_get_type(_type), _ufile, \ - (_fdnum)*typecheck(s32, _fdnum), false) + (_fdnum)*typecheck(s32, _fdnum), \ + UVERBS_LOOKUP_READ) static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) { @@ -76,7 +77,7 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) #define uobj_get_write(_type, _id, _ufile) \ rdma_lookup_get_uobject(uobj_get_type(_type), _ufile, \ - _uobj_check_id(_id), true) + _uobj_check_id(_id), UVERBS_LOOKUP_WRITE) int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id, struct ib_uverbs_file *ufile, int success_res); @@ -92,12 +93,12 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type, static inline void uobj_put_destroy(struct ib_uobject *uobj) { - rdma_lookup_put_uobject(uobj, true); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); } static inline void uobj_put_read(struct ib_uobject *uobj) { - rdma_lookup_put_uobject(uobj, false); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ); } #define uobj_put_obj_read(_obj) \ @@ -105,7 +106,7 @@ static inline void uobj_put_read(struct ib_uobject *uobj) static inline void uobj_put_write(struct ib_uobject *uobj) { - rdma_lookup_put_uobject(uobj, true); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); } static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj, diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 875dd8c16ba3..0676672dbbb9 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -38,6 +38,11 @@ struct uverbs_obj_type; +enum rdma_lookup_mode { + UVERBS_LOOKUP_READ, + UVERBS_LOOKUP_WRITE, +}; + /* * The following sequences are valid: * Success flow: @@ -78,8 +83,8 @@ struct uverbs_obj_type_class { struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile, s64 id, - bool exclusive); - void (*lookup_put)(struct ib_uobject *uobj, bool exclusive); + enum rdma_lookup_mode mode); + void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode); /* This does not consume the kref on uobj */ int __must_check (*remove_commit)(struct ib_uobject *uobj, enum rdma_remove_reason why); @@ -116,9 +121,10 @@ struct uverbs_obj_idr_type { }; struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, - struct ib_uverbs_file *ufile, - s64 id, bool exclusive); -void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive); + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode); +void rdma_lookup_put_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode); struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile); void rdma_alloc_abort_uobject(struct ib_uobject *uobj); -- cgit From 7452a3c745a2e7eb70d09dc5bb870759b1f26c91 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 25 Jul 2018 21:40:14 -0600 Subject: IB/uverbs: Allow RDMA_REMOVE_DESTROY to work concurrently with disassociate After all the recent structural changes this is now straightfoward, hoist the hw_destroy_rwsem up out of rdma_destroy_explicit and wrap it around the uobject write lock as well as the destroy. This is necessary as obtaining a write lock concurrently with uverbs_destroy_ufile_hw() will cause malfunction. After this change none of the destroy callbacks require the disassociate_srcu lock to be correct. This requires introducing a new lookup mode, UVERBS_LOOKUP_DESTROY as the IOCTL interface needs to hold an unlocked kref until all command verification is completed. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 71 +++++++++++++++++++++++----------- drivers/infiniband/core/rdma_core.h | 2 + drivers/infiniband/core/uverbs_ioctl.c | 7 +++- include/rdma/uverbs_types.h | 7 +++- 4 files changed, 63 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 435dbe8ef2a2..81d668abe18e 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -127,8 +127,10 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ? -EBUSY : 0; case UVERBS_LOOKUP_WRITE: - /* lock is either WRITE or DESTROY - should be exclusive */ + /* lock is exclusive */ return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; + case UVERBS_LOOKUP_DESTROY: + return 0; } return 0; } @@ -144,6 +146,8 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj, case UVERBS_LOOKUP_WRITE: WARN_ON(atomic_read(&uobj->usecnt) != -1); break; + case UVERBS_LOOKUP_DESTROY: + break; } #endif } @@ -227,6 +231,35 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, return 0; } +/* + * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY + * sequence. It should only be used from command callbacks. On success the + * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This + * version requires the caller to have already obtained an + * LOOKUP_DESTROY uobject kref. + */ +int uobj_destroy(struct ib_uobject *uobj) +{ + struct ib_uverbs_file *ufile = uobj->ufile; + int ret; + + down_read(&ufile->hw_destroy_rwsem); + + ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE); + if (ret) + goto out_unlock; + + ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY); + if (ret) { + atomic_set(&uobj->usecnt, 0); + goto out_unlock; + } + +out_unlock: + up_read(&ufile->hw_destroy_rwsem); + return ret; +} + /* * uobj_get_destroy destroys the HW object and returns a handle to the uobj * with a NULL object pointer. The caller must pair this with @@ -238,13 +271,13 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type, struct ib_uobject *uobj; int ret; - uobj = rdma_lookup_get_uobject(type, ufile, id, UVERBS_LOOKUP_WRITE); + uobj = rdma_lookup_get_uobject(type, ufile, id, UVERBS_LOOKUP_DESTROY); if (IS_ERR(uobj)) return uobj; - ret = rdma_explicit_destroy(uobj); + ret = uobj_destroy(uobj); if (ret) { - rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY); return ERR_PTR(ret); } @@ -265,6 +298,11 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id, if (IS_ERR(uobj)) return PTR_ERR(uobj); + /* + * FIXME: After destroy this is not safe. We no longer hold the rwsem + * so disassociation could have completed and unloaded the module that + * backs the uobj->type pointer. + */ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); return success_res; } @@ -534,23 +572,6 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, return 0; } -int rdma_explicit_destroy(struct ib_uobject *uobject) -{ - int ret; - struct ib_uverbs_file *ufile = uobject->ufile; - - /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ufile->hw_destroy_rwsem)) { - WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); - return 0; - } - - ret = uverbs_destroy_uobject(uobject, RDMA_REMOVE_DESTROY); - - up_read(&ufile->hw_destroy_rwsem); - return ret; -} - static int alloc_commit_idr_uobject(struct ib_uobject *uobj) { struct ib_uverbs_file *ufile = uobj->ufile; @@ -686,6 +707,8 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, case UVERBS_LOOKUP_WRITE: atomic_set(&uobj->usecnt, 0); break; + case UVERBS_LOOKUP_DESTROY: + break; } /* Pairs with the kref obtained by type->lookup_get */ @@ -911,6 +934,9 @@ uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs, return rdma_lookup_get_uobject(type_attrs, ufile, id, UVERBS_LOOKUP_READ); case UVERBS_ACCESS_DESTROY: + /* Actual destruction is done inside uverbs_handle_method */ + return rdma_lookup_get_uobject(type_attrs, ufile, id, + UVERBS_LOOKUP_DESTROY); case UVERBS_ACCESS_WRITE: return rdma_lookup_get_uobject(type_attrs, ufile, id, UVERBS_LOOKUP_WRITE); @@ -942,7 +968,8 @@ int uverbs_finalize_object(struct ib_uobject *uobj, rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); break; case UVERBS_ACCESS_DESTROY: - rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); + if (uobj) + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY); break; case UVERBS_ACCESS_NEW: if (commit) diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index a736b46d18e3..e4d8b985c311 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -52,6 +52,8 @@ const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_sp void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason); +int uobj_destroy(struct ib_uobject *uobj); + /* * uverbs_uobject_get is called in order to increase the reference count on * an uobject. This is useful when a handler wants to keep the uobject's memory diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 404acfcdbeb2..f3776f909ca5 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -349,13 +349,18 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, * not get to manipulate the HW objects. */ if (destroy_attr) { - ret = rdma_explicit_destroy(destroy_attr->uobject); + ret = uobj_destroy(destroy_attr->uobject); if (ret) goto cleanup; } ret = method_spec->handler(ibdev, ufile, attr_bundle); + if (destroy_attr) { + uobj_put_destroy(destroy_attr->uobject); + destroy_attr->uobject = NULL; + } + cleanup: finalize_ret = uverbs_finalize_attrs(attr_bundle, method_spec->attr_buckets, diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 0676672dbbb9..f64f413cecac 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -41,6 +41,12 @@ struct uverbs_obj_type; enum rdma_lookup_mode { UVERBS_LOOKUP_READ, UVERBS_LOOKUP_WRITE, + /* + * Destroy is like LOOKUP_WRITE, except that the uobject is not + * locked. uobj_destroy is used to convert a LOOKUP_DESTROY lock into + * a LOOKUP_WRITE lock. + */ + UVERBS_LOOKUP_DESTROY, }; /* @@ -129,7 +135,6 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile); void rdma_alloc_abort_uobject(struct ib_uobject *uobj); int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj); -int rdma_explicit_destroy(struct ib_uobject *uobject); struct uverbs_obj_fd_type { /* -- cgit From 1e857e65d4bb76738d3fb3b15ce9b73a0ce550f8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 25 Jul 2018 21:40:15 -0600 Subject: IB/uverbs: Allow uobject allocation to work concurrently with disassociate After all the recent structural changes this is now straightforward, hold the hw_destroy_rwsem across the entire uobject creation. We already take this semaphore on the success path, so holding it a bit longer is not going to change the performance. After this change none of the create callbacks require the disassociate_srcu lock to be correct. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 81d668abe18e..95a8110f186f 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -153,9 +153,8 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj, } /* - * This must be called with the hw_destroy_rwsem locked (except for - * RDMA_REMOVE_ABORT) for read or write, also The uobject itself must be - * locked for write. + * This must be called with the hw_destroy_rwsem locked for read or write, + * also the uobject itself must be locked for write. * * Upon return the HW object is guaranteed to be destroyed. * @@ -177,6 +176,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, unsigned long flags; int ret; + lockdep_assert_held(&ufile->hw_destroy_rwsem); assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); if (uobj->object) { @@ -515,7 +515,22 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, struct ib_uverbs_file *ufile) { - return type->type_class->alloc_begin(type, ufile); + struct ib_uobject *ret; + + /* + * The hw_destroy_rwsem is held across the entire object creation and + * released during rdma_alloc_commit_uobject or + * rdma_alloc_abort_uobject + */ + if (!down_read_trylock(&ufile->hw_destroy_rwsem)) + return ERR_PTR(-EIO); + + ret = type->type_class->alloc_begin(type, ufile); + if (IS_ERR(ret)) { + up_read(&ufile->hw_destroy_rwsem); + return ret; + } + return ret; } static void alloc_abort_idr_uobject(struct ib_uobject *uobj) @@ -637,17 +652,11 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) struct ib_uverbs_file *ufile = uobj->ufile; int ret; - /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ufile->hw_destroy_rwsem)) { - WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n"); - uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); - return -EINVAL; - } - /* alloc_commit consumes the uobj kref */ ret = uobj->type->type_class->alloc_commit(uobj); if (ret) { uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); + up_read(&ufile->hw_destroy_rwsem); return ret; } @@ -660,6 +669,7 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) /* matches atomic_set(-1) in alloc_uobj */ atomic_set(&uobj->usecnt, 0); + /* Matches the down_read in rdma_alloc_begin_uobject */ up_read(&ufile->hw_destroy_rwsem); return 0; @@ -671,8 +681,13 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) */ void rdma_alloc_abort_uobject(struct ib_uobject *uobj) { + struct ib_uverbs_file *ufile = uobj->ufile; + uobj->object = NULL; uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); + + /* Matches the down_read in rdma_alloc_begin_uobject */ + up_read(&ufile->hw_destroy_rwsem); } static void lookup_put_idr_uobject(struct ib_uobject *uobj, -- cgit From cc2e14e68004e6dec70842f990085f67c1f6fec7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 25 Jul 2018 21:40:16 -0600 Subject: IB/uverbs: Lower the test for ongoing disassociation Commands that are reading/writing to objects can test for an ongoing disassociation during their initial call to rdma_lookup_get_uobject. This directly prevents all of these commands from conflicting with an ongoing disassociation. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 95a8110f186f..d4de1fed98f2 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -449,6 +449,17 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, goto free; } + /* + * If we have been disassociated block every command except for + * DESTROY based commands. + */ + if (mode != UVERBS_LOOKUP_DESTROY && + !srcu_dereference(ufile->device->ib_dev, + &ufile->device->disassociate_srcu)) { + ret = -EIO; + goto free; + } + ret = uverbs_try_lock_object(uobj, mode); if (ret) goto free; -- cgit From bbd51e881ff05aa6dccda025e335438f3b3a1dba Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 25 Jul 2018 21:40:17 -0600 Subject: IB/uverbs: Do not pass struct ib_device to the write based methods This is a step to get rid of the global check for disassociation. In this model, the ib_dev is not proven to be valid by the core code and cannot be provided to the method. Instead, every method decides if it is able to run after disassociation and obtains the ib_dev using one of three different approaches: - Call srcu_dereference on the udevice's ib_dev. As before, this means the method cannot be called after disassociation begins. (eg alloc ucontext) - Retrieve the ib_dev from the ucontext, via ib_uverbs_get_ucontext() - Retrieve the ib_dev from the uobject->object after checking under SRCU if disassociation has started (eg uobj_get) Largely, the code is all ready for this, the main work is to provide a ib_dev after calling uobj_alloc(). The few other places simply use ib_uverbs_get_ucontext() to get the ib_dev. This flexibility will let the next patches allow destroy to operate after disassociation. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs.h | 2 - drivers/infiniband/core/uverbs_cmd.c | 155 +++++++++++++++++----------------- drivers/infiniband/core/uverbs_main.c | 6 +- include/rdma/uverbs_std_types.h | 12 ++- 4 files changed, 89 insertions(+), 86 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index cf02b433000c..5e21cc1f900b 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -299,7 +299,6 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS); #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ - struct ib_device *ib_dev, \ const char __user *buf, int in_len, \ int out_len) @@ -341,7 +340,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd); #define IB_UVERBS_DECLARE_EX_CMD(name) \ int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \ - struct ib_device *ib_dev, \ struct ib_udata *ucore, \ struct ib_udata *uhw) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index fe96ceda6cd2..465b4d921024 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -66,7 +66,6 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile) _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile) ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -76,6 +75,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_ucontext *ucontext; struct file *filp; struct ib_rdmacg_object cg_obj; + struct ib_device *ib_dev; int ret; if (out_len < sizeof resp) @@ -85,6 +85,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, return -EFAULT; mutex_lock(&file->ucontext_lock); + ib_dev = srcu_dereference(file->device->ib_dev, + &file->device->disassociate_srcu); + if (!ib_dev) { + ret = -EIO; + goto err; + } if (file->ucontext) { ret = -EINVAL; @@ -177,11 +183,12 @@ err: return ret; } -static void copy_query_dev_fields(struct ib_uverbs_file *file, - struct ib_device *ib_dev, +static void copy_query_dev_fields(struct ib_ucontext *ucontext, struct ib_uverbs_query_device_resp *resp, struct ib_device_attr *attr) { + struct ib_device *ib_dev = ucontext->device; + resp->fw_ver = attr->fw_ver; resp->node_guid = ib_dev->node_guid; resp->sys_image_guid = attr->sys_image_guid; @@ -225,12 +232,16 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file, } ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_query_device cmd; struct ib_uverbs_query_device_resp resp; + struct ib_ucontext *ucontext; + + ucontext = ib_uverbs_get_ucontext(file); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); if (out_len < sizeof resp) return -ENOSPC; @@ -239,7 +250,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, return -EFAULT; memset(&resp, 0, sizeof resp); - copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs); + copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; @@ -269,7 +280,6 @@ static u32 make_port_cap_flags(const struct ib_port_attr *attr) } ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -277,6 +287,13 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, struct ib_uverbs_query_port_resp resp; struct ib_port_attr attr; int ret; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; + + ucontext = ib_uverbs_get_ucontext(file); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; if (out_len < sizeof resp) return -ENOSPC; @@ -328,7 +345,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, } ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -338,6 +354,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_pd *pd; int ret; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -350,7 +367,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - uobj = uobj_alloc(UVERBS_OBJECT_PD, file); + uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -387,7 +404,6 @@ err: } ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -486,7 +502,6 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev, } ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -499,6 +514,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, struct inode *inode = NULL; int ret = 0; int new_xrcd = 0; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -535,7 +551,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } } - obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file); + obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file, + &ib_dev); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_tree_mutex_unlock; @@ -606,7 +623,6 @@ err_tree_mutex_unlock: } ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -645,7 +661,6 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, } ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -656,6 +671,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, struct ib_pd *pd; struct ib_mr *mr; int ret; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -675,7 +691,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (ret) return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MR, file); + uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -737,7 +753,6 @@ err_free: } ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -829,7 +844,6 @@ put_uobjs: } ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -843,7 +857,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, } ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -854,6 +867,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, struct ib_mw *mw; struct ib_udata udata; int ret; + struct ib_device *ib_dev; if (out_len < sizeof(resp)) return -ENOSPC; @@ -861,7 +875,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = uobj_alloc(UVERBS_OBJECT_MW, file); + uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -911,7 +925,6 @@ err_free: } ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -925,7 +938,6 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, } ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -933,6 +945,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, struct ib_uverbs_create_comp_channel_resp resp; struct ib_uobject *uobj; struct ib_uverbs_completion_event_file *ev_file; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -940,7 +953,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file); + uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -959,7 +972,6 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, } static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw, struct ib_uverbs_ex_create_cq *cmd, @@ -977,17 +989,21 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, int ret; struct ib_uverbs_ex_create_cq_resp resp; struct ib_cq_init_attr attr = {}; - - if (!ib_dev->create_cq) - return ERR_PTR(-EOPNOTSUPP); + struct ib_device *ib_dev; if (cmd->comp_vector >= file->device->num_comp_vectors) return ERR_PTR(-EINVAL); - obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file); + obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file, + &ib_dev); if (IS_ERR(obj)) return obj; + if (!ib_dev->create_cq) { + ret = -EOPNOTSUPP; + goto err; + } + if (cmd->comp_channel >= 0) { ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file); if (IS_ERR(ev_file)) { @@ -1066,7 +1082,6 @@ static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file, } ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1097,7 +1112,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, cmd_ex.comp_vector = cmd.comp_vector; cmd_ex.comp_channel = cmd.comp_channel; - obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex, + obj = create_cq(file, &ucore, &uhw, &cmd_ex, offsetof(typeof(cmd_ex), comp_channel) + sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, NULL); @@ -1120,7 +1135,6 @@ static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file, } int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -1146,7 +1160,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, sizeof(resp.response_length))) return -ENOSPC; - obj = create_cq(file, ib_dev, ucore, uhw, &cmd, + obj = create_cq(file, ucore, uhw, &cmd, min(ucore->inlen, sizeof(cmd)), ib_uverbs_ex_create_cq_cb, NULL); @@ -1154,7 +1168,6 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1222,7 +1235,6 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, } ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1253,7 +1265,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (!ret) break; - ret = copy_wc_to_user(ib_dev, data_ptr, &wc); + ret = copy_wc_to_user(cq->device, data_ptr, &wc); if (ret) goto out_put; @@ -1274,7 +1286,6 @@ out_put: } ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1297,7 +1308,6 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1350,11 +1360,13 @@ static int create_qp(struct ib_uverbs_file *file, int ret; struct ib_rwq_ind_table *ind_tbl = NULL; bool has_sq = true; + struct ib_device *ib_dev; if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file); + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file, + &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); obj->uxrcd = NULL; @@ -1611,7 +1623,6 @@ static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file, } ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1672,7 +1683,6 @@ static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file, } int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -1709,7 +1719,6 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, } ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_open_qp cmd; @@ -1721,6 +1730,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, struct ib_qp *qp; struct ib_qp_open_attr attr; int ret; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -1733,7 +1743,8 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file); + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file, + &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1815,7 +1826,6 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr, } ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2018,7 +2028,6 @@ out: } ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2045,7 +2054,6 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, } int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -2081,7 +2089,6 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, } ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2120,7 +2127,6 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge) } ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2401,7 +2407,6 @@ err: } ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2451,7 +2456,6 @@ out: } ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2502,7 +2506,6 @@ out: } ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2514,6 +2517,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct rdma_ah_attr attr = {}; int ret; struct ib_udata udata; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -2521,18 +2525,20 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) - return -EINVAL; - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - uobj = uobj_alloc(UVERBS_OBJECT_AH, file); + uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); + if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) { + ret = -EINVAL; + goto err; + } + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; @@ -2589,7 +2595,6 @@ err: } ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_ah cmd; @@ -2602,7 +2607,6 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2652,7 +2656,6 @@ out_put: } ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3021,7 +3024,6 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile, } int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3035,6 +3037,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, struct ib_wq_init_attr wq_init_attr = {}; size_t required_cmd_sz; size_t required_resp_len; + struct ib_device *ib_dev; required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); @@ -3057,7 +3060,8 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file); + obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file, + &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -3136,7 +3140,6 @@ err_uobj: } int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3183,7 +3186,6 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, } int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3233,7 +3235,6 @@ out: } int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3251,6 +3252,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, u32 expected_in_size; size_t required_cmd_sz_header; size_t required_resp_len; + struct ib_device *ib_dev; required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); @@ -3316,7 +3318,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, wqs[num_read_wqs] = wq; } - uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file); + uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto put_wqs; @@ -3376,7 +3378,6 @@ err_free: } int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3406,7 +3407,6 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, } int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3423,6 +3423,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, void *kern_spec; void *ib_spec; int i; + struct ib_device *ib_dev; if (ucore->inlen < sizeof(cmd)) return -EINVAL; @@ -3478,7 +3479,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kern_flow_attr = &cmd.flow_attr; } - uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file); + uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto err_free_attr; @@ -3583,7 +3584,6 @@ err_free_attr: } int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3605,7 +3605,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, } static int __uverbs_create_xsrq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) { @@ -3616,8 +3615,10 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_srq_init_attr attr; int ret; + struct ib_device *ib_dev; - obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file); + obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file, + &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -3740,7 +3741,6 @@ err: } ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3770,7 +3770,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); + ret = __uverbs_create_xsrq(file, &xcmd, &udata); if (ret) return ret; @@ -3778,7 +3778,6 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_xsrq cmd; @@ -3797,7 +3796,7 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); + ret = __uverbs_create_xsrq(file, &cmd, &udata); if (ret) return ret; @@ -3805,7 +3804,6 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3836,7 +3834,6 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3876,7 +3873,6 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3905,15 +3901,21 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, } int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_query_device_resp resp = { {0} }; struct ib_uverbs_ex_query_device cmd; struct ib_device_attr attr = {0}; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; int err; + ucontext = ib_uverbs_get_ucontext(file); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; + if (!ib_dev->query_device) return -EOPNOTSUPP; @@ -3939,7 +3941,7 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, if (err) return err; - copy_query_dev_fields(file, ib_dev, &resp.base, &attr); + copy_query_dev_fields(ucontext, &resp.base, &attr); if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) goto end; @@ -4026,7 +4028,6 @@ end: } int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 34df04ed142b..a1e427b2c2a1 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -75,7 +75,6 @@ static struct class *uverbs_class; static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) = { [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, @@ -116,7 +115,6 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, }; static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) = { [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, @@ -774,7 +772,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, buf += sizeof(hdr); if (!extended) { - ret = uverbs_cmd_table[command](file, ib_dev, buf, + ret = uverbs_cmd_table[command](file, buf, hdr.in_words * 4, hdr.out_words * 4); } else { @@ -793,7 +791,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, ex_hdr.provider_in_words * 8, ex_hdr.provider_out_words * 8); - ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw); + ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw); ret = (ret) ? : count; } diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 8c54e1439ba1..64ee2545dd3d 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -125,12 +125,18 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj) } static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type, - struct ib_uverbs_file *ufile) + struct ib_uverbs_file *ufile, + struct ib_device **ib_dev) { - return rdma_alloc_begin_uobject(type, ufile); + struct ib_uobject *uobj = rdma_alloc_begin_uobject(type, ufile); + + if (!IS_ERR(uobj)) + *ib_dev = uobj->context->device; + return uobj; } -#define uobj_alloc(_type, _ufile) __uobj_alloc(uobj_get_type(_type), _ufile) +#define uobj_alloc(_type, _ufile, _ib_dev) \ + __uobj_alloc(uobj_get_type(_type), _ufile, _ib_dev) #endif -- cgit From e83f0ecdc40f2c3d63ff0e7f17462a29d12684a2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 25 Jul 2018 21:40:18 -0600 Subject: IB/uverbs: Do not pass struct ib_device to the ioctl methods This does the same as the patch before, except for ioctl. The rules are the same, but for the ioctl methods the core code handles setting up the uobject. - Retrieve the ib_dev from the uobject->context->device. This is safe under ioctl as the core has already done rdma_alloc_begin_uobject and so CREATE calls are entirely protected by the rwsem. - Retrieve the ib_dev from uobject->object - Call ib_uverbs_get_ucontext() Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 2 +- drivers/infiniband/core/uverbs_std_types.c | 3 +- .../infiniband/core/uverbs_std_types_counters.c | 21 +++++------ drivers/infiniband/core/uverbs_std_types_cq.c | 18 ++++----- drivers/infiniband/core/uverbs_std_types_dm.c | 13 ++++--- .../infiniband/core/uverbs_std_types_flow_action.c | 35 ++++++++---------- drivers/infiniband/core/uverbs_std_types_mr.c | 22 +++++------ drivers/infiniband/hw/mlx5/devx.c | 43 +++++++++++----------- drivers/infiniband/hw/mlx5/flow.c | 8 ++-- include/rdma/ib_verbs.h | 3 +- include/rdma/uverbs_ioctl.h | 4 +- 11 files changed, 78 insertions(+), 94 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index f3776f909ca5..f0655a84f9d9 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -354,7 +354,7 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, goto cleanup; } - ret = method_spec->handler(ibdev, ufile, attr_bundle); + ret = method_spec->handler(ufile, attr_bundle); if (destroy_attr) { uobj_put_destroy(destroy_attr->uobject); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index c1e0492cc78a..3aa7c7deac74 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -210,8 +210,7 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj, return 0; }; -int uverbs_destroy_def_handler(struct ib_device *ib_dev, - struct ib_uverbs_file *file, +int uverbs_destroy_def_handler(struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { return 0; diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 34589799f446..dfacc9e83399 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -47,12 +47,13 @@ static int uverbs_free_counters(struct ib_uobject *uobject, return counters->device->destroy_counters(counters); } -static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); + struct ib_device *ib_dev = uobj->context->device; struct ib_counters *counters; - struct ib_uobject *uobj; int ret; /* @@ -63,7 +64,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_de if (!ib_dev->create_counters) return -EOPNOTSUPP; - uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); counters = ib_dev->create_counters(ib_dev, attrs); if (IS_ERR(counters)) { ret = PTR_ERR(counters); @@ -81,9 +81,8 @@ err_create_counters: return ret; } -static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_counters_read_attr read_attr = {}; const struct uverbs_attr *uattr; @@ -91,7 +90,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev, uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE); int ret; - if (!ib_dev->read_counters) + if (!counters->device->read_counters) return -EOPNOTSUPP; if (!atomic_read(&counters->usecnt)) @@ -110,9 +109,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev, if (!read_attr.counters_buff) return -ENOMEM; - ret = ib_dev->read_counters(counters, - &read_attr, - attrs); + ret = counters->device->read_counters(counters, &read_attr, attrs); if (ret) goto err_read; diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 68c86e6e932e..5b5f2052cd52 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -57,11 +57,13 @@ static int uverbs_free_cq(struct ib_uobject *uobject, return ret; } -static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct ib_ucq_object *obj; + struct ib_ucq_object *obj = container_of( + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), + typeof(*obj), uobject); + struct ib_device *ib_dev = obj->uobject.context->device; struct ib_udata uhw; int ret; u64 user_handle; @@ -104,9 +106,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, goto err_event_file; } - obj = container_of(uverbs_attr_get_uobject(attrs, - UVERBS_ATTR_CREATE_CQ_HANDLE), - typeof(*obj), uobject); obj->comp_events_reported = 0; obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); @@ -173,9 +172,8 @@ DECLARE_UVERBS_NAMED_METHOD( UA_MANDATORY), UVERBS_ATTR_UHW()); -static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE); diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index c90efa4b99f4..edc3ff7733d4 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -46,12 +46,15 @@ static int uverbs_free_dm(struct ib_uobject *uobject, return dm->device->dealloc_dm(dm); } -static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int +UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) { struct ib_dm_alloc_attr attr = {}; - struct ib_uobject *uobj; + struct ib_uobject *uobj = + uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE) + ->obj_attr.uobject; + struct ib_device *ib_dev = uobj->context->device; struct ib_dm *dm; int ret; @@ -68,8 +71,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, if (ret) return ret; - uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject; - dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs); if (IS_ERR(dm)) return PTR_ERR(dm); diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index adb9209c4710..d8cfafe23bd9 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -304,12 +304,13 @@ static int parse_flow_action_esp(struct ib_device *ib_dev, return 0; } -static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); + struct ib_device *ib_dev = uobj->context->device; int ret; - struct ib_uobject *uobj; struct ib_flow_action *action; struct ib_flow_action_esp_attr esp_attr = {}; @@ -321,8 +322,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device return ret; /* No need to check as this attribute is marked as MANDATORY */ - uobj = uverbs_attr_get_uobject( - attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs); if (IS_ERR(action)) return PTR_ERR(action); @@ -336,32 +335,28 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device return 0; } -static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE); + struct ib_flow_action *action = uobj->object; int ret; - struct ib_uobject *uobj; - struct ib_flow_action *action; struct ib_flow_action_esp_attr esp_attr = {}; - if (!ib_dev->modify_flow_action_esp) + if (!action->device->modify_flow_action_esp) return -EOPNOTSUPP; - ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, true); + ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr, + true); if (ret) return ret; - uobj = uverbs_attr_get_uobject( - attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE); - action = uobj->object; - if (action->type != IB_FLOW_ACTION_ESP) return -EINVAL; - return ib_dev->modify_flow_action_esp(action, - &esp_attr.hdr, - attrs); + return action->device->modify_flow_action_esp(action, &esp_attr.hdr, + attrs); } static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index d63da0c2a8c1..cf02e774303e 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -39,14 +39,18 @@ static int uverbs_free_mr(struct ib_uobject *uobject, return ib_dereg_mr((struct ib_mr *)uobject->object); } -static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_dm_mr_attr attr = {}; - struct ib_uobject *uobj; - struct ib_dm *dm; - struct ib_pd *pd; + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE); + struct ib_dm *dm = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE); + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE); + struct ib_device *ib_dev = pd->device; + struct ib_mr *mr; int ret; @@ -75,12 +79,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev, if (ret) return ret; - pd = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE); - - dm = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE); - - uobj = uverbs_attr_get(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE)->obj_attr.uobject; - if (attr.offset > dm->length || attr.length > dm->length || attr.length > dm->length - attr.offset) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index c9a7a12a8c13..29c688372390 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -409,11 +409,11 @@ static bool devx_is_general_cmd(void *in) } } -static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct mlx5_ib_dev *dev = to_mdev(ib_dev); + struct mlx5_ib_ucontext *c; + struct mlx5_ib_dev *dev; int user_vector; int dev_eqn; unsigned int irqn; @@ -423,6 +423,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(struct ib_device *ib_de MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC)) return -EFAULT; + c = devx_ufile2uctx(file); + if (IS_ERR(c)) + return PTR_ERR(c); + dev = to_mdev(c->ibucontext.device); + err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn); if (err < 0) return err; @@ -454,9 +459,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(struct ib_device *ib_de * of the buggy user for execution (just insert it to the hardware schedule * queue or arm its CQ for event generation), no further harm is expected. */ -static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; @@ -483,9 +487,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(struct ib_device *ib_de return 0; } -static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; @@ -712,9 +715,8 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, return ret; } -static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -769,9 +771,8 @@ obj_free: return err; } -static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -811,9 +812,8 @@ other_cmd_free: return err; } -static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -931,9 +931,8 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, MLX5_IB_MTT_READ); } -static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct devx_umem_reg_cmd cmd; struct devx_umem *obj; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index ee398a9b5f26..1a29f47f836e 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -39,8 +39,7 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { }; static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( - struct ib_device *ib_dev, struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_flow_handler *flow_handler; struct mlx5_ib_flow_matcher *fs_matcher; @@ -109,7 +108,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( if (IS_ERR(flow_handler)) return PTR_ERR(flow_handler); - ib_set_flow(uobj, &flow_handler->ibflow, qp, ib_dev); + ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev); return 0; } @@ -129,8 +128,7 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( - struct ib_device *ib_dev, struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index be208421f7d3..dea770e5b9ae 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4170,7 +4170,6 @@ void rdma_roce_rescan_device(struct ib_device *ibdev); struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile); -int uverbs_destroy_def_handler(struct ib_device *ib_dev, - struct ib_uverbs_file *file, +int uverbs_destroy_def_handler(struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs); #endif /* IB_VERBS_H */ diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 5e6d0569d97c..8d71b7a7f147 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -128,7 +128,7 @@ struct uverbs_method_spec { u32 flags; size_t num_buckets; size_t num_child_attrs; - int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile, + int (*handler)(struct ib_uverbs_file *ufile, struct uverbs_attr_bundle *ctx); struct uverbs_attr_spec_hash *attr_buckets[0]; }; @@ -171,7 +171,7 @@ struct uverbs_method_def { u32 flags; size_t num_attrs; const struct uverbs_attr_def * const (*attrs)[]; - int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile, + int (*handler)(struct ib_uverbs_file *ufile, struct uverbs_attr_bundle *ctx); }; -- cgit From a9b66d6453d70ee01f11bdae40d68996cee7474a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 25 Jul 2018 21:40:19 -0600 Subject: IB/uverbs: Do not block disassociate during write() Now that all the callbacks are safe to run concurrently with disassociation this test can be eliminated. The ufile core infrastructure becomes entirely self contained and is not sensitive to disassociation. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs.h | 3 +++ drivers/infiniband/core/uverbs_main.c | 20 ++++++++------------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 5e21cc1f900b..0fa32009908c 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -158,6 +158,9 @@ struct ib_uverbs_file { spinlock_t uobjects_lock; struct list_head uobjects; + u64 uverbs_cmd_mask; + u64 uverbs_ex_cmd_mask; + struct idr idr; /* spinlock protects write access to idr */ spinlock_t idr_lock; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index a1e427b2c2a1..a3213245aab2 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -646,13 +646,13 @@ err_put_refs: return filp; } -static bool verify_command_mask(struct ib_device *ib_dev, - u32 command, bool extended) +static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command, + bool extended) { if (!extended) - return ib_dev->uverbs_cmd_mask & BIT_ULL(command); + return ufile->uverbs_cmd_mask & BIT_ULL(command); - return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command); + return ufile->uverbs_ex_cmd_mask & BIT_ULL(command); } static bool verify_command_idx(u32 command, bool extended) @@ -722,7 +722,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_ex_cmd_hdr ex_hdr; - struct ib_device *ib_dev; struct ib_uverbs_cmd_hdr hdr; bool extended; int srcu_key; @@ -757,14 +756,8 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, return ret; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; - goto out; - } - if (!verify_command_mask(ib_dev, command, extended)) { + if (!verify_command_mask(file, command, extended)) { ret = -EOPNOTSUPP; goto out; } @@ -889,6 +882,9 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); + file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask; + file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask; + return nonseekable_open(inode, filp); err_module: -- cgit From 0f50d88a6e9ae6d9dd14ed1a7d6b309280a9c23b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 25 Jul 2018 21:40:20 -0600 Subject: IB/uverbs: Allow all DESTROY commands to succeed after disassociate The disassociate function was broken by design because it failed all commands. This prevents userspace from calling destroy on a uobject after it has detected a device fatal error and thus reclaiming the resources in userspace is prevented. This fix is now straightforward, when anything destroys a uobject that is not the user the object remains on the IDR with a NULL context and object pointer. All lookup locking modes other than DESTROY will fail. When the user ultimately calls the destroy function it is simply dropped from the IDR while any related information is returned. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 66 +++++++++++++++++++++++++++++------ drivers/infiniband/core/rdma_core.h | 3 ++ drivers/infiniband/core/uverbs_main.c | 7 ++-- include/rdma/uverbs_types.h | 6 ++-- 4 files changed, 66 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index d4de1fed98f2..4235b9ddc2ad 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -180,7 +180,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); if (uobj->object) { - ret = uobj->type->type_class->remove_commit(uobj, reason); + ret = uobj->type->type_class->destroy_hw(uobj, reason); if (ret) { if (ib_is_destroy_retryable(ret, reason, uobj)) return ret; @@ -204,10 +204,13 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, /* * For DESTROY the usecnt is held write locked, the caller is expected - * to put it unlock and put the object when done with it. + * to put it unlock and put the object when done with it. Only DESTROY + * can remove the IDR handle. */ if (reason != RDMA_REMOVE_DESTROY) atomic_set(&uobj->usecnt, 0); + else + uobj->type->type_class->remove_handle(uobj); if (!list_empty(&uobj->list)) { spin_lock_irqsave(&ufile->uobjects_lock, flags); @@ -554,8 +557,8 @@ static void alloc_abort_idr_uobject(struct ib_uobject *uobj) spin_unlock(&uobj->ufile->idr_lock); } -static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why) { const struct uverbs_obj_idr_type *idr_type = container_of(uobj->type, struct uverbs_obj_idr_type, @@ -573,20 +576,28 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, if (why == RDMA_REMOVE_ABORT) return 0; - alloc_abort_idr_uobject(uobj); - /* Matches the kref in alloc_commit_idr_uobject */ - uverbs_uobject_put(uobj); + ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, + RDMACG_RESOURCE_HCA_OBJECT); return 0; } +static void remove_handle_idr_uobject(struct ib_uobject *uobj) +{ + spin_lock(&uobj->ufile->idr_lock); + idr_remove(&uobj->ufile->idr, uobj->id); + spin_unlock(&uobj->ufile->idr_lock); + /* Matches the kref in alloc_commit_idr_uobject */ + uverbs_uobject_put(uobj); +} + static void alloc_abort_fd_uobject(struct ib_uobject *uobj) { put_unused_fd(uobj->id); } -static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why) { const struct uverbs_obj_fd_type *fd_type = container_of(uobj->type, struct uverbs_obj_fd_type, type); @@ -598,6 +609,10 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, return 0; } +static void remove_handle_fd_uobject(struct ib_uobject *uobj) +{ +} + static int alloc_commit_idr_uobject(struct ib_uobject *uobj) { struct ib_uverbs_file *ufile = uobj->ufile; @@ -741,13 +756,41 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, uverbs_uobject_put(uobj); } +void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile) +{ + spin_lock_init(&ufile->idr_lock); + idr_init(&ufile->idr); +} + +void release_ufile_idr_uobject(struct ib_uverbs_file *ufile) +{ + struct ib_uobject *entry; + int id; + + /* + * At this point uverbs_cleanup_ufile() is guaranteed to have run, and + * there are no HW objects left, however the IDR is still populated + * with anything that has not been cleaned up by userspace. Since the + * kref on ufile is 0, nothing is allowed to call lookup_get. + * + * This is an optimized equivalent to remove_handle_idr_uobject + */ + idr_for_each_entry(&ufile->idr, entry, id) { + WARN_ON(entry->object); + uverbs_uobject_put(entry); + } + + idr_destroy(&ufile->idr); +} + const struct uverbs_obj_type_class uverbs_idr_class = { .alloc_begin = alloc_begin_idr_uobject, .lookup_get = lookup_get_idr_uobject, .alloc_commit = alloc_commit_idr_uobject, .alloc_abort = alloc_abort_idr_uobject, .lookup_put = lookup_put_idr_uobject, - .remove_commit = remove_commit_idr_uobject, + .destroy_hw = destroy_hw_idr_uobject, + .remove_handle = remove_handle_idr_uobject, /* * When we destroy an object, we first just lock it for WRITE and * actually DESTROY it in the finalize stage. So, the problematic @@ -945,7 +988,8 @@ const struct uverbs_obj_type_class uverbs_fd_class = { .alloc_commit = alloc_commit_fd_uobject, .alloc_abort = alloc_abort_fd_uobject, .lookup_put = lookup_put_fd_uobject, - .remove_commit = remove_commit_fd_uobject, + .destroy_hw = destroy_hw_fd_uobject, + .remove_handle = remove_handle_fd_uobject, .needs_kfree_rcu = false, }; EXPORT_SYMBOL(uverbs_fd_class); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index e4d8b985c311..b2e85ce65b78 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -110,4 +110,7 @@ int uverbs_finalize_object(struct ib_uobject *uobj, enum uverbs_obj_access access, bool commit); +void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile); +void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); + #endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index a3213245aab2..6f62146e9738 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -253,6 +253,8 @@ void ib_uverbs_release_file(struct kref *ref) struct ib_device *ib_dev; int srcu_key; + release_ufile_idr_uobject(file); + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); @@ -867,8 +869,6 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) } file->device = dev; - spin_lock_init(&file->idr_lock); - idr_init(&file->idr); kref_init(&file->ref); mutex_init(&file->ucontext_lock); @@ -885,6 +885,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask; file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask; + setup_ufile_idr_uobject(file); + return nonseekable_open(inode, filp); err_module: @@ -904,7 +906,6 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) struct ib_uverbs_file *file = filp->private_data; uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); - idr_destroy(&file->idr); mutex_lock(&file->device->lists_mutex); if (!file->is_closed) { diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index f64f413cecac..1ab9a85eebd9 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -61,6 +61,7 @@ enum rdma_lookup_mode { * Destruction flow: * lookup_get(exclusive=true) & uverbs_try_lock_object * remove_commit + * remove_handle (optional) * lookup_put(exclusive=true) via rdma_lookup_put_uobject * * Allocate Error flow #1 @@ -92,8 +93,9 @@ struct uverbs_obj_type_class { enum rdma_lookup_mode mode); void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode); /* This does not consume the kref on uobj */ - int __must_check (*remove_commit)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + int __must_check (*destroy_hw)(struct ib_uobject *uobj, + enum rdma_remove_reason why); + void (*remove_handle)(struct ib_uobject *uobj); u8 needs_kfree_rcu; }; -- cgit From 75da96067ade4e7854379ec2f7834f3497652b1a Mon Sep 17 00:00:00 2001 From: Denis Drozdov Date: Sun, 29 Jul 2018 11:42:28 +0300 Subject: IB/IPoIB: Set ah valid flag in multicast send flow The change of ipoib_ah data structure with adding "valid" flag and checks of ah->valid in ipoib_start_xmit affected multicast packet flow. Since the multicast flow doesn't invoke path_rec_start, "ah->valid" flag remains unset, so that ipoib_start_xmit end up with neigh_refresh_path instead of sending the packet using neigh. "ah->valid" has to be set in multicast send flow. As a result IPoIB starts sending packets via neigh immediately and eliminates 60sec delay of neigh keep alive interval. The typical example of this issue are two sequential arpings: arping 11.134.208.9 -> got response (mcast_send) arping 11.134.208.9 -> no response (ah->valid = 0) Fixes: fa9391dbad4b ("RDMA/ipoib: Update paths on CLIENT_REREG/SM_CHANGE events") Signed-off-by: Denis Drozdov Reviewed-by: Erez Shitrit Reviewed-by: Feras Daoud Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index f696ea49c97a..b9e9562f5034 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -822,6 +822,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) if (neigh && list_empty(&neigh->list)) { kref_get(&mcast->ah->ref); neigh->ah = mcast->ah; + neigh->ah->valid = 1; list_add_tail(&neigh->list, &mcast->neigh_list); } } -- cgit From 0425e3e6e0c7f92f2c2a396d902871b7a81da0eb Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Thu, 2 Aug 2018 10:38:05 +0800 Subject: RDMA/hns: Support flush cqe for hip08 in kernel space According to IB protocol, there are some cases that work requests must return the flush error completion status through the completion queue. Due to hardware limitation, the driver needs to assist the flush process. This patch adds the support of flush cqe for hip08 in the cases that needed, such as poll cqe, post send, post recv and aeqe handle. The patch also considered the compatibility between kernel and user space. Signed-off-by: Yixian Liu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_db.c | 2 + drivers/infiniband/hw/hns/hns_roce_device.h | 18 ++- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 190 +++++++++++++++++++++++++--- drivers/infiniband/hw/hns/hns_roce_qp.c | 50 +++++++- include/uapi/rdma/hns-abi.h | 1 + 5 files changed, 241 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index ebee2782a573..e2f93c1ce86a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -41,6 +41,8 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, found: db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); + page->umem->sg_head.sgl->offset = virt & ~PAGE_MASK; + db->virt_addr = sg_virt(page->umem->sg_head.sgl); db->u.user_page = page; refcount_inc(&page->refcount); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 0eab5a2f45e5..9a24fd0ee3e7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -110,6 +110,7 @@ enum { HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, + HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1, }; enum { @@ -190,7 +191,8 @@ enum { HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), - HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3) + HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), + HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), }; enum hns_roce_mtt_type { @@ -385,6 +387,7 @@ struct hns_roce_db { struct hns_roce_user_db_page *user_page; } u; dma_addr_t dma; + void *virt_addr; int index; int order; }; @@ -524,7 +527,9 @@ struct hns_roce_qp { struct hns_roce_buf hr_buf; struct hns_roce_wq rq; struct hns_roce_db rdb; + struct hns_roce_db sdb; u8 rdb_en; + u8 sdb_en; u32 doorbell_qpn; __le32 sq_signal_bits; u32 sq_next_wqe; @@ -641,6 +646,8 @@ struct hns_roce_eq { int shift; dma_addr_t cur_eqe_ba; dma_addr_t nxt_eqe_ba; + int event_type; + int sub_type; }; struct hns_roce_eq_table { @@ -727,6 +734,14 @@ struct hns_roce_caps { u64 flags; }; +struct hns_roce_work { + struct hns_roce_dev *hr_dev; + struct work_struct work; + u32 qpn; + int event_type; + int sub_type; +}; + struct hns_roce_hw { int (*reset)(struct hns_roce_dev *hr_dev, bool enable); int (*cmq_init)(struct hns_roce_dev *hr_dev); @@ -819,6 +834,7 @@ struct hns_roce_dev { u32 tptr_size; /*only for hw v1*/ const struct hns_roce_hw *hw; void *priv; + struct workqueue_struct *irq_workq; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 268d55bfca07..0218c0f8c2a7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -165,6 +165,11 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, return 0; } +static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state); + static int hns_roce_v2_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) @@ -176,12 +181,14 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct hns_roce_qp *qp = to_hr_qp(ibqp); struct device *dev = hr_dev->dev; struct hns_roce_v2_db sq_db; + struct ib_qp_attr attr; unsigned int sge_ind = 0; unsigned int owner_bit; unsigned long flags; unsigned int ind; void *wqe = NULL; bool loopback; + int attr_mask; u32 tmp_len; int ret = 0; u8 *smac; @@ -524,6 +531,19 @@ out: qp->sq_next_wqe = ind; qp->next_sge = sge_ind; + + if (qp->state == IB_QPS_ERR) { + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + + ret = hns_roce_v2_modify_qp(&qp->ibqp, &attr, attr_mask, + qp->state, IB_QPS_ERR); + if (ret) { + spin_unlock_irqrestore(&qp->sq.lock, flags); + *bad_wr = wr; + return ret; + } + } } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -540,8 +560,10 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_rinl_sge *sge_list; struct device *dev = hr_dev->dev; + struct ib_qp_attr attr; unsigned long flags; void *wqe = NULL; + int attr_mask; int ret = 0; int nreq; int ind; @@ -610,6 +632,20 @@ out: wmb(); *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff; + + if (hr_qp->state == IB_QPS_ERR) { + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + + ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, + attr_mask, hr_qp->state, + IB_QPS_ERR); + if (ret) { + spin_unlock_irqrestore(&hr_qp->rq.lock, flags); + *bad_wr = wr; + return ret; + } + } } spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -1217,7 +1253,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | HNS_ROCE_CAP_FLAG_ROCE_V1_V2 | HNS_ROCE_CAP_FLAG_RQ_INLINE | - HNS_ROCE_CAP_FLAG_RECORD_DB; + HNS_ROCE_CAP_FLAG_RECORD_DB | + HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; caps->pkey_table_len[0] = 1; caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; @@ -2009,6 +2046,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, struct hns_roce_v2_cqe *cqe; struct hns_roce_qp *hr_qp; struct hns_roce_wq *wq; + struct ib_qp_attr attr; + int attr_mask; int is_send; u16 wqe_ctr; u32 opcode; @@ -2095,8 +2134,17 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, break; } - /* CQE status error, directly return */ - if (wc->status != IB_WC_SUCCESS) + /* flush cqe if wc status is error, excluding flush error */ + if ((wc->status != IB_WC_SUCCESS) && + (wc->status != IB_WC_WR_FLUSH_ERR)) { + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + return hns_roce_v2_modify_qp(&(*cur_qp)->ibqp, + &attr, attr_mask, + (*cur_qp)->state, IB_QPS_ERR); + } + + if (wc->status == IB_WC_WR_FLUSH_ERR) return 0; if (is_send) { @@ -3450,6 +3498,24 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, goto out; } + /* When QP state is err, SQ and RQ WQE should be flushed */ + if (new_state == IB_QPS_ERR) { + roce_set_field(context->byte_160_sq_ci_pi, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, + hr_qp->sq.head); + roce_set_field(qpc_mask->byte_160_sq_ci_pi, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0); + roce_set_field(context->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, + hr_qp->rq.head); + roce_set_field(qpc_mask->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); + } + if (attr_mask & IB_QP_AV) { const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); @@ -3806,6 +3872,11 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); if (is_user) { + if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1)) + hns_roce_db_unmap_user( + to_hr_ucontext(hr_qp->ibqp.uobject->context), + &hr_qp->sdb); + if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) hns_roce_db_unmap_user( to_hr_ucontext(hr_qp->ibqp.uobject->context), @@ -3888,6 +3959,74 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) return ret; } +static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn) +{ + struct hns_roce_qp *hr_qp; + struct ib_qp_attr attr; + int attr_mask; + int ret; + + hr_qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (!hr_qp) { + dev_warn(hr_dev->dev, "no hr_qp can be found!\n"); + return; + } + + if (hr_qp->ibqp.uobject) { + if (hr_qp->sdb_en == 1) { + hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); + hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); + } else { + dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n"); + return; + } + } + + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, attr_mask, + hr_qp->state, IB_QPS_ERR); + if (ret) + dev_err(hr_dev->dev, "failed to modify qp %d to err state.\n", + qpn); +} + +static void hns_roce_irq_work_handle(struct work_struct *work) +{ + struct hns_roce_work *irq_work = + container_of(work, struct hns_roce_work, work); + u32 qpn = irq_work->qpn; + + switch (irq_work->event_type) { + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: + hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); + break; + default: + break; + } + + kfree(irq_work); +} + +static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq, u32 qpn) +{ + struct hns_roce_work *irq_work; + + irq_work = kzalloc(sizeof(struct hns_roce_work), GFP_ATOMIC); + if (!irq_work) + return; + + INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); + irq_work->hr_dev = hr_dev; + irq_work->qpn = qpn; + irq_work->event_type = eq->event_type; + irq_work->sub_type = eq->sub_type; + queue_work(hr_dev->irq_workq, &(irq_work->work)); +} + static void set_eq_cons_index_v2(struct hns_roce_eq *eq) { u32 doorbell[2]; @@ -3990,14 +4129,9 @@ static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev, static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe, - int event_type) + int event_type, u32 qpn) { struct device *dev = hr_dev->dev; - u32 qpn; - - qpn = roce_get_field(aeqe->event.qp_event.qp, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_COMM_EST: @@ -4024,14 +4158,9 @@ static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev, static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe, - int event_type) + int event_type, u32 cqn) { struct device *dev = hr_dev->dev; - u32 cqn; - - cqn = roce_get_field(aeqe->event.cq_event.cq, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: @@ -4096,6 +4225,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe; int aeqe_found = 0; int event_type; + int sub_type; + u32 qpn; + u32 cqn; while ((aeqe = next_aeqe_sw_v2(eq))) { @@ -4107,6 +4239,15 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, event_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_EVENT_TYPE_M, HNS_ROCE_V2_AEQE_EVENT_TYPE_S); + sub_type = roce_get_field(aeqe->asyn, + HNS_ROCE_V2_AEQE_SUB_TYPE_M, + HNS_ROCE_V2_AEQE_SUB_TYPE_S); + qpn = roce_get_field(aeqe->event.qp_event.qp, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + cqn = roce_get_field(aeqe->event.cq_event.cq, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -4120,7 +4261,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type); + hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type, + qpn); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: @@ -4129,7 +4271,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type); + hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type, + cqn); break; case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: dev_warn(dev, "DB overflow.\n"); @@ -4152,6 +4295,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, break; }; + eq->event_type = event_type; + eq->sub_type = sub_type; ++eq->cons_index; aeqe_found = 1; @@ -4159,6 +4304,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, dev_warn(dev, "cons_index overflow, set back to 0.\n"); eq->cons_index = 0; } + hns_roce_v2_init_irq_work(hr_dev, eq, qpn); } set_eq_cons_index_v2(eq); @@ -4975,6 +5121,13 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) } } + hr_dev->irq_workq = + create_singlethread_workqueue("hns_roce_irq_workqueue"); + if (!hr_dev->irq_workq) { + dev_err(dev, "Create irq workqueue failed!\n"); + goto err_request_irq_fail; + } + return 0; err_request_irq_fail: @@ -5025,6 +5178,9 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) kfree(hr_dev->irq_names[i]); kfree(eq_table->eq); + + flush_workqueue(hr_dev->irq_workq); + destroy_workqueue(hr_dev->irq_workq); } static const struct hns_roce_hw hns_roce_hw_v2 = { diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index baaf906f7c2e..c1dbddcd58c9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -489,6 +489,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, return 0; } +static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_XRC_TGT) + return 0; + + return 1; +} + static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr) { if (attr->qp_type == IB_QPT_XRC_INI || @@ -613,6 +621,23 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_mtt; } + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && + (udata->inlen >= sizeof(ucmd)) && + (udata->outlen >= sizeof(resp)) && + hns_roce_qp_has_sq(init_attr)) { + ret = hns_roce_db_map_user( + to_hr_ucontext(ib_pd->uobject->context), + ucmd.sdb_addr, &hr_qp->sdb); + if (ret) { + dev_err(dev, "sq record doorbell map failed!\n"); + goto err_mtt; + } + + /* indicate kernel supports sq record db */ + resp.cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB; + hr_qp->sdb_en = 1; + } + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp)) && hns_roce_qp_has_rq(init_attr)) { @@ -621,7 +646,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, ucmd.db_addr, &hr_qp->rdb); if (ret) { dev_err(dev, "rq record doorbell map failed!\n"); - goto err_mtt; + goto err_sq_dbmap; } } } else { @@ -734,7 +759,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) { - /* indicate kernel supports record db */ + /* indicate kernel supports rq record db */ resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB; ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (ret) @@ -770,6 +795,16 @@ err_wrid: kfree(hr_qp->rq.wrid); } +err_sq_dbmap: + if (ib_pd->uobject) + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && + (udata->inlen >= sizeof(ucmd)) && + (udata->outlen >= sizeof(resp)) && + hns_roce_qp_has_sq(init_attr)) + hns_roce_db_unmap_user( + to_hr_ucontext(ib_pd->uobject->context), + &hr_qp->sdb); + err_mtt: hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); @@ -903,6 +938,17 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + if (ibqp->uobject && + (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) { + if (hr_qp->sdb_en == 1) { + hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); + hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); + } else { + dev_warn(dev, "flush cqe is not supported in userspace!\n"); + goto out; + } + } + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, IB_LINK_LAYER_ETHERNET)) { dev_err(dev, "ib_modify_qp_is_ok failed\n"); diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 78613b609fa8..c1f87735514f 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -53,6 +53,7 @@ struct hns_roce_ib_create_qp { __u8 log_sq_stride; __u8 sq_no_prefetch; __u8 reserved[5]; + __aligned_u64 sdb_addr; }; struct hns_roce_ib_create_qp_resp { -- cgit From 8001b717f09460d9e17457f6bade6699aa14604f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 2 Aug 2018 10:56:13 +0300 Subject: rdma/cxgb4: fix some info leaks In c4iw_create_qp() there are several struct members which potentially aren't inintialized like uresp.rq_key. I've fixed this code before in in commit ae1fe07f3f42 ("RDMA/cxgb4: Fix stack info leak in c4iw_create_qp()") so this time I'm just going to take a big hammer approach and memset the whole struct to zero. Hopefully, it will stay fixed this time. In c4iw_create_srq() we don't clear uresp.reserved. Fixes: 6a0b6174d35a ("rdma/cxgb4: Add support for kernel mode SRQ's") Signed-off-by: Dan Carpenter Acked-by: Raju Rangoju Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/qp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index c26086c76f0b..dbd99370a0de 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2088,6 +2088,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, goto err_free_sq_db_key; } } + memset(&uresp, 0, sizeof(uresp)); if (t4_sq_onchip(&qhp->wq.sq)) { ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm), GFP_KERNEL); @@ -2096,8 +2097,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, goto err_free_rq_db_key; } uresp.flags = C4IW_QPF_ONCHIP; - } else - uresp.flags = 0; + } uresp.qid_mask = rhp->rdev.qpmask; uresp.sqid = qhp->wq.sq.qid; uresp.sq_size = qhp->wq.sq.size; @@ -2111,8 +2111,6 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (ma_sync_key_mm) { uresp.ma_sync_key = ucontext->key; ucontext->key += PAGE_SIZE; - } else { - uresp.ma_sync_key = 0; } uresp.sq_key = ucontext->key; ucontext->key += PAGE_SIZE; @@ -2601,6 +2599,7 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, ret = -ENOMEM; goto err_free_srq_key_mm; } + memset(&uresp, 0, sizeof(uresp)); uresp.flags = srq->flags; uresp.qid_mask = rhp->rdev.qpmask; uresp.srqid = srq->wq.qid; -- cgit From b9855f4ca0fe582aabfdbd08f0d856b22486e157 Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Thu, 2 Aug 2018 11:33:03 +0530 Subject: iw_cxgb4: RDMA write with immediate support Adds iw_cxgb4 functionality to support RDMA_WRITE_WITH_IMMEDATE opcode. Signed-off-by: Potnuri Bharat Teja Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 23 ++++++++++++++++--- drivers/infiniband/hw/cxgb4/qp.c | 37 ++++++++++++++++++++++++------- drivers/infiniband/hw/cxgb4/t4.h | 16 ++++++++++++- drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 18 ++++++++++++--- include/uapi/rdma/cxgb4-abi.h | 3 ++- 5 files changed, 81 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 0c13f2838c84..6d3042794094 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -791,15 +791,32 @@ static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp, wc->byte_len = CQE_LEN(&cqe); else wc->byte_len = 0; - wc->opcode = IB_WC_RECV; - if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || - CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { + + switch (CQE_OPCODE(&cqe)) { + case FW_RI_SEND: + wc->opcode = IB_WC_RECV; + break; + case FW_RI_SEND_WITH_INV: + case FW_RI_SEND_WITH_SE_INV: + wc->opcode = IB_WC_RECV; wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); wc->wc_flags |= IB_WC_WITH_INVALIDATE; c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); + break; + case FW_RI_WRITE_IMMEDIATE: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->ex.imm_data = CQE_IMM_DATA(&cqe); + wc->wc_flags |= IB_WC_WITH_IMM; + break; + default: + pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", + CQE_OPCODE(&cqe), CQE_QPID(&cqe)); + ret = -EINVAL; + goto out; } } else { switch (CQE_OPCODE(&cqe)) { + case FW_RI_WRITE_IMMEDIATE: case FW_RI_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index dbd99370a0de..5d30cd14f795 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -555,7 +555,15 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; - wqe->write.r2 = 0; + + /* + * iWARP protocol supports 64 bit immediate data but rdma api + * limits it to 32bit. + */ + if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data; + else + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0; wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); if (wr->num_sge) { @@ -848,6 +856,9 @@ static int ib_to_fw_opcode(int ib_opcode) case IB_WR_RDMA_WRITE: opcode = FW_RI_RDMA_WRITE; break; + case IB_WR_RDMA_WRITE_WITH_IMM: + opcode = FW_RI_WRITE_IMMEDIATE; + break; case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: opcode = FW_RI_READ_REQ; @@ -970,6 +981,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, enum fw_wr_opcodes fw_opcode = 0; enum fw_ri_wr_flags fw_flags; struct c4iw_qp *qhp; + struct c4iw_dev *rhp; union t4_wr *wqe = NULL; u32 num_wrs; struct t4_swsqe *swsqe; @@ -977,6 +989,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, u16 idx = 0; qhp = to_c4iw_qp(ibqp); + rhp = qhp->rhp; spin_lock_irqsave(&qhp->lock, flag); /* @@ -1021,6 +1034,13 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, swsqe->opcode = FW_RI_SEND_WITH_INV; err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); break; + case IB_WR_RDMA_WRITE_WITH_IMM: + if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) { + err = -EINVAL; + break; + } + fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE; + /*FALLTHROUGH*/ case IB_WR_RDMA_WRITE: fw_opcode = FW_RI_RDMA_WRITE_WR; swsqe->opcode = FW_RI_RDMA_WRITE; @@ -1031,8 +1051,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, fw_opcode = FW_RI_RDMA_READ_WR; swsqe->opcode = FW_RI_READ_REQ; if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) { - c4iw_invalidate_mr(qhp->rhp, - wr->sg_list[0].lkey); + c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey); fw_flags = FW_RI_RDMA_READ_INVALIDATE; } else { fw_flags = 0; @@ -1048,7 +1067,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr); swsqe->opcode = FW_RI_FAST_REGISTER; - if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support && + if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support && !mhp->attr.state && mhp->mpl_len <= 2) { fw_opcode = FW_RI_FR_NSMR_TPTE_WR; build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr), @@ -1057,7 +1076,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, fw_opcode = FW_RI_FR_NSMR_WR; err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), mhp, &len16, - qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); + rhp->rdev.lldi.ulptx_memwrite_dsgl); if (err) break; } @@ -1070,7 +1089,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; err = build_inv_stag(wqe, wr, &len16); - c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey); + c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey); break; default: pr_warn("%s post of type=%d TBD!\n", __func__, @@ -1089,7 +1108,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, swsqe->wr_id = wr->wr_id; if (c4iw_wr_log) { swsqe->sge_ts = cxgb4_read_sge_timestamp( - qhp->rhp->rdev.lldi.ports[0]); + rhp->rdev.lldi.ports[0]); swsqe->host_time = ktime_get(); } @@ -1103,7 +1122,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, t4_sq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } - if (!qhp->rhp->rdev.status_page->db_off) { + if (!rhp->rdev.status_page->db_off) { t4_ring_sq_db(&qhp->wq, idx, wqe); spin_unlock_irqrestore(&qhp->lock, flag); } else { @@ -2098,6 +2117,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, } uresp.flags = C4IW_QPF_ONCHIP; } + if (rhp->rdev.lldi.write_w_imm_support) + uresp.flags |= C4IW_QPF_WRITE_W_IMM; uresp.qid_mask = rhp->rdev.qpmask; uresp.sqid = qhp->wq.sq.qid; uresp.sq_size = qhp->wq.sq.size; diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 11d55fc2ded7..0fb3e55f37c1 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -190,7 +190,19 @@ struct t4_cqe { __be32 abs_rqe_idx; } srcqe; struct { - __be64 imm_data; + __be32 mo; + __be32 msn; + /* + * Use union for immediate data to be consistent with + * stack's 32 bit data and iWARP spec's 64 bit data. + */ + union { + struct { + __be32 imm_data32; + u32 reserved; + } ib_imm_data; + __be64 imm_data64; + } iw_imm_data; } imm_data_rcqe; u64 drain_cookie; @@ -253,6 +265,8 @@ struct t4_cqe { #define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag)) #define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn)) #define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx)) +#define CQE_IMM_DATA(x)( \ + (x)->u.imm_data_rcqe.iw_imm_data.ib_imm_data.imm_data32) /* used for SQ completion processing */ #define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 0f4f86b004d6..62606e66ba20 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -50,7 +50,8 @@ enum fw_ri_wr_opcode { FW_RI_BYPASS = 0xd, FW_RI_RECEIVE = 0xe, - FW_RI_SGE_EC_CR_RETURN = 0xf + FW_RI_SGE_EC_CR_RETURN = 0xf, + FW_RI_WRITE_IMMEDIATE = FW_RI_RDMA_INIT }; enum fw_ri_wr_flags { @@ -59,7 +60,8 @@ enum fw_ri_wr_flags { FW_RI_SOLICITED_EVENT_FLAG = 0x04, FW_RI_READ_FENCE_FLAG = 0x08, FW_RI_LOCAL_FENCE_FLAG = 0x10, - FW_RI_RDMA_READ_INVALIDATE = 0x20 + FW_RI_RDMA_READ_INVALIDATE = 0x20, + FW_RI_RDMA_WRITE_WITH_IMMEDIATE = 0x40 }; enum fw_ri_mpa_attrs { @@ -546,7 +548,17 @@ struct fw_ri_rdma_write_wr { __u16 wrid; __u8 r1[3]; __u8 len16; - __be64 r2; + /* + * Use union for immediate data to be consistent with stack's 32 bit + * data and iWARP spec's 64 bit data. + */ + union { + struct { + __be32 imm_data32; + u32 reserved; + } ib_imm_data; + __be64 imm_data64; + } iw_imm_data; __be32 plen; __be32 stag_sink; __be64 to_sink; diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h index d0b2d829471a..f85ec1a3f727 100644 --- a/include/uapi/rdma/cxgb4-abi.h +++ b/include/uapi/rdma/cxgb4-abi.h @@ -65,7 +65,8 @@ struct c4iw_create_cq_resp { }; enum { - C4IW_QPF_ONCHIP = (1 << 0) + C4IW_QPF_ONCHIP = (1 << 0), + C4IW_QPF_WRITE_W_IMM = (1 << 1) }; struct c4iw_create_qp_resp { -- cgit From 94245f4ad9e10c161affaa4763136d29fbe91cb9 Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Thu, 2 Aug 2018 11:33:04 +0530 Subject: iw_cxgb4: Support FW write completion WR To optimize NVME-oF READ IOPs, use a specialized WQE that combines the RDMA WRITE and SEND_INV WR chain submitted by the NVME-oF target driver. This reduces uP overhead per NVME-oF IO, and results in over 10% improvement in NVME-oF 4K READ IOPs. Signed-off-by: Potnuri Bharat Teja Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/device.c | 1 + drivers/infiniband/hw/cxgb4/qp.c | 147 +++++++++++++++++++++++++++++- drivers/infiniband/hw/cxgb4/t4.h | 6 +- drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 31 +++++++ 4 files changed, 183 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 5ef082bfa95a..c13c0ba30f63 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -866,6 +866,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->status_page->qp_size = rdev->lldi.vr->qp.size; rdev->status_page->cq_start = rdev->lldi.vr->cq.start; rdev->status_page->cq_size = rdev->lldi.vr->cq.size; + rdev->status_page->write_cmpl_supported = rdev->lldi.write_cmpl_support; if (c4iw_wr_log) { rdev->wr_log = kcalloc(1 << c4iw_wr_log_size_order, diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 5d30cd14f795..b3203afa3b1d 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -455,7 +455,12 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end, { int i; u32 plen = 0; - __be64 *flitp = (__be64 *)isglp->sge; + __be64 *flitp; + + if ((__be64 *)isglp == queue_end) + isglp = (struct fw_ri_isgl *)queue_start; + + flitp = (__be64 *)isglp->sge; for (i = 0; i < num_sge; i++) { if ((plen + sg_list[i].length) < plen) @@ -597,6 +602,56 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, return 0; } +static void build_immd_cmpl(struct t4_sq *sq, struct fw_ri_immd_cmpl *immdp, + struct ib_send_wr *wr) +{ + memcpy((u8 *)immdp->data, (u8 *)(uintptr_t)wr->sg_list->addr, 16); + memset(immdp->r1, 0, 6); + immdp->op = FW_RI_DATA_IMMD; + immdp->immdlen = 16; +} + +static void build_rdma_write_cmpl(struct t4_sq *sq, + struct fw_ri_rdma_write_cmpl_wr *wcwr, + const struct ib_send_wr *wr, u8 *len16) +{ + u32 plen; + int size; + + /* + * This code assumes the struct fields preceding the write isgl + * fit in one 64B WR slot. This is because the WQE is built + * directly in the dma queue, and wrapping is only handled + * by the code buildling sgls. IE the "fixed part" of the wr + * structs must all fit in 64B. The WQE build code should probably be + * redesigned to avoid this restriction, but for now just add + * the BUILD_BUG_ON() to catch if this WQE struct gets too big. + */ + BUILD_BUG_ON(offsetof(struct fw_ri_rdma_write_cmpl_wr, u) > 64); + + wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); + wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey); + wcwr->r2 = 0; + wcwr->r3 = 0; + + /* SEND_INV SGL */ + if (wr->next->send_flags & IB_SEND_INLINE) + build_immd_cmpl(sq, &wcwr->u_cmpl.immd_src, wr->next); + else + build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], + &wcwr->u_cmpl.isgl_src, wr->next->sg_list, 1, NULL); + + /* WRITE SGL */ + build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], + wcwr->u.isgl_src, wr->sg_list, wr->num_sge, &plen); + + size = sizeof(*wcwr) + sizeof(struct fw_ri_isgl) + + wr->num_sge * sizeof(struct fw_ri_sge); + wcwr->plen = cpu_to_be32(plen); + *len16 = DIV_ROUND_UP(size, 16); +} + static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr, u8 *len16) { @@ -627,6 +682,72 @@ static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr, return 0; } +static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr) +{ + bool send_signaled = (wr->next->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + bool write_signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + struct t4_swsqe *swsqe; + union t4_wr *wqe; + u16 write_wrid; + u8 len16; + u16 idx; + + /* + * The sw_sq entries still look like a WRITE and a SEND and consume + * 2 slots. The FW WR, however, will be a single uber-WR. + */ + wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + + qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); + build_rdma_write_cmpl(&qhp->wq.sq, &wqe->write_cmpl, wr, &len16); + + /* WRITE swsqe */ + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + swsqe->opcode = FW_RI_RDMA_WRITE; + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = write_signaled; + swsqe->flushed = 0; + swsqe->wr_id = wr->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = + cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]); + swsqe->host_time = ktime_get(); + } + + write_wrid = qhp->wq.sq.pidx; + + /* just bump the sw_sq */ + qhp->wq.sq.in_use++; + if (++qhp->wq.sq.pidx == qhp->wq.sq.size) + qhp->wq.sq.pidx = 0; + + /* SEND_WITH_INV swsqe */ + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + swsqe->opcode = FW_RI_SEND_WITH_INV; + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = send_signaled; + swsqe->flushed = 0; + swsqe->wr_id = wr->next->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = + cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]); + swsqe->host_time = ktime_get(); + } + + wqe->write_cmpl.flags_send = send_signaled ? FW_RI_COMPLETION_FLAG : 0; + wqe->write_cmpl.wrid_send = qhp->wq.sq.pidx; + + init_wr_hdr(wqe, write_wrid, FW_RI_RDMA_WRITE_CMPL_WR, + write_signaled ? FW_RI_COMPLETION_FLAG : 0, len16); + t4_sq_produce(&qhp->wq, len16); + idx = DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + + t4_ring_sq_db(&qhp->wq, idx, wqe); +} + static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, const struct ib_recv_wr *wr, u8 *len16) { @@ -1007,6 +1128,30 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, *bad_wr = wr; return -ENOMEM; } + + /* + * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is + * the response for small NVMEe-oF READ requests. If the chain is + * exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths + * meet the requirements of the fw_ri_write_cmpl_wr work request, + * then build and post the write_cmpl WR. If any of the tests + * below are not true, then we continue on with the tradtional WRITE + * and SEND WRs. + */ + if (qhp->rhp->rdev.lldi.write_cmpl_support && + CHELSIO_CHIP_VERSION(qhp->rhp->rdev.lldi.adapter_type) >= + CHELSIO_T5 && + wr && wr->next && !wr->next->next && + wr->opcode == IB_WR_RDMA_WRITE && + wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL && + wr->next->opcode == IB_WR_SEND_WITH_INV && + wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE && + wr->next->num_sge == 1 && num_wrs >= 2) { + post_write_cmpl(qhp, wr); + spin_unlock_irqrestore(&qhp->lock, flag); + return 0; + } + while (wr) { if (num_wrs == 0) { err = -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 0fb3e55f37c1..e42021fd6fd6 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -91,6 +91,9 @@ static inline int t4_max_fr_depth(int use_dsgl) #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) #define T4_MAX_RECV_SGE 4 +#define T4_WRITE_CMPL_MAX_SGL 4 +#define T4_WRITE_CMPL_MAX_CQE 16 + union t4_wr { struct fw_ri_res_wr res; struct fw_ri_wr ri; @@ -101,6 +104,7 @@ union t4_wr { struct fw_ri_fr_nsmr_wr fr; struct fw_ri_fr_nsmr_tpte_wr fr_tpte; struct fw_ri_inv_lstag_wr inv; + struct fw_ri_rdma_write_cmpl_wr write_cmpl; struct t4_status_page status; __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; }; @@ -851,7 +855,7 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq) struct t4_dev_status_page { u8 db_off; - u8 pad1; + u8 write_cmpl_supported; u16 pad2; u32 pad3; u64 qp_start; diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 62606e66ba20..cbdb300a4794 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -595,6 +595,37 @@ struct fw_ri_send_wr { #define FW_RI_SEND_WR_SENDOP_G(x) \ (((x) >> FW_RI_SEND_WR_SENDOP_S) & FW_RI_SEND_WR_SENDOP_M) +struct fw_ri_rdma_write_cmpl_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u8 r2; + __u8 flags_send; + __u16 wrid_send; + __be32 stag_inv; + __be32 plen; + __be32 stag_sink; + __be64 to_sink; + union fw_ri_cmpl { + struct fw_ri_immd_cmpl { + __u8 op; + __u8 r1[6]; + __u8 immdlen; + __u8 data[16]; + } immd_src; + struct fw_ri_isgl isgl_src; + } u_cmpl; + __be64 r3; +#ifndef C99_NOT_SUPPORTED + union fw_ri_write { + struct fw_ri_immd immd_src[0]; + struct fw_ri_isgl isgl_src[0]; + } u; +#endif +}; + struct fw_ri_rdma_read_wr { __u8 opcode; __u8 flags; -- cgit From 577e07ffbad9960551a6821b74af90a216ac10e2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 29 Jul 2018 11:34:51 +0300 Subject: IB/ipoib: Get rid of IPOIB_FLAG_GOING_DOWN This essentially duplicates the netdev's reg_state, so just use that directly. The reg_state is updated under the rntl_lock, and all places using GOING_DOWN already acquire the rtnl_lock so checking is safe. Since the only place we use GOING_DOWN is for the parent device this does not fix any bugs, but it is a step to tidy up the unregister flow so that after later patches the flow is uniform and sane. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_cm.c | 9 ++++++--- drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 --- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 18 ++++++++++++------ 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index e255a7e5a4c3..9eebb705d994 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -95,7 +95,6 @@ enum { IPOIB_NEIGH_TBL_FLUSH = 12, IPOIB_FLAG_DEV_ADDR_SET = 13, IPOIB_FLAG_DEV_ADDR_CTRL = 14, - IPOIB_FLAG_GOING_DOWN = 15, IPOIB_MAX_BACKOFF_SECONDS = 16, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 518313a1b0c9..16ea08dc59a8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1519,9 +1519,6 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, int ret; struct ipoib_dev_priv *priv = ipoib_priv(dev); - if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags)) - return -EPERM; - if (!mutex_trylock(&priv->sysfs_mutex)) return restart_syscall(); @@ -1530,6 +1527,12 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, return restart_syscall(); } + if (dev->reg_state != NETREG_REGISTERED) { + rtnl_unlock(); + mutex_unlock(&priv->sysfs_mutex); + return -EPERM; + } + ret = ipoib_set_mode(dev, buf); /* The assumption is that the function ipoib_set_mode returned diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 82f0e3869b04..7ca9013bf05c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2406,9 +2406,6 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) ib_unregister_event_handler(&priv->event_handler); flush_workqueue(ipoib_workqueue); - /* mark interface in the middle of destruction */ - set_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags); - rtnl_lock(); dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); rtnl_unlock(); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index b067ad5e4c7e..1b7bfd500893 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -127,9 +127,6 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ppriv = ipoib_priv(pdev); - if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) - return -EPERM; - snprintf(intf_name, sizeof(intf_name), "%s.%04x", ppriv->dev->name, pkey); @@ -141,6 +138,12 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) return restart_syscall(); } + if (pdev->reg_state != NETREG_REGISTERED) { + rtnl_unlock(); + mutex_unlock(&ppriv->sysfs_mutex); + return -EPERM; + } + if (!down_write_trylock(&ppriv->vlan_rwsem)) { rtnl_unlock(); mutex_unlock(&ppriv->sysfs_mutex); @@ -199,9 +202,6 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) ppriv = ipoib_priv(pdev); - if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) - return -EPERM; - if (!mutex_trylock(&ppriv->sysfs_mutex)) return restart_syscall(); @@ -210,6 +210,12 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) return restart_syscall(); } + if (pdev->reg_state != NETREG_REGISTERED) { + rtnl_unlock(); + mutex_unlock(&ppriv->sysfs_mutex); + return -EPERM; + } + if (!down_write_trylock(&ppriv->vlan_rwsem)) { rtnl_unlock(); mutex_unlock(&ppriv->sysfs_mutex); -- cgit From cda8daf17914a261986d6d4b7294599736d5a463 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sun, 29 Jul 2018 11:34:52 +0300 Subject: IB/ipoib: Use cancel_delayed_work_sync for neigh-clean task The neigh_reap_task is self restarting, but so long as we call cancel_delayed_work_sync() it will be guaranteed to not be running and never start again. Thus we don't need to have the racy IPOIB_STOP_NEIGH_GC bit, or the confusing mismatch of places sometimes calling flush_workqueue after the cancel. This fixes a situation where the GC work could have been left running in some rare situations. Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_main.c | 33 ++++++++++--------------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 9eebb705d994..c619c0098ba6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -91,7 +91,6 @@ enum { IPOIB_STOP_REAPER = 7, IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_UMCAST = 10, - IPOIB_STOP_NEIGH_GC = 11, IPOIB_NEIGH_TBL_FLUSH = 12, IPOIB_FLAG_DEV_ADDR_SET = 13, IPOIB_FLAG_DEV_ADDR_CTRL = 14, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 7ca9013bf05c..84bee9151059 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1310,9 +1310,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) int i; LIST_HEAD(remove_list); - if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) - return; - spin_lock_irqsave(&priv->lock, flags); htbl = rcu_dereference_protected(ntbl->htbl, @@ -1324,9 +1321,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) /* neigh is obsolete if it was idle for two GC periods */ dt = 2 * arp_tbl.gc_interval; neigh_obsolete = jiffies - dt; - /* handle possible race condition */ - if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) - goto out_unlock; for (i = 0; i < htbl->size; i++) { struct ipoib_neigh *neigh; @@ -1364,9 +1358,8 @@ static void ipoib_reap_neigh(struct work_struct *work) __ipoib_reap_neigh(priv); - if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) - queue_delayed_work(priv->wq, &priv->neigh_reap_task, - arp_tbl.gc_interval); + queue_delayed_work(priv->wq, &priv->neigh_reap_task, + arp_tbl.gc_interval); } @@ -1528,7 +1521,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); if (!htbl) return -ENOMEM; - set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); size = roundup_pow_of_two(arp_tbl.gc_thresh3); buckets = kvcalloc(size, sizeof(*buckets), GFP_KERNEL); if (!buckets) { @@ -1543,7 +1535,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) atomic_set(&ntbl->entries, 0); /* start garbage collection */ - clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); queue_delayed_work(priv->wq, &priv->neigh_reap_task, arp_tbl.gc_interval); @@ -1653,15 +1644,11 @@ out_unlock: static void ipoib_neigh_hash_uninit(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - int stopped; ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); init_completion(&priv->ntbl.deleted); - /* Stop GC if called at init fail need to cancel work */ - stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); - if (!stopped) - cancel_delayed_work(&priv->neigh_reap_task); + cancel_delayed_work_sync(&priv->neigh_reap_task); ipoib_flush_neighs(priv); @@ -1801,12 +1788,15 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) if (ipoib_ib_dev_open(dev)) { pr_warn("%s failed to open device\n", dev->name); ret = -ENODEV; - goto out_dev_uninit; + goto out_hash_uninit; } } return 0; +out_hash_uninit: + ipoib_neigh_hash_uninit(dev); + out_dev_uninit: ipoib_ib_dev_cleanup(dev); @@ -1836,8 +1826,7 @@ void ipoib_dev_cleanup(struct net_device *dev) /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { /* Stop GC on child */ - set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags); - cancel_delayed_work(&cpriv->neigh_reap_task); + cancel_delayed_work_sync(&cpriv->neigh_reap_task); unregister_netdevice_queue(cpriv->dev, &head); } unregister_netdevice_many(&head); @@ -2345,8 +2334,7 @@ register_failed: ib_unregister_event_handler(&priv->event_handler); flush_workqueue(ipoib_workqueue); /* Stop GC if started before flush */ - set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); - cancel_delayed_work(&priv->neigh_reap_task); + cancel_delayed_work_sync(&priv->neigh_reap_task); flush_workqueue(priv->wq); ipoib_dev_cleanup(priv->dev); @@ -2411,8 +2399,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) rtnl_unlock(); /* Stop GC */ - set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); - cancel_delayed_work(&priv->neigh_reap_task); + cancel_delayed_work_sync(&priv->neigh_reap_task); flush_workqueue(priv->wq); /* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */ -- cgit From 7cbee87c17965ede0eba2e7ba41d0a38ebd2249c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 29 Jul 2018 11:34:53 +0300 Subject: IB/ipoib: Move all uninit code into ndo_uninit Currently uninit is sometimes done twice in error flows, and is sprinkled a bit all over the place. Improve the clarity of the design by moving all uninit only into ndo_uinit. Some duplication is removed: - Sometimes IPOIB_STOP_NEIGH_GC was done before unregister, but this duplicates the process in ipoib_neigh_hash_init - Flushing priv->wq was sometimes done before unregister, but that duplicates what has been done in ndo_uninit Uniniting the IB event queue must remain before unregister_netdev as it requires the RTNL lock to be dropped, this is moved to a helper to make that flow really clear and remove some duplication in error flows. If register_netdev fails (and ndo_init is NULL) then it almost always calls ndo_uninit, which lets us remove all the extra code from the error unwinds. The next patch in the series will close the 'almost always' hole by pairing a proper ndo_init with ndo_uninit. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_main.c | 60 +++++++++++++++++-------------- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 5 +-- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index c619c0098ba6..04fc5ad1b69f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -509,7 +509,6 @@ int ipoib_ib_dev_stop_default(struct net_device *dev); void ipoib_pkey_dev_check_presence(struct net_device *dev); int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); -void ipoib_dev_cleanup(struct net_device *dev); void ipoib_mcast_join_task(struct work_struct *work); void ipoib_mcast_carrier_on_task(struct work_struct *work); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 84bee9151059..d4e9951dc539 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -215,11 +215,6 @@ static int ipoib_stop(struct net_device *dev) return 0; } -static void ipoib_uninit(struct net_device *dev) -{ - ipoib_dev_cleanup(dev); -} - static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -1816,7 +1811,33 @@ out: return ret; } -void ipoib_dev_cleanup(struct net_device *dev) +/* + * This must be called before doing an unregister_netdev on a parent device to + * shutdown the IB event handler. + */ +static void ipoib_parent_unregister_pre(struct net_device *ndev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(ndev); + + /* + * ipoib_set_mac checks netif_running before pushing work, clearing + * running ensures the it will not add more work. + */ + rtnl_lock(); + dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); + rtnl_unlock(); + + /* ipoib_event() cannot be running once this returns */ + ib_unregister_event_handler(&priv->event_handler); + + /* + * Work on the queue grabs the rtnl lock, so this cannot be done while + * also holding it. + */ + flush_workqueue(ipoib_workqueue); +} + +static void ipoib_ndo_uninit(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv; LIST_HEAD(head); @@ -1888,7 +1909,7 @@ static const struct header_ops ipoib_header_ops = { }; static const struct net_device_ops ipoib_netdev_ops_pf = { - .ndo_uninit = ipoib_uninit, + .ndo_uninit = ipoib_ndo_uninit, .ndo_open = ipoib_open, .ndo_stop = ipoib_stop, .ndo_change_mtu = ipoib_change_mtu, @@ -1907,7 +1928,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { }; static const struct net_device_ops ipoib_netdev_ops_vf = { - .ndo_uninit = ipoib_uninit, + .ndo_uninit = ipoib_ndo_uninit, .ndo_open = ipoib_open, .ndo_stop = ipoib_stop, .ndo_change_mtu = ipoib_change_mtu, @@ -2310,7 +2331,8 @@ static struct net_device *ipoib_add_port(const char *format, if (result) { pr_warn("%s: couldn't register ipoib port %d; error %d\n", hca->name, port, result); - goto register_failed; + ipoib_parent_unregister_pre(priv->dev); + goto device_init_failed; } result = -ENOMEM; @@ -2328,16 +2350,9 @@ static struct net_device *ipoib_add_port(const char *format, return priv->dev; sysfs_failed: + ipoib_parent_unregister_pre(priv->dev); unregister_netdev(priv->dev); -register_failed: - ib_unregister_event_handler(&priv->event_handler); - flush_workqueue(ipoib_workqueue); - /* Stop GC if started before flush */ - cancel_delayed_work_sync(&priv->neigh_reap_task); - flush_workqueue(priv->wq); - ipoib_dev_cleanup(priv->dev); - device_init_failed: rn = netdev_priv(priv->dev); rn->free_rdma_netdev(priv->dev); @@ -2391,16 +2406,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) list_for_each_entry_safe(priv, tmp, dev_list, list) { struct rdma_netdev *parent_rn = netdev_priv(priv->dev); - ib_unregister_event_handler(&priv->event_handler); - flush_workqueue(ipoib_workqueue); - - rtnl_lock(); - dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); - rtnl_unlock(); - - /* Stop GC */ - cancel_delayed_work_sync(&priv->neigh_reap_task); - flush_workqueue(priv->wq); + ipoib_parent_unregister_pre(priv->dev); /* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */ mutex_lock(&priv->sysfs_mutex); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 1b7bfd500893..b62ab85c8ead 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -83,7 +83,7 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, result = register_netdevice(priv->dev); if (result) { ipoib_warn(priv, "failed to initialize; error %i", result); - goto register_failed; + goto err; } /* RTNL childs don't need proprietary sysfs entries */ @@ -108,9 +108,6 @@ sysfs_failed: result = -ENOMEM; unregister_netdevice(priv->dev); -register_failed: - ipoib_dev_cleanup(priv->dev); - err: return result; } -- cgit From eaeb398425089cb3c8edc81a406109db94b2705c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 29 Jul 2018 11:34:55 +0300 Subject: IB/ipoib: Move init code to ndo_init Now that we have a proper ndo_uninit, move code that naturally pairs with the ndo_uninit into ndo_init. This allows the netdev core to natually handle ordering. This fixes the situation where register_netdev can fail before calling ndo_init, in which case it wouldn't call ndo_uninit either. Also move a bunch of duplicated init code that is shared between child and parent for clarity. Now the child and parent register functions look very similar. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib.h | 3 - drivers/infiniband/ulp/ipoib/ipoib_main.c | 193 +++++++++++++++------------ drivers/infiniband/ulp/ipoib/ipoib_netlink.c | 6 - drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 31 +---- 4 files changed, 114 insertions(+), 119 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 04fc5ad1b69f..02ad1a60dc80 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -508,8 +508,6 @@ void ipoib_ib_dev_down(struct net_device *dev); int ipoib_ib_dev_stop_default(struct net_device *dev); void ipoib_pkey_dev_check_presence(struct net_device *dev); -int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); - void ipoib_mcast_join_task(struct work_struct *work); void ipoib_mcast_carrier_on_task(struct work_struct *work); void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); @@ -597,7 +595,6 @@ void ipoib_pkey_open(struct ipoib_dev_priv *priv); void ipoib_drain_cq(struct net_device *dev); void ipoib_set_ethtool_ops(struct net_device *dev); -void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca); #define IPOIB_FLAGS_RC 0x80 #define IPOIB_FLAGS_UC 0x40 diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index d4e9951dc539..67ab52eec3e9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1741,13 +1741,11 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr, return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd); } -int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) +static int ipoib_dev_init(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); int ret = -ENOMEM; - priv->ca = ca; - priv->port = port; priv->qp = NULL; /* @@ -1763,7 +1761,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) /* create pd, which used both for control and datapath*/ priv->pd = ib_alloc_pd(priv->ca, 0); if (IS_ERR(priv->pd)) { - pr_warn("%s: failed to allocate PD\n", ca->name); + pr_warn("%s: failed to allocate PD\n", priv->ca->name); goto clean_wq; } @@ -1837,6 +1835,108 @@ static void ipoib_parent_unregister_pre(struct net_device *ndev) flush_workqueue(ipoib_workqueue); } +static void ipoib_set_dev_features(struct ipoib_dev_priv *priv) +{ + priv->hca_caps = priv->ca->attrs.device_cap_flags; + + if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { + priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM; + + if (priv->hca_caps & IB_DEVICE_UD_TSO) + priv->dev->hw_features |= NETIF_F_TSO; + + priv->dev->features |= priv->dev->hw_features; + } +} + +static int ipoib_parent_init(struct net_device *ndev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(ndev); + struct ib_port_attr attr; + int result; + + result = ib_query_port(priv->ca, priv->port, &attr); + if (result) { + pr_warn("%s: ib_query_port %d failed\n", priv->ca->name, + priv->port); + return result; + } + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + + result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey); + if (result) { + pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n", + priv->ca->name, priv->port, result); + return result; + } + + result = rdma_query_gid(priv->ca, priv->port, 0, &priv->local_gid); + if (result) { + pr_warn("%s: rdma_query_gid port %d failed (ret = %d)\n", + priv->ca->name, priv->port, result); + return result; + } + memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, + sizeof(union ib_gid)); + + SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent); + priv->dev->dev_id = priv->port - 1; + + return 0; +} + +static void ipoib_child_init(struct net_device *ndev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(ndev); + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); + + priv->max_ib_mtu = ppriv->max_ib_mtu; + set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); + memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); + memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); +} + +static int ipoib_ndo_init(struct net_device *ndev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(ndev); + int rc; + + if (priv->parent) { + ipoib_child_init(ndev); + } else { + rc = ipoib_parent_init(ndev); + if (rc) + return rc; + } + + /* MTU will be reset when mcast join happens */ + ndev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = ndev->mtu; + ndev->max_mtu = IPOIB_CM_MTU; + + ndev->neigh_priv_len = sizeof(struct ipoib_neigh); + + /* + * Set the full membership bit, so that we join the right + * broadcast group, etc. + */ + priv->pkey |= 0x8000; + + ndev->broadcast[8] = priv->pkey >> 8; + ndev->broadcast[9] = priv->pkey & 0xff; + set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); + + ipoib_set_dev_features(priv); + + rc = ipoib_dev_init(ndev); + if (rc) { + pr_warn("%s: failed to initialize device: %s port %d (ret = %d)\n", + priv->ca->name, priv->dev->name, priv->port, rc); + } + + return 0; +} + static void ipoib_ndo_uninit(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv; @@ -1909,6 +2009,7 @@ static const struct header_ops ipoib_header_ops = { }; static const struct net_device_ops ipoib_netdev_ops_pf = { + .ndo_init = ipoib_ndo_init, .ndo_uninit = ipoib_ndo_uninit, .ndo_open = ipoib_open, .ndo_stop = ipoib_stop, @@ -1928,6 +2029,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { }; static const struct net_device_ops ipoib_netdev_ops_vf = { + .ndo_init = ipoib_ndo_init, .ndo_uninit = ipoib_ndo_uninit, .ndo_open = ipoib_open, .ndo_stop = ipoib_stop, @@ -2054,6 +2156,9 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, if (!priv) return NULL; + priv->ca = hca; + priv->port = port; + dev = ipoib_get_netdev(hca, port, name); if (!dev) goto free_priv; @@ -2201,12 +2306,6 @@ static ssize_t create_child(struct device *dev, if (pkey <= 0 || pkey > 0xffff || pkey == 0x8000) return -EINVAL; - /* - * Set the full membership bit, so that we join the right - * broadcast group, etc. - */ - pkey |= 0x8000; - ret = ipoib_vlan_add(to_net_dev(dev), pkey); return ret ? ret : count; @@ -2238,86 +2337,17 @@ int ipoib_add_pkey_attr(struct net_device *dev) return device_create_file(&dev->dev, &dev_attr_pkey); } -void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) -{ - priv->hca_caps = hca->attrs.device_cap_flags; - - if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { - priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM; - - if (priv->hca_caps & IB_DEVICE_UD_TSO) - priv->dev->hw_features |= NETIF_F_TSO; - - priv->dev->features |= priv->dev->hw_features; - } -} - static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; - struct ib_port_attr attr; struct rdma_netdev *rn; - int result = -ENOMEM; + int result; priv = ipoib_intf_alloc(hca, port, format); if (!priv) { pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port); - goto alloc_mem_failed; - } - - SET_NETDEV_DEV(priv->dev, hca->dev.parent); - priv->dev->dev_id = port - 1; - - result = ib_query_port(hca, port, &attr); - if (result) { - pr_warn("%s: ib_query_port %d failed\n", hca->name, port); - goto device_init_failed; - } - - priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); - - /* MTU will be reset when mcast join happens */ - priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); - priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; - priv->dev->max_mtu = IPOIB_CM_MTU; - - priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh); - - result = ib_query_pkey(hca, port, 0, &priv->pkey); - if (result) { - pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n", - hca->name, port, result); - goto device_init_failed; - } - - ipoib_set_dev_features(priv, hca); - - /* - * Set the full membership bit, so that we join the right - * broadcast group, etc. - */ - priv->pkey |= 0x8000; - - priv->dev->broadcast[8] = priv->pkey >> 8; - priv->dev->broadcast[9] = priv->pkey & 0xff; - - result = rdma_query_gid(hca, port, 0, &priv->local_gid); - if (result) { - pr_warn("%s: rdma_query_gid port %d failed (ret = %d)\n", - hca->name, port, result); - goto device_init_failed; - } - - memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, - sizeof(union ib_gid)); - set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); - - result = ipoib_dev_init(priv->dev, hca, port); - if (result) { - pr_warn("%s: failed to initialize port %d (ret = %d)\n", - hca->name, port, result); - goto device_init_failed; + return ERR_PTR(-ENOMEM); } INIT_IB_EVENT_HANDLER(&priv->event_handler, @@ -2358,7 +2388,6 @@ device_init_failed: rn->free_rdma_netdev(priv->dev); kfree(priv); -alloc_mem_failed: return ERR_PTR(result); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index 3e44087935ae..a86928a80c08 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -125,12 +125,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, if (child_pkey == 0 || child_pkey == 0x8000) return -EINVAL; - /* - * Set the full membership bit, so that we join the right - * broadcast group, etc. - */ - child_pkey |= 0x8000; - err = __ipoib_vlan_add(ppriv, ipoib_priv(dev), child_pkey, IPOIB_RTNL_CHILD); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index b62ab85c8ead..3103729a73fd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -55,35 +55,14 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, { int result; - priv->max_ib_mtu = ppriv->max_ib_mtu; - /* MTU will be reset when mcast join happens */ - priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); - priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; priv->parent = ppriv->dev; - set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); - - ipoib_set_dev_features(priv, ppriv->ca); - priv->pkey = pkey; - - memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); - memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); - set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); - priv->dev->broadcast[8] = pkey >> 8; - priv->dev->broadcast[9] = pkey & 0xff; - - result = ipoib_dev_init(priv->dev, ppriv->ca, ppriv->port); - if (result < 0) { - ipoib_warn(ppriv, "failed to initialize subinterface: " - "device %s, port %d", - ppriv->ca->name, ppriv->port); - goto err; - } + priv->child_type = type; result = register_netdevice(priv->dev); if (result) { ipoib_warn(priv, "failed to initialize; error %i", result); - goto err; + return result; } /* RTNL childs don't need proprietary sysfs entries */ @@ -99,17 +78,13 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, goto sysfs_failed; } - priv->child_type = type; list_add_tail(&priv->list, &ppriv->child_intfs); return 0; sysfs_failed: - result = -ENOMEM; unregister_netdevice(priv->dev); - -err: - return result; + return -ENOMEM; } int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) -- cgit From 9f49a5b5c21d58aa84e16cfdc5e99e49faefcb7a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 29 Jul 2018 11:34:56 +0300 Subject: RDMA/netdev: Use priv_destructor for netdev cleanup Now that the unregister_netdev flow for IPoIB no longer relies on external code we can now introduce the use of priv_destructor and needs_free_netdev. The rdma_netdev flow is switched to use the netdev common priv_destructor instead of the special free_rdma_netdev and the IPOIB ULP adjusted: - priv_destructor needs to switch to point to the ULP's destructor which will then call the rdma_ndev's in the right order - We need to be careful around the error unwind of register_netdev as it sometimes calls priv_destructor on failure - ULPs need to use ndo_init/uninit to ensure proper ordering of failures around register_netdev Switching to priv_destructor is a necessary pre-requisite to using the rtnl new_link mechanism. The VNIC user for rdma_netdev should also be revised, but that is left for another patch. Signed-off-by: Jason Gunthorpe Signed-off-by: Denis Drozdov Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 10 -- drivers/infiniband/ulp/ipoib/ipoib.h | 2 + drivers/infiniband/ulp/ipoib/ipoib_main.c | 101 +++++++++++++-------- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 68 ++++++++------ .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 37 ++++---- include/linux/mlx5/driver.h | 3 - include/rdma/ib_verbs.h | 6 +- 7 files changed, 129 insertions(+), 98 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 06d6309b719a..13744b4631b4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5157,11 +5157,6 @@ done: return num_counters; } -static void mlx5_ib_free_rdma_netdev(struct net_device *netdev) -{ - return mlx5_rdma_netdev_free(netdev); -} - static struct net_device* mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, u8 port_num, @@ -5171,17 +5166,12 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, void (*setup)(struct net_device *)) { struct net_device *netdev; - struct rdma_netdev *rn; if (type != RDMA_NETDEV_IPOIB) return ERR_PTR(-EOPNOTSUPP); netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca, name, setup); - if (likely(!IS_ERR_OR_NULL(netdev))) { - rn = netdev_priv(netdev); - rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev; - } return netdev; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 02ad1a60dc80..d2cb0a8500e3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -323,6 +323,7 @@ struct ipoib_dev_priv { spinlock_t lock; struct net_device *dev; + void (*next_priv_destructor)(struct net_device *dev); struct napi_struct send_napi; struct napi_struct recv_napi; @@ -481,6 +482,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah) kref_put(&ah->ref, ipoib_free_ah); } int ipoib_open(struct net_device *dev); +void ipoib_intf_free(struct net_device *dev); int ipoib_add_pkey_attr(struct net_device *dev); int ipoib_add_umcast_attr(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 67ab52eec3e9..73d917d57f93 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2062,6 +2062,13 @@ void ipoib_setup_common(struct net_device *dev) netif_keep_dst(dev); memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); + + /* + * unregister_netdev always frees the netdev, we use this mode + * consistently to unify all the various unregister paths, including + * those connected to rtnl_link_ops which require it. + */ + dev->needs_free_netdev = true; } static void ipoib_build_priv(struct net_device *dev) @@ -2116,9 +2123,7 @@ static struct net_device rn->send = ipoib_send; rn->attach_mcast = ipoib_mcast_attach; rn->detach_mcast = ipoib_mcast_detach; - rn->free_rdma_netdev = free_netdev; rn->hca = hca; - dev->netdev_ops = &ipoib_netdev_default_pf; return dev; @@ -2173,6 +2178,15 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, rn = netdev_priv(dev); rn->clnt_priv = priv; + + /* + * Only the child register_netdev flows can handle priv_destructor + * being set, so we force it to NULL here and handle manually until it + * is safe to turn on. + */ + priv->next_priv_destructor = dev->priv_destructor; + dev->priv_destructor = NULL; + ipoib_build_priv(dev); return priv; @@ -2181,6 +2195,27 @@ free_priv: return NULL; } +void ipoib_intf_free(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); + + dev->priv_destructor = priv->next_priv_destructor; + if (dev->priv_destructor) + dev->priv_destructor(dev); + + /* + * There are some error flows around register_netdev failing that may + * attempt to call priv_destructor twice, prevent that from happening. + */ + dev->priv_destructor = NULL; + + /* unregister/destroy is very complicated. Make bugs more obvious. */ + rn->clnt_priv = NULL; + + kfree(priv); +} + static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, char *buf) { @@ -2341,7 +2376,7 @@ static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; - struct rdma_netdev *rn; + struct net_device *ndev; int result; priv = ipoib_intf_alloc(hca, port, format); @@ -2349,6 +2384,7 @@ static struct net_device *ipoib_add_port(const char *format, pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port); return ERR_PTR(-ENOMEM); } + ndev = priv->dev; INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, ipoib_event); @@ -2357,38 +2393,43 @@ static struct net_device *ipoib_add_port(const char *format, /* call event handler to ensure pkey in sync */ queue_work(ipoib_workqueue, &priv->flush_heavy); - result = register_netdev(priv->dev); + result = register_netdev(ndev); if (result) { pr_warn("%s: couldn't register ipoib port %d; error %d\n", hca->name, port, result); - ipoib_parent_unregister_pre(priv->dev); - goto device_init_failed; + + ipoib_parent_unregister_pre(ndev); + ipoib_intf_free(ndev); + free_netdev(ndev); + + return ERR_PTR(result); } - result = -ENOMEM; - if (ipoib_cm_add_mode_attr(priv->dev)) + /* + * We cannot set priv_destructor before register_netdev because we + * need priv to be always valid during the error flow to execute + * ipoib_parent_unregister_pre(). Instead handle it manually and only + * enter priv_destructor mode once we are completely registered. + */ + ndev->priv_destructor = ipoib_intf_free; + + if (ipoib_cm_add_mode_attr(ndev)) goto sysfs_failed; - if (ipoib_add_pkey_attr(priv->dev)) + if (ipoib_add_pkey_attr(ndev)) goto sysfs_failed; - if (ipoib_add_umcast_attr(priv->dev)) + if (ipoib_add_umcast_attr(ndev)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) + if (device_create_file(&ndev->dev, &dev_attr_create_child)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_delete_child)) + if (device_create_file(&ndev->dev, &dev_attr_delete_child)) goto sysfs_failed; - return priv->dev; + return ndev; sysfs_failed: - ipoib_parent_unregister_pre(priv->dev); - unregister_netdev(priv->dev); - -device_init_failed: - rn = netdev_priv(priv->dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); - - return ERR_PTR(result); + ipoib_parent_unregister_pre(ndev); + unregister_netdev(ndev); + return ERR_PTR(-ENOMEM); } static void ipoib_add_one(struct ib_device *device) @@ -2426,33 +2467,19 @@ static void ipoib_add_one(struct ib_device *device) static void ipoib_remove_one(struct ib_device *device, void *client_data) { - struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv; + struct ipoib_dev_priv *priv, *tmp; struct list_head *dev_list = client_data; if (!dev_list) return; list_for_each_entry_safe(priv, tmp, dev_list, list) { - struct rdma_netdev *parent_rn = netdev_priv(priv->dev); - ipoib_parent_unregister_pre(priv->dev); /* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */ mutex_lock(&priv->sysfs_mutex); unregister_netdev(priv->dev); mutex_unlock(&priv->sysfs_mutex); - - parent_rn->free_rdma_netdev(priv->dev); - - list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { - struct rdma_netdev *child_rn; - - child_rn = netdev_priv(cpriv->dev); - child_rn->free_rdma_netdev(cpriv->dev); - kfree(cpriv); - } - - kfree(priv); } kfree(dev_list); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 3103729a73fd..7776334cf8c5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -50,31 +50,53 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, } static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); +/* + * NOTE: If this function fails then the priv->dev will remain valid, however + * priv can have been freed and must not be touched by caller in the error + * case. + * + * If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to + * free the net_device (just as rtnl_newlink does) otherwise the net_device + * will be freed when the rtnl is unlocked. + */ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, u16 pkey, int type) { + struct net_device *ndev = priv->dev; int result; + ASSERT_RTNL(); + priv->parent = ppriv->dev; priv->pkey = pkey; priv->child_type = type; - result = register_netdevice(priv->dev); + /* We do not need to touch priv if register_netdevice fails */ + ndev->priv_destructor = ipoib_intf_free; + + result = register_netdevice(ndev); if (result) { ipoib_warn(priv, "failed to initialize; error %i", result); + + /* + * register_netdevice sometimes calls priv_destructor, + * sometimes not. Make sure it was done. + */ + if (ndev->priv_destructor) + ndev->priv_destructor(ndev); return result; } /* RTNL childs don't need proprietary sysfs entries */ if (type == IPOIB_LEGACY_CHILD) { - if (ipoib_cm_add_mode_attr(priv->dev)) + if (ipoib_cm_add_mode_attr(ndev)) goto sysfs_failed; - if (ipoib_add_pkey_attr(priv->dev)) + if (ipoib_add_pkey_attr(ndev)) goto sysfs_failed; - if (ipoib_add_umcast_attr(priv->dev)) + if (ipoib_add_umcast_attr(ndev)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_parent)) + if (device_create_file(&ndev->dev, &dev_attr_parent)) goto sysfs_failed; } @@ -91,6 +113,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) { struct ipoib_dev_priv *ppriv, *priv; char intf_name[IFNAMSIZ]; + struct net_device *ndev; struct ipoib_dev_priv *tpriv; int result; @@ -122,12 +145,6 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) return restart_syscall(); } - priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); - if (!priv) { - result = -ENOMEM; - goto out; - } - /* * First ensure this isn't a duplicate. We check the parent device and * then all of the legacy child interfaces to make sure the Pkey @@ -146,21 +163,23 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) } } + priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); + if (!priv) { + result = -ENOMEM; + goto out; + } + ndev = priv->dev; + result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); + if (result && ndev->reg_state == NETREG_UNINITIALIZED) + free_netdev(ndev); + out: up_write(&ppriv->vlan_rwsem); rtnl_unlock(); mutex_unlock(&ppriv->sysfs_mutex); - if (result && priv) { - struct rdma_netdev *rn; - - rn = netdev_priv(priv->dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); - } - return result; } @@ -212,14 +231,5 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) rtnl_unlock(); mutex_unlock(&ppriv->sysfs_mutex); - if (dev) { - struct rdma_netdev *rn; - - rn = netdev_priv(dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); - return 0; - } - - return -ENODEV; + return (dev) ? 0 : -ENODEV; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index af3bb2f7a504..b8d150d2fd72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -580,6 +580,22 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) return 0; } +static void mlx5_rdma_netdev_free(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = priv->ppriv; + const struct mlx5e_profile *profile = priv->profile; + + mlx5e_detach_netdev(priv); + profile->cleanup(priv); + destroy_workqueue(priv->wq); + + if (!ipriv->sub_interface) { + mlx5i_pkey_qpn_ht_cleanup(netdev); + mlx5e_destroy_mdev_resources(priv->mdev); + } +} + struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, struct ib_device *ibdev, const char *name, @@ -653,6 +669,9 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, rn->detach_mcast = mlx5i_detach_mcast; rn->set_id = mlx5i_set_pkey_index; + netdev->priv_destructor = mlx5_rdma_netdev_free; + netdev->needs_free_netdev = 1; + return netdev; destroy_ht: @@ -665,21 +684,3 @@ err_free_netdev: return NULL; } EXPORT_SYMBOL(mlx5_rdma_netdev_alloc); - -void mlx5_rdma_netdev_free(struct net_device *netdev) -{ - struct mlx5e_priv *priv = mlx5i_epriv(netdev); - struct mlx5i_priv *ipriv = priv->ppriv; - const struct mlx5e_profile *profile = priv->profile; - - mlx5e_detach_netdev(priv); - profile->cleanup(priv); - destroy_workqueue(priv->wq); - - if (!ipriv->sub_interface) { - mlx5i_pkey_qpn_ht_cleanup(netdev); - mlx5e_destroy_mdev_resources(priv->mdev); - } - free_netdev(netdev); -} -EXPORT_SYMBOL(mlx5_rdma_netdev_free); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 957199c20a0f..96498ff6beb6 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1218,14 +1218,11 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, { return ERR_PTR(-EOPNOTSUPP); } - -static inline void mlx5_rdma_netdev_free(struct net_device *netdev) {} #else struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, struct ib_device *ibdev, const char *name, void (*setup)(struct net_device *)); -void mlx5_rdma_netdev_free(struct net_device *netdev); #endif /* CONFIG_MLX5_CORE_IPOIB */ struct mlx5_profile { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index dea770e5b9ae..4ffe3e11e8fb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2203,7 +2203,11 @@ struct rdma_netdev { struct ib_device *hca; u8 port_num; - /* cleanup function must be specified */ + /* + * cleanup function must be specified. + * FIXME: This is only used for OPA_VNIC and that usage should be + * removed too. + */ void (*free_rdma_netdev)(struct net_device *netdev); /* control functions */ -- cgit From ee190ab734ba4d3c7887bd193ce8124385738e44 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 29 Jul 2018 11:34:57 +0300 Subject: IB/ipoib: Get rid of the sysfs_mutex This mutex was introduced to deal with the deadlock formed by calling unregister_netdev from within the sysfs callback of a netdev. Now that we have priv_destructor and needs_free_netdev we can switch to the more targeted solution of running the unregister from a work queue. This avoids the deadlock and gets rid of the mutex. The next patch in the series needs this mutex eliminated to create atomicity of unregisteration. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_cm.c | 7 --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 7 +-- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 98 ++++++++++++++++++++----------- 4 files changed, 65 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index d2cb0a8500e3..804cb4bee57d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -332,7 +332,6 @@ struct ipoib_dev_priv { struct rw_semaphore vlan_rwsem; struct mutex mcast_mutex; - struct mutex sysfs_mutex; struct rb_root path_tree; struct list_head path_list; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 16ea08dc59a8..ea01b8dd2be6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1517,19 +1517,13 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, { struct net_device *dev = to_net_dev(d); int ret; - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - if (!mutex_trylock(&priv->sysfs_mutex)) - return restart_syscall(); if (!rtnl_trylock()) { - mutex_unlock(&priv->sysfs_mutex); return restart_syscall(); } if (dev->reg_state != NETREG_REGISTERED) { rtnl_unlock(); - mutex_unlock(&priv->sysfs_mutex); return -EPERM; } @@ -1541,7 +1535,6 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, */ if (ret != -EBUSY) rtnl_unlock(); - mutex_unlock(&priv->sysfs_mutex); return (!ret || ret == -EBUSY) ? count : ret; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 73d917d57f93..e9f4f261fe20 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2079,7 +2079,6 @@ static void ipoib_build_priv(struct net_device *dev) spin_lock_init(&priv->lock); init_rwsem(&priv->vlan_rwsem); mutex_init(&priv->mcast_mutex); - mutex_init(&priv->sysfs_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); @@ -2476,10 +2475,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) list_for_each_entry_safe(priv, tmp, dev_list, list) { ipoib_parent_unregister_pre(priv->dev); - /* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */ - mutex_lock(&priv->sysfs_mutex); unregister_netdev(priv->dev); - mutex_unlock(&priv->sysfs_mutex); } kfree(dev_list); @@ -2527,8 +2523,7 @@ static int __init ipoib_init_module(void) * its private workqueue, and we only queue up flush events * on our global flush workqueue. This avoids the deadlocks. */ - ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", - WQ_MEM_RECLAIM); + ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", 0); if (!ipoib_workqueue) { ret = -ENOMEM; goto err_fs; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 7776334cf8c5..891c5b40018a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -125,23 +125,16 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) snprintf(intf_name, sizeof(intf_name), "%s.%04x", ppriv->dev->name, pkey); - if (!mutex_trylock(&ppriv->sysfs_mutex)) + if (!rtnl_trylock()) return restart_syscall(); - if (!rtnl_trylock()) { - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); - } - if (pdev->reg_state != NETREG_REGISTERED) { rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); return -EPERM; } if (!down_write_trylock(&ppriv->vlan_rwsem)) { rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); return restart_syscall(); } @@ -178,58 +171,95 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) out: up_write(&ppriv->vlan_rwsem); rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); return result; } +struct ipoib_vlan_delete_work { + struct work_struct work; + struct net_device *dev; +}; + +/* + * sysfs callbacks of a netdevice cannot obtain the rtnl lock as + * unregister_netdev ultimately deletes the sysfs files while holding the rtnl + * lock. This deadlocks the system. + * + * A callback can use rtnl_trylock to avoid the deadlock but it cannot call + * unregister_netdev as that internally takes and releases the rtnl_lock. So + * instead we find the netdev to unregister and then do the actual unregister + * from the global work queue where we can obtain the rtnl_lock safely. + */ +static void ipoib_vlan_delete_task(struct work_struct *work) +{ + struct ipoib_vlan_delete_work *pwork = + container_of(work, struct ipoib_vlan_delete_work, work); + struct net_device *dev = pwork->dev; + + rtnl_lock(); + + /* Unregistering tasks can race with another task or parent removal */ + if (dev->reg_state == NETREG_REGISTERED) { + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); + + down_write(&ppriv->vlan_rwsem); + list_del(&priv->list); + up_write(&ppriv->vlan_rwsem); + + ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name); + unregister_netdevice(dev); + } + + rtnl_unlock(); + + kfree(pwork); +} + int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) { struct ipoib_dev_priv *ppriv, *priv, *tpriv; - struct net_device *dev = NULL; + int rc; if (!capable(CAP_NET_ADMIN)) return -EPERM; - ppriv = ipoib_priv(pdev); - - if (!mutex_trylock(&ppriv->sysfs_mutex)) + if (!rtnl_trylock()) return restart_syscall(); - if (!rtnl_trylock()) { - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); - } - if (pdev->reg_state != NETREG_REGISTERED) { rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); return -EPERM; } - if (!down_write_trylock(&ppriv->vlan_rwsem)) { - rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); - } + ppriv = ipoib_priv(pdev); + rc = -ENODEV; list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { if (priv->pkey == pkey && priv->child_type == IPOIB_LEGACY_CHILD) { - list_del(&priv->list); - dev = priv->dev; + struct ipoib_vlan_delete_work *work; + + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) { + rc = -ENOMEM; + goto out; + } + + down_write(&ppriv->vlan_rwsem); + list_del_init(&priv->list); + up_write(&ppriv->vlan_rwsem); + work->dev = priv->dev; + INIT_WORK(&work->work, ipoib_vlan_delete_task); + queue_work(ipoib_workqueue, &work->work); + + rc = 0; break; } } - up_write(&ppriv->vlan_rwsem); - - if (dev) { - ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name); - unregister_netdevice(dev); - } +out: rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - return (dev) ? 0 : -ENODEV; + return rc; } -- cgit From 25405d98a2aa0b9983bb9c36b0b00815d39394f4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 29 Jul 2018 11:34:58 +0300 Subject: IB/ipoib: Do not remove child devices from within the ndo_uninit Switching to priv_destructor and needs_free_netdev created a subtle ordering problem in ipoib_remove_one. Now that unregister_netdev frees the netdev and priv we must ensure that the children are unregistered before trying to unregister the parent, or child unregister will use after free. The solution is to unregister the children, then parent, in the same batch all while holding the rtnl_lock. This closes all the races where a new child could have been added and ensures proper ordering. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib.h | 7 +++++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 28 +++++++++++++++++----------- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 6 ++++++ 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 804cb4bee57d..1abe3c62f106 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -330,6 +330,13 @@ struct ipoib_dev_priv { unsigned long flags; + /* + * This protects access to the child_intfs list. + * To READ from child_intfs the RTNL or vlan_rwsem read side must be + * held. To WRITE RTNL and the vlan_rwsem write side must be held (in + * that order) This lock exists because we have a few contexts where + * we need the child_intfs, but do not want to grab the RTNL. + */ struct rw_semaphore vlan_rwsem; struct mutex mcast_mutex; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index e9f4f261fe20..b2fe23d60103 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1939,18 +1939,15 @@ static int ipoib_ndo_init(struct net_device *ndev) static void ipoib_ndo_uninit(struct net_device *dev) { - struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv; - LIST_HEAD(head); + struct ipoib_dev_priv *priv = ipoib_priv(dev); ASSERT_RTNL(); - /* Delete any child interfaces first */ - list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { - /* Stop GC on child */ - cancel_delayed_work_sync(&cpriv->neigh_reap_task); - unregister_netdevice_queue(cpriv->dev, &head); - } - unregister_netdevice_many(&head); + /* + * ipoib_remove_one guarantees the children are removed before the + * parent, and that is the only place where a parent can be removed. + */ + WARN_ON(!list_empty(&priv->child_intfs)); ipoib_neigh_hash_uninit(dev); @@ -2466,16 +2463,25 @@ static void ipoib_add_one(struct ib_device *device) static void ipoib_remove_one(struct ib_device *device, void *client_data) { - struct ipoib_dev_priv *priv, *tmp; + struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv; struct list_head *dev_list = client_data; if (!dev_list) return; list_for_each_entry_safe(priv, tmp, dev_list, list) { + LIST_HEAD(head); ipoib_parent_unregister_pre(priv->dev); - unregister_netdev(priv->dev); + rtnl_lock(); + + list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, + list) + unregister_netdevice_queue(cpriv->dev, &head); + unregister_netdevice_queue(priv->dev, &head); + unregister_netdevice_many(&head); + + rtnl_unlock(); } kfree(dev_list); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 891c5b40018a..fa4dfcee2644 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -67,6 +67,12 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, ASSERT_RTNL(); + /* + * Racing with unregister of the parent must be prevented by the + * caller. + */ + WARN_ON(ppriv->dev->reg_state != NETREG_REGISTERED); + priv->parent = ppriv->dev; priv->pkey = pkey; priv->child_type = type; -- cgit From 13476d35bba60b59521ff25d902fdb552b8bf2ac Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 29 Jul 2018 11:34:59 +0300 Subject: IB/ipoib: Maintain the child_intfs list from ndo_init/uninit This fixes a bug in the netlink path where the vlan_rwsem was not held around __ipoib_vlan_add causing the child_intfs to be manipulated unsafely. In the process this greatly simplifies the vlan_rwsem write side locking to only cover a single non-sleeping statement. This also further increases the safety of the removal ordering by holding the netdev of the parent while the child is active to ensure most bugs become either an oops on a NULL priv or a deadlock on the netdev refcount. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 16 ++++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_netlink.c | 14 -------------- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 12 ------------ 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b2fe23d60103..e3d28f9ad9c0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1890,6 +1890,12 @@ static void ipoib_child_init(struct net_device *ndev) struct ipoib_dev_priv *priv = ipoib_priv(ndev); struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); + dev_hold(priv->parent); + + down_write(&ppriv->vlan_rwsem); + list_add_tail(&priv->list, &ppriv->child_intfs); + up_write(&ppriv->vlan_rwsem); + priv->max_ib_mtu = ppriv->max_ib_mtu; set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); @@ -1959,6 +1965,16 @@ static void ipoib_ndo_uninit(struct net_device *dev) destroy_workqueue(priv->wq); priv->wq = NULL; } + + if (priv->parent) { + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); + + down_write(&ppriv->vlan_rwsem); + list_del(&priv->list); + up_write(&ppriv->vlan_rwsem); + + dev_put(priv->parent); + } } static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index a86928a80c08..7e093b7aad8f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -133,19 +133,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, return err; } -static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head *head) -{ - struct ipoib_dev_priv *priv, *ppriv; - - priv = ipoib_priv(dev); - ppriv = ipoib_priv(priv->parent); - - down_write(&ppriv->vlan_rwsem); - unregister_netdevice_queue(dev, head); - list_del(&priv->list); - up_write(&ppriv->vlan_rwsem); -} - static size_t ipoib_get_size(const struct net_device *dev) { return nla_total_size(2) + /* IFLA_IPOIB_PKEY */ @@ -161,7 +148,6 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = { .setup = ipoib_setup_common, .newlink = ipoib_new_child_link, .changelink = ipoib_changelink, - .dellink = ipoib_unregister_child_dev, .get_size = ipoib_get_size, .fill_info = ipoib_fill_info, }; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index fa4dfcee2644..ca3a7f6c0998 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -106,8 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, goto sysfs_failed; } - list_add_tail(&priv->list, &ppriv->child_intfs); - return 0; sysfs_failed: @@ -139,11 +137,6 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) return -EPERM; } - if (!down_write_trylock(&ppriv->vlan_rwsem)) { - rtnl_unlock(); - return restart_syscall(); - } - /* * First ensure this isn't a duplicate. We check the parent device and * then all of the legacy child interfaces to make sure the Pkey @@ -175,7 +168,6 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) free_netdev(ndev); out: - up_write(&ppriv->vlan_rwsem); rtnl_unlock(); return result; @@ -209,10 +201,6 @@ static void ipoib_vlan_delete_task(struct work_struct *work) struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); - down_write(&ppriv->vlan_rwsem); - list_del(&priv->list); - up_write(&ppriv->vlan_rwsem); - ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name); unregister_netdevice(dev); } -- cgit From 760109760455a0a35491cb02a3bc3e15f0c180f6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 29 Jul 2018 11:35:00 +0300 Subject: IB/ipoib: Consolidate checking of the proposed child interface Move all the checking for pkey and other validity to the __ipoib_vlan_add function. This removes the last difference from the control flow of the __ipoib_vlan_add to make the overall design simpler to understand. Signed-off-by: Jason Gunthorpe Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_netlink.c | 3 -- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 77 +++++++++++++++++++--------- 2 files changed, 52 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index 7e093b7aad8f..d4d553a51fa9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -122,9 +122,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, } else child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]); - if (child_pkey == 0 || child_pkey == 0x8000) - return -EINVAL; - err = __ipoib_vlan_add(ppriv, ipoib_priv(dev), child_pkey, IPOIB_RTNL_CHILD); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index ca3a7f6c0998..341753fbda54 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -50,6 +50,39 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, } static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); +static bool is_child_unique(struct ipoib_dev_priv *ppriv, + struct ipoib_dev_priv *priv) +{ + struct ipoib_dev_priv *tpriv; + + ASSERT_RTNL(); + + /* + * Since the legacy sysfs interface uses pkey for deletion it cannot + * support more than one interface with the same pkey, it creates + * ambiguity. The RTNL interface deletes using the netdev so it does + * not have a problem to support duplicated pkeys. + */ + if (priv->child_type != IPOIB_LEGACY_CHILD) + return true; + + /* + * First ensure this isn't a duplicate. We check the parent device and + * then all of the legacy child interfaces to make sure the Pkey + * doesn't match. + */ + if (ppriv->pkey == priv->pkey) + return false; + + list_for_each_entry(tpriv, &ppriv->child_intfs, list) { + if (tpriv->pkey == priv->pkey && + tpriv->child_type == IPOIB_LEGACY_CHILD) + return false; + } + + return true; +} + /* * NOTE: If this function fails then the priv->dev will remain valid, however * priv can have been freed and must not be touched by caller in the error @@ -73,10 +106,20 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, */ WARN_ON(ppriv->dev->reg_state != NETREG_REGISTERED); + if (pkey == 0 || pkey == 0x8000) { + result = -EINVAL; + goto out_early; + } + priv->parent = ppriv->dev; priv->pkey = pkey; priv->child_type = type; + if (!is_child_unique(ppriv, priv)) { + result = -ENOTUNIQ; + goto out_early; + } + /* We do not need to touch priv if register_netdevice fails */ ndev->priv_destructor = ipoib_intf_free; @@ -88,9 +131,7 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, * register_netdevice sometimes calls priv_destructor, * sometimes not. Make sure it was done. */ - if (ndev->priv_destructor) - ndev->priv_destructor(ndev); - return result; + goto out_early; } /* RTNL childs don't need proprietary sysfs entries */ @@ -111,6 +152,11 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, sysfs_failed: unregister_netdevice(priv->dev); return -ENOMEM; + +out_early: + if (ndev->priv_destructor) + ndev->priv_destructor(ndev); + return result; } int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) @@ -118,17 +164,11 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) struct ipoib_dev_priv *ppriv, *priv; char intf_name[IFNAMSIZ]; struct net_device *ndev; - struct ipoib_dev_priv *tpriv; int result; if (!capable(CAP_NET_ADMIN)) return -EPERM; - ppriv = ipoib_priv(pdev); - - snprintf(intf_name, sizeof(intf_name), "%s.%04x", - ppriv->dev->name, pkey); - if (!rtnl_trylock()) return restart_syscall(); @@ -137,23 +177,10 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) return -EPERM; } - /* - * First ensure this isn't a duplicate. We check the parent device and - * then all of the legacy child interfaces to make sure the Pkey - * doesn't match. - */ - if (ppriv->pkey == pkey) { - result = -ENOTUNIQ; - goto out; - } + ppriv = ipoib_priv(pdev); - list_for_each_entry(tpriv, &ppriv->child_intfs, list) { - if (tpriv->pkey == pkey && - tpriv->child_type == IPOIB_LEGACY_CHILD) { - result = -ENOTUNIQ; - goto out; - } - } + snprintf(intf_name, sizeof(intf_name), "%s.%04x", + ppriv->dev->name, pkey); priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); if (!priv) { -- cgit From 58796e67d5d529e2010f6985fae7a5cef02f19a8 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 6 Aug 2018 07:45:51 +0300 Subject: IB/ucm: Initialize sgid request GID attribute pointer sgid_attr is uninitialized on the stack, initialize it to NULL. Fixes: 398391071f25 ("IB/cm: Replace members of sa_path_rec with 'struct sgid_attr *'") Signed-off-by: Parav Pandit Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 9eef96dacbd7..3e21a879d386 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1000,14 +1000,11 @@ static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { - struct ib_cm_sidr_req_param param; + struct ib_cm_sidr_req_param param = {}; struct ib_ucm_context *ctx; struct ib_ucm_sidr_req cmd; int result; - param.private_data = NULL; - param.path = NULL; - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; -- cgit From 0c66847793d1982d1083dc6f7adad60fa265ce9c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 1 Aug 2018 14:25:39 -0700 Subject: overflow.h: Add arithmetic shift helper Add shift_overflow() helper to assist driver authors in ensuring that shift operations don't cause overflows or other odd conditions. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky [kees: tweaked comments and commit log, dropped unneeded assignment] Signed-off-by: Kees Cook --- include/linux/overflow.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 8712ff70995f..40b48e2133cb 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -202,6 +202,37 @@ #endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ +/** check_shl_overflow() - Calculate a left-shifted value and check overflow + * + * @a: Value to be shifted + * @s: How many bits left to shift + * @d: Pointer to where to store the result + * + * Computes *@d = (@a << @s) + * + * Returns true if '*d' cannot hold the result or when 'a << s' doesn't + * make sense. Example conditions: + * - 'a << s' causes bits to be lost when stored in *d. + * - 's' is garbage (e.g. negative) or so large that the result of + * 'a << s' is guaranteed to be 0. + * - 'a' is negative. + * - 'a << s' sets the sign bit, if any, in '*d'. + * + * '*d' will hold the results of the attempted shift, but is not + * considered "safe for use" if false is returned. + */ +#define check_shl_overflow(a, s, d) ({ \ + typeof(a) _a = a; \ + typeof(s) _s = s; \ + typeof(d) _d = d; \ + u64 _a_full = _a; \ + unsigned int _to_shift = \ + _s >= 0 && _s < 8 * sizeof(*d) ? _s : 0; \ + *_d = (_a_full << _to_shift); \ + (_to_shift != _s || *_d < 0 || _a < 0 || \ + (*_d >> _to_shift) != _a); \ +}) + /** * array_size() - Calculate size of 2-dimensional array. * -- cgit From d36b6ad27c7b95e3f6bfbf6ea33757c8e8accf01 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 1 Aug 2018 14:25:40 -0700 Subject: test_overflow: Add shift overflow tests This adds overflow tests for the new check_shift_overflow() helper to validate overflow, signedness glitches, storage glitches, etc. Co-developed-by: Rasmus Villemoes Signed-off-by: Kees Cook Signed-off-by: Jason Gunthorpe --- lib/test_overflow.c | 198 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 197 insertions(+), 1 deletion(-) diff --git a/lib/test_overflow.c b/lib/test_overflow.c index 2278fe05a1b0..fc680562d8b6 100644 --- a/lib/test_overflow.c +++ b/lib/test_overflow.c @@ -252,7 +252,8 @@ static int __init test_ ## t ## _overflow(void) { \ int err = 0; \ unsigned i; \ \ - pr_info("%-3s: %zu tests\n", #t, ARRAY_SIZE(t ## _tests)); \ + pr_info("%-3s: %zu arithmetic tests\n", #t, \ + ARRAY_SIZE(t ## _tests)); \ for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i) \ err |= do_test_ ## t(&t ## _tests[i]); \ return err; \ @@ -287,6 +288,200 @@ static int __init test_overflow_calculation(void) return err; } +static int __init test_overflow_shift(void) +{ + int err = 0; + +/* Args are: value, shift, type, expected result, overflow expected */ +#define TEST_ONE_SHIFT(a, s, t, expect, of) ({ \ + int __failed = 0; \ + typeof(a) __a = (a); \ + typeof(s) __s = (s); \ + t __e = (expect); \ + t __d; \ + bool __of = check_shl_overflow(__a, __s, &__d); \ + if (__of != of) { \ + pr_warn("expected (%s)(%s << %s) to%s overflow\n", \ + #t, #a, #s, of ? "" : " not"); \ + __failed = 1; \ + } else if (!__of && __d != __e) { \ + pr_warn("expected (%s)(%s << %s) == %s\n", \ + #t, #a, #s, #expect); \ + if ((t)-1 < 0) \ + pr_warn("got %lld\n", (s64)__d); \ + else \ + pr_warn("got %llu\n", (u64)__d); \ + __failed = 1; \ + } \ + if (!__failed) \ + pr_info("ok: (%s)(%s << %s) == %s\n", #t, #a, #s, \ + of ? "overflow" : #expect); \ + __failed; \ +}) + + /* Sane shifts. */ + err |= TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false); + err |= TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false); + err |= TEST_ONE_SHIFT(1, 7, u8, 1 << 7, false); + err |= TEST_ONE_SHIFT(0xF, 4, u8, 0xF << 4, false); + err |= TEST_ONE_SHIFT(1, 0, u16, 1 << 0, false); + err |= TEST_ONE_SHIFT(1, 10, u16, 1 << 10, false); + err |= TEST_ONE_SHIFT(1, 15, u16, 1 << 15, false); + err |= TEST_ONE_SHIFT(0xFF, 8, u16, 0xFF << 8, false); + err |= TEST_ONE_SHIFT(1, 0, int, 1 << 0, false); + err |= TEST_ONE_SHIFT(1, 16, int, 1 << 16, false); + err |= TEST_ONE_SHIFT(1, 30, int, 1 << 30, false); + err |= TEST_ONE_SHIFT(1, 0, s32, 1 << 0, false); + err |= TEST_ONE_SHIFT(1, 16, s32, 1 << 16, false); + err |= TEST_ONE_SHIFT(1, 30, s32, 1 << 30, false); + err |= TEST_ONE_SHIFT(1, 0, unsigned int, 1U << 0, false); + err |= TEST_ONE_SHIFT(1, 20, unsigned int, 1U << 20, false); + err |= TEST_ONE_SHIFT(1, 31, unsigned int, 1U << 31, false); + err |= TEST_ONE_SHIFT(0xFFFFU, 16, unsigned int, 0xFFFFU << 16, false); + err |= TEST_ONE_SHIFT(1, 0, u32, 1U << 0, false); + err |= TEST_ONE_SHIFT(1, 20, u32, 1U << 20, false); + err |= TEST_ONE_SHIFT(1, 31, u32, 1U << 31, false); + err |= TEST_ONE_SHIFT(0xFFFFU, 16, u32, 0xFFFFU << 16, false); + err |= TEST_ONE_SHIFT(1, 0, u64, 1ULL << 0, false); + err |= TEST_ONE_SHIFT(1, 40, u64, 1ULL << 40, false); + err |= TEST_ONE_SHIFT(1, 63, u64, 1ULL << 63, false); + err |= TEST_ONE_SHIFT(0xFFFFFFFFULL, 32, u64, + 0xFFFFFFFFULL << 32, false); + + /* Sane shift: start and end with 0, without a too-wide shift. */ + err |= TEST_ONE_SHIFT(0, 7, u8, 0, false); + err |= TEST_ONE_SHIFT(0, 15, u16, 0, false); + err |= TEST_ONE_SHIFT(0, 31, unsigned int, 0, false); + err |= TEST_ONE_SHIFT(0, 31, u32, 0, false); + err |= TEST_ONE_SHIFT(0, 63, u64, 0, false); + + /* Sane shift: start and end with 0, without reaching signed bit. */ + err |= TEST_ONE_SHIFT(0, 6, s8, 0, false); + err |= TEST_ONE_SHIFT(0, 14, s16, 0, false); + err |= TEST_ONE_SHIFT(0, 30, int, 0, false); + err |= TEST_ONE_SHIFT(0, 30, s32, 0, false); + err |= TEST_ONE_SHIFT(0, 62, s64, 0, false); + + /* Overflow: shifted the bit off the end. */ + err |= TEST_ONE_SHIFT(1, 8, u8, 0, true); + err |= TEST_ONE_SHIFT(1, 16, u16, 0, true); + err |= TEST_ONE_SHIFT(1, 32, unsigned int, 0, true); + err |= TEST_ONE_SHIFT(1, 32, u32, 0, true); + err |= TEST_ONE_SHIFT(1, 64, u64, 0, true); + + /* Overflow: shifted into the signed bit. */ + err |= TEST_ONE_SHIFT(1, 7, s8, 0, true); + err |= TEST_ONE_SHIFT(1, 15, s16, 0, true); + err |= TEST_ONE_SHIFT(1, 31, int, 0, true); + err |= TEST_ONE_SHIFT(1, 31, s32, 0, true); + err |= TEST_ONE_SHIFT(1, 63, s64, 0, true); + + /* Overflow: high bit falls off unsigned types. */ + /* 10010110 */ + err |= TEST_ONE_SHIFT(150, 1, u8, 0, true); + /* 1000100010010110 */ + err |= TEST_ONE_SHIFT(34966, 1, u16, 0, true); + /* 10000100000010001000100010010110 */ + err |= TEST_ONE_SHIFT(2215151766U, 1, u32, 0, true); + err |= TEST_ONE_SHIFT(2215151766U, 1, unsigned int, 0, true); + /* 1000001000010000010000000100000010000100000010001000100010010110 */ + err |= TEST_ONE_SHIFT(9372061470395238550ULL, 1, u64, 0, true); + + /* Overflow: bit shifted into signed bit on signed types. */ + /* 01001011 */ + err |= TEST_ONE_SHIFT(75, 1, s8, 0, true); + /* 0100010001001011 */ + err |= TEST_ONE_SHIFT(17483, 1, s16, 0, true); + /* 01000010000001000100010001001011 */ + err |= TEST_ONE_SHIFT(1107575883, 1, s32, 0, true); + err |= TEST_ONE_SHIFT(1107575883, 1, int, 0, true); + /* 0100000100001000001000000010000001000010000001000100010001001011 */ + err |= TEST_ONE_SHIFT(4686030735197619275LL, 1, s64, 0, true); + + /* Overflow: bit shifted past signed bit on signed types. */ + /* 01001011 */ + err |= TEST_ONE_SHIFT(75, 2, s8, 0, true); + /* 0100010001001011 */ + err |= TEST_ONE_SHIFT(17483, 2, s16, 0, true); + /* 01000010000001000100010001001011 */ + err |= TEST_ONE_SHIFT(1107575883, 2, s32, 0, true); + err |= TEST_ONE_SHIFT(1107575883, 2, int, 0, true); + /* 0100000100001000001000000010000001000010000001000100010001001011 */ + err |= TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true); + + /* Overflow: values larger than destination type. */ + err |= TEST_ONE_SHIFT(0x100, 0, u8, 0, true); + err |= TEST_ONE_SHIFT(0xFF, 0, s8, 0, true); + err |= TEST_ONE_SHIFT(0x10000U, 0, u16, 0, true); + err |= TEST_ONE_SHIFT(0xFFFFU, 0, s16, 0, true); + err |= TEST_ONE_SHIFT(0x100000000ULL, 0, u32, 0, true); + err |= TEST_ONE_SHIFT(0x100000000ULL, 0, unsigned int, 0, true); + err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, s32, 0, true); + err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true); + err |= TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true); + + /* Nonsense: negative initial value. */ + err |= TEST_ONE_SHIFT(-1, 0, s8, 0, true); + err |= TEST_ONE_SHIFT(-1, 0, u8, 0, true); + err |= TEST_ONE_SHIFT(-5, 0, s16, 0, true); + err |= TEST_ONE_SHIFT(-5, 0, u16, 0, true); + err |= TEST_ONE_SHIFT(-10, 0, int, 0, true); + err |= TEST_ONE_SHIFT(-10, 0, unsigned int, 0, true); + err |= TEST_ONE_SHIFT(-100, 0, s32, 0, true); + err |= TEST_ONE_SHIFT(-100, 0, u32, 0, true); + err |= TEST_ONE_SHIFT(-10000, 0, s64, 0, true); + err |= TEST_ONE_SHIFT(-10000, 0, u64, 0, true); + + /* Nonsense: negative shift values. */ + err |= TEST_ONE_SHIFT(0, -5, s8, 0, true); + err |= TEST_ONE_SHIFT(0, -5, u8, 0, true); + err |= TEST_ONE_SHIFT(0, -10, s16, 0, true); + err |= TEST_ONE_SHIFT(0, -10, u16, 0, true); + err |= TEST_ONE_SHIFT(0, -15, int, 0, true); + err |= TEST_ONE_SHIFT(0, -15, unsigned int, 0, true); + err |= TEST_ONE_SHIFT(0, -20, s32, 0, true); + err |= TEST_ONE_SHIFT(0, -20, u32, 0, true); + err |= TEST_ONE_SHIFT(0, -30, s64, 0, true); + err |= TEST_ONE_SHIFT(0, -30, u64, 0, true); + + /* Overflow: shifted at or beyond entire type's bit width. */ + err |= TEST_ONE_SHIFT(0, 8, u8, 0, true); + err |= TEST_ONE_SHIFT(0, 9, u8, 0, true); + err |= TEST_ONE_SHIFT(0, 8, s8, 0, true); + err |= TEST_ONE_SHIFT(0, 9, s8, 0, true); + err |= TEST_ONE_SHIFT(0, 16, u16, 0, true); + err |= TEST_ONE_SHIFT(0, 17, u16, 0, true); + err |= TEST_ONE_SHIFT(0, 16, s16, 0, true); + err |= TEST_ONE_SHIFT(0, 17, s16, 0, true); + err |= TEST_ONE_SHIFT(0, 32, u32, 0, true); + err |= TEST_ONE_SHIFT(0, 33, u32, 0, true); + err |= TEST_ONE_SHIFT(0, 32, int, 0, true); + err |= TEST_ONE_SHIFT(0, 33, int, 0, true); + err |= TEST_ONE_SHIFT(0, 32, s32, 0, true); + err |= TEST_ONE_SHIFT(0, 33, s32, 0, true); + err |= TEST_ONE_SHIFT(0, 64, u64, 0, true); + err |= TEST_ONE_SHIFT(0, 65, u64, 0, true); + err |= TEST_ONE_SHIFT(0, 64, s64, 0, true); + err |= TEST_ONE_SHIFT(0, 65, s64, 0, true); + + /* + * Corner case: for unsigned types, we fail when we've shifted + * through the entire width of bits. For signed types, we might + * want to match this behavior, but that would mean noticing if + * we shift through all but the signed bit, and this is not + * currently detected (but we'll notice an overflow into the + * signed bit). So, for now, we will test this condition but + * mark it as not expected to overflow. + */ + err |= TEST_ONE_SHIFT(0, 7, s8, 0, false); + err |= TEST_ONE_SHIFT(0, 15, s16, 0, false); + err |= TEST_ONE_SHIFT(0, 31, int, 0, false); + err |= TEST_ONE_SHIFT(0, 31, s32, 0, false); + err |= TEST_ONE_SHIFT(0, 63, s64, 0, false); + + return err; +} + /* * Deal with the various forms of allocator arguments. See comments above * the DEFINE_TEST_ALLOC() instances for mapping of the "bits". @@ -397,6 +592,7 @@ static int __init test_module_init(void) int err = 0; err |= test_overflow_calculation(); + err |= test_overflow_shift(); err |= test_overflow_allocation(); if (err) { -- cgit From 0dfe452241f4904de497aef01ad2f609ccb9be90 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 1 Aug 2018 14:25:41 -0700 Subject: RDMA/mlx5: Fix shift overflow in mlx5_ib_create_wq [ 61.182439] UBSAN: Undefined behaviour in drivers/infiniband/hw/mlx5/qp.c:5366:34 [ 61.183673] shift exponent 4294967288 is too large for 32-bit type 'unsigned int' [ 61.185530] CPU: 0 PID: 639 Comm: qp Not tainted 4.18.0-rc1-00037-g4aa1d69a9c60-dirty #96 [ 61.186981] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 [ 61.188315] Call Trace: [ 61.188661] dump_stack+0xc7/0x13b [ 61.190427] ubsan_epilogue+0x9/0x49 [ 61.190899] __ubsan_handle_shift_out_of_bounds+0x1ea/0x22f [ 61.197040] mlx5_ib_create_wq+0x1c99/0x1d50 [ 61.206632] ib_uverbs_ex_create_wq+0x499/0x820 [ 61.213892] ib_uverbs_write+0x77e/0xae0 [ 61.248018] vfs_write+0x121/0x3b0 [ 61.249831] ksys_write+0xa1/0x120 [ 61.254024] do_syscall_64+0x7c/0x2a0 [ 61.256178] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 61.259211] RIP: 0033:0x7f54bab70e99 [ 61.262125] Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 [ 61.268678] RSP: 002b:00007ffe1541c318 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 61.271076] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f54bab70e99 [ 61.273795] RDX: 0000000000000070 RSI: 0000000020000240 RDI: 0000000000000003 [ 61.276982] RBP: 00007ffe1541c330 R08: 00000000200078e0 R09: 0000000000000002 [ 61.280035] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000004005c0 [ 61.283279] R13: 00007ffe1541c420 R14: 0000000000000000 R15: 0000000000000000 Cc: # 4.7 Fixes: 79b20a6c3014 ("IB/mlx5: Add receive Work Queue verbs") Cc: syzkaller Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Kees Cook Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6efd770797d1..351c2efceb35 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -5362,7 +5362,9 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev, rwq->wqe_count = ucmd->rq_wqe_count; rwq->wqe_shift = ucmd->rq_wqe_shift; - rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift); + if (check_shl_overflow(rwq->wqe_count, rwq->wqe_shift, &rwq->buf_size)) + return -EINVAL; + rwq->log_rq_stride = rwq->wqe_shift; rwq->log_rq_size = ilog2(rwq->wqe_count); return 0; -- cgit From 2e51e45cf613491a2bd9d757f04e36d8617be5ac Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Fri, 3 Aug 2018 18:26:47 +0530 Subject: iw_cxgb4: pass window scale in flowc work request This will allow FW to not send more data to TP (which would then need to be buffered). Pass the negotiated TCP window scale to FW in the FLOWC WR. Also refactor send_flowc() a bit to clean it up. Signed-off-by: Steve Wise Signed-off-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 46 +++++++++++++++++----------------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 6 ++++- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 9e1463080c22..0f83cbec33f3 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -587,24 +587,29 @@ static int send_flowc(struct c4iw_ep *ep) { struct fw_flowc_wr *flowc; struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); - int i; u16 vlan = ep->l2t->vlan; int nparams; + int flowclen, flowclen16; if (WARN_ON(!skb)) return -ENOMEM; if (vlan == CPL_L2T_VLAN_NONE) - nparams = 8; - else nparams = 9; + else + nparams = 10; + + flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); + flowclen16 = DIV_ROUND_UP(flowclen, 16); + flowclen = flowclen16 * 16; - flowc = __skb_put(skb, FLOWC_LEN); + flowc = __skb_put(skb, flowclen); + memset(flowc, 0, flowclen); flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | FW_FLOWC_WR_NPARAMS_V(nparams)); - flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN, - 16)) | FW_WR_FLOWID_V(ep->hwtid)); + flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | + FW_WR_FLOWID_V(ep->hwtid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V @@ -623,21 +628,13 @@ static int send_flowc(struct c4iw_ep *ep) flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = cpu_to_be32(ep->emss); - if (nparams == 9) { + flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE; + flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale); + if (nparams == 10) { u16 pri; - pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; - flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; - flowc->mnemval[8].val = cpu_to_be32(pri); - } else { - /* Pad WR to 16 byte boundary */ - flowc->mnemval[8].mnemonic = 0; - flowc->mnemval[8].val = 0; - } - for (i = 0; i < 9; i++) { - flowc->mnemval[i].r4[0] = 0; - flowc->mnemval[i].r4[1] = 0; - flowc->mnemval[i].r4[2] = 0; + flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; + flowc->mnemval[9].val = cpu_to_be32(pri); } set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); @@ -1176,6 +1173,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; struct cpl_act_establish *req = cplhdr(skb); + unsigned short tcp_opt = ntohs(req->tcp_opt); unsigned int tid = GET_TID(req); unsigned int atid = TID_TID_G(ntohl(req->tos_atid)); struct tid_info *t = dev->rdev.lldi.tids; @@ -1196,8 +1194,9 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); - set_emss(ep, ntohs(req->tcp_opt)); + set_emss(ep, tcp_opt); /* dealloc the atid */ remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); @@ -2629,16 +2628,17 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) struct cpl_pass_establish *req = cplhdr(skb); unsigned int tid = GET_TID(req); int ret; + u16 tcp_opt = ntohs(req->tcp_opt); ep = get_ep_from_tid(dev, tid); pr_debug("ep %p tid %u\n", ep, ep->hwtid); ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); - pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, - ntohs(req->tcp_opt)); + pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt); - set_emss(ep, ntohs(req->tcp_opt)); + set_emss(ep, tcp_opt); dst_confirm(ep->dst); mutex_lock(&ep->com.mutex); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index afa86a3c5cb4..f0fceadd0d12 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -910,7 +910,10 @@ enum conn_pre_alloc_buffers { CN_MAX_CON_BUF }; -#define FLOWC_LEN 80 +enum { + FLOWC_LEN = offsetof(struct fw_flowc_wr, mnemval[FW_FLOWC_MNEM_MAX]) +}; + union cpl_wr_size { struct cpl_abort_req abrt_req; struct cpl_abort_rpl abrt_rpl; @@ -977,6 +980,7 @@ struct c4iw_ep { unsigned int retry_count; int snd_win; int rcv_win; + u32 snd_wscale; struct c4iw_ep_stats stats; }; -- cgit From 61b717d041b1976530f68f8b539b2e3a7dd8e39c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 26 Jun 2018 08:39:36 -0700 Subject: RDMA/rxe: Set wqe->status correctly if an unexpected response is received Every function that returns COMPST_ERROR must set wqe->status to another value than IB_WC_SUCCESS before returning COMPST_ERROR. Fix the only code path for which this is not yet the case. Signed-off-by: Bart Van Assche Cc: Reviewed-by: Yuval Shaia Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 98d470d1f3fc..83311dd07019 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -276,6 +276,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (wqe->wr.opcode != IB_WR_RDMA_READ && wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + wqe->status = IB_WC_FATAL_ERR; return COMPST_ERROR; } reset_retry_counters(qp); -- cgit From 922983c2a1a2f679cd576eb7162f413c15a4e979 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 9 Aug 2018 19:19:35 +0300 Subject: IB/uverbs: Fix reading of 32 bit flags This is missing a zeroing of the high bits of flags, and is also not correct for big endian machines. Properly zero extend the 32 bit flags into the 64 bit stack variable. Reported-by: Michael J. Ruhl Fixes: bccd06223f21 ("IB/uverbs: Add UVERBS_ATTR_FLAGS_IN to the specs language") Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Reviewed-by: Michael J. Ruhl --- drivers/infiniband/core/uverbs_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index f0655a84f9d9..23ff698ab08e 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -535,7 +535,7 @@ int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, if (attr->ptr_attr.len == 8) flags = attr->ptr_attr.data; else if (attr->ptr_attr.len == 4) - memcpy(&flags, &attr->ptr_attr.data, 4); + flags = *(u32 *)&attr->ptr_attr.data; else return -EINVAL; -- cgit From 7d96c9b17636b6148534617ddf95dead18617776 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 9 Aug 2018 20:14:35 -0600 Subject: IB/uverbs: Have the core code create the uverbs_root_spec There is no reason for drivers to do this, the core code should take of everything. The drivers will provide their information from rodata to describe their modifications to the core's base uapi specification. The core uses this to build up the runtime uapi for each device. Signed-off-by: Jason Gunthorpe Reviewed-by: Michael J. Ruhl Reviewed-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_ioctl_merge.c | 2 -- drivers/infiniband/core/uverbs_main.c | 50 ++++++++++++++++++---------- drivers/infiniband/core/uverbs_std_types.c | 1 - drivers/infiniband/hw/mlx5/main.c | 45 +++++++++---------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + include/rdma/ib_verbs.h | 2 +- 6 files changed, 51 insertions(+), 50 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c index f81aa888ce5c..16b575929915 100644 --- a/drivers/infiniband/core/uverbs_ioctl_merge.c +++ b/drivers/infiniband/core/uverbs_ioctl_merge.c @@ -556,7 +556,6 @@ void uverbs_free_spec_tree(struct uverbs_root_spec *root) kfree(root); } -EXPORT_SYMBOL(uverbs_free_spec_tree); struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, const struct uverbs_object_tree_def **trees) @@ -661,4 +660,3 @@ free_root: uverbs_free_spec_tree(root_spec); return ERR_PTR(res); } -EXPORT_SYMBOL(uverbs_alloc_spec_tree); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 6f62146e9738..20003594b5d6 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -994,6 +994,36 @@ static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_VERBS_ABI_VERSION)); +static int ib_uverbs_create_uapi(struct ib_device *device, + struct ib_uverbs_device *uverbs_dev) +{ + const struct uverbs_object_tree_def **specs; + struct uverbs_root_spec *specs_root; + unsigned int num_specs = 1; + unsigned int i; + + if (device->driver_specs) + for (i = 0; device->driver_specs[i]; i++) + num_specs++; + + specs = kmalloc_array(num_specs, sizeof(*specs), GFP_KERNEL); + if (!specs) + return -ENOMEM; + + specs[0] = uverbs_default_get_objects(); + if (device->driver_specs) + for (i = 0; device->driver_specs[i]; i++) + specs[i+1] = device->driver_specs[i]; + + specs_root = uverbs_alloc_spec_tree(num_specs, specs); + kfree(specs); + if (IS_ERR(specs_root)) + return PTR_ERR(specs_root); + + uverbs_dev->specs_root = specs_root; + return 0; +} + static void ib_uverbs_add_one(struct ib_device *device) { int devnum; @@ -1036,6 +1066,9 @@ static void ib_uverbs_add_one(struct ib_device *device) rcu_assign_pointer(uverbs_dev->ib_dev, device); uverbs_dev->num_comp_vectors = device->num_comp_vectors; + if (ib_uverbs_create_uapi(device, uverbs_dev)) + goto err; + cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; @@ -1055,23 +1088,6 @@ static void ib_uverbs_add_one(struct ib_device *device) if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; - if (!device->driver_specs_root) { - const struct uverbs_object_tree_def *default_root[] = { - uverbs_default_get_objects()}; - - uverbs_dev->specs_root = uverbs_alloc_spec_tree(1, - default_root); - if (IS_ERR(uverbs_dev->specs_root)) - goto err_class; - } else { - uverbs_dev->specs_root = device->driver_specs_root; - /* - * Take responsibility to free the specs allocated by the - * driver. - */ - device->driver_specs_root = NULL; - } - ib_set_client_data(device, &uverbs_client, uverbs_dev); return; diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 3aa7c7deac74..7f22b820a21b 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -316,4 +316,3 @@ const struct uverbs_object_tree_def *uverbs_default_get_objects(void) { return &uverbs_default_objects; } -EXPORT_SYMBOL_GPL(uverbs_default_get_objects); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 13744b4631b4..f86d831ee27c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5523,37 +5523,29 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, enum mlx5_ib_uapi_flow_action_flags)); -#define NUM_TREES 5 static int populate_specs_root(struct mlx5_ib_dev *dev) { - const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = { - uverbs_default_get_objects()}; - size_t num_trees = 1; + const struct uverbs_object_tree_def **trees = dev->driver_trees; + size_t num_trees = 0; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE && - !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) - default_root[num_trees++] = &mlx5_ib_flow_action; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) + trees[num_trees++] = &mlx5_ib_flow_action; - if (MLX5_CAP_DEV_MEM(dev->mdev, memic) && - !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) - default_root[num_trees++] = &mlx5_ib_dm; + if (MLX5_CAP_DEV_MEM(dev->mdev, memic)) + trees[num_trees++] = &mlx5_ib_dm; if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & - MLX5_GENERAL_OBJ_TYPES_CAP_UCTX && - !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) - default_root[num_trees++] = mlx5_ib_get_devx_tree(); + MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) + trees[num_trees++] = mlx5_ib_get_devx_tree(); - num_trees += mlx5_ib_get_flow_trees(default_root + num_trees); + num_trees += mlx5_ib_get_flow_trees(trees + num_trees); - dev->ib_dev.driver_specs_root = - uverbs_alloc_spec_tree(num_trees, default_root); + WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees)); + trees[num_trees] = NULL; + dev->ib_dev.driver_specs = trees; - return PTR_ERR_OR_ZERO(dev->ib_dev.driver_specs_root); -} - -static void depopulate_specs_root(struct mlx5_ib_dev *dev) -{ - uverbs_free_spec_tree(dev->ib_dev.driver_specs_root); + return 0; } static int mlx5_ib_read_counters(struct ib_counters *counters, @@ -6092,11 +6084,6 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) return ib_register_device(&dev->ib_dev, NULL); } -static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev) -{ - depopulate_specs_root(dev); -} - void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { destroy_umrc_res(dev); @@ -6231,7 +6218,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_SPECS, mlx5_ib_stage_populate_specs, - mlx5_ib_stage_depopulate_specs), + NULL), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -6279,7 +6266,7 @@ static const struct mlx5_ib_profile nic_rep_profile = { mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_SPECS, mlx5_ib_stage_populate_specs, - mlx5_ib_stage_depopulate_specs), + NULL), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b75754efc663..320d4dfe8c2f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -860,6 +860,7 @@ to_mcounters(struct ib_counters *ibcntrs) struct mlx5_ib_dev { struct ib_device ib_dev; + const struct uverbs_object_tree_def *driver_trees[6]; struct mlx5_core_dev *mdev; struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4ffe3e11e8fb..3b07201b9a80 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2580,7 +2580,7 @@ struct ib_device { const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, int comp_vector); - struct uverbs_root_spec *driver_specs_root; + const struct uverbs_object_tree_def *const *driver_specs; enum rdma_driver_id driver_id; }; -- cgit From 9ed3e5f447723a41de6bcc29633e9f7e6246d2f7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 9 Aug 2018 20:14:36 -0600 Subject: IB/uverbs: Build the specs into a radix tree at runtime This radix tree datastructure is intended to replace the 'hash' structure used today for parsing ioctl methods during system calls. This first commit introduces the structure and builds it from the existing .rodata descriptions. The so-called hash arrangement is actually a 5 level open coded radix tree. This new version uses a 3 level radix tree built using the radix tree library. Overall this is much less code and much easier to build as the radix tree API allows for dynamic modification during the building. There is a small memory penalty to pay for this, but since the radix tree is allocated on a per device basis, a few kb of RAM seems immaterial considering the gained simplicity. The radix tree is similar to the existing tree, but also has a 'attr_bkey' concept, which is a small value'd index for each method attribute. This is used to simplify and improve performance of everything in the next patches. Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky Reviewed-by: Michael J. Ruhl --- drivers/infiniband/core/Makefile | 3 +- drivers/infiniband/core/rdma_core.h | 50 +++++ drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_main.c | 14 +- drivers/infiniband/core/uverbs_uapi.c | 343 ++++++++++++++++++++++++++++++++++ include/rdma/uverbs_ioctl.h | 137 ++++++++++++++ 6 files changed, 545 insertions(+), 3 deletions(-) create mode 100644 drivers/infiniband/core/uverbs_uapi.c diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 61667705d746..d934cf617841 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -37,4 +37,5 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ rdma_core.o uverbs_std_types.o uverbs_ioctl.o \ uverbs_ioctl_merge.o uverbs_std_types_cq.o \ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ - uverbs_std_types_mr.o uverbs_std_types_counters.o + uverbs_std_types_mr.o uverbs_std_types_counters.o \ + uverbs_uapi.o diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index b2e85ce65b78..55a687285b1d 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -43,6 +43,8 @@ #include #include +struct ib_uverbs_device; + int uverbs_ns_idx(u16 *id, unsigned int ns_count); const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile, uint16_t object); @@ -113,4 +115,52 @@ int uverbs_finalize_object(struct ib_uobject *uobj, void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile); void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); +/* + * This is the runtime description of the uverbs API, used by the syscall + * machinery to validate and dispatch calls. + */ + +/* + * Depending on ID the slot pointer in the radix tree points at one of these + * structs. + */ +struct uverbs_api_object { + const struct uverbs_obj_type *type_attrs; + const struct uverbs_obj_type_class *type_class; +}; + +struct uverbs_api_ioctl_method { + int (__rcu *handler)(struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *ctx); + DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN); + u8 driver_method:1; + u8 key_bitmap_len; + u8 destroy_bkey; +}; + +struct uverbs_api_attr { + struct uverbs_attr_spec spec; +}; + +struct uverbs_api_object; +struct uverbs_api { + /* radix tree contains struct uverbs_api_* pointers */ + struct radix_tree_root radix; + enum rdma_driver_id driver_id; +}; + +static inline const struct uverbs_api_object * +uapi_get_object(struct uverbs_api *uapi, u16 object_id) +{ + return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id)); +} + +char *uapi_key_format(char *S, unsigned int key); +struct uverbs_api *uverbs_alloc_api( + const struct uverbs_object_tree_def *const *driver_specs, + enum rdma_driver_id driver_id); +void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev); +void uverbs_disassociate_api(struct uverbs_api *uapi); +void uverbs_destroy_api(struct uverbs_api *uapi); + #endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 0fa32009908c..879be0d1fd99 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -112,6 +112,7 @@ struct ib_uverbs_device { struct list_head uverbs_file_list; struct list_head uverbs_events_file_list; struct uverbs_root_spec *specs_root; + struct uverbs_api *uapi; }; struct ib_uverbs_event_queue { diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 20003594b5d6..0fab083cafef 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -174,6 +174,7 @@ static void ib_uverbs_release_dev(struct kobject *kobj) struct ib_uverbs_device *dev = container_of(kobj, struct ib_uverbs_device, kobj); + uverbs_destroy_api(dev->uapi); cleanup_srcu_struct(&dev->disassociate_srcu); uverbs_free_spec_tree(dev->specs_root); kfree(dev); @@ -1000,6 +1001,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device, const struct uverbs_object_tree_def **specs; struct uverbs_root_spec *specs_root; unsigned int num_specs = 1; + struct uverbs_api *uapi; unsigned int i; if (device->driver_specs) @@ -1020,7 +1022,14 @@ static int ib_uverbs_create_uapi(struct ib_device *device, if (IS_ERR(specs_root)) return PTR_ERR(specs_root); + uapi = uverbs_alloc_api(device->driver_specs, device->driver_id); + if (IS_ERR(uapi)) { + uverbs_free_spec_tree(specs_root); + return PTR_ERR(uapi); + } + uverbs_dev->specs_root = specs_root; + uverbs_dev->uapi = uapi; return 0; } @@ -1115,7 +1124,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_event event; /* Pending running commands to terminate */ - synchronize_srcu(&uverbs_dev->disassociate_srcu); + uverbs_disassociate_api_pre(uverbs_dev); event.event = IB_EVENT_DEVICE_FATAL; event.element.port_num = 0; event.device = ib_dev; @@ -1161,6 +1170,8 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN); } mutex_unlock(&uverbs_dev->lists_mutex); + + uverbs_disassociate_api(uverbs_dev->uapi); } static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) @@ -1188,7 +1199,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) * cdev was deleted, however active clients can still issue * commands and close their open files. */ - rcu_assign_pointer(uverbs_dev->ib_dev, NULL); ib_uverbs_free_hw_resources(uverbs_dev, device); wait_clients = 0; } diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c new file mode 100644 index 000000000000..21c0de034511 --- /dev/null +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -0,0 +1,343 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + */ +#include +#include +#include +#include "rdma_core.h" +#include "uverbs.h" + +static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size) +{ + void *elm; + int rc; + + if (key == UVERBS_API_KEY_ERR) + return ERR_PTR(-EOVERFLOW); + + elm = kzalloc(alloc_size, GFP_KERNEL); + rc = radix_tree_insert(&uapi->radix, key, elm); + if (rc) { + kfree(elm); + return ERR_PTR(rc); + } + + return elm; +} + +static int uapi_merge_method(struct uverbs_api *uapi, + struct uverbs_api_object *obj_elm, u32 obj_key, + const struct uverbs_method_def *method, + bool is_driver) +{ + u32 method_key = obj_key | uapi_key_ioctl_method(method->id); + struct uverbs_api_ioctl_method *method_elm; + unsigned int i; + + if (!method->attrs) + return 0; + + method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm)); + if (IS_ERR(method_elm)) { + if (method_elm != ERR_PTR(-EEXIST)) + return PTR_ERR(method_elm); + + /* + * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE + */ + if (WARN_ON(method->handler)) + return -EINVAL; + method_elm = radix_tree_lookup(&uapi->radix, method_key); + if (WARN_ON(!method_elm)) + return -EINVAL; + } else { + WARN_ON(!method->handler); + rcu_assign_pointer(method_elm->handler, method->handler); + if (method->handler != uverbs_destroy_def_handler) + method_elm->driver_method = is_driver; + } + + for (i = 0; i != method->num_attrs; i++) { + const struct uverbs_attr_def *attr = (*method->attrs)[i]; + struct uverbs_api_attr *attr_slot; + + if (!attr) + continue; + + /* + * ENUM_IN contains the 'ids' pointer to the driver's .rodata, + * so if it is specified by a driver then it always makes this + * into a driver method. + */ + if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN) + method_elm->driver_method |= is_driver; + + attr_slot = + uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id), + sizeof(*attr_slot)); + /* Attributes are not allowed to be modified by drivers */ + if (IS_ERR(attr_slot)) + return PTR_ERR(attr_slot); + + attr_slot->spec = attr->attr; + } + + return 0; +} + +static int uapi_merge_tree(struct uverbs_api *uapi, + const struct uverbs_object_tree_def *tree, + bool is_driver) +{ + unsigned int i, j; + int rc; + + if (!tree->objects) + return 0; + + for (i = 0; i != tree->num_objects; i++) { + const struct uverbs_object_def *obj = (*tree->objects)[i]; + struct uverbs_api_object *obj_elm; + u32 obj_key; + + if (!obj) + continue; + + obj_key = uapi_key_obj(obj->id); + obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm)); + if (IS_ERR(obj_elm)) { + if (obj_elm != ERR_PTR(-EEXIST)) + return PTR_ERR(obj_elm); + + /* This occurs when a driver uses ADD_UVERBS_METHODS */ + if (WARN_ON(obj->type_attrs)) + return -EINVAL; + obj_elm = radix_tree_lookup(&uapi->radix, obj_key); + if (WARN_ON(!obj_elm)) + return -EINVAL; + } else { + obj_elm->type_attrs = obj->type_attrs; + if (obj->type_attrs) { + obj_elm->type_class = + obj->type_attrs->type_class; + /* + * Today drivers are only permitted to use + * idr_class types. They cannot use FD types + * because we currently have no way to revoke + * the fops pointer after device + * disassociation. + */ + if (WARN_ON(is_driver && + obj->type_attrs->type_class != + &uverbs_idr_class)) + return -EINVAL; + } + } + + if (!obj->methods) + continue; + + for (j = 0; j != obj->num_methods; j++) { + const struct uverbs_method_def *method = + (*obj->methods)[j]; + if (!method) + continue; + + rc = uapi_merge_method(uapi, obj_elm, obj_key, method, + is_driver); + if (rc) + return rc; + } + } + + return 0; +} + +static int +uapi_finalize_ioctl_method(struct uverbs_api *uapi, + struct uverbs_api_ioctl_method *method_elm, + u32 method_key) +{ + struct radix_tree_iter iter; + unsigned int max_bkey = 0; + bool single_uobj = false; + void __rcu **slot; + + method_elm->destroy_bkey = UVERBS_API_ATTR_BKEY_LEN; + radix_tree_for_each_slot (slot, &uapi->radix, &iter, + uapi_key_attrs_start(method_key)) { + struct uverbs_api_attr *elm = + rcu_dereference_protected(*slot, true); + u32 attr_key = iter.index & UVERBS_API_ATTR_KEY_MASK; + u32 attr_bkey = uapi_bkey_attr(attr_key); + u8 type = elm->spec.type; + + if (uapi_key_attr_to_method(iter.index) != + uapi_key_attr_to_method(method_key)) + break; + + if (elm->spec.mandatory) + __set_bit(attr_bkey, method_elm->attr_mandatory); + + if (type == UVERBS_ATTR_TYPE_IDR || + type == UVERBS_ATTR_TYPE_FD) { + u8 access = elm->spec.u.obj.access; + + /* + * Verbs specs may only have one NEW/DESTROY, we don't + * have the infrastructure to abort multiple NEW's or + * cope with multiple DESTROY failure. + */ + if (access == UVERBS_ACCESS_NEW || + access == UVERBS_ACCESS_DESTROY) { + if (WARN_ON(single_uobj)) + return -EINVAL; + + single_uobj = true; + if (WARN_ON(!elm->spec.mandatory)) + return -EINVAL; + } + + if (access == UVERBS_ACCESS_DESTROY) + method_elm->destroy_bkey = attr_bkey; + } + + max_bkey = max(max_bkey, attr_bkey); + } + + method_elm->key_bitmap_len = max_bkey + 1; + WARN_ON(method_elm->key_bitmap_len > UVERBS_API_ATTR_BKEY_LEN); + + return 0; +} + +static int uapi_finalize(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + void __rcu **slot; + int rc; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + struct uverbs_api_ioctl_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (uapi_key_is_ioctl_method(iter.index)) { + rc = uapi_finalize_ioctl_method(uapi, method_elm, + iter.index); + if (rc) + return rc; + } + } + + return 0; +} + +void uverbs_destroy_api(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + if (!uapi) + return; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + kfree(rcu_dereference_protected(*slot, true)); + radix_tree_iter_delete(&uapi->radix, &iter, slot); + } +} + +struct uverbs_api *uverbs_alloc_api( + const struct uverbs_object_tree_def *const *driver_specs, + enum rdma_driver_id driver_id) +{ + struct uverbs_api *uapi; + int rc; + + uapi = kzalloc(sizeof(*uapi), GFP_KERNEL); + if (!uapi) + return ERR_PTR(-ENOMEM); + + INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL); + uapi->driver_id = driver_id; + + rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false); + if (rc) + goto err; + + for (; driver_specs && *driver_specs; driver_specs++) { + rc = uapi_merge_tree(uapi, *driver_specs, true); + if (rc) + goto err; + } + + rc = uapi_finalize(uapi); + if (rc) + goto err; + + return uapi; +err: + if (rc != -ENOMEM) + pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n", + rc); + + uverbs_destroy_api(uapi); + return ERR_PTR(rc); +} + +/* + * The pre version is done before destroying the HW objects, it only blocks + * off method access. All methods that require the ib_dev or the module data + * must test one of these assignments prior to continuing. + */ +void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev) +{ + struct uverbs_api *uapi = uverbs_dev->uapi; + struct radix_tree_iter iter; + void __rcu **slot; + + rcu_assign_pointer(uverbs_dev->ib_dev, NULL); + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + if (uapi_key_is_ioctl_method(iter.index)) { + struct uverbs_api_ioctl_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (method_elm->driver_method) + rcu_assign_pointer(method_elm->handler, NULL); + } + } + + synchronize_srcu(&uverbs_dev->disassociate_srcu); +} + +/* + * Called when a driver disassociates from the ib_uverbs_device. The + * assumption is that the driver module will unload after. Replace everything + * related to the driver with NULL as a safety measure. + */ +void uverbs_disassociate_api(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + if (uapi_key_is_object(iter.index)) { + struct uverbs_api_object *object_elm = + rcu_dereference_protected(*slot, true); + + /* + * Some type_attrs are in the driver module. We don't + * bother to keep track of which since there should be + * no use of this after disassociate. + */ + object_elm->type_attrs = NULL; + } else if (uapi_key_is_attr(iter.index)) { + struct uverbs_api_attr *elm = + rcu_dereference_protected(*slot, true); + + if (elm->spec.type == UVERBS_ATTR_TYPE_ENUM_IN) + elm->spec.u2.enum_def.ids = NULL; + } + } +} diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 8d71b7a7f147..339996e80c16 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -154,6 +154,143 @@ struct uverbs_root_spec { struct uverbs_object_spec_hash *object_buckets[0]; }; +/* + * Information about the API is loaded into a radix tree. For IOCTL we start + * with a tuple of: + * object_id, attr_id, method_id + * + * Which is a 48 bit value, with most of the bits guaranteed to be zero. Based + * on the current kernel support this is compressed into 16 bit key for the + * radix tree. Since this compression is entirely internal to the kernel the + * below limits can be revised if the kernel gains additional data. + * + * With 64 leafs per node this is a 3 level radix tree. + * + * The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns + * the object slot, and OBJ_ID,METH_ID,0 and returns the method slot. + */ +enum uapi_radix_data { + UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT, + + UVERBS_API_ATTR_KEY_BITS = 6, + UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0), + UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1, + + UVERBS_API_METHOD_KEY_BITS = 5, + UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS, + UVERBS_API_METHOD_KEY_NUM_CORE = 24, + UVERBS_API_METHOD_KEY_NUM_DRIVER = (1 << UVERBS_API_METHOD_KEY_BITS) - + UVERBS_API_METHOD_KEY_NUM_CORE, + UVERBS_API_METHOD_KEY_MASK = GENMASK( + UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1, + UVERBS_API_METHOD_KEY_SHIFT), + + UVERBS_API_OBJ_KEY_BITS = 5, + UVERBS_API_OBJ_KEY_SHIFT = + UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT, + UVERBS_API_OBJ_KEY_NUM_CORE = 24, + UVERBS_API_OBJ_KEY_NUM_DRIVER = + (1 << UVERBS_API_OBJ_KEY_BITS) - UVERBS_API_OBJ_KEY_NUM_CORE, + UVERBS_API_OBJ_KEY_MASK = GENMASK(31, UVERBS_API_OBJ_KEY_SHIFT), + + /* This id guaranteed to not exist in the radix tree */ + UVERBS_API_KEY_ERR = 0xFFFFFFFF, +}; + +static inline __attribute_const__ u32 uapi_key_obj(u32 id) +{ + if (id & UVERBS_API_NS_FLAG) { + id &= ~UVERBS_API_NS_FLAG; + if (id >= UVERBS_API_OBJ_KEY_NUM_DRIVER) + return UVERBS_API_KEY_ERR; + id = id + UVERBS_API_OBJ_KEY_NUM_CORE; + } else { + if (id >= UVERBS_API_OBJ_KEY_NUM_CORE) + return UVERBS_API_KEY_ERR; + } + + return id << UVERBS_API_OBJ_KEY_SHIFT; +} + +static inline __attribute_const__ bool uapi_key_is_object(u32 key) +{ + return (key & ~UVERBS_API_OBJ_KEY_MASK) == 0; +} + +static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id) +{ + if (id & UVERBS_API_NS_FLAG) { + id &= ~UVERBS_API_NS_FLAG; + if (id >= UVERBS_API_METHOD_KEY_NUM_DRIVER) + return UVERBS_API_KEY_ERR; + id = id + UVERBS_API_METHOD_KEY_NUM_CORE; + } else { + id++; + if (id >= UVERBS_API_METHOD_KEY_NUM_CORE) + return UVERBS_API_KEY_ERR; + } + + return id << UVERBS_API_METHOD_KEY_SHIFT; +} + +static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key) +{ + return attr_key & + (UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK); +} + +static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key) +{ + return (key & UVERBS_API_METHOD_KEY_MASK) != 0 && + (key & UVERBS_API_ATTR_KEY_MASK) == 0; +} + +static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key) +{ + /* 0 is the method slot itself */ + return ioctl_method_key + 1; +} + +static inline __attribute_const__ u32 uapi_key_attr(u32 id) +{ + /* + * The attr is designed to fit in the typical single radix tree node + * of 64 entries. Since allmost all methods have driver attributes we + * organize things so that the driver and core attributes interleave to + * reduce the length of the attributes array in typical cases. + */ + if (id & UVERBS_API_NS_FLAG) { + id &= ~UVERBS_API_NS_FLAG; + id++; + if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1)) + return UVERBS_API_KEY_ERR; + id = (id << 1) | 0; + } else { + if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1)) + return UVERBS_API_KEY_ERR; + id = (id << 1) | 1; + } + + return id; +} + +static inline __attribute_const__ bool uapi_key_is_attr(u32 key) +{ + return (key & UVERBS_API_METHOD_KEY_MASK) != 0 && + (key & UVERBS_API_ATTR_KEY_MASK) != 0; +} + +/* + * This returns a value in the range [0 to UVERBS_API_ATTR_BKEY_LEN), + * basically it undoes the reservation of 0 in the ID numbering. attr_key + * must already be masked with UVERBS_API_ATTR_KEY_MASK, or be the output of + * uapi_key_attr(). + */ +static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key) +{ + return attr_key - 1; +} + /* * ======================================= * Verbs definitions -- cgit From 6b0d08f4a27134e6fb49aa33ceb53356081bc92e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 9 Aug 2018 20:14:37 -0600 Subject: IB/uverbs: Use uverbs_api to manage the object type inside the uobject Currently the struct uverbs_obj_type stored in the ib_uobject is part of the .rodata segment of the module that defines the object. This is a problem if drivers define new uapi objects as we will be left with a dangling pointer after device disassociation. Switch the uverbs_obj_type for struct uverbs_api_object, which is allocated memory that is part of the uverbs_api and is guaranteed to always exist. Further this moves the 'type_class' into this memory which means access to the IDR/FD function pointers is also guaranteed. Drivers cannot define new types. This makes it safe to continue to use all uobjects, including driver defined ones, after disassociation. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 100 ++++++++++++++++++--------------- drivers/infiniband/core/rdma_core.h | 2 +- drivers/infiniband/core/uverbs_ioctl.c | 6 +- include/rdma/ib_verbs.h | 2 +- include/rdma/uverbs_std_types.h | 30 +++++----- include/rdma/uverbs_types.h | 9 +-- 6 files changed, 79 insertions(+), 70 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 4235b9ddc2ad..2814228ead39 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -97,7 +97,7 @@ static void uverbs_uobject_free(struct kref *ref) struct ib_uobject *uobj = container_of(ref, struct ib_uobject, ref); - if (uobj->type->type_class->needs_kfree_rcu) + if (uobj->uapi_object->type_class->needs_kfree_rcu) kfree_rcu(uobj, rcu); else kfree(uobj); @@ -180,7 +180,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); if (uobj->object) { - ret = uobj->type->type_class->destroy_hw(uobj, reason); + ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason); if (ret) { if (ib_is_destroy_retryable(ret, reason, uobj)) return ret; @@ -197,7 +197,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, if (reason == RDMA_REMOVE_ABORT) { WARN_ON(!list_empty(&uobj->list)); WARN_ON(!uobj->context); - uobj->type->type_class->alloc_abort(uobj); + uobj->uapi_object->type_class->alloc_abort(uobj); } uobj->context = NULL; @@ -210,7 +210,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, if (reason != RDMA_REMOVE_DESTROY) atomic_set(&uobj->usecnt, 0); else - uobj->type->type_class->remove_handle(uobj); + uobj->uapi_object->type_class->remove_handle(uobj); if (!list_empty(&uobj->list)) { spin_lock_irqsave(&ufile->uobjects_lock, flags); @@ -268,13 +268,13 @@ out_unlock: * with a NULL object pointer. The caller must pair this with * uverbs_put_destroy. */ -struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type, +struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, u32 id, struct ib_uverbs_file *ufile) { struct ib_uobject *uobj; int ret; - uobj = rdma_lookup_get_uobject(type, ufile, id, UVERBS_LOOKUP_DESTROY); + uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY); if (IS_ERR(uobj)) return uobj; @@ -292,27 +292,22 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type, * on success (negative errno on failure). For use by callers that do not need * the uobj. */ -int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id, +int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, struct ib_uverbs_file *ufile, int success_res) { struct ib_uobject *uobj; - uobj = __uobj_get_destroy(type, id, ufile); + uobj = __uobj_get_destroy(obj, id, ufile); if (IS_ERR(uobj)) return PTR_ERR(uobj); - /* - * FIXME: After destroy this is not safe. We no longer hold the rwsem - * so disassociation could have completed and unloaded the module that - * backs the uobj->type pointer. - */ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); return success_res; } /* alloc_uobj must be undone by uverbs_destroy_uobject() */ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, - const struct uverbs_obj_type *type) + const struct uverbs_api_object *obj) { struct ib_uobject *uobj; struct ib_ucontext *ucontext; @@ -321,7 +316,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, if (IS_ERR(ucontext)) return ERR_CAST(ucontext); - uobj = kzalloc(type->obj_size, GFP_KERNEL); + uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL); if (!uobj) return ERR_PTR(-ENOMEM); /* @@ -331,7 +326,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, uobj->ufile = ufile; uobj->context = ucontext; INIT_LIST_HEAD(&uobj->list); - uobj->type = type; + uobj->uapi_object = obj; /* * Allocated objects start out as write locked to deny any other * syscalls from accessing them until they are committed. See @@ -368,7 +363,7 @@ static int idr_add_uobj(struct ib_uobject *uobj) /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ static struct ib_uobject * -lookup_get_idr_uobject(const struct uverbs_obj_type *type, +lookup_get_idr_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, s64 id, enum rdma_lookup_mode mode) { @@ -401,15 +396,14 @@ free: } static struct ib_uobject * -lookup_get_fd_uobject(const struct uverbs_obj_type *type, +lookup_get_fd_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, s64 id, enum rdma_lookup_mode mode) { + const struct uverbs_obj_fd_type *fd_type; struct file *f; struct ib_uobject *uobject; int fdno = id; - const struct uverbs_obj_fd_type *fd_type = - container_of(type, struct uverbs_obj_fd_type, type); if (fdno != id) return ERR_PTR(-EINVAL); @@ -417,6 +411,11 @@ lookup_get_fd_uobject(const struct uverbs_obj_type *type, if (mode != UVERBS_LOOKUP_READ) return ERR_PTR(-EOPNOTSUPP); + if (!obj->type_attrs) + return ERR_PTR(-EIO); + fd_type = + container_of(obj->type_attrs, struct uverbs_obj_fd_type, type); + f = fget(fdno); if (!f) return ERR_PTR(-EBADF); @@ -436,18 +435,21 @@ lookup_get_fd_uobject(const struct uverbs_obj_type *type, return uobject; } -struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, +struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, s64 id, enum rdma_lookup_mode mode) { struct ib_uobject *uobj; int ret; - uobj = type->type_class->lookup_get(type, ufile, id, mode); + if (!obj) + return ERR_PTR(-EINVAL); + + uobj = obj->type_class->lookup_get(obj, ufile, id, mode); if (IS_ERR(uobj)) return uobj; - if (uobj->type != type) { + if (uobj->uapi_object != obj) { ret = -EINVAL; goto free; } @@ -469,18 +471,19 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, return uobj; free: - uobj->type->type_class->lookup_put(uobj, mode); + obj->type_class->lookup_put(uobj, mode); uverbs_uobject_put(uobj); return ERR_PTR(ret); } -static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type, - struct ib_uverbs_file *ufile) +static struct ib_uobject * +alloc_begin_idr_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile) { int ret; struct ib_uobject *uobj; - uobj = alloc_uobj(ufile, type); + uobj = alloc_uobj(ufile, obj); if (IS_ERR(uobj)) return uobj; @@ -504,8 +507,9 @@ uobj_put: return ERR_PTR(ret); } -static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type, - struct ib_uverbs_file *ufile) +static struct ib_uobject * +alloc_begin_fd_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile) { int new_fd; struct ib_uobject *uobj; @@ -514,7 +518,7 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t if (new_fd < 0) return ERR_PTR(new_fd); - uobj = alloc_uobj(ufile, type); + uobj = alloc_uobj(ufile, obj); if (IS_ERR(uobj)) { put_unused_fd(new_fd); return uobj; @@ -526,11 +530,14 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t return uobj; } -struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, +struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile) { struct ib_uobject *ret; + if (!obj) + return ERR_PTR(-EINVAL); + /* * The hw_destroy_rwsem is held across the entire object creation and * released during rdma_alloc_commit_uobject or @@ -539,7 +546,7 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, if (!down_read_trylock(&ufile->hw_destroy_rwsem)) return ERR_PTR(-EIO); - ret = type->type_class->alloc_begin(type, ufile); + ret = obj->type_class->alloc_begin(obj, ufile); if (IS_ERR(ret)) { up_read(&ufile->hw_destroy_rwsem); return ret; @@ -561,8 +568,8 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, enum rdma_remove_reason why) { const struct uverbs_obj_idr_type *idr_type = - container_of(uobj->type, struct uverbs_obj_idr_type, - type); + container_of(uobj->uapi_object->type_attrs, + struct uverbs_obj_idr_type, type); int ret = idr_type->destroy_object(uobj, why); /* @@ -599,8 +606,8 @@ static void alloc_abort_fd_uobject(struct ib_uobject *uobj) static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, enum rdma_remove_reason why) { - const struct uverbs_obj_fd_type *fd_type = - container_of(uobj->type, struct uverbs_obj_fd_type, type); + const struct uverbs_obj_fd_type *fd_type = container_of( + uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); int ret = fd_type->context_closed(uobj, why); if (ib_is_destroy_retryable(ret, why, uobj)) @@ -633,8 +640,8 @@ static int alloc_commit_idr_uobject(struct ib_uobject *uobj) static int alloc_commit_fd_uobject(struct ib_uobject *uobj) { - const struct uverbs_obj_fd_type *fd_type = - container_of(uobj->type, struct uverbs_obj_fd_type, type); + const struct uverbs_obj_fd_type *fd_type = container_of( + uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); int fd = uobj->id; struct file *filp; @@ -679,7 +686,7 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) int ret; /* alloc_commit consumes the uobj kref */ - ret = uobj->type->type_class->alloc_commit(uobj); + ret = uobj->uapi_object->type_class->alloc_commit(uobj); if (ret) { uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); up_read(&ufile->hw_destroy_rwsem); @@ -735,7 +742,7 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, enum rdma_lookup_mode mode) { assert_uverbs_usecnt(uobj, mode); - uobj->type->type_class->lookup_put(uobj, mode); + uobj->uapi_object->type_class->lookup_put(uobj, mode); /* * In order to unlock an object, either decrease its usecnt for * read access or zero it in case of exclusive access. See @@ -995,23 +1002,26 @@ const struct uverbs_obj_type_class uverbs_fd_class = { EXPORT_SYMBOL(uverbs_fd_class); struct ib_uobject * -uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs, +uverbs_get_uobject_from_file(u16 object_id, struct ib_uverbs_file *ufile, enum uverbs_obj_access access, s64 id) { + const struct uverbs_api_object *obj = + uapi_get_object(ufile->device->uapi, object_id); + switch (access) { case UVERBS_ACCESS_READ: - return rdma_lookup_get_uobject(type_attrs, ufile, id, + return rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_READ); case UVERBS_ACCESS_DESTROY: /* Actual destruction is done inside uverbs_handle_method */ - return rdma_lookup_get_uobject(type_attrs, ufile, id, + return rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY); case UVERBS_ACCESS_WRITE: - return rdma_lookup_get_uobject(type_attrs, ufile, id, + return rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_WRITE); case UVERBS_ACCESS_NEW: - return rdma_alloc_begin_uobject(type_attrs, ufile); + return rdma_alloc_begin_uobject(obj, ufile); default: WARN_ON(true); return ERR_PTR(-EOPNOTSUPP); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 55a687285b1d..d89569d87b1c 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -89,7 +89,7 @@ void uverbs_close_fd(struct file *f); * uverbs_finalize_objects are called. */ struct ib_uobject * -uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs, +uverbs_get_uobject_from_file(u16 object_id, struct ib_uverbs_file *ufile, enum uverbs_obj_access access, s64 id); diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 23ff698ab08e..8a052d0fdf2c 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -57,7 +57,6 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, const struct uverbs_attr_spec *spec; const struct uverbs_attr_spec *val_spec; struct uverbs_attr *e; - const struct uverbs_object_spec *object; struct uverbs_obj_attr *o_attr; struct uverbs_attr *elements = attr_bundle_h->attrs; @@ -145,9 +144,6 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, return -EINVAL; o_attr = &e->obj_attr; - object = uverbs_get_object(ufile, spec->u.obj.obj_type); - if (!object) - return -EINVAL; /* specs are allowed to have only one destroy attribute */ WARN_ON(spec->u.obj.access == UVERBS_ACCESS_DESTROY && @@ -162,7 +158,7 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, * IDR implementation today rejects negative IDs */ o_attr->uobject = uverbs_get_uobject_from_file( - object->type_attrs, + spec->u.obj.obj_type, ufile, spec->u.obj.access, uattr->data_s64); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3b07201b9a80..5d404c20b49f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1524,7 +1524,7 @@ struct ib_uobject { atomic_t usecnt; /* protects exclusive access */ struct rcu_head rcu; /* kfree_rcu() overhead */ - const struct uverbs_obj_type *type; + const struct uverbs_api_object *uapi_object; }; struct ib_udata { diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 64ee2545dd3d..3b00231cc084 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -54,14 +54,15 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo */ #define _uobj_check_id(_id) ((_id) * typecheck(u32, _id)) -#define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs +#define uobj_get_type(_ufile, _object) \ + uapi_get_object((_ufile)->device->uapi, _object) #define uobj_get_read(_type, _id, _ufile) \ - rdma_lookup_get_uobject(uobj_get_type(_type), _ufile, \ + rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \ _uobj_check_id(_id), UVERBS_LOOKUP_READ) #define ufd_get_read(_type, _fdnum, _ufile) \ - rdma_lookup_get_uobject(uobj_get_type(_type), _ufile, \ + rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \ (_fdnum)*typecheck(s32, _fdnum), \ UVERBS_LOOKUP_READ) @@ -76,20 +77,21 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) uobj_get_read(_type, _id, _ufile))) #define uobj_get_write(_type, _id, _ufile) \ - rdma_lookup_get_uobject(uobj_get_type(_type), _ufile, \ + rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \ _uobj_check_id(_id), UVERBS_LOOKUP_WRITE) -int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id, +int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, struct ib_uverbs_file *ufile, int success_res); #define uobj_perform_destroy(_type, _id, _ufile, _success_res) \ - __uobj_perform_destroy(uobj_get_type(_type), _uobj_check_id(_id), \ - _ufile, _success_res) + __uobj_perform_destroy(uobj_get_type(_ufile, _type), \ + _uobj_check_id(_id), _ufile, _success_res) -struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type, +struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, u32 id, struct ib_uverbs_file *ufile); #define uobj_get_destroy(_type, _id, _ufile) \ - __uobj_get_destroy(uobj_get_type(_type), _uobj_check_id(_id), _ufile) + __uobj_get_destroy(uobj_get_type(_ufile, _type), _uobj_check_id(_id), \ + _ufile) static inline void uobj_put_destroy(struct ib_uobject *uobj) { @@ -124,11 +126,11 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj) rdma_alloc_abort_uobject(uobj); } -static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type, - struct ib_uverbs_file *ufile, - struct ib_device **ib_dev) +static inline struct ib_uobject * +__uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, + struct ib_device **ib_dev) { - struct ib_uobject *uobj = rdma_alloc_begin_uobject(type, ufile); + struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, ufile); if (!IS_ERR(uobj)) *ib_dev = uobj->context->device; @@ -136,7 +138,7 @@ static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type } #define uobj_alloc(_type, _ufile, _ib_dev) \ - __uobj_alloc(uobj_get_type(_type), _ufile, _ib_dev) + __uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev) #endif diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 1ab9a85eebd9..acb1bfa3cc99 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -37,6 +37,7 @@ #include struct uverbs_obj_type; +struct uverbs_api_object; enum rdma_lookup_mode { UVERBS_LOOKUP_READ, @@ -81,14 +82,14 @@ enum rdma_lookup_mode { * alloc_abort returns. */ struct uverbs_obj_type_class { - struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type, + struct ib_uobject *(*alloc_begin)(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile); /* This consumes the kref on uobj */ int (*alloc_commit)(struct ib_uobject *uobj); /* This does not consume the kref on uobj */ void (*alloc_abort)(struct ib_uobject *uobj); - struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type, + struct ib_uobject *(*lookup_get)(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, s64 id, enum rdma_lookup_mode mode); void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode); @@ -128,12 +129,12 @@ struct uverbs_obj_idr_type { enum rdma_remove_reason why); }; -struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, +struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, s64 id, enum rdma_lookup_mode mode); void rdma_lookup_put_uobject(struct ib_uobject *uobj, enum rdma_lookup_mode mode); -struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, +struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile); void rdma_alloc_abort_uobject(struct ib_uobject *uobj); int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj); -- cgit From 4b3dd2bbf0818ccb23e7f2831f2ca4a86789cd1f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 9 Aug 2018 20:14:38 -0600 Subject: IB/uverbs: Provide implementation private memory for the uverbs_attr_bundle This already existed as the anonymous 'ctx' structure, but this was not really a useful form. Hoist this struct into bundle_priv and rework the internal things to use it instead. Move a bunch of the processing internal state into the priv and reduce the excessive use of function arguments. Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_ioctl.c | 112 +++++++++++++++++---------------- include/rdma/uverbs_ioctl.h | 1 + 2 files changed, 58 insertions(+), 55 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 8a052d0fdf2c..efb7adcc21fb 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -35,6 +35,18 @@ #include "rdma_core.h" #include "uverbs.h" +struct bundle_priv { + struct ib_uverbs_attr __user *user_attrs; + struct ib_uverbs_attr *uattrs; + struct uverbs_obj_attr *destroy_attr; + + /* + * Must be last. bundle ends in a flex array which overlaps + * internal_buffer. + */ + struct uverbs_attr_bundle bundle; +}; + static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, u16 len) { @@ -46,12 +58,11 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, 0, uattr->len - len); } -static int uverbs_process_attr(struct ib_uverbs_file *ufile, +static int uverbs_process_attr(struct bundle_priv *pbundle, const struct ib_uverbs_attr *uattr, u16 attr_id, const struct uverbs_attr_spec_hash *attr_spec_bucket, struct uverbs_attr_bundle_hash *attr_bundle_h, - struct uverbs_obj_attr **destroy_attr, struct ib_uverbs_attr __user *uattr_ptr) { const struct uverbs_attr_spec *spec; @@ -147,9 +158,9 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, /* specs are allowed to have only one destroy attribute */ WARN_ON(spec->u.obj.access == UVERBS_ACCESS_DESTROY && - *destroy_attr); + pbundle->destroy_attr); if (spec->u.obj.access == UVERBS_ACCESS_DESTROY) - *destroy_attr = o_attr; + pbundle->destroy_attr = o_attr; /* * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and @@ -159,7 +170,7 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, */ o_attr->uobject = uverbs_get_uobject_from_file( spec->u.obj.obj_type, - ufile, + pbundle->bundle.ufile, spec->u.obj.access, uattr->data_s64); @@ -187,10 +198,11 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile, return 0; } -static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle, +static int uverbs_finalize_attrs(struct bundle_priv *pbundle, struct uverbs_attr_spec_hash *const *spec_hash, size_t num, bool commit) { + struct uverbs_attr_bundle *attrs_bundle = &pbundle->bundle; unsigned int i; int ret = 0; @@ -233,27 +245,25 @@ static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle, return ret; } -static int uverbs_uattrs_process(struct ib_uverbs_file *ufile, - const struct ib_uverbs_attr *uattrs, - size_t num_uattrs, +static int uverbs_uattrs_process(size_t num_uattrs, const struct uverbs_method_spec *method, - struct uverbs_attr_bundle *attr_bundle, - struct uverbs_obj_attr **destroy_attr, - struct ib_uverbs_attr __user *uattr_ptr) + struct bundle_priv *pbundle) { + struct uverbs_attr_bundle *attr_bundle = &pbundle->bundle; + struct ib_uverbs_attr __user *uattr_ptr = pbundle->user_attrs; size_t i; int ret = 0; int num_given_buckets = 0; for (i = 0; i < num_uattrs; i++) { - const struct ib_uverbs_attr *uattr = &uattrs[i]; + const struct ib_uverbs_attr *uattr = &pbundle->uattrs[i]; u16 attr_id = uattr->attr_id; struct uverbs_attr_spec_hash *attr_spec_bucket; ret = uverbs_ns_idx(&attr_id, method->num_buckets); if (ret < 0 || !method->attr_buckets[ret]) { if (uattr->flags & UVERBS_ATTR_F_MANDATORY) { - uverbs_finalize_attrs(attr_bundle, + uverbs_finalize_attrs(pbundle, method->attr_buckets, num_given_buckets, false); @@ -270,12 +280,13 @@ static int uverbs_uattrs_process(struct ib_uverbs_file *ufile, num_given_buckets = ret + 1; attr_spec_bucket = method->attr_buckets[ret]; - ret = uverbs_process_attr(ufile, uattr, attr_id, + ret = uverbs_process_attr(pbundle, + uattr, attr_id, attr_spec_bucket, - &attr_bundle->hash[ret], destroy_attr, + &attr_bundle->hash[ret], uattr_ptr++); if (ret) { - uverbs_finalize_attrs(attr_bundle, + uverbs_finalize_attrs(pbundle, method->attr_buckets, num_given_buckets, false); @@ -287,8 +298,9 @@ static int uverbs_uattrs_process(struct ib_uverbs_file *ufile, } static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec, - struct uverbs_attr_bundle *attr_bundle) + struct bundle_priv *pbundle) { + struct uverbs_attr_bundle *attr_bundle = &pbundle->bundle; unsigned int i; for (i = 0; i < attr_bundle->num_buckets; i++) { @@ -316,27 +328,22 @@ static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *met return 0; } -static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, - const struct ib_uverbs_attr *uattrs, - size_t num_uattrs, - struct ib_device *ibdev, - struct ib_uverbs_file *ufile, +static int uverbs_handle_method(size_t num_uattrs, const struct uverbs_method_spec *method_spec, - struct uverbs_attr_bundle *attr_bundle) + struct bundle_priv *pbundle) { + struct uverbs_attr_bundle *attr_bundle = &pbundle->bundle; int ret; int finalize_ret; int num_given_buckets; - struct uverbs_obj_attr *destroy_attr = NULL; num_given_buckets = - uverbs_uattrs_process(ufile, uattrs, num_uattrs, method_spec, - attr_bundle, &destroy_attr, uattr_ptr); + uverbs_uattrs_process(num_uattrs, method_spec, pbundle); if (num_given_buckets <= 0) return -EINVAL; attr_bundle->num_buckets = num_given_buckets; - ret = uverbs_validate_kernel_mandatory(method_spec, attr_bundle); + ret = uverbs_validate_kernel_mandatory(method_spec, pbundle); if (ret) goto cleanup; @@ -344,21 +351,21 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, * We destroy the HW object before invoking the handler, handlers do * not get to manipulate the HW objects. */ - if (destroy_attr) { - ret = uobj_destroy(destroy_attr->uobject); + if (pbundle->destroy_attr) { + ret = uobj_destroy(pbundle->destroy_attr->uobject); if (ret) goto cleanup; } - ret = method_spec->handler(ufile, attr_bundle); + ret = method_spec->handler(pbundle->bundle.ufile, attr_bundle); - if (destroy_attr) { - uobj_put_destroy(destroy_attr->uobject); - destroy_attr->uobject = NULL; + if (pbundle->destroy_attr) { + uobj_put_destroy(pbundle->destroy_attr->uobject); + pbundle->destroy_attr->uobject = NULL; } cleanup: - finalize_ret = uverbs_finalize_attrs(attr_bundle, + finalize_ret = uverbs_finalize_attrs(pbundle, method_spec->attr_buckets, attr_bundle->num_buckets, !ret); @@ -370,16 +377,13 @@ cleanup: static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, struct ib_uverbs_file *file, struct ib_uverbs_ioctl_hdr *hdr, - void __user *buf) + struct ib_uverbs_attr __user *user_attrs) { const struct uverbs_object_spec *object_spec; const struct uverbs_method_spec *method_spec; long err = 0; unsigned int i; - struct { - struct ib_uverbs_attr *uattrs; - struct uverbs_attr_bundle *uverbs_attr_bundle; - } *ctx = NULL; + struct bundle_priv *ctx; struct uverbs_attr *curr_attr; unsigned long *curr_bitmap; size_t ctx_size; @@ -397,12 +401,11 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, return -EPROTONOSUPPORT; ctx_size = sizeof(*ctx) + - sizeof(struct uverbs_attr_bundle) + sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets + sizeof(*ctx->uattrs) * hdr->num_attrs + - sizeof(*ctx->uverbs_attr_bundle->hash[0].attrs) * + sizeof(*ctx->bundle.hash[0].attrs) * method_spec->num_child_attrs + - sizeof(*ctx->uverbs_attr_bundle->hash[0].valid_bitmap) * + sizeof(*ctx->bundle.hash[0].valid_bitmap) * (method_spec->num_child_attrs / BITS_PER_LONG + method_spec->num_buckets); @@ -413,10 +416,8 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, if (!ctx) return -ENOMEM; - ctx->uverbs_attr_bundle = (void *)ctx + sizeof(*ctx); - ctx->uattrs = (void *)(ctx->uverbs_attr_bundle + 1) + - (sizeof(ctx->uverbs_attr_bundle->hash[0]) * - method_spec->num_buckets); + ctx->uattrs = (void *)(ctx + 1) + + (sizeof(ctx->bundle.hash[0]) * method_spec->num_buckets); curr_attr = (void *)(ctx->uattrs + hdr->num_attrs); curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs); @@ -432,23 +433,25 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, curr_num_attrs = method_spec->attr_buckets[i]->num_attrs; - ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr; + ctx->bundle.hash[i].attrs = curr_attr; curr_attr += curr_num_attrs; - ctx->uverbs_attr_bundle->hash[i].num_attrs = curr_num_attrs; - ctx->uverbs_attr_bundle->hash[i].valid_bitmap = curr_bitmap; + ctx->bundle.hash[i].num_attrs = curr_num_attrs; + ctx->bundle.hash[i].valid_bitmap = curr_bitmap; bitmap_zero(curr_bitmap, curr_num_attrs); curr_bitmap += BITS_TO_LONGS(curr_num_attrs); } - err = copy_from_user(ctx->uattrs, buf, + err = copy_from_user(ctx->uattrs, user_attrs, sizeof(*ctx->uattrs) * hdr->num_attrs); if (err) { err = -EFAULT; goto out; } - err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev, - file, method_spec, ctx->uverbs_attr_bundle); + ctx->destroy_attr = NULL; + ctx->bundle.ufile = file; + ctx->user_attrs = user_attrs; + err = uverbs_handle_method(hdr->num_attrs, method_spec, ctx); /* * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can @@ -499,8 +502,7 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) goto out; } - err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr, - (__user void *)arg + sizeof(hdr)); + err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr, user_hdr->attrs); } else { err = -ENOIOCTLCMD; } diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 339996e80c16..3b497d9ed395 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -494,6 +494,7 @@ struct uverbs_attr_bundle_hash { }; struct uverbs_attr_bundle { + struct ib_uverbs_file *ufile; size_t num_buckets; struct uverbs_attr_bundle_hash hash[]; }; -- cgit From 6a1f444fefeba392d1232b408aaf5902e33e0982 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 9 Aug 2018 20:14:39 -0600 Subject: IB/uverbs: Remove the ib_uverbs_attr pointer from each attr Memory in the bundle is valuable, do not waste it holding an 8 byte pointer for the rare case of writing to a PTR_OUT. We can compute the pointer by storing a small 1 byte array offset and the base address of the uattr memory in the bundle private memory. This also means we can access the kernel's copy of the ib_uverbs_attr, so drop the copy of flags as well. Since the uattr base should be private bundle information this also de-inlines the already too big uverbs_copy_to inline and moves create_udata into uverbs_ioctl.c so they can see the private struct definition. Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_ioctl.c | 67 ++++++++++++++++++++++++++++-- drivers/infiniband/core/uverbs_std_types.c | 32 -------------- include/rdma/uverbs_ioctl.h | 36 ++++------------ 3 files changed, 72 insertions(+), 63 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index efb7adcc21fb..f355e938a0b1 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -84,7 +84,6 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, spec = &attr_spec_bucket->attrs[attr_id]; val_spec = spec; e = &elements[attr_id]; - e->uattr = uattr_ptr; switch (spec->type) { case UVERBS_ATTR_TYPE_ENUM_IN: @@ -124,8 +123,8 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, uattr->attr_data.reserved) return -EINVAL; + e->ptr_attr.uattr_idx = uattr - pbundle->uattrs; e->ptr_attr.len = uattr->len; - e->ptr_attr.flags = uattr->flags; if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) { void *p; @@ -181,7 +180,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, s64 id = o_attr->uobject->id; /* Copy the allocated id to the user-space */ - if (put_user(id, &e->uattr->data)) { + if (put_user(id, &uattr_ptr->data)) { uverbs_finalize_object(o_attr->uobject, UVERBS_ACCESS_NEW, false); @@ -562,3 +561,65 @@ int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, return 0; } EXPORT_SYMBOL(uverbs_get_flags32); + +/* + * This is for ease of conversion. The purpose is to convert all drivers to + * use uverbs_attr_bundle instead of ib_udata. Assume attr == 0 is input and + * attr == 1 is output. + */ +void create_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + const struct uverbs_attr *uhw_in = + uverbs_attr_get(bundle, UVERBS_ATTR_UHW_IN); + const struct uverbs_attr *uhw_out = + uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT); + + if (!IS_ERR(uhw_in)) { + udata->inlen = uhw_in->ptr_attr.len; + if (uverbs_attr_ptr_is_inline(uhw_in)) + udata->inbuf = + &pbundle->user_attrs[uhw_in->ptr_attr.uattr_idx] + .data; + else + udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data); + } else { + udata->inbuf = NULL; + udata->inlen = 0; + } + + if (!IS_ERR(uhw_out)) { + udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data); + udata->outlen = uhw_out->ptr_attr.len; + } else { + udata->outbuf = NULL; + udata->outlen = 0; + } +} + +int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx, + const void *from, size_t size) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + u16 flags; + size_t min_size; + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + min_size = min_t(size_t, attr->ptr_attr.len, size); + if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size)) + return -EFAULT; + + flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags | + UVERBS_ATTR_F_VALID_OUTPUT; + if (put_user(flags, + &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags)) + return -EFAULT; + + return 0; +} +EXPORT_SYMBOL(uverbs_copy_to); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 7f22b820a21b..203cc96ac6f5 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -217,38 +217,6 @@ int uverbs_destroy_def_handler(struct ib_uverbs_file *file, } EXPORT_SYMBOL(uverbs_destroy_def_handler); -void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata) -{ - /* - * This is for ease of conversion. The purpose is to convert all drivers - * to use uverbs_attr_bundle instead of ib_udata. - * Assume attr == 0 is input and attr == 1 is output. - */ - const struct uverbs_attr *uhw_in = - uverbs_attr_get(ctx, UVERBS_ATTR_UHW_IN); - const struct uverbs_attr *uhw_out = - uverbs_attr_get(ctx, UVERBS_ATTR_UHW_OUT); - - if (!IS_ERR(uhw_in)) { - udata->inlen = uhw_in->ptr_attr.len; - if (uverbs_attr_ptr_is_inline(uhw_in)) - udata->inbuf = &uhw_in->uattr->data; - else - udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data); - } else { - udata->inbuf = NULL; - udata->inlen = 0; - } - - if (!IS_ERR(uhw_out)) { - udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data); - udata->outlen = uhw_out->ptr_attr.len; - } else { - udata->outbuf = NULL; - udata->outlen = 0; - } -} - DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_COMP_CHANNEL, UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 3b497d9ed395..ecf028446cdf 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -461,8 +461,7 @@ struct uverbs_ptr_attr { u64 data; }; u16 len; - /* Combination of bits from enum UVERBS_ATTR_F_XXXX */ - u16 flags; + u16 uattr_idx; u8 enum_id; }; @@ -471,11 +470,6 @@ struct uverbs_obj_attr { }; struct uverbs_attr { - /* - * pointer to the user-space given attribute, in order to write the - * new uobject's id or update flags. - */ - struct ib_uverbs_attr __user *uattr; union { struct uverbs_ptr_attr ptr_attr; struct uverbs_obj_attr obj_attr; @@ -575,27 +569,6 @@ uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) return attr->ptr_attr.len; } -static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, - size_t idx, const void *from, size_t size) -{ - const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); - u16 flags; - size_t min_size; - - if (IS_ERR(attr)) - return PTR_ERR(attr); - - min_size = min_t(size_t, attr->ptr_attr.len, size); - if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size)) - return -EFAULT; - - flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT; - if (put_user(flags, &attr->uattr->flags)) - return -EFAULT; - - return 0; -} - static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr) { return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data); @@ -676,6 +649,8 @@ int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits); int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits); +int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx, + const void *from, size_t size); #else static inline int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, @@ -689,6 +664,11 @@ uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, { return -EINVAL; } +static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, const void *from, size_t size) +{ + return -EINVAL; +} #endif /* ================================================= -- cgit From 461bb2eee4e162617e790c74d9b4ab10056cad7f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 9 Aug 2018 20:14:40 -0600 Subject: IB/uverbs: Add a simple allocator to uverbs_attr_bundle This is similar in spirit to devm, it keeps track of any allocations linked to this method call and ensures they are all freed when the method exits. Further, if there is space in the internal/onstack buffer then the allocator will hand out that memory and avoid an expensive call to kalloc/kfree in the syscall path. Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_ioctl.c | 109 +++++++++++++++++++++++++++------ include/rdma/uverbs_ioctl.h | 24 ++++++++ 2 files changed, 113 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index f355e938a0b1..7b330cc5ff76 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -35,7 +35,18 @@ #include "rdma_core.h" #include "uverbs.h" +struct bundle_alloc_head { + struct bundle_alloc_head *next; + u8 data[]; +}; + struct bundle_priv { + /* Must be first */ + struct bundle_alloc_head alloc_head; + struct bundle_alloc_head *allocated_mem; + size_t internal_avail; + size_t internal_used; + struct ib_uverbs_attr __user *user_attrs; struct ib_uverbs_attr *uattrs; struct uverbs_obj_attr *destroy_attr; @@ -45,8 +56,53 @@ struct bundle_priv { * internal_buffer. */ struct uverbs_attr_bundle bundle; + u64 internal_buffer[32]; }; +/** + * uverbs_alloc() - Quickly allocate memory for use with a bundle + * @bundle: The bundle + * @size: Number of bytes to allocate + * @flags: Allocator flags + * + * The bundle allocator is intended for allocations that are connected with + * processing the system call related to the bundle. The allocated memory is + * always freed once the system call completes, and cannot be freed any other + * way. + * + * This tries to use a small pool of pre-allocated memory for performance. + */ +__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size, + gfp_t flags) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + size_t new_used; + void *res; + + if (check_add_overflow(size, pbundle->internal_used, &new_used)) + return ERR_PTR(-EINVAL); + + if (new_used > pbundle->internal_avail) { + struct bundle_alloc_head *buf; + + buf = kvmalloc(struct_size(buf, data, size), flags); + if (!buf) + return ERR_PTR(-ENOMEM); + buf->next = pbundle->allocated_mem; + pbundle->allocated_mem = buf; + return buf->data; + } + + res = (void *)pbundle->internal_buffer + pbundle->internal_used; + pbundle->internal_used = + ALIGN(new_used, sizeof(*pbundle->internal_buffer)); + if (flags & __GFP_ZERO) + memset(res, 0, size); + return res; +} +EXPORT_SYMBOL(_uverbs_alloc); + static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, u16 len) { @@ -129,17 +185,15 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) { void *p; - p = kvmalloc(uattr->len, GFP_KERNEL); - if (!p) - return -ENOMEM; + p = uverbs_alloc(&pbundle->bundle, uattr->len); + if (IS_ERR(p)) + return PTR_ERR(p); e->ptr_attr.ptr = p; if (copy_from_user(p, u64_to_user_ptr(uattr->data), - uattr->len)) { - kvfree(p); + uattr->len)) return -EFAULT; - } } else { e->ptr_attr.data = uattr->data; } @@ -234,10 +288,6 @@ static int uverbs_finalize_attrs(struct bundle_priv *pbundle, spec->u.obj.access, commit); if (!ret) ret = current_ret; - } else if (spec->type == UVERBS_ATTR_TYPE_PTR_IN && - spec->alloc_and_copy && - !uverbs_attr_ptr_is_inline(attr)) { - kvfree(attr->ptr_attr.ptr); } } } @@ -372,7 +422,18 @@ cleanup: return ret ? ret : finalize_ret; } -#define UVERBS_OPTIMIZE_USING_STACK_SZ 256 +static void bundle_destroy(struct bundle_priv *pbundle) +{ + struct bundle_alloc_head *memblock; + + for (memblock = pbundle->allocated_mem; memblock;) { + struct bundle_alloc_head *tmp = memblock; + + memblock = memblock->next; + kvfree(tmp); + } +} + static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, struct ib_uverbs_file *file, struct ib_uverbs_ioctl_hdr *hdr, @@ -382,11 +443,11 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, const struct uverbs_method_spec *method_spec; long err = 0; unsigned int i; + struct bundle_priv onstack_pbundle; struct bundle_priv *ctx; struct uverbs_attr *curr_attr; unsigned long *curr_bitmap; size_t ctx_size; - uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)]; if (hdr->driver_id != ib_dev->driver_id) return -EINVAL; @@ -399,7 +460,7 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, if (!method_spec) return -EPROTONOSUPPORT; - ctx_size = sizeof(*ctx) + + ctx_size = sizeof(*ctx) - sizeof(ctx->internal_buffer) + sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets + sizeof(*ctx->uattrs) * hdr->num_attrs + sizeof(*ctx->bundle.hash[0].attrs) * @@ -408,17 +469,26 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, (method_spec->num_child_attrs / BITS_PER_LONG + method_spec->num_buckets); - if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ) - ctx = (void *)data; - if (!ctx) + if (ctx_size <= sizeof(onstack_pbundle)) { + ctx = &onstack_pbundle; + ctx->internal_avail = + sizeof(onstack_pbundle) - + offsetof(struct bundle_priv, internal_buffer); + ctx->allocated_mem = NULL; + } else { ctx = kmalloc(ctx_size, GFP_KERNEL); - if (!ctx) - return -ENOMEM; + if (!ctx) + return -ENOMEM; + ctx->internal_avail = 0; + ctx->alloc_head.next = NULL; + ctx->allocated_mem = &ctx->alloc_head; + } ctx->uattrs = (void *)(ctx + 1) + (sizeof(ctx->bundle.hash[0]) * method_spec->num_buckets); curr_attr = (void *)(ctx->uattrs + hdr->num_attrs); curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs); + ctx->internal_used = ALIGN(ctx_size, sizeof(*ctx->internal_buffer)); /* * We just fill the pointers and num_attrs here. The data itself will be @@ -462,8 +532,7 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, err = -EINVAL; } out: - if (ctx != (void *)data) - kfree(ctx); + bundle_destroy(ctx); return err; } diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index ecf028446cdf..1dbf663f7f43 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -651,6 +651,20 @@ int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits); int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx, const void *from, size_t size); +__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size, + gfp_t flags); + +static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle, + size_t size) +{ + return _uverbs_alloc(bundle, size, GFP_KERNEL); +} + +static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, + size_t size) +{ + return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); +} #else static inline int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, @@ -669,6 +683,16 @@ static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, { return -EINVAL; } +static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle, + size_t size) +{ + return ERR_PTR(-EINVAL); +} +static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, + size_t size) +{ + return ERR_PTR(-EINVAL); +} #endif /* ================================================= -- cgit From b61815e241652096b61f59ca2063edecb23602a2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 9 Aug 2018 20:14:41 -0600 Subject: IB/uverbs: Use uverbs_alloc for allocations Several handlers need temporary allocations for the life of the method, switch them to use the uverbs_alloc allocator. Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky --- .../infiniband/core/uverbs_std_types_counters.c | 20 +++--- drivers/infiniband/hw/mlx5/devx.c | 83 ++++++++-------------- 2 files changed, 38 insertions(+), 65 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index dfacc9e83399..a0ffdcf9a51c 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -104,22 +104,18 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF); read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64); - read_attr.counters_buff = kcalloc(read_attr.ncounters, - sizeof(u64), GFP_KERNEL); - if (!read_attr.counters_buff) - return -ENOMEM; + read_attr.counters_buff = uverbs_zalloc( + attrs, array_size(read_attr.ncounters, sizeof(u64))); + if (IS_ERR(read_attr.counters_buff)) + return PTR_ERR(read_attr.counters_buff); ret = counters->device->read_counters(counters, &read_attr, attrs); if (ret) - goto err_read; - - ret = uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF, - read_attr.counters_buff, - read_attr.ncounters * sizeof(u64)); + return ret; -err_read: - kfree(read_attr.counters_buff); - return ret; + return uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF, + read_attr.counters_buff, + read_attr.ncounters * sizeof(u64)); } DECLARE_UVERBS_NAMED_METHOD( diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 29c688372390..ac116d63e466 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -511,22 +511,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( if (!devx_is_general_cmd(cmd_in)) return -EINVAL; - cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); - if (!cmd_out) - return -ENOMEM; + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); err = mlx5_cmd_exec(dev->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), cmd_out, cmd_out_len); if (err) - goto other_cmd_free; - - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, cmd_out_len); + return err; -other_cmd_free: - kvfree(cmd_out); - return err; + return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, + cmd_out_len); } static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, @@ -735,22 +732,20 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( if (!devx_is_obj_create_cmd(cmd_in)) return -EINVAL; + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL); if (!obj) return -ENOMEM; - cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); - if (!cmd_out) { - err = -ENOMEM; - goto obj_free; - } - MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); err = mlx5_cmd_exec(dev->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN), cmd_out, cmd_out_len); if (err) - goto cmd_free; + goto obj_free; uobj->object = obj; obj->mdev = dev->mdev; @@ -759,13 +754,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) - goto cmd_free; + goto obj_free; - kvfree(cmd_out); return 0; -cmd_free: - kvfree(cmd_out); obj_free: kfree(obj); return err; @@ -793,23 +785,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( if (!devx_is_valid_obj_id(obj, cmd_in)) return -EINVAL; - cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); - if (!cmd_out) - return -ENOMEM; + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); err = mlx5_cmd_exec(obj->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), cmd_out, cmd_out_len); if (err) - goto other_cmd_free; - - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, - cmd_out, cmd_out_len); + return err; -other_cmd_free: - kvfree(cmd_out); - return err; + return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + cmd_out, cmd_out_len); } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( @@ -834,22 +822,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( if (!devx_is_valid_obj_id(obj, cmd_in)) return -EINVAL; - cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL); - if (!cmd_out) - return -ENOMEM; + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); err = mlx5_cmd_exec(obj->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), cmd_out, cmd_out_len); if (err) - goto other_cmd_free; - - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, cmd_out, cmd_out_len); + return err; -other_cmd_free: - kvfree(cmd_out); - return err; + return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + cmd_out, cmd_out_len); } static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, @@ -896,18 +881,14 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, return 0; } -static int devx_umem_reg_cmd_alloc(struct devx_umem *obj, +static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs, + struct devx_umem *obj, struct devx_umem_reg_cmd *cmd) { cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); - cmd->in = kvzalloc(cmd->inlen, GFP_KERNEL); - return cmd->in ? 0 : -ENOMEM; -} - -static void devx_umem_reg_cmd_free(struct devx_umem_reg_cmd *cmd) -{ - kvfree(cmd->in); + cmd->in = uverbs_zalloc(attrs, cmd->inlen); + return PTR_ERR_OR_ZERO(cmd->in); } static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, @@ -954,7 +935,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( if (err) goto err_obj_free; - err = devx_umem_reg_cmd_alloc(obj, &cmd); + err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd); if (err) goto err_umem_release; @@ -964,7 +945,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, sizeof(cmd.out)); if (err) - goto err_umem_reg_cmd_free; + goto err_umem_release; obj->mdev = dev->mdev; uobj->object = obj; @@ -973,14 +954,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( if (err) goto err_umem_destroy; - devx_umem_reg_cmd_free(&cmd); - return 0; err_umem_destroy: mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out)); -err_umem_reg_cmd_free: - devx_umem_reg_cmd_free(&cmd); err_umem_release: ib_umem_release(obj->umem); err_obj_free: -- cgit From 3a863577a7496278892360a69d90d8465733100c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 9 Aug 2018 20:14:42 -0600 Subject: IB/uverbs: Use uverbs_api to unmarshal ioctl commands Convert the ioctl method syscall path to use the uverbs_api data structures. The new uapi structure includes all the same information, just in a different and more optimal way. - Use attr_bkey instead of 2 level radix trees for everything related to attributes. This includes the attribute storage, presence, and detection of missing mandatory attributes. - Avoid iterating over all attribute storage at finish, instead use find_first_bit with the attr_bkey to locate only those attrs that need cleanup. - Organize things to always run, and always rely on, cleanup. This avoids a bunch of tricky error unwind cases. - Locate the method using the radix tree, and locate the attributes using a very efficient incremental radix tree lookup - Use the precomputed destroy_bkey to handle uobject destruction - Use the precomputed allocation sizes and precomputed 'need_stack' to avoid maths in the fast path. This is optimal if userspace does not pass (many) unsupported attributes. Overall this results in much better codegen for the attribute accessors, everything is now stored in bitmaps or linear arrays indexed by attr_bkey. The compiler can compute attr_bkey values at compile time for all method attributes, meaning things like uverbs_attr_is_valid() now compile into single instruction bit tests. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.h | 4 + drivers/infiniband/core/uverbs_ioctl.c | 472 ++++++++++++++------------------- drivers/infiniband/core/uverbs_uapi.c | 3 + include/rdma/uverbs_ioctl.h | 36 +-- 4 files changed, 217 insertions(+), 298 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index d89569d87b1c..aca279bfef08 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -133,6 +133,8 @@ struct uverbs_api_ioctl_method { int (__rcu *handler)(struct ib_uverbs_file *ufile, struct uverbs_attr_bundle *ctx); DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN); + u16 bundle_size; + u8 use_stack:1; u8 driver_method:1; u8 key_bitmap_len; u8 destroy_bkey; @@ -162,5 +164,7 @@ struct uverbs_api *uverbs_alloc_api( void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev); void uverbs_disassociate_api(struct uverbs_api *uapi); void uverbs_destroy_api(struct uverbs_api *uapi); +void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, + unsigned int num_attrs); #endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 7b330cc5ff76..3ca700f6d663 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -47,9 +47,16 @@ struct bundle_priv { size_t internal_avail; size_t internal_used; + struct radix_tree_root *radix; + const struct uverbs_api_ioctl_method *method_elm; + void __rcu **radix_slots; + unsigned long radix_slots_len; + u32 method_key; + struct ib_uverbs_attr __user *user_attrs; struct ib_uverbs_attr *uattrs; - struct uverbs_obj_attr *destroy_attr; + + DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); /* * Must be last. bundle ends in a flex array which overlaps @@ -59,6 +66,28 @@ struct bundle_priv { u64 internal_buffer[32]; }; +/* + * Each method has an absolute minimum amount of memory it needs to allocate, + * precompute that amount and determine if the onstack memory can be used or + * if allocation is need. + */ +void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, + unsigned int num_attrs) +{ + struct bundle_priv *pbundle; + size_t bundle_size = + offsetof(struct bundle_priv, internal_buffer) + + sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len + + sizeof(*pbundle->uattrs) * num_attrs; + + method_elm->use_stack = bundle_size <= sizeof(*pbundle); + method_elm->bundle_size = + ALIGN(bundle_size + 256, sizeof(*pbundle->internal_buffer)); + + /* Do not want order-2 allocations for this. */ + WARN_ON_ONCE(method_elm->bundle_size > PAGE_SIZE); +} + /** * uverbs_alloc() - Quickly allocate memory for use with a bundle * @bundle: The bundle @@ -81,7 +110,7 @@ __malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size, void *res; if (check_add_overflow(size, pbundle->internal_used, &new_used)) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOVERFLOW); if (new_used > pbundle->internal_avail) { struct bundle_alloc_head *buf; @@ -115,31 +144,13 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, } static int uverbs_process_attr(struct bundle_priv *pbundle, - const struct ib_uverbs_attr *uattr, - u16 attr_id, - const struct uverbs_attr_spec_hash *attr_spec_bucket, - struct uverbs_attr_bundle_hash *attr_bundle_h, - struct ib_uverbs_attr __user *uattr_ptr) + const struct uverbs_api_attr *attr_uapi, + struct ib_uverbs_attr *uattr, u32 attr_bkey) { - const struct uverbs_attr_spec *spec; - const struct uverbs_attr_spec *val_spec; - struct uverbs_attr *e; + const struct uverbs_attr_spec *spec = &attr_uapi->spec; + struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey]; + const struct uverbs_attr_spec *val_spec = spec; struct uverbs_obj_attr *o_attr; - struct uverbs_attr *elements = attr_bundle_h->attrs; - - if (attr_id >= attr_spec_bucket->num_attrs) { - if (uattr->flags & UVERBS_ATTR_F_MANDATORY) - return -EINVAL; - else - return 0; - } - - if (test_bit(attr_id, attr_bundle_h->valid_bitmap)) - return -EINVAL; - - spec = &attr_spec_bucket->attrs[attr_id]; - val_spec = spec; - e = &elements[attr_id]; switch (spec->type) { case UVERBS_ATTR_TYPE_ENUM_IN: @@ -208,12 +219,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, return -EINVAL; o_attr = &e->obj_attr; - - /* specs are allowed to have only one destroy attribute */ - WARN_ON(spec->u.obj.access == UVERBS_ACCESS_DESTROY && - pbundle->destroy_attr); - if (spec->u.obj.access == UVERBS_ACCESS_DESTROY) - pbundle->destroy_attr = o_attr; + o_attr->attr_elm = attr_uapi; /* * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and @@ -226,20 +232,17 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, pbundle->bundle.ufile, spec->u.obj.access, uattr->data_s64); - if (IS_ERR(o_attr->uobject)) return PTR_ERR(o_attr->uobject); + __set_bit(attr_bkey, pbundle->uobj_finalize); if (spec->u.obj.access == UVERBS_ACCESS_NEW) { + unsigned int uattr_idx = uattr - pbundle->uattrs; s64 id = o_attr->uobject->id; /* Copy the allocated id to the user-space */ - if (put_user(id, &uattr_ptr->data)) { - uverbs_finalize_object(o_attr->uobject, - UVERBS_ACCESS_NEW, - false); + if (put_user(id, &pbundle->user_attrs[uattr_idx].data)) return -EFAULT; - } } break; @@ -247,184 +250,152 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, return -EOPNOTSUPP; } - set_bit(attr_id, attr_bundle_h->valid_bitmap); return 0; } -static int uverbs_finalize_attrs(struct bundle_priv *pbundle, - struct uverbs_attr_spec_hash *const *spec_hash, - size_t num, bool commit) +/* + * We search the radix tree with the method prefix and now we want to fast + * search the suffix bits to get a particular attribute pointer. It is not + * totally clear to me if this breaks the radix tree encasulation or not, but + * it uses the iter data to determine if the method iter points at the same + * chunk that will store the attribute, if so it just derefs it directly. By + * construction in most kernel configs the method and attrs will all fit in a + * single radix chunk, so in most cases this will have no search. Other cases + * this falls back to a full search. + */ +static void __rcu **uapi_get_attr_for_method(struct bundle_priv *pbundle, + u32 attr_key) { - struct uverbs_attr_bundle *attrs_bundle = &pbundle->bundle; - unsigned int i; - int ret = 0; - - for (i = 0; i < num; i++) { - struct uverbs_attr_bundle_hash *curr_bundle = - &attrs_bundle->hash[i]; - const struct uverbs_attr_spec_hash *curr_spec_bucket = - spec_hash[i]; - unsigned int j; - - if (!curr_spec_bucket) - continue; - - for (j = 0; j < curr_bundle->num_attrs; j++) { - struct uverbs_attr *attr; - const struct uverbs_attr_spec *spec; - - if (!uverbs_attr_is_valid_in_hash(curr_bundle, j)) - continue; - - attr = &curr_bundle->attrs[j]; - spec = &curr_spec_bucket->attrs[j]; + void __rcu **slot; - if (spec->type == UVERBS_ATTR_TYPE_IDR || - spec->type == UVERBS_ATTR_TYPE_FD) { - int current_ret; + if (likely(attr_key < pbundle->radix_slots_len)) { + void *entry; - current_ret = uverbs_finalize_object( - attr->obj_attr.uobject, - spec->u.obj.access, commit); - if (!ret) - ret = current_ret; - } - } + slot = pbundle->radix_slots + attr_key; + entry = rcu_dereference_raw(*slot); + if (likely(!radix_tree_is_internal_node(entry) && entry)) + return slot; } - return ret; + + return radix_tree_lookup_slot(pbundle->radix, + pbundle->method_key | attr_key); } -static int uverbs_uattrs_process(size_t num_uattrs, - const struct uverbs_method_spec *method, - struct bundle_priv *pbundle) +static int uverbs_set_attr(struct bundle_priv *pbundle, + struct ib_uverbs_attr *uattr) { - struct uverbs_attr_bundle *attr_bundle = &pbundle->bundle; - struct ib_uverbs_attr __user *uattr_ptr = pbundle->user_attrs; - size_t i; - int ret = 0; - int num_given_buckets = 0; - - for (i = 0; i < num_uattrs; i++) { - const struct ib_uverbs_attr *uattr = &pbundle->uattrs[i]; - u16 attr_id = uattr->attr_id; - struct uverbs_attr_spec_hash *attr_spec_bucket; - - ret = uverbs_ns_idx(&attr_id, method->num_buckets); - if (ret < 0 || !method->attr_buckets[ret]) { - if (uattr->flags & UVERBS_ATTR_F_MANDATORY) { - uverbs_finalize_attrs(pbundle, - method->attr_buckets, - num_given_buckets, - false); - return ret; - } - continue; - } + u32 attr_key = uapi_key_attr(uattr->attr_id); + u32 attr_bkey = uapi_bkey_attr(attr_key); + const struct uverbs_api_attr *attr; + void __rcu **slot; + int ret; + slot = uapi_get_attr_for_method(pbundle, attr_key); + if (!slot) { /* - * ret is the found ns, so increase num_given_buckets if - * necessary. + * Kernel does not support the attribute but user-space says it + * is mandatory */ - if (ret >= num_given_buckets) - num_given_buckets = ret + 1; - - attr_spec_bucket = method->attr_buckets[ret]; - ret = uverbs_process_attr(pbundle, - uattr, attr_id, - attr_spec_bucket, - &attr_bundle->hash[ret], - uattr_ptr++); - if (ret) { - uverbs_finalize_attrs(pbundle, - method->attr_buckets, - num_given_buckets, - false); - return ret; - } + if (uattr->flags & UVERBS_ATTR_F_MANDATORY) + return -EPROTONOSUPPORT; + return 0; } + attr = srcu_dereference( + *slot, &pbundle->bundle.ufile->device->disassociate_srcu); - return num_given_buckets; -} - -static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec, - struct bundle_priv *pbundle) -{ - struct uverbs_attr_bundle *attr_bundle = &pbundle->bundle; - unsigned int i; - - for (i = 0; i < attr_bundle->num_buckets; i++) { - struct uverbs_attr_spec_hash *attr_spec_bucket = - method_spec->attr_buckets[i]; - - if (!attr_spec_bucket) - continue; - - if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask, - attr_bundle->hash[i].valid_bitmap, - attr_spec_bucket->num_attrs)) - return -EINVAL; - } + /* Reject duplicate attributes from user-space */ + if (test_bit(attr_bkey, pbundle->bundle.attr_present)) + return -EINVAL; - for (; i < method_spec->num_buckets; i++) { - struct uverbs_attr_spec_hash *attr_spec_bucket = - method_spec->attr_buckets[i]; + ret = uverbs_process_attr(pbundle, attr, uattr, attr_bkey); + if (ret) + return ret; - if (!bitmap_empty(attr_spec_bucket->mandatory_attrs_bitmask, - attr_spec_bucket->num_attrs)) - return -EINVAL; - } + __set_bit(attr_bkey, pbundle->bundle.attr_present); return 0; } -static int uverbs_handle_method(size_t num_uattrs, - const struct uverbs_method_spec *method_spec, - struct bundle_priv *pbundle) +static int ib_uverbs_run_method(struct bundle_priv *pbundle, + unsigned int num_attrs) { - struct uverbs_attr_bundle *attr_bundle = &pbundle->bundle; + int (*handler)(struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *ctx); + size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs); + unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey; + unsigned int i; int ret; - int finalize_ret; - int num_given_buckets; - num_given_buckets = - uverbs_uattrs_process(num_uattrs, method_spec, pbundle); - if (num_given_buckets <= 0) + /* See uverbs_disassociate_api() */ + handler = srcu_dereference( + pbundle->method_elm->handler, + &pbundle->bundle.ufile->device->disassociate_srcu); + if (!handler) + return -EIO; + + pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size); + if (IS_ERR(pbundle->uattrs)) + return PTR_ERR(pbundle->uattrs); + if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size)) + return -EFAULT; + + for (i = 0; i != num_attrs; i++) { + ret = uverbs_set_attr(pbundle, &pbundle->uattrs[i]); + if (unlikely(ret)) + return ret; + } + + /* User space did not provide all the mandatory attributes */ + if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory, + pbundle->bundle.attr_present, + pbundle->method_elm->key_bitmap_len))) return -EINVAL; - attr_bundle->num_buckets = num_given_buckets; - ret = uverbs_validate_kernel_mandatory(method_spec, pbundle); - if (ret) - goto cleanup; + if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) { + struct uverbs_obj_attr *destroy_attr = + &pbundle->bundle.attrs[destroy_bkey].obj_attr; - /* - * We destroy the HW object before invoking the handler, handlers do - * not get to manipulate the HW objects. - */ - if (pbundle->destroy_attr) { - ret = uobj_destroy(pbundle->destroy_attr->uobject); + ret = uobj_destroy(destroy_attr->uobject); if (ret) - goto cleanup; - } - - ret = method_spec->handler(pbundle->bundle.ufile, attr_bundle); + return ret; + __clear_bit(destroy_bkey, pbundle->uobj_finalize); - if (pbundle->destroy_attr) { - uobj_put_destroy(pbundle->destroy_attr->uobject); - pbundle->destroy_attr->uobject = NULL; + ret = handler(pbundle->bundle.ufile, &pbundle->bundle); + uobj_put_destroy(destroy_attr->uobject); + } else { + ret = handler(pbundle->bundle.ufile, &pbundle->bundle); } -cleanup: - finalize_ret = uverbs_finalize_attrs(pbundle, - method_spec->attr_buckets, - attr_bundle->num_buckets, - !ret); + /* + * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can + * not invoke the method because the request is not supported. No + * other cases should return this code. + */ + if (WARN_ON_ONCE(ret == -EPROTONOSUPPORT)) + return -EINVAL; - return ret ? ret : finalize_ret; + return ret; } -static void bundle_destroy(struct bundle_priv *pbundle) +static int bundle_destroy(struct bundle_priv *pbundle, bool commit) { + unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len; struct bundle_alloc_head *memblock; + unsigned int i; + int ret = 0; + + i = -1; + while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, + i + 1)) < key_bitmap_len) { + struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; + int current_ret; + + current_ret = uverbs_finalize_object( + attr->obj_attr.uobject, + attr->obj_attr.attr_elm->spec.u.obj.access, commit); + if (!ret) + ret = current_ret; + } for (memblock = pbundle->allocated_mem; memblock;) { struct bundle_alloc_head *tmp = memblock; @@ -432,108 +403,71 @@ static void bundle_destroy(struct bundle_priv *pbundle) memblock = memblock->next; kvfree(tmp); } + + return ret; } -static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct ib_uverbs_ioctl_hdr *hdr, - struct ib_uverbs_attr __user *user_attrs) +static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, + struct ib_uverbs_ioctl_hdr *hdr, + struct ib_uverbs_attr __user *user_attrs) { - const struct uverbs_object_spec *object_spec; - const struct uverbs_method_spec *method_spec; - long err = 0; - unsigned int i; - struct bundle_priv onstack_pbundle; - struct bundle_priv *ctx; - struct uverbs_attr *curr_attr; - unsigned long *curr_bitmap; - size_t ctx_size; + const struct uverbs_api_ioctl_method *method_elm; + struct uverbs_api *uapi = ufile->device->uapi; + struct radix_tree_iter attrs_iter; + struct bundle_priv *pbundle; + struct bundle_priv onstack; + void __rcu **slot; + int destroy_ret; + int ret; - if (hdr->driver_id != ib_dev->driver_id) + if (unlikely(hdr->driver_id != uapi->driver_id)) return -EINVAL; - object_spec = uverbs_get_object(file, hdr->object_id); - if (!object_spec) - return -EPROTONOSUPPORT; - - method_spec = uverbs_get_method(object_spec, hdr->method_id); - if (!method_spec) + slot = radix_tree_iter_lookup( + &uapi->radix, &attrs_iter, + uapi_key_obj(hdr->object_id) | + uapi_key_ioctl_method(hdr->method_id)); + if (unlikely(!slot)) return -EPROTONOSUPPORT; + method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu); - ctx_size = sizeof(*ctx) - sizeof(ctx->internal_buffer) + - sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets + - sizeof(*ctx->uattrs) * hdr->num_attrs + - sizeof(*ctx->bundle.hash[0].attrs) * - method_spec->num_child_attrs + - sizeof(*ctx->bundle.hash[0].valid_bitmap) * - (method_spec->num_child_attrs / BITS_PER_LONG + - method_spec->num_buckets); - - if (ctx_size <= sizeof(onstack_pbundle)) { - ctx = &onstack_pbundle; - ctx->internal_avail = - sizeof(onstack_pbundle) - + if (!method_elm->use_stack) { + pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL); + if (!pbundle) + return -ENOMEM; + pbundle->internal_avail = + method_elm->bundle_size - offsetof(struct bundle_priv, internal_buffer); - ctx->allocated_mem = NULL; + pbundle->alloc_head.next = NULL; + pbundle->allocated_mem = &pbundle->alloc_head; } else { - ctx = kmalloc(ctx_size, GFP_KERNEL); - if (!ctx) - return -ENOMEM; - ctx->internal_avail = 0; - ctx->alloc_head.next = NULL; - ctx->allocated_mem = &ctx->alloc_head; + pbundle = &onstack; + pbundle->internal_avail = sizeof(pbundle->internal_buffer); + pbundle->allocated_mem = NULL; } - ctx->uattrs = (void *)(ctx + 1) + - (sizeof(ctx->bundle.hash[0]) * method_spec->num_buckets); - curr_attr = (void *)(ctx->uattrs + hdr->num_attrs); - curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs); - ctx->internal_used = ALIGN(ctx_size, sizeof(*ctx->internal_buffer)); - - /* - * We just fill the pointers and num_attrs here. The data itself will be - * filled at a later stage (uverbs_process_attr) - */ - for (i = 0; i < method_spec->num_buckets; i++) { - unsigned int curr_num_attrs; - - if (!method_spec->attr_buckets[i]) - continue; - - curr_num_attrs = method_spec->attr_buckets[i]->num_attrs; - - ctx->bundle.hash[i].attrs = curr_attr; - curr_attr += curr_num_attrs; - ctx->bundle.hash[i].num_attrs = curr_num_attrs; - ctx->bundle.hash[i].valid_bitmap = curr_bitmap; - bitmap_zero(curr_bitmap, curr_num_attrs); - curr_bitmap += BITS_TO_LONGS(curr_num_attrs); - } + /* Space for the pbundle->bundle.attrs flex array */ + pbundle->method_elm = method_elm; + pbundle->method_key = attrs_iter.index; + pbundle->bundle.ufile = ufile; + pbundle->radix = &uapi->radix; + pbundle->radix_slots = slot; + pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter); + pbundle->user_attrs = user_attrs; + + pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len * + sizeof(*pbundle->bundle.attrs), + sizeof(*pbundle->internal_buffer)); + memset(pbundle->bundle.attr_present, 0, + sizeof(pbundle->bundle.attr_present)); + memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); + + ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); + destroy_ret = bundle_destroy(pbundle, ret == 0); + if (unlikely(destroy_ret && !ret)) + return destroy_ret; - err = copy_from_user(ctx->uattrs, user_attrs, - sizeof(*ctx->uattrs) * hdr->num_attrs); - if (err) { - err = -EFAULT; - goto out; - } - - ctx->destroy_attr = NULL; - ctx->bundle.ufile = file; - ctx->user_attrs = user_attrs; - err = uverbs_handle_method(hdr->num_attrs, method_spec, ctx); - - /* - * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can - * not invoke the method because the request is not supported. No - * other cases should return this code. - */ - if (unlikely(err == -EPROTONOSUPPORT)) { - WARN_ON_ONCE(err == -EPROTONOSUPPORT); - err = -EINVAL; - } -out: - bundle_destroy(ctx); - return err; + return ret; } #define IB_UVERBS_MAX_CMD_SZ 4096 @@ -570,7 +504,7 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) goto out; } - err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr, user_hdr->attrs); + err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs); } else { err = -ENOIOCTLCMD; } diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 21c0de034511..73ea6f0db88f 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -160,6 +160,7 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi, u32 method_key) { struct radix_tree_iter iter; + unsigned int num_attrs = 0; unsigned int max_bkey = 0; bool single_uobj = false; void __rcu **slot; @@ -204,11 +205,13 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi, } max_bkey = max(max_bkey, attr_bkey); + num_attrs++; } method_elm->key_bitmap_len = max_bkey + 1; WARN_ON(method_elm->key_bitmap_len > UVERBS_API_ATTR_BKEY_LEN); + uapi_compute_bundle_size(method_elm, num_attrs); return 0; } diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 1dbf663f7f43..24ef8d9ac631 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -451,6 +451,7 @@ struct uverbs_object_tree_def { * ================================================= */ + struct uverbs_ptr_attr { /* * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is @@ -467,6 +468,7 @@ struct uverbs_ptr_attr { struct uverbs_obj_attr { struct ib_uobject *uobject; + const struct uverbs_api_attr *attr_elm; }; struct uverbs_attr { @@ -476,39 +478,17 @@ struct uverbs_attr { }; }; -struct uverbs_attr_bundle_hash { - /* if bit i is set, it means attrs[i] contains valid information */ - unsigned long *valid_bitmap; - size_t num_attrs; - /* - * arrays of attributes, each element corresponds to the specification - * of the attribute in the same index. - */ - struct uverbs_attr *attrs; -}; - struct uverbs_attr_bundle { struct ib_uverbs_file *ufile; - size_t num_buckets; - struct uverbs_attr_bundle_hash hash[]; + DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN); + struct uverbs_attr attrs[]; }; -static inline bool uverbs_attr_is_valid_in_hash(const struct uverbs_attr_bundle_hash *attrs_hash, - unsigned int idx) -{ - return test_bit(idx, attrs_hash->valid_bitmap); -} - static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_bundle, unsigned int idx) { - u16 idx_bucket = idx >> UVERBS_ID_NS_SHIFT; - - if (attrs_bundle->num_buckets <= idx_bucket) - return false; - - return uverbs_attr_is_valid_in_hash(&attrs_bundle->hash[idx_bucket], - idx & ~UVERBS_ID_NS_MASK); + return test_bit(uapi_bkey_attr(uapi_key_attr(idx)), + attrs_bundle->attr_present); } #define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) @@ -516,12 +496,10 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { - u16 idx_bucket = idx >> UVERBS_ID_NS_SHIFT; - if (!uverbs_attr_is_valid(attrs_bundle, idx)) return ERR_PTR(-ENOENT); - return &attrs_bundle->hash[idx_bucket].attrs[idx & ~UVERBS_ID_NS_MASK]; + return &attrs_bundle->attrs[uapi_bkey_attr(uapi_key_attr(idx))]; } static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs_bundle, -- cgit From 51d0a2b4cfa9979fd8a59faf483b4e84587ab4ea Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 9 Aug 2018 20:14:43 -0600 Subject: IB/uverbs: Remove struct uverbs_root_spec and all supporting code Everything now uses the uverbs_uapi data structure. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/rdma_core.c | 45 -- drivers/infiniband/core/rdma_core.h | 6 - drivers/infiniband/core/uverbs.h | 1 - drivers/infiniband/core/uverbs_ioctl_merge.c | 662 --------------------------- drivers/infiniband/core/uverbs_main.c | 28 +- include/rdma/uverbs_ioctl.h | 91 ---- 7 files changed, 2 insertions(+), 833 deletions(-) delete mode 100644 drivers/infiniband/core/uverbs_ioctl_merge.c diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index d934cf617841..867cee5e27b2 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -35,7 +35,7 @@ ib_ucm-y := ucm.o ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ rdma_core.o uverbs_std_types.o uverbs_ioctl.o \ - uverbs_ioctl_merge.o uverbs_std_types_cq.o \ + uverbs_std_types_cq.o \ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ uverbs_std_types_mr.o uverbs_std_types_counters.o \ uverbs_uapi.o diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 2814228ead39..12e7c6c102c1 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -42,51 +42,6 @@ #include "core_priv.h" #include "rdma_core.h" -int uverbs_ns_idx(u16 *id, unsigned int ns_count) -{ - int ret = (*id & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT; - - if (ret >= ns_count) - return -EINVAL; - - *id &= ~UVERBS_ID_NS_MASK; - return ret; -} - -const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile, - uint16_t object) -{ - const struct uverbs_root_spec *object_hash = ufile->device->specs_root; - const struct uverbs_object_spec_hash *objects; - int ret = uverbs_ns_idx(&object, object_hash->num_buckets); - - if (ret < 0) - return NULL; - - objects = object_hash->object_buckets[ret]; - - if (object >= objects->num_objects) - return NULL; - - return objects->objects[object]; -} - -const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, - uint16_t method) -{ - const struct uverbs_method_spec_hash *methods; - int ret = uverbs_ns_idx(&method, object->num_buckets); - - if (ret < 0) - return NULL; - - methods = object->method_buckets[ret]; - if (method >= methods->num_methods) - return NULL; - - return methods->methods[method]; -} - void uverbs_uobject_get(struct ib_uobject *uobject) { kref_get(&uobject->ref); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index aca279bfef08..f962f2a593ba 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -45,12 +45,6 @@ struct ib_uverbs_device; -int uverbs_ns_idx(u16 *id, unsigned int ns_count); -const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile, - uint16_t object); -const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, - uint16_t method); - void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 879be0d1fd99..5df8e548cc14 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -111,7 +111,6 @@ struct ib_uverbs_device { struct mutex lists_mutex; /* protect lists */ struct list_head uverbs_file_list; struct list_head uverbs_events_file_list; - struct uverbs_root_spec *specs_root; struct uverbs_api *uapi; }; diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c deleted file mode 100644 index 16b575929915..000000000000 --- a/drivers/infiniband/core/uverbs_ioctl_merge.c +++ /dev/null @@ -1,662 +0,0 @@ -/* - * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include "uverbs.h" - -#define UVERBS_NUM_NS (UVERBS_ID_NS_MASK >> UVERBS_ID_NS_SHIFT) -#define GET_NS_ID(idx) (((idx) & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT) -#define GET_ID(idx) ((idx) & ~UVERBS_ID_NS_MASK) - -#define _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset, \ - buckets_offset) \ - for (tmpj = 0, \ - elem = (*(const void ***)((hashes)[tmpi] + \ - (buckets_offset)))[0]; \ - tmpj < *(size_t *)((hashes)[tmpi] + (num_buckets_offset)); \ - tmpj++) \ - if ((elem = ((*(const void ***)(hashes[tmpi] + \ - (buckets_offset)))[tmpj]))) - -/* - * Iterate all elements of a few @hashes. The number of given hashes is - * indicated by @num_hashes. The offset of the number of buckets in the hash is - * represented by @num_buckets_offset, while the offset of the buckets array in - * the hash structure is represented by @buckets_offset. tmpi and tmpj are two - * short (or int) based indices that are given by the user. tmpi iterates over - * the different hashes. @elem points the current element in the hashes[tmpi] - * bucket we are looping on. To be honest, @hashes representation isn't exactly - * a hash, but more a collection of elements. These elements' ids are treated - * in a hash like manner, where the first upper bits are the bucket number. - * These elements are later mapped into a perfect-hash. - */ -#define for_each_element(elem, tmpi, tmpj, hashes, num_hashes, \ - num_buckets_offset, buckets_offset) \ - for (tmpi = 0; tmpi < (num_hashes); tmpi++) \ - _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset,\ - buckets_offset) - -#define get_elements_iterators_entry_above(iters, num_elements, elements, \ - num_objects_fld, objects_fld, bucket,\ - min_id) \ - get_elements_above_id((const void **)iters, num_elements, \ - (const void **)(elements), \ - offsetof(typeof(**elements), \ - num_objects_fld), \ - offsetof(typeof(**elements), objects_fld),\ - offsetof(typeof(***(*elements)->objects_fld), id),\ - bucket, min_id) - -#define get_objects_above_id(iters, num_trees, trees, bucket, min_id) \ - get_elements_iterators_entry_above(iters, num_trees, trees, \ - num_objects, objects, bucket, min_id) - -#define get_methods_above_id(method_iters, num_iters, iters, bucket, min_id)\ - get_elements_iterators_entry_above(method_iters, num_iters, iters, \ - num_methods, methods, bucket, min_id) - -#define get_attrs_above_id(attrs_iters, num_iters, iters, bucket, min_id)\ - get_elements_iterators_entry_above(attrs_iters, num_iters, iters, \ - num_attrs, attrs, bucket, min_id) - -/* - * get_elements_above_id get a few hashes represented by @elements and - * @num_elements. The hashes fields are described by @num_offset, @data_offset - * and @id_offset in the same way as required by for_each_element. The function - * returns an array of @iters, represents an array of elements in the hashes - * buckets, which their ids are the smallest ids in all hashes but are all - * larger than the id given by min_id. Elements are only added to the iters - * array if their id belongs to the bucket @bucket. The number of elements in - * the returned array is returned by the function. @min_id is also updated to - * reflect the new min_id of all elements in iters. - */ -static size_t get_elements_above_id(const void **iters, - unsigned int num_elements, - const void **elements, - size_t num_offset, - size_t data_offset, - size_t id_offset, - u16 bucket, - short *min_id) -{ - size_t num_iters = 0; - short min = SHRT_MAX; - const void *elem; - int i, j, last_stored = -1; - unsigned int equal_min = 0; - - for_each_element(elem, i, j, elements, num_elements, num_offset, - data_offset) { - u16 id = *(u16 *)(elem + id_offset); - - if (GET_NS_ID(id) != bucket) - continue; - - if (GET_ID(id) < *min_id || - (min != SHRT_MAX && GET_ID(id) > min)) - continue; - - /* - * We first iterate all hashes represented by @elements. When - * we do, we try to find an element @elem in the bucket @bucket - * which its id is min. Since we can't ensure the user sorted - * the elements in increasing order, we override this hash's - * minimal id element we found, if a new element with a smaller - * id was just found. - */ - iters[last_stored == i ? num_iters - 1 : num_iters++] = elem; - last_stored = i; - if (min == GET_ID(id)) - equal_min++; - else - equal_min = 1; - min = GET_ID(id); - } - - /* - * We only insert to our iters array an element, if its id is smaller - * than all previous ids. Therefore, the final iters array is sorted so - * that smaller ids are in the end of the array. - * Therefore, we need to clean the beginning of the array to make sure - * all ids of final elements are equal to min. - */ - memmove(iters, iters + num_iters - equal_min, sizeof(*iters) * equal_min); - - *min_id = min; - return equal_min; -} - -#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \ - objects_fld, bucket) \ - find_max_element_id(num_elements, (const void **)(elements), \ - offsetof(typeof(**elements), num_objects_fld), \ - offsetof(typeof(**elements), objects_fld), \ - offsetof(typeof(***(*elements)->objects_fld), id),\ - bucket) - -static short find_max_element_ns_id(unsigned int num_elements, - const void **elements, - size_t num_offset, - size_t data_offset, - size_t id_offset) -{ - short max_ns = SHRT_MIN; - const void *elem; - int i, j; - - for_each_element(elem, i, j, elements, num_elements, num_offset, - data_offset) { - u16 id = *(u16 *)(elem + id_offset); - - if (GET_NS_ID(id) > max_ns) - max_ns = GET_NS_ID(id); - } - - return max_ns; -} - -static short find_max_element_id(unsigned int num_elements, - const void **elements, - size_t num_offset, - size_t data_offset, - size_t id_offset, - u16 bucket) -{ - short max_id = SHRT_MIN; - const void *elem; - int i, j; - - for_each_element(elem, i, j, elements, num_elements, num_offset, - data_offset) { - u16 id = *(u16 *)(elem + id_offset); - - if (GET_NS_ID(id) == bucket && - GET_ID(id) > max_id) - max_id = GET_ID(id); - } - return max_id; -} - -#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \ - objects_fld, bucket) \ - find_max_element_id(num_elements, (const void **)(elements), \ - offsetof(typeof(**elements), num_objects_fld), \ - offsetof(typeof(**elements), objects_fld), \ - offsetof(typeof(***(*elements)->objects_fld), id),\ - bucket) - -#define find_max_element_ns_entry_id(num_elements, elements, \ - num_objects_fld, objects_fld) \ - find_max_element_ns_id(num_elements, (const void **)(elements), \ - offsetof(typeof(**elements), num_objects_fld),\ - offsetof(typeof(**elements), objects_fld), \ - offsetof(typeof(***(*elements)->objects_fld), id)) - -/* - * find_max_xxxx_ns_id gets a few elements. Each element is described by an id - * which its upper bits represents a namespace. It finds the max namespace. This - * could be used in order to know how many buckets do we need to allocate. If no - * elements exist, SHRT_MIN is returned. Namespace represents here different - * buckets. The common example is "common bucket" and "driver bucket". - * - * find_max_xxxx_id gets a few elements and a bucket. Each element is described - * by an id which its upper bits represent a namespace. It returns the max id - * which is contained in the same namespace defined in @bucket. This could be - * used in order to know how many elements do we need to allocate in the bucket. - * If no elements exist, SHRT_MIN is returned. - */ - -#define find_max_object_id(num_trees, trees, bucket) \ - find_max_element_entry_id(num_trees, trees, num_objects,\ - objects, bucket) -#define find_max_object_ns_id(num_trees, trees) \ - find_max_element_ns_entry_id(num_trees, trees, \ - num_objects, objects) - -#define find_max_method_id(num_iters, iters, bucket) \ - find_max_element_entry_id(num_iters, iters, num_methods,\ - methods, bucket) -#define find_max_method_ns_id(num_iters, iters) \ - find_max_element_ns_entry_id(num_iters, iters, \ - num_methods, methods) - -#define find_max_attr_id(num_iters, iters, bucket) \ - find_max_element_entry_id(num_iters, iters, num_attrs, \ - attrs, bucket) -#define find_max_attr_ns_id(num_iters, iters) \ - find_max_element_ns_entry_id(num_iters, iters, \ - num_attrs, attrs) - -static void free_method(struct uverbs_method_spec *method) -{ - unsigned int i; - - if (!method) - return; - - for (i = 0; i < method->num_buckets; i++) - kfree(method->attr_buckets[i]); - - kfree(method); -} - -#define IS_ATTR_OBJECT(attr) ((attr)->type == UVERBS_ATTR_TYPE_IDR || \ - (attr)->type == UVERBS_ATTR_TYPE_FD) - -/* - * This function gets array of size @num_method_defs which contains pointers to - * method definitions @method_defs. The function allocates an - * uverbs_method_spec structure and initializes its number of buckets and the - * elements in buckets to the correct attributes. While doing that, it - * validates that there aren't conflicts between attributes of different - * method_defs. - */ -static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_method_def **method_defs, - size_t num_method_defs) -{ - int bucket_idx; - int max_attr_buckets = 0; - size_t num_attr_buckets = 0; - int res = 0; - struct uverbs_method_spec *method = NULL; - const struct uverbs_attr_def **attr_defs; - unsigned int num_of_singularities = 0; - - max_attr_buckets = find_max_attr_ns_id(num_method_defs, method_defs); - if (max_attr_buckets >= 0) - num_attr_buckets = max_attr_buckets + 1; - - method = kzalloc(struct_size(method, attr_buckets, num_attr_buckets), - GFP_KERNEL); - if (!method) - return ERR_PTR(-ENOMEM); - - method->num_buckets = num_attr_buckets; - attr_defs = kcalloc(num_method_defs, sizeof(*attr_defs), GFP_KERNEL); - if (!attr_defs) { - res = -ENOMEM; - goto free_method; - } - for (bucket_idx = 0; bucket_idx < method->num_buckets; bucket_idx++) { - short min_id = SHRT_MIN; - int attr_max_bucket = 0; - struct uverbs_attr_spec_hash *hash = NULL; - - attr_max_bucket = find_max_attr_id(num_method_defs, method_defs, - bucket_idx); - if (attr_max_bucket < 0) - continue; - - hash = kzalloc(sizeof(*hash) + - ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1), - sizeof(long)) + - BITS_TO_LONGS(attr_max_bucket + 1) * sizeof(long), - GFP_KERNEL); - if (!hash) { - res = -ENOMEM; - goto free; - } - hash->num_attrs = attr_max_bucket + 1; - method->num_child_attrs += hash->num_attrs; - hash->mandatory_attrs_bitmask = (void *)(hash + 1) + - ALIGN(sizeof(*hash->attrs) * - (attr_max_bucket + 1), - sizeof(long)); - - method->attr_buckets[bucket_idx] = hash; - - do { - size_t num_attr_defs; - struct uverbs_attr_spec *attr; - bool attr_obj_with_special_access; - - num_attr_defs = - get_attrs_above_id(attr_defs, - num_method_defs, - method_defs, - bucket_idx, - &min_id); - /* Last attr in bucket */ - if (!num_attr_defs) - break; - - if (num_attr_defs > 1) { - /* - * We don't allow two attribute definitions for - * the same attribute. This is usually a - * programmer error. If required, it's better to - * just add a new attribute to capture the new - * semantics. - */ - res = -EEXIST; - goto free; - } - - attr = &hash->attrs[min_id]; - memcpy(attr, &attr_defs[0]->attr, sizeof(*attr)); - - attr_obj_with_special_access = IS_ATTR_OBJECT(attr) && - (attr->u.obj.access == UVERBS_ACCESS_NEW || - attr->u.obj.access == UVERBS_ACCESS_DESTROY); - num_of_singularities += !!attr_obj_with_special_access; - if (WARN(num_of_singularities > 1, - "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n", - min_id) || - WARN(attr_obj_with_special_access && - !attr->mandatory, - "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n", - min_id) || - WARN(IS_ATTR_OBJECT(attr) && - attr->zero_trailing, - "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n", - min_id)) { - res = -EINVAL; - goto free; - } - - if (attr->mandatory) - set_bit(min_id, hash->mandatory_attrs_bitmask); - min_id++; - - } while (1); - } - kfree(attr_defs); - return method; - -free: - kfree(attr_defs); -free_method: - free_method(method); - return ERR_PTR(res); -} - -static void free_object(struct uverbs_object_spec *object) -{ - unsigned int i, j; - - if (!object) - return; - - for (i = 0; i < object->num_buckets; i++) { - struct uverbs_method_spec_hash *method_buckets = - object->method_buckets[i]; - - if (!method_buckets) - continue; - - for (j = 0; j < method_buckets->num_methods; j++) - free_method(method_buckets->methods[j]); - - kfree(method_buckets); - } - - kfree(object); -} - -/* - * This function gets array of size @num_object_defs which contains pointers to - * object definitions @object_defs. The function allocated an - * uverbs_object_spec structure and initialize its number of buckets and the - * elements in buckets to the correct methods. While doing that, it - * sorts out the correct relationship between conflicts in the same method. - */ -static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_object_def **object_defs, - size_t num_object_defs) -{ - u16 bucket_idx; - int max_method_buckets = 0; - u16 num_method_buckets = 0; - int res = 0; - struct uverbs_object_spec *object = NULL; - const struct uverbs_method_def **method_defs; - - max_method_buckets = find_max_method_ns_id(num_object_defs, object_defs); - if (max_method_buckets >= 0) - num_method_buckets = max_method_buckets + 1; - - object = kzalloc(struct_size(object, method_buckets, - num_method_buckets), - GFP_KERNEL); - if (!object) - return ERR_PTR(-ENOMEM); - - object->num_buckets = num_method_buckets; - method_defs = kcalloc(num_object_defs, sizeof(*method_defs), GFP_KERNEL); - if (!method_defs) { - res = -ENOMEM; - goto free_object; - } - - for (bucket_idx = 0; bucket_idx < object->num_buckets; bucket_idx++) { - short min_id = SHRT_MIN; - int methods_max_bucket = 0; - struct uverbs_method_spec_hash *hash = NULL; - - methods_max_bucket = find_max_method_id(num_object_defs, object_defs, - bucket_idx); - if (methods_max_bucket < 0) - continue; - - hash = kzalloc(struct_size(hash, methods, - methods_max_bucket + 1), - GFP_KERNEL); - if (!hash) { - res = -ENOMEM; - goto free; - } - - hash->num_methods = methods_max_bucket + 1; - object->method_buckets[bucket_idx] = hash; - - do { - size_t num_method_defs; - struct uverbs_method_spec *method; - int i; - - num_method_defs = - get_methods_above_id(method_defs, - num_object_defs, - object_defs, - bucket_idx, - &min_id); - /* Last method in bucket */ - if (!num_method_defs) - break; - - method = build_method_with_attrs(method_defs, - num_method_defs); - if (IS_ERR(method)) { - res = PTR_ERR(method); - goto free; - } - - /* - * The last tree which is given as an argument to the - * merge overrides previous method handler. - * Therefore, we iterate backwards and search for the - * first handler which != NULL. This also defines the - * set of flags used for this handler. - */ - for (i = num_method_defs - 1; - i >= 0 && !method_defs[i]->handler; i--) - ; - hash->methods[min_id++] = method; - /* NULL handler isn't allowed */ - if (WARN(i < 0, - "ib_uverbs: tried to merge function id %d, but all handlers are NULL\n", - min_id)) { - res = -EINVAL; - goto free; - } - method->handler = method_defs[i]->handler; - method->flags = method_defs[i]->flags; - - } while (1); - } - kfree(method_defs); - return object; - -free: - kfree(method_defs); -free_object: - free_object(object); - return ERR_PTR(res); -} - -void uverbs_free_spec_tree(struct uverbs_root_spec *root) -{ - unsigned int i, j; - - if (!root) - return; - - for (i = 0; i < root->num_buckets; i++) { - struct uverbs_object_spec_hash *object_hash = - root->object_buckets[i]; - - if (!object_hash) - continue; - - for (j = 0; j < object_hash->num_objects; j++) - free_object(object_hash->objects[j]); - - kfree(object_hash); - } - - kfree(root); -} - -struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, - const struct uverbs_object_tree_def **trees) -{ - u16 bucket_idx; - short max_object_buckets = 0; - size_t num_objects_buckets = 0; - struct uverbs_root_spec *root_spec = NULL; - const struct uverbs_object_def **object_defs; - int i; - int res = 0; - - max_object_buckets = find_max_object_ns_id(num_trees, trees); - /* - * Devices which don't want to support ib_uverbs, should just allocate - * an empty parsing tree. Every user-space command won't hit any valid - * entry in the parsing tree and thus will fail. - */ - if (max_object_buckets >= 0) - num_objects_buckets = max_object_buckets + 1; - - root_spec = kzalloc(struct_size(root_spec, object_buckets, - num_objects_buckets), - GFP_KERNEL); - if (!root_spec) - return ERR_PTR(-ENOMEM); - root_spec->num_buckets = num_objects_buckets; - - object_defs = kcalloc(num_trees, sizeof(*object_defs), - GFP_KERNEL); - if (!object_defs) { - res = -ENOMEM; - goto free_root; - } - - for (bucket_idx = 0; bucket_idx < root_spec->num_buckets; bucket_idx++) { - short min_id = SHRT_MIN; - short objects_max_bucket; - struct uverbs_object_spec_hash *hash = NULL; - - objects_max_bucket = find_max_object_id(num_trees, trees, - bucket_idx); - if (objects_max_bucket < 0) - continue; - - hash = kzalloc(struct_size(hash, objects, - objects_max_bucket + 1), - GFP_KERNEL); - if (!hash) { - res = -ENOMEM; - goto free; - } - hash->num_objects = objects_max_bucket + 1; - root_spec->object_buckets[bucket_idx] = hash; - - do { - size_t num_object_defs; - struct uverbs_object_spec *object; - - num_object_defs = get_objects_above_id(object_defs, - num_trees, - trees, - bucket_idx, - &min_id); - /* Last object in bucket */ - if (!num_object_defs) - break; - - object = build_object_with_methods(object_defs, - num_object_defs); - if (IS_ERR(object)) { - res = PTR_ERR(object); - goto free; - } - - /* - * The last tree which is given as an argument to the - * merge overrides previous object's type_attrs. - * Therefore, we iterate backwards and search for the - * first type_attrs which != NULL. - */ - for (i = num_object_defs - 1; - i >= 0 && !object_defs[i]->type_attrs; i--) - ; - /* - * NULL is a valid type_attrs. It means an object we - * can't instantiate (like DEVICE). - */ - object->type_attrs = i < 0 ? NULL : - object_defs[i]->type_attrs; - - hash->objects[min_id++] = object; - } while (1); - } - - kfree(object_defs); - return root_spec; - -free: - kfree(object_defs); -free_root: - uverbs_free_spec_tree(root_spec); - return ERR_PTR(res); -} diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 0fab083cafef..823beca448e1 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -176,7 +176,6 @@ static void ib_uverbs_release_dev(struct kobject *kobj) uverbs_destroy_api(dev->uapi); cleanup_srcu_struct(&dev->disassociate_srcu); - uverbs_free_spec_tree(dev->specs_root); kfree(dev); } @@ -998,37 +997,12 @@ static CLASS_ATTR_STRING(abi_version, S_IRUGO, static int ib_uverbs_create_uapi(struct ib_device *device, struct ib_uverbs_device *uverbs_dev) { - const struct uverbs_object_tree_def **specs; - struct uverbs_root_spec *specs_root; - unsigned int num_specs = 1; struct uverbs_api *uapi; - unsigned int i; - - if (device->driver_specs) - for (i = 0; device->driver_specs[i]; i++) - num_specs++; - - specs = kmalloc_array(num_specs, sizeof(*specs), GFP_KERNEL); - if (!specs) - return -ENOMEM; - - specs[0] = uverbs_default_get_objects(); - if (device->driver_specs) - for (i = 0; device->driver_specs[i]; i++) - specs[i+1] = device->driver_specs[i]; - - specs_root = uverbs_alloc_spec_tree(num_specs, specs); - kfree(specs); - if (IS_ERR(specs_root)) - return PTR_ERR(specs_root); uapi = uverbs_alloc_api(device->driver_specs, device->driver_id); - if (IS_ERR(uapi)) { - uverbs_free_spec_tree(specs_root); + if (IS_ERR(uapi)) return PTR_ERR(uapi); - } - uverbs_dev->specs_root = specs_root; uverbs_dev->uapi = uapi; return 0; } diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 24ef8d9ac631..9e997c3c2f04 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -114,46 +114,6 @@ struct uverbs_attr_spec { } u2; }; -struct uverbs_attr_spec_hash { - size_t num_attrs; - unsigned long *mandatory_attrs_bitmask; - struct uverbs_attr_spec attrs[0]; -}; - -struct uverbs_attr_bundle; -struct ib_uverbs_file; - -struct uverbs_method_spec { - /* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */ - u32 flags; - size_t num_buckets; - size_t num_child_attrs; - int (*handler)(struct ib_uverbs_file *ufile, - struct uverbs_attr_bundle *ctx); - struct uverbs_attr_spec_hash *attr_buckets[0]; -}; - -struct uverbs_method_spec_hash { - size_t num_methods; - struct uverbs_method_spec *methods[0]; -}; - -struct uverbs_object_spec { - const struct uverbs_obj_type *type_attrs; - size_t num_buckets; - struct uverbs_method_spec_hash *method_buckets[0]; -}; - -struct uverbs_object_spec_hash { - size_t num_objects; - struct uverbs_object_spec *objects[0]; -}; - -struct uverbs_root_spec { - size_t num_buckets; - struct uverbs_object_spec_hash *object_buckets[0]; -}; - /* * Information about the API is loaded into a radix tree. For IOCTL we start * with a tuple of: @@ -673,55 +633,4 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, } #endif -/* ================================================= - * Definitions -> Specs infrastructure - * ================================================= - */ - -/* - * uverbs_alloc_spec_tree - Merges different common and driver specific feature - * into one parsing tree that every uverbs command will be parsed upon. - * - * @num_trees: Number of trees in the array @trees. - * @trees: Array of pointers to tree root definitions to merge. Each such tree - * possibly contains objects, methods and attributes definitions. - * - * Returns: - * uverbs_root_spec *: The root of the merged parsing tree. - * On error, we return an error code. Error is checked via IS_ERR. - * - * The following merges could take place: - * a. Two trees representing the same method with different handler - * -> We take the handler of the tree that its handler != NULL - * and its index in the trees array is greater. The incentive for that - * is that developers are expected to first merge common trees and then - * merge trees that gives specialized the behaviour. - * b. Two trees representing the same object with different - * type_attrs (struct uverbs_obj_type): - * -> We take the type_attrs of the tree that its type_attr != NULL - * and its index in the trees array is greater. This could be used - * in order to override the free function, allocation size, etc. - * c. Two trees representing the same method attribute (same id but possibly - * different attributes): - * -> ERROR (-ENOENT), we believe that's not the programmer's intent. - * - * An object without any methods is considered invalid and will abort the - * function with -ENOENT error. - */ -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, - const struct uverbs_object_tree_def **trees); -void uverbs_free_spec_tree(struct uverbs_root_spec *root); -#else -static inline struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, - const struct uverbs_object_tree_def **trees) -{ - return NULL; -} - -static inline void uverbs_free_spec_tree(struct uverbs_root_spec *root) -{ -} -#endif - #endif -- cgit From 4ce719f846a4177f2631e4149503e3baf3dce87b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 9 Aug 2018 20:14:44 -0600 Subject: IB/uverbs: Do not check for device disassociation during ioctl Now that the ioctl path and uobjects are converted to use uverbs_api, it is now safe to remove the disassociation protection from the common ioctl code. This completes the work to make destroy functions continue to work even after device disassociation. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 41 +++++++++++----------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 3ca700f6d663..1a6b229e3db3 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -470,47 +470,32 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, return ret; } -#define IB_UVERBS_MAX_CMD_SZ 4096 - long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_ioctl_hdr __user *user_hdr = (struct ib_uverbs_ioctl_hdr __user *)arg; struct ib_uverbs_ioctl_hdr hdr; - struct ib_device *ib_dev; int srcu_key; - long err; + int err; - srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - err = -EIO; - goto out; - } + if (unlikely(cmd != RDMA_VERBS_IOCTL)) + return -ENOIOCTLCMD; - if (cmd == RDMA_VERBS_IOCTL) { - err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); + err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); + if (err) + return -EFAULT; - if (err || hdr.length > IB_UVERBS_MAX_CMD_SZ || - hdr.length != sizeof(hdr) + hdr.num_attrs * sizeof(struct ib_uverbs_attr)) { - err = -EINVAL; - goto out; - } + if (hdr.length > PAGE_SIZE || + hdr.length != struct_size(&hdr, attrs, hdr.num_attrs)) + return -EINVAL; - if (hdr.reserved1 || hdr.reserved2) { - err = -EPROTONOSUPPORT; - goto out; - } + if (hdr.reserved1 || hdr.reserved2) + return -EPROTONOSUPPORT; - err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs); - } else { - err = -ENOIOCTLCMD; - } -out: + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); + err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs); srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); - return err; } -- cgit From 486edfb1039dc413c4806d11accdedc8a4aa573b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 13 Aug 2018 20:04:37 -0600 Subject: IB/ucm: Fix compiling ucm.c Even though this interface is marked CONFIG_BROKEN we still expect it to compile, at least until we delete it completely. Also mark INFINIBAND_USER_ACCESS_UCM with COMPILE_TEST so these situations can be detected. Fixes: e7ff98aefc9e ("RDMA/cma: Constify path record, ib_cm_event, listen_id pointers") Signed-off-by: Jason Gunthorpe --- drivers/infiniband/Kconfig | 2 +- drivers/infiniband/core/ucm.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index b03af54367c0..d160d2d1f3a3 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -37,7 +37,7 @@ config INFINIBAND_USER_ACCESS config INFINIBAND_USER_ACCESS_UCM bool "Userspace CM (UCM, DEPRECATED)" - depends on BROKEN + depends on BROKEN || COMPILE_TEST depends on INFINIBAND_USER_ACCESS help The UCM module has known security flaws, which no one is diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 3e21a879d386..faa9e6116b2f 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -207,7 +207,7 @@ error: } static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, - struct ib_cm_req_event_param *kreq) + const struct ib_cm_req_event_param *kreq) { ureq->remote_ca_guid = kreq->remote_ca_guid; ureq->remote_qkey = kreq->remote_qkey; @@ -231,7 +231,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, } static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, - struct ib_cm_rep_event_param *krep) + const struct ib_cm_rep_event_param *krep) { urep->remote_ca_guid = krep->remote_ca_guid; urep->remote_qkey = krep->remote_qkey; @@ -247,14 +247,14 @@ static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, } static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, - struct ib_cm_sidr_rep_event_param *krep) + const struct ib_cm_sidr_rep_event_param *krep) { urep->status = krep->status; urep->qkey = krep->qkey; urep->qpn = krep->qpn; }; -static int ib_ucm_event_process(struct ib_cm_event *evt, +static int ib_ucm_event_process(const struct ib_cm_event *evt, struct ib_ucm_event *uvt) { void *info = NULL; @@ -351,7 +351,7 @@ err1: } static int ib_ucm_event_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event) + const struct ib_cm_event *event) { struct ib_ucm_event *uevent; struct ib_ucm_context *ctx; -- cgit From 0da9be22cdcbcbe0e5f04271dcfac71601526a3e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Aug 2018 08:07:04 -0700 Subject: Update the e-mail address of Bart Van Assche Since my @wdc.com e-mail address will become invalid after Friday August 24th, change it into an e-mail address that will remain valid after that date. Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- .mailmap | 2 ++ MAINTAINERS | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 29ddeb1bf015..a2f4c8595835 100644 --- a/.mailmap +++ b/.mailmap @@ -31,6 +31,8 @@ Arnaud Patard Arnd Bergmann Axel Dyks Axel Lin +Bart Van Assche +Bart Van Assche Ben Gardner Ben M Cahill Björn Steinbrink diff --git a/MAINTAINERS b/MAINTAINERS index 1e53fe99eb63..0fb209911711 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12619,7 +12619,7 @@ S: Maintained F: drivers/scsi/sr* SCSI RDMA PROTOCOL (SRP) INITIATOR -M: Bart Van Assche +M: Bart Van Assche L: linux-rdma@vger.kernel.org S: Supported Q: http://patchwork.kernel.org/project/linux-rdma/list/ @@ -12627,7 +12627,7 @@ F: drivers/infiniband/ulp/srp/ F: include/scsi/srp.h SCSI RDMA PROTOCOL (SRP) TARGET -M: Bart Van Assche +M: Bart Van Assche L: linux-rdma@vger.kernel.org L: target-devel@vger.kernel.org S: Supported -- cgit From 0625b4ba1a5d4703c7fb01c497bd6c156908af00 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 14 Aug 2018 15:33:52 -0600 Subject: IB/mlx5: Fix leaking stack memory to userspace mlx5_ib_create_qp_resp was never initialized and only the first 4 bytes were written. Fixes: 41d902cb7c32 ("RDMA/mlx5: Fix definition of mlx5_ib_create_qp_resp") Cc: Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 351c2efceb35..6cba2a02d11b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1607,7 +1607,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_create_qp_resp resp; + struct mlx5_ib_create_qp_resp resp = {}; struct mlx5_ib_cq *send_cq; struct mlx5_ib_cq *recv_cq; unsigned long flags; -- cgit From 1212767e23bbaba164cc7ea3a64115b3ae335063 Mon Sep 17 00:00:00 2001 From: Yuval Bason Date: Thu, 9 Aug 2018 17:29:36 +0300 Subject: qedr: Add wrapping generic structure for qpidr and adjust idr routines. Today, we are using idr mechanism for QP's only. This patch prepares the qedr_idr stuctures and the idr routines for both QP's and SRQ's. Signed-off-by: Yuval Bason Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 4 ++-- drivers/infiniband/hw/qedr/qedr.h | 8 ++++++-- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 12 +++++------ drivers/infiniband/hw/qedr/verbs.c | 36 ++++++++++++++++----------------- 4 files changed, 31 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index ad22b32bbd9c..912a0ca0f617 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -325,8 +325,8 @@ static int qedr_alloc_resources(struct qedr_dev *dev) spin_lock_init(&dev->sgid_lock); if (IS_IWARP(dev)) { - spin_lock_init(&dev->idr_lock); - idr_init(&dev->qpidr); + spin_lock_init(&dev->qpidr.idr_lock); + idr_init(&dev->qpidr.idr); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); } diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 86d4511e0d75..5d0b75eaaa28 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -122,6 +122,11 @@ struct qedr_device_attr { #define QEDR_ENET_STATE_BIT (0) +struct qedr_idr { + spinlock_t idr_lock; /* Protect idr data-structure */ + struct idr idr; +}; + struct qedr_dev { struct ib_device ibdev; struct qed_dev *cdev; @@ -165,8 +170,7 @@ struct qedr_dev { struct qedr_cq *gsi_rqcq; struct qedr_qp *gsi_qp; enum qed_rdma_type rdma_type; - spinlock_t idr_lock; /* Protect qpidr data-structure */ - struct idr qpidr; + struct qedr_idr qpidr; struct workqueue_struct *iwarp_wq; u16 iwarp_max_mtu; diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 26dc374787f7..505fa3648762 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -491,7 +491,7 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int rc = 0; int i; - qp = idr_find(&dev->qpidr, conn_param->qpn); + qp = idr_find(&dev->qpidr.idr, conn_param->qpn); laddr = (struct sockaddr_in *)&cm_id->m_local_addr; raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; @@ -679,7 +679,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); - qp = idr_find(&dev->qpidr, conn_param->qpn); + qp = idr_find(&dev->qpidr.idr, conn_param->qpn); if (!qp) { DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); return -EINVAL; @@ -737,9 +737,9 @@ void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) struct qedr_qp *qp = get_qedr_qp(ibqp); if (atomic_dec_and_test(&qp->refcnt)) { - spin_lock_irq(&qp->dev->idr_lock); - idr_remove(&qp->dev->qpidr, qp->qp_id); - spin_unlock_irq(&qp->dev->idr_lock); + spin_lock_irq(&qp->dev->qpidr.idr_lock); + idr_remove(&qp->dev->qpidr.idr, qp->qp_id); + spin_unlock_irq(&qp->dev->qpidr.idr_lock); kfree(qp); } } @@ -748,5 +748,5 @@ struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn) { struct qedr_dev *dev = get_qedr_dev(ibdev); - return idr_find(&dev->qpidr, qpn); + return idr_find(&dev->qpidr.idr, qpn); } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 4aaeb24cebfc..d217b08c9e66 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1311,32 +1311,27 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); } -static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id) +static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr, + void *ptr, u32 id) { int rc; - if (!rdma_protocol_iwarp(&dev->ibdev, 1)) - return 0; - idr_preload(GFP_KERNEL); - spin_lock_irq(&dev->idr_lock); + spin_lock_irq(&qidr->idr_lock); - rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC); + rc = idr_alloc(&qidr->idr, ptr, id, id + 1, GFP_ATOMIC); - spin_unlock_irq(&dev->idr_lock); + spin_unlock_irq(&qidr->idr_lock); idr_preload_end(); return rc < 0 ? rc : 0; } -static void qedr_idr_remove(struct qedr_dev *dev, u32 id) +static void qedr_idr_remove(struct qedr_dev *dev, struct qedr_idr *qidr, u32 id) { - if (!rdma_protocol_iwarp(&dev->ibdev, 1)) - return; - - spin_lock_irq(&dev->idr_lock); - idr_remove(&dev->qpidr, id); - spin_unlock_irq(&dev->idr_lock); + spin_lock_irq(&qidr->idr_lock); + idr_remove(&qidr->idr, id); + spin_unlock_irq(&qidr->idr_lock); } static inline void @@ -1708,9 +1703,11 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = qp->qp_id; - rc = qedr_idr_add(dev, qp, qp->qp_id); - if (rc) - goto err; + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + rc = qedr_idr_add(dev, &dev->qpidr, qp, qp->qp_id); + if (rc) + goto err; + } return &qp->ibqp; @@ -2279,8 +2276,9 @@ int qedr_destroy_qp(struct ib_qp *ibqp) qedr_free_qp_resources(dev, qp); - if (atomic_dec_and_test(&qp->refcnt)) { - qedr_idr_remove(dev, qp->qp_id); + if (atomic_dec_and_test(&qp->refcnt) && + rdma_protocol_iwarp(&dev->ibdev, 1)) { + qedr_idr_remove(dev, &dev->qpidr, qp->qp_id); kfree(qp); } return rc; -- cgit From 3491c9e799fb96d909f22f3b39d8cca81e75c3a9 Mon Sep 17 00:00:00 2001 From: Yuval Bason Date: Thu, 9 Aug 2018 17:29:37 +0300 Subject: qedr: Add support for kernel mode SRQ's Implement the SRQ specific verbs and update the poll_cq verb to deal with SRQ completions. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Bason Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 5 + drivers/infiniband/hw/qedr/qedr.h | 35 +++ drivers/infiniband/hw/qedr/qedr_hsi_rdma.h | 11 + drivers/infiniband/hw/qedr/verbs.c | 411 ++++++++++++++++++++++++++++- drivers/infiniband/hw/qedr/verbs.h | 9 + 5 files changed, 458 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 912a0ca0f617..2642caf94b55 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -229,6 +229,11 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.query_qp = qedr_query_qp; dev->ibdev.destroy_qp = qedr_destroy_qp; + dev->ibdev.create_srq = qedr_create_srq; + dev->ibdev.destroy_srq = qedr_destroy_srq; + dev->ibdev.modify_srq = qedr_modify_srq; + dev->ibdev.query_srq = qedr_query_srq; + dev->ibdev.post_srq_recv = qedr_post_srq_recv; dev->ibdev.query_pkey = qedr_query_pkey; dev->ibdev.create_ah = qedr_create_ah; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 5d0b75eaaa28..a2d708dceb8d 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -58,6 +58,7 @@ #define QEDR_MSG_RQ " RQ" #define QEDR_MSG_SQ " SQ" #define QEDR_MSG_QP " QP" +#define QEDR_MSG_SRQ " SRQ" #define QEDR_MSG_GSI " GSI" #define QEDR_MSG_IWARP " IW" @@ -171,6 +172,7 @@ struct qedr_dev { struct qedr_qp *gsi_qp; enum qed_rdma_type rdma_type; struct qedr_idr qpidr; + struct qedr_idr srqidr; struct workqueue_struct *iwarp_wq; u16 iwarp_max_mtu; @@ -341,6 +343,34 @@ struct qedr_qp_hwq_info { qed_chain_get_capacity(p_info->pbl) \ } while (0) +struct qedr_srq_hwq_info { + u32 max_sges; + u32 max_wr; + struct qed_chain pbl; + u64 p_phys_addr_tbl; + u32 wqe_prod; + u32 sge_prod; + u32 wr_prod_cnt; + u32 wr_cons_cnt; + u32 num_elems; + + u32 *virt_prod_pair_addr; + dma_addr_t phy_prod_pair_addr; +}; + +struct qedr_srq { + struct ib_srq ibsrq; + struct qedr_dev *dev; + + struct qedr_userq usrq; + struct qedr_srq_hwq_info hw_srq; + struct ib_umem *prod_umem; + u16 srq_id; + u32 srq_limit; + /* lock to protect srq recv post */ + spinlock_t lock; +}; + enum qedr_qp_err_bitmap { QEDR_QP_ERR_SQ_FULL = 1, QEDR_QP_ERR_RQ_FULL = 2, @@ -542,4 +572,9 @@ static inline struct qedr_mr *get_qedr_mr(struct ib_mr *ibmr) { return container_of(ibmr, struct qedr_mr, ibmr); } + +static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct qedr_srq, ibsrq); +} #endif diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h index 7e1f7021396a..228dd7d49622 100644 --- a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h +++ b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h @@ -161,12 +161,23 @@ struct rdma_rq_sge { #define RDMA_RQ_SGE_L_KEY_HI_SHIFT 29 }; +struct rdma_srq_wqe_header { + struct regpair wr_id; + u8 num_sges /* number of SGEs in WQE */; + u8 reserved2[7]; +}; + struct rdma_srq_sge { struct regpair addr; __le32 length; __le32 l_key; }; +union rdma_srq_elm { + struct rdma_srq_wqe_header header; + struct rdma_srq_sge sge; +}; + /* Rdma doorbell data for flags update */ struct rdma_pwm_flags_data { __le16 icid; /* internal CID */ diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index d217b08c9e66..7c75fc36e5ec 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -51,6 +51,10 @@ #include #include "qedr_roce_cm.h" +#define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm) +#define RDMA_MAX_SGE_PER_SRQ (4) +#define RDMA_MAX_SRQ_WQE_SIZE (RDMA_MAX_SGE_PER_SRQ + 1) + #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, @@ -84,6 +88,19 @@ int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, return 0; } +int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct qedr_dev *dev = get_qedr_dev(ibsrq->device); + struct qedr_device_attr *qattr = &dev->attr; + struct qedr_srq *srq = get_qedr_srq(ibsrq); + + srq_attr->srq_limit = srq->srq_limit; + srq_attr->max_wr = qattr->max_srq_wr; + srq_attr->max_sge = qattr->max_sge; + + return 0; +} + int qedr_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, struct ib_udata *udata) { @@ -1248,13 +1265,18 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, qp->state = QED_ROCE_QP_STATE_RESET; qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; qp->sq_cq = get_qedr_cq(attrs->send_cq); - qp->rq_cq = get_qedr_cq(attrs->recv_cq); qp->dev = dev; - qp->rq.max_sges = attrs->cap.max_recv_sge; - DP_DEBUG(dev, QEDR_MSG_QP, - "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", - qp->rq.max_sges, qp->rq_cq->icid); + if (attrs->srq) { + qp->srq = get_qedr_srq(attrs->srq); + } else { + qp->rq_cq = get_qedr_cq(attrs->recv_cq); + qp->rq.max_sges = attrs->cap.max_recv_sge; + DP_DEBUG(dev, QEDR_MSG_QP, + "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", + qp->rq.max_sges, qp->rq_cq->icid); + } + DP_DEBUG(dev, QEDR_MSG_QP, "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n", pd->pd_id, qp->qp_type, qp->max_inline_data, @@ -1269,9 +1291,228 @@ static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) qp->sq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); qp->sq.db_data.data.icid = qp->icid + 1; - qp->rq.db = dev->db_addr + - DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); - qp->rq.db_data.data.icid = qp->icid; + if (!qp->srq) { + qp->rq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); + qp->rq.db_data.data.icid = qp->icid; + } +} + +static int qedr_check_srq_params(struct ib_pd *ibpd, struct qedr_dev *dev, + struct ib_srq_init_attr *attrs, + struct ib_udata *udata) +{ + struct qedr_device_attr *qattr = &dev->attr; + + if (attrs->attr.max_wr > qattr->max_srq_wr) { + DP_ERR(dev, + "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n", + attrs->attr.max_wr, qattr->max_srq_wr); + return -EINVAL; + } + + if (attrs->attr.max_sge > qattr->max_sge) { + DP_ERR(dev, + "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n", + attrs->attr.max_sge, qattr->max_sge); + return -EINVAL; + } + + return 0; +} + +static void qedr_free_srq_kernel_params(struct qedr_srq *srq) +{ + struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq; + struct qedr_dev *dev = srq->dev; + + dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl); + + dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers), + hw_srq->virt_prod_pair_addr, + hw_srq->phy_prod_pair_addr); +} + +static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq, + struct qedr_dev *dev, + struct ib_srq_init_attr *init_attr) +{ + struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq; + dma_addr_t phy_prod_pair_addr; + u32 num_elems; + void *va; + int rc; + + va = dma_alloc_coherent(&dev->pdev->dev, + sizeof(struct rdma_srq_producers), + &phy_prod_pair_addr, GFP_KERNEL); + if (!va) { + DP_ERR(dev, + "create srq: failed to allocate dma memory for producer\n"); + return -ENOMEM; + } + + hw_srq->phy_prod_pair_addr = phy_prod_pair_addr; + hw_srq->virt_prod_pair_addr = va; + + num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE; + rc = dev->ops->common->chain_alloc(dev->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U32, + num_elems, + QEDR_SRQ_WQE_ELEM_SIZE, + &hw_srq->pbl, NULL); + if (rc) + goto err0; + + hw_srq->num_elems = num_elems; + + return 0; + +err0: + dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers), + va, phy_prod_pair_addr); + return rc; +} + +static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr, + void *ptr, u32 id); +static void qedr_idr_remove(struct qedr_dev *dev, + struct qedr_idr *qidr, u32 id); + +struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct qed_rdma_destroy_srq_in_params destroy_in_params; + struct qed_rdma_create_srq_in_params in_params = {}; + struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qed_rdma_create_srq_out_params out_params; + struct qedr_pd *pd = get_qedr_pd(ibpd); + u64 pbl_base_addr, phy_prod_pair_addr; + struct qedr_srq_hwq_info *hw_srq; + u32 page_cnt, page_size; + struct qed_chain *pbl; + struct qedr_srq *srq; + int rc = 0; + + DP_DEBUG(dev, QEDR_MSG_QP, + "create SRQ called from %s (pd %p)\n", + (udata) ? "User lib" : "kernel", pd); + + rc = qedr_check_srq_params(ibpd, dev, init_attr, udata); + if (rc) + return ERR_PTR(-EINVAL); + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + srq->dev = dev; + hw_srq = &srq->hw_srq; + spin_lock_init(&srq->lock); + + hw_srq->max_wr = init_attr->attr.max_wr; + hw_srq->max_sges = init_attr->attr.max_sge; + + rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr); + if (rc) + goto err0; + + pbl = &hw_srq->pbl; + page_cnt = qed_chain_get_page_cnt(pbl); + pbl_base_addr = qed_chain_get_pbl_phys(pbl); + phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; + page_size = QED_CHAIN_PAGE_SIZE; + in_params.pd_id = pd->pd_id; + in_params.pbl_base_addr = pbl_base_addr; + in_params.prod_pair_addr = phy_prod_pair_addr; + in_params.num_pages = page_cnt; + in_params.page_size = page_size; + + rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params); + if (rc) + goto err1; + + srq->srq_id = out_params.srq_id; + + rc = qedr_idr_add(dev, &dev->srqidr, srq, srq->srq_id); + if (rc) + goto err2; + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "create srq: created srq with srq_id=0x%0x\n", srq->srq_id); + return &srq->ibsrq; + +err2: + destroy_in_params.srq_id = srq->srq_id; + + dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params); +err1: + qedr_free_srq_kernel_params(srq); +err0: + kfree(srq); + + return ERR_PTR(-EFAULT); +} + +int qedr_destroy_srq(struct ib_srq *ibsrq) +{ + struct qed_rdma_destroy_srq_in_params in_params = {}; + struct qedr_dev *dev = get_qedr_dev(ibsrq->device); + struct qedr_srq *srq = get_qedr_srq(ibsrq); + + qedr_idr_remove(dev, &dev->srqidr, srq->srq_id); + in_params.srq_id = srq->srq_id; + dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); + + qedr_free_srq_kernel_params(srq); + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "destroy srq: destroyed srq with srq_id=0x%0x\n", + srq->srq_id); + kfree(srq); + + return 0; +} + +int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct qed_rdma_modify_srq_in_params in_params = {}; + struct qedr_dev *dev = get_qedr_dev(ibsrq->device); + struct qedr_srq *srq = get_qedr_srq(ibsrq); + int rc; + + if (attr_mask & IB_SRQ_MAX_WR) { + DP_ERR(dev, + "modify srq: invalid attribute mask=0x%x specified for %p\n", + attr_mask, srq); + return -EINVAL; + } + + if (attr_mask & IB_SRQ_LIMIT) { + if (attr->srq_limit >= srq->hw_srq.max_wr) { + DP_ERR(dev, + "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n", + attr->srq_limit, srq->hw_srq.max_wr); + return -EINVAL; + } + + in_params.srq_id = srq->srq_id; + in_params.wqe_limit = attr->srq_limit; + rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params); + if (rc) + return rc; + } + + srq->srq_limit = attr->srq_limit; + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id); + + return 0; } static inline void @@ -1292,9 +1533,17 @@ qedr_init_common_qp_in_params(struct qedr_dev *dev, params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; params->stats_queue = 0; - params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; params->srq_id = 0; params->use_srq = false; + + if (!qp->srq) { + params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; + + } else { + params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; + params->srq_id = qp->srq->srq_id; + params->use_srq = true; + } } static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) @@ -1667,16 +1916,13 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, if (rc) return ERR_PTR(rc); - if (attrs->srq) - return ERR_PTR(-EINVAL); - DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n", udata ? "user library" : "kernel", attrs->event_handler, pd, get_qedr_cq(attrs->send_cq), get_qedr_cq(attrs->send_cq)->icid, get_qedr_cq(attrs->recv_cq), - get_qedr_cq(attrs->recv_cq)->icid); + attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0); qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) { @@ -3224,6 +3470,102 @@ int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, return rc; } +static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq) +{ + u32 used; + + /* Calculate number of elements used based on producer + * count and consumer count and subtract it from max + * work request supported so that we get elements left. + */ + used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt; + + return hw_srq->max_wr - used; +} + +int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct qedr_srq *srq = get_qedr_srq(ibsrq); + struct qedr_srq_hwq_info *hw_srq; + struct qedr_dev *dev = srq->dev; + struct qed_chain *pbl; + unsigned long flags; + int status = 0; + u32 num_sge; + u32 offset; + + spin_lock_irqsave(&srq->lock, flags); + + hw_srq = &srq->hw_srq; + pbl = &srq->hw_srq.pbl; + while (wr) { + struct rdma_srq_wqe_header *hdr; + int i; + + if (!qedr_srq_elem_left(hw_srq) || + wr->num_sge > srq->hw_srq.max_sges) { + DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n", + hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, + wr->num_sge, srq->hw_srq.max_sges); + status = -ENOMEM; + *bad_wr = wr; + break; + } + + hdr = qed_chain_produce(pbl); + num_sge = wr->num_sge; + /* Set number of sge and work request id in header */ + SRQ_HDR_SET(hdr, wr->wr_id, num_sge); + + srq->hw_srq.wr_prod_cnt++; + hw_srq->wqe_prod++; + hw_srq->sge_prod++; + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n", + wr->num_sge, hw_srq->wqe_prod, wr->wr_id); + + for (i = 0; i < wr->num_sge; i++) { + struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl); + + /* Set SGE length, lkey and address */ + SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr, + wr->sg_list[i].length, wr->sg_list[i].lkey); + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "[%d]: len %d key %x addr %x:%x\n", + i, srq_sge->length, srq_sge->l_key, + srq_sge->addr.hi, srq_sge->addr.lo); + hw_srq->sge_prod++; + } + + /* Flush WQE and SGE information before + * updating producer. + */ + wmb(); + + /* SRQ producer is 8 bytes. Need to update SGE producer index + * in first 4 bytes and need to update WQE producer in + * next 4 bytes. + */ + *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod; + offset = offsetof(struct rdma_srq_producers, wqe_prod); + *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) = + hw_srq->wqe_prod; + + /* Flush producer after updating it. */ + wmb(); + wr = wr->next; + } + + DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n", + qed_chain_get_elem_left(pbl)); + spin_unlock_irqrestore(&srq->lock, flags); + + return status; +} + int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { @@ -3615,6 +3957,31 @@ static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, wc->wr_id = wr_id; } +static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp, + struct qedr_cq *cq, struct ib_wc *wc, + struct rdma_cqe_responder *resp) +{ + struct qedr_srq *srq = qp->srq; + u64 wr_id; + + wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi), + le32_to_cpu(resp->srq_wr_id.lo), u64); + + if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = 0; + wc->wr_id = wr_id; + wc->byte_len = 0; + wc->src_qp = qp->id; + wc->qp = &qp->ibqp; + wc->wr_id = wr_id; + } else { + __process_resp_one(dev, qp, cq, wc, resp, wr_id); + } + srq->hw_srq.wr_cons_cnt++; + + return 1; +} static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, struct qedr_cq *cq, struct ib_wc *wc, struct rdma_cqe_responder *resp) @@ -3664,6 +4031,19 @@ static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp, } } +static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp, + struct qedr_cq *cq, int num_entries, + struct ib_wc *wc, + struct rdma_cqe_responder *resp) +{ + int cnt; + + cnt = process_resp_one_srq(dev, qp, cq, wc, resp); + consume_cqe(cq); + + return cnt; +} + static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp, struct qedr_cq *cq, int num_entries, struct ib_wc *wc, struct rdma_cqe_responder *resp, @@ -3741,6 +4121,11 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc, &cqe->resp, &update); break; + case RDMA_CQE_TYPE_RESPONDER_SRQ: + cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries, + wc, &cqe->resp); + update = 1; + break; case RDMA_CQE_TYPE_INVALID: default: DP_ERR(dev, "Error: invalid CQE type = %d\n", diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 087baf009864..0b7d0124b16c 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -66,6 +66,15 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); int qedr_destroy_qp(struct ib_qp *ibqp); +struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *attr, + struct ib_udata *udata); +int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); +int qedr_destroy_srq(struct ib_srq *ibsrq); +int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, struct ib_udata *udata); int qedr_destroy_ah(struct ib_ah *ibah); -- cgit From 40b173ddce0fc6653a859889d1a90b5f5817061b Mon Sep 17 00:00:00 2001 From: Yuval Bason Date: Thu, 9 Aug 2018 17:29:38 +0300 Subject: qedr: Add user space support for SRQ This patch adds support for SRQ's created in user space and update qedr_affiliated_event to deal with general SRQ events. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Bason Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 98 +++++++++++++++++++------- drivers/infiniband/hw/qedr/verbs.c | 137 +++++++++++++++++++++++++++++++------ include/uapi/rdma/qedr-abi.h | 17 +++++ 3 files changed, 208 insertions(+), 44 deletions(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 2642caf94b55..a0af6d424aed 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -191,6 +191,11 @@ static int qedr_register_device(struct qedr_dev *dev) QEDR_UVERBS(MODIFY_QP) | QEDR_UVERBS(QUERY_QP) | QEDR_UVERBS(DESTROY_QP) | + QEDR_UVERBS(CREATE_SRQ) | + QEDR_UVERBS(DESTROY_SRQ) | + QEDR_UVERBS(QUERY_SRQ) | + QEDR_UVERBS(MODIFY_SRQ) | + QEDR_UVERBS(POST_SRQ_RECV) | QEDR_UVERBS(REG_MR) | QEDR_UVERBS(DEREG_MR) | QEDR_UVERBS(POLL_CQ) | @@ -658,42 +663,70 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) #define EVENT_TYPE_NOT_DEFINED 0 #define EVENT_TYPE_CQ 1 #define EVENT_TYPE_QP 2 +#define EVENT_TYPE_SRQ 3 struct qedr_dev *dev = (struct qedr_dev *)context; struct regpair *async_handle = (struct regpair *)fw_handle; u64 roce_handle64 = ((u64) async_handle->hi << 32) + async_handle->lo; u8 event_type = EVENT_TYPE_NOT_DEFINED; struct ib_event event; + struct ib_srq *ibsrq; + struct qedr_srq *srq; + unsigned long flags; struct ib_cq *ibcq; struct ib_qp *ibqp; struct qedr_cq *cq; struct qedr_qp *qp; + u16 srq_id; - switch (e_code) { - case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR: - event.event = IB_EVENT_CQ_ERR; - event_type = EVENT_TYPE_CQ; - break; - case ROCE_ASYNC_EVENT_SQ_DRAINED: - event.event = IB_EVENT_SQ_DRAINED; - event_type = EVENT_TYPE_QP; - break; - case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR: - event.event = IB_EVENT_QP_FATAL; - event_type = EVENT_TYPE_QP; - break; - case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR: - event.event = IB_EVENT_QP_REQ_ERR; - event_type = EVENT_TYPE_QP; - break; - case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR: - event.event = IB_EVENT_QP_ACCESS_ERR; - event_type = EVENT_TYPE_QP; - break; - default: + if (IS_ROCE(dev)) { + switch (e_code) { + case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR: + event.event = IB_EVENT_CQ_ERR; + event_type = EVENT_TYPE_CQ; + break; + case ROCE_ASYNC_EVENT_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR: + event.event = IB_EVENT_QP_FATAL; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR: + event.event = IB_EVENT_QP_REQ_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR: + event.event = IB_EVENT_QP_ACCESS_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_SRQ_LIMIT: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + event_type = EVENT_TYPE_SRQ; + break; + case ROCE_ASYNC_EVENT_SRQ_EMPTY: + event.event = IB_EVENT_SRQ_ERR; + event_type = EVENT_TYPE_SRQ; + break; + default: + DP_ERR(dev, "unsupported event %d on handle=%llx\n", + e_code, roce_handle64); + } + } else { + switch (e_code) { + case QED_IWARP_EVENT_SRQ_LIMIT: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + event_type = EVENT_TYPE_SRQ; + break; + case QED_IWARP_EVENT_SRQ_EMPTY: + event.event = IB_EVENT_SRQ_ERR; + event_type = EVENT_TYPE_SRQ; + break; + default: DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code, roce_handle64); + } } - switch (event_type) { case EVENT_TYPE_CQ: cq = (struct qedr_cq *)(uintptr_t)roce_handle64; @@ -727,6 +760,25 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) } DP_ERR(dev, "QP event %d on handle %p\n", e_code, qp); break; + case EVENT_TYPE_SRQ: + srq_id = (u16)roce_handle64; + spin_lock_irqsave(&dev->srqidr.idr_lock, flags); + srq = idr_find(&dev->srqidr.idr, srq_id); + if (srq) { + ibsrq = &srq->ibsrq; + if (ibsrq->event_handler) { + event.device = ibsrq->device; + event.element.srq = ibsrq; + ibsrq->event_handler(&event, + ibsrq->srq_context); + } + } else { + DP_NOTICE(dev, + "SRQ event with NULL pointer ibsrq. Handle=%llx\n", + roce_handle64); + } + spin_unlock_irqrestore(&dev->srqidr.idr_lock, flags); + DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq); default: break; } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 7c75fc36e5ec..3f46fc14ee38 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1199,6 +1199,21 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, return 0; } +static int qedr_copy_srq_uresp(struct qedr_dev *dev, + struct qedr_srq *srq, struct ib_udata *udata) +{ + struct qedr_create_srq_uresp uresp = {}; + int rc; + + uresp.srq_id = srq->srq_id; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rc) + DP_ERR(dev, "create srq: problem copying data to user space\n"); + + return rc; +} + static void qedr_copy_rq_uresp(struct qedr_dev *dev, struct qedr_create_qp_uresp *uresp, struct qedr_qp *qp) @@ -1321,6 +1336,13 @@ static int qedr_check_srq_params(struct ib_pd *ibpd, struct qedr_dev *dev, return 0; } +static void qedr_free_srq_user_params(struct qedr_srq *srq) +{ + qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); + ib_umem_release(srq->usrq.umem); + ib_umem_release(srq->prod_umem); +} + static void qedr_free_srq_kernel_params(struct qedr_srq *srq) { struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq; @@ -1333,6 +1355,37 @@ static void qedr_free_srq_kernel_params(struct qedr_srq *srq) hw_srq->phy_prod_pair_addr); } +static int qedr_init_srq_user_params(struct ib_ucontext *ib_ctx, + struct qedr_srq *srq, + struct qedr_create_srq_ureq *ureq, + int access, int dmasync) +{ + struct scatterlist *sg; + int rc; + + rc = qedr_init_user_queue(ib_ctx, srq->dev, &srq->usrq, ureq->srq_addr, + ureq->srq_len, access, dmasync, 1); + if (rc) + return rc; + + srq->prod_umem = ib_umem_get(ib_ctx, ureq->prod_pair_addr, + sizeof(struct rdma_srq_producers), + access, dmasync); + if (IS_ERR(srq->prod_umem)) { + qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); + ib_umem_release(srq->usrq.umem); + DP_ERR(srq->dev, + "create srq: failed ib_umem_get for producer, got %ld\n", + PTR_ERR(srq->prod_umem)); + return PTR_ERR(srq->prod_umem); + } + + sg = srq->prod_umem->sg_head.sgl; + srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg); + + return 0; +} + static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq, struct qedr_dev *dev, struct ib_srq_init_attr *init_attr) @@ -1390,10 +1443,12 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, struct qedr_dev *dev = get_qedr_dev(ibpd->device); struct qed_rdma_create_srq_out_params out_params; struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_create_srq_ureq ureq = {}; u64 pbl_base_addr, phy_prod_pair_addr; + struct ib_ucontext *ib_ctx = NULL; struct qedr_srq_hwq_info *hw_srq; + struct qedr_ucontext *ctx = NULL; u32 page_cnt, page_size; - struct qed_chain *pbl; struct qedr_srq *srq; int rc = 0; @@ -1416,15 +1471,38 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, hw_srq->max_wr = init_attr->attr.max_wr; hw_srq->max_sges = init_attr->attr.max_sge; - rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr); - if (rc) - goto err0; + if (udata && ibpd->uobject && ibpd->uobject->context) { + ib_ctx = ibpd->uobject->context; + ctx = get_qedr_ucontext(ib_ctx); + + if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + DP_ERR(dev, + "create srq: problem copying data from user space\n"); + goto err0; + } + + rc = qedr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0); + if (rc) + goto err0; + + page_cnt = srq->usrq.pbl_info.num_pbes; + pbl_base_addr = srq->usrq.pbl_tbl->pa; + phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; + page_size = BIT(srq->usrq.umem->page_shift); + } else { + struct qed_chain *pbl; + + rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr); + if (rc) + goto err0; + + pbl = &hw_srq->pbl; + page_cnt = qed_chain_get_page_cnt(pbl); + pbl_base_addr = qed_chain_get_pbl_phys(pbl); + phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; + page_size = QED_CHAIN_PAGE_SIZE; + } - pbl = &hw_srq->pbl; - page_cnt = qed_chain_get_page_cnt(pbl); - pbl_base_addr = qed_chain_get_pbl_phys(pbl); - phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; - page_size = QED_CHAIN_PAGE_SIZE; in_params.pd_id = pd->pd_id; in_params.pbl_base_addr = pbl_base_addr; in_params.prod_pair_addr = phy_prod_pair_addr; @@ -1437,6 +1515,12 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, srq->srq_id = out_params.srq_id; + if (udata) { + rc = qedr_copy_srq_uresp(dev, srq, udata); + if (rc) + goto err2; + } + rc = qedr_idr_add(dev, &dev->srqidr, srq, srq->srq_id); if (rc) goto err2; @@ -1450,7 +1534,10 @@ err2: dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params); err1: - qedr_free_srq_kernel_params(srq); + if (udata) + qedr_free_srq_user_params(srq); + else + qedr_free_srq_kernel_params(srq); err0: kfree(srq); @@ -1467,7 +1554,10 @@ int qedr_destroy_srq(struct ib_srq *ibsrq) in_params.srq_id = srq->srq_id; dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); - qedr_free_srq_kernel_params(srq); + if (ibsrq->pd->uobject) + qedr_free_srq_user_params(srq); + else + qedr_free_srq_kernel_params(srq); DP_DEBUG(dev, QEDR_MSG_SRQ, "destroy srq: destroyed srq with srq_id=0x%0x\n", @@ -1593,9 +1683,10 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev, qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl, &qp->usq.pbl_info, FW_PAGE_SHIFT); - - qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; - qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; + if (!qp->srq) { + qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; + qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; + } qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl, &qp->urq.pbl_info, FW_PAGE_SHIFT); @@ -1641,11 +1732,13 @@ static int qedr_create_user_qp(struct qedr_dev *dev, if (rc) return rc; - /* RQ - read access only (0), dma sync not required (0) */ - rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, - ureq.rq_len, 0, 0, alloc_and_init); - if (rc) - return rc; + if (!qp->srq) { + /* RQ - read access only (0), dma sync not required (0) */ + rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, + ureq.rq_len, 0, 0, alloc_and_init); + if (rc) + return rc; + } memset(&in_params, 0, sizeof(in_params)); qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params); @@ -1653,8 +1746,10 @@ static int qedr_create_user_qp(struct qedr_dev *dev, in_params.qp_handle_hi = ureq.qp_handle_hi; in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; - in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; - in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; + if (!qp->srq) { + in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; + in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; + } qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, &in_params, &out_params); diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index 24c658b3c790..7a10b3a325fa 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -111,4 +111,21 @@ struct qedr_create_qp_uresp { __u32 reserved; }; +struct qedr_create_srq_ureq { + /* user space virtual address of producer pair */ + __aligned_u64 prod_pair_addr; + + /* user space virtual address of SRQ buffer */ + __aligned_u64 srq_addr; + + /* length of SRQ buffer */ + __aligned_u64 srq_len; +}; + +struct qedr_create_srq_uresp { + __u16 srq_id; + __u16 reserved0; + __u32 reserved1; +}; + #endif /* __QEDR_USER_H__ */ -- cgit From 666e7099a41afbbc1fb3ee785cad808d2925e12a Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 14 Aug 2018 10:36:15 +0300 Subject: IB/core: Add comment for change upper netevent handling Add comment for handling CHANGEUPPER netevent handling. To improve code readability, (a) move cmd definitions to its respective if-else branches, (b) avoid single line structure definitions. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/roce_gid_mgmt.c | 55 +++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index a4fbdc5d28fa..1304f4239b3d 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -579,25 +579,48 @@ static const struct netdev_event_work_cmd add_cmd = { static const struct netdev_event_work_cmd add_cmd_upper_ips = { .cb = add_netdev_upper_ips, .filter = is_eth_port_of_netdev}; +static void +ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info, + struct netdev_event_work_cmd *cmds) +{ + static const struct netdev_event_work_cmd + upper_ips_del_cmd = { + .cb = del_netdev_upper_ips, + .filter = upper_device_filter + }; + + cmds[0] = upper_ips_del_cmd; + cmds[0].ndev = changeupper_info->upper_dev; + cmds[1] = add_cmd; +} + +static void +ndev_event_link(struct netdev_notifier_changeupper_info *changeupper_info, + struct netdev_event_work_cmd *cmds) +{ + static const struct netdev_event_work_cmd + bonding_default_del_cmd = { + .cb = del_netdev_default_ips, + .filter = is_eth_port_inactive_slave + }; + /* + * When a lower netdev is linked to its upper bonding + * netdev, delete lower inactive slave netdev's default GIDs. + */ + cmds[0] = bonding_default_del_cmd; + cmds[0].ndev = changeupper_info->upper_dev; + cmds[1] = add_cmd_upper_ips; + cmds[1].ndev = changeupper_info->upper_dev; + cmds[1].filter_ndev = changeupper_info->upper_dev; +} + static void netdevice_event_changeupper(struct netdev_notifier_changeupper_info *changeupper_info, struct netdev_event_work_cmd *cmds) { - static const struct netdev_event_work_cmd upper_ips_del_cmd = { - .cb = del_netdev_upper_ips, .filter = upper_device_filter}; - static const struct netdev_event_work_cmd bonding_default_del_cmd = { - .cb = del_netdev_default_ips, .filter = is_eth_port_inactive_slave}; - - if (changeupper_info->linking == false) { - cmds[0] = upper_ips_del_cmd; - cmds[0].ndev = changeupper_info->upper_dev; - cmds[1] = add_cmd; - } else { - cmds[0] = bonding_default_del_cmd; - cmds[0].ndev = changeupper_info->upper_dev; - cmds[1] = add_cmd_upper_ips; - cmds[1].ndev = changeupper_info->upper_dev; - cmds[1].filter_ndev = changeupper_info->upper_dev; - } + if (changeupper_info->linking) + ndev_event_link(changeupper_info, cmds); + else + ndev_event_unlink(changeupper_info, cmds); } static int netdevice_event(struct notifier_block *this, unsigned long event, -- cgit From b9f09866e0e964ab04968d44602fa3d9de5076f3 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 14 Aug 2018 10:36:16 +0300 Subject: IB/core: Avoid confusing del_netdev_default_ips Currently bond_delete_netdev_default_gids() is called by two callers. (a) del_netdev_default_ips_join() (b) del_netdev_default_ips() Both above functions changes the argument order while calling bond_delete_netdev_default_gids(). This required silly del_netdev_default_ips() wrapper. Additionally, del_netdev_default_ips() deletes default GIDs not IP based GIDs. del_netdev_default_ips() having _ips suffix is confusing. Therefore, get rid of confusing del_netdev_default_ips() and simplify bond_delete_netdev_default_gids() to follow same argument order as its caller. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/roce_gid_mgmt.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 1304f4239b3d..c7cf3c7f38a6 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -249,8 +249,8 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev, static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, u8 port, - struct net_device *event_ndev, - struct net_device *rdma_ndev) + struct net_device *rdma_ndev, + void *event_ndev) { struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev); unsigned long gid_type_mask; @@ -513,18 +513,12 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port, rcu_read_unlock(); if (master_ndev) { - bond_delete_netdev_default_gids(ib_dev, port, master_ndev, - rdma_ndev); + bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev, + master_ndev); dev_put(master_ndev); } } -static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) -{ - bond_delete_netdev_default_gids(ib_dev, port, cookie, rdma_ndev); -} - /* The following functions operate on all IB devices. netdevice_event and * addr_event execute ib_enum_all_roce_netdevs through a work. * ib_enum_all_roce_netdevs iterates through all IB devices. @@ -600,7 +594,7 @@ ndev_event_link(struct netdev_notifier_changeupper_info *changeupper_info, { static const struct netdev_event_work_cmd bonding_default_del_cmd = { - .cb = del_netdev_default_ips, + .cb = bond_delete_netdev_default_gids, .filter = is_eth_port_inactive_slave }; /* @@ -630,8 +624,11 @@ static int netdevice_event(struct notifier_block *this, unsigned long event, .cb = del_netdev_ips, .filter = pass_all_filter}; static const struct netdev_event_work_cmd bonding_default_del_cmd_join = { .cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave}; - static const struct netdev_event_work_cmd default_del_cmd = { - .cb = del_netdev_default_ips, .filter = pass_all_filter}; + static const struct netdev_event_work_cmd + default_del_cmd = { + .cb = bond_delete_netdev_default_gids, + .filter = pass_all_filter + }; static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = { .cb = del_netdev_upper_ips, .filter = upper_device_filter}; struct net_device *ndev = netdev_notifier_info_to_dev(ptr); -- cgit From 408f1242d940773e9e97a544803b0dcf28b70d17 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 14 Aug 2018 10:36:17 +0300 Subject: IB/core: Delete lower netdevice default GID entries in bonding scenario When NETDEV_CHANGEUPPER event occurs, lower device is not yet established as slave of the master, and when upper device is bond device, default GID entries not deleted. Due to this, when bond device is fully configured, default GID entries of bond device cannot be added as default GID entries are occupied by the lower netdevice. This is incorrect. Default GID entries should really be of bond netdevice because in all RoCE GIDs (default or IP), MAC address of the bond device will be used. It is confusing to have default GID of netdevice which is not really used for any purpose. Therefore, as first step, implement (a) filter function which filters if a CHANGEUPPER event netdevice and associated upper device is master device or not. (b) callback function which deletes the default GIDs of lower (event netdevice). Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/roce_gid_mgmt.c | 71 ++++++++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index c7cf3c7f38a6..21e008aa863c 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -208,6 +208,34 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port, return res; } +/** + * is_upper_ndev_bond_master_filter - Check if a given netdevice + * is bond master device of netdevice of the the RDMA device of port. + * @ib_dev: IB device to check + * @port: Port to consider for adding default GID + * @rdma_ndev: Pointer to rdma netdevice + * @cookie: Netdevice to consider to form a default GID + * + * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev + * is bond master device and rdma_ndev is its lower netdevice. It might + * not have been established as slave device yet. + */ +static int +is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, + void *cookie) +{ + struct net_device *cookie_ndev = cookie; + bool match = false; + + rcu_read_lock(); + if (netif_is_bond_master(cookie_ndev) && + rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)) + match = true; + rcu_read_unlock(); + return match; +} + static void update_gid_ip(enum gid_op_type gid_op, struct ib_device *ib_dev, u8 port, struct net_device *ndev, @@ -391,6 +419,27 @@ static void del_netdev_ips(struct ib_device *ib_dev, u8 port, ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie); } +/** + * del_default_gids - Delete default GIDs of the event/cookie netdevice + * @ib_dev: RDMA device pointer + * @port: Port of the RDMA device whose GID table to consider + * @rdma_ndev: Unused rdma netdevice + * @cookie: Pointer to event netdevice + * + * del_default_gids() deletes the default GIDs of the event/cookie netdevice. + */ +static void del_default_gids(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *cookie_ndev = cookie; + unsigned long gid_type_mask; + + gid_type_mask = roce_gid_type_mask_support(ib_dev, port); + + ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask, + IB_CACHE_GID_DEFAULT_MODE_DELETE); +} + static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, @@ -589,30 +638,34 @@ ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info, } static void -ndev_event_link(struct netdev_notifier_changeupper_info *changeupper_info, +ndev_event_link(struct net_device *event_ndev, + struct netdev_notifier_changeupper_info *changeupper_info, struct netdev_event_work_cmd *cmds) { static const struct netdev_event_work_cmd bonding_default_del_cmd = { - .cb = bond_delete_netdev_default_gids, - .filter = is_eth_port_inactive_slave + .cb = del_default_gids, + .filter = is_upper_ndev_bond_master_filter }; /* * When a lower netdev is linked to its upper bonding - * netdev, delete lower inactive slave netdev's default GIDs. + * netdev, delete lower slave netdev's default GIDs. */ cmds[0] = bonding_default_del_cmd; - cmds[0].ndev = changeupper_info->upper_dev; + cmds[0].ndev = event_ndev; + cmds[0].filter_ndev = changeupper_info->upper_dev; + cmds[1] = add_cmd_upper_ips; cmds[1].ndev = changeupper_info->upper_dev; cmds[1].filter_ndev = changeupper_info->upper_dev; } -static void netdevice_event_changeupper(struct netdev_notifier_changeupper_info *changeupper_info, - struct netdev_event_work_cmd *cmds) +static void netdevice_event_changeupper(struct net_device *event_ndev, + struct netdev_notifier_changeupper_info *changeupper_info, + struct netdev_event_work_cmd *cmds) { if (changeupper_info->linking) - ndev_event_link(changeupper_info, cmds); + ndev_event_link(event_ndev, changeupper_info, cmds); else ndev_event_unlink(changeupper_info, cmds); } @@ -657,7 +710,7 @@ static int netdevice_event(struct notifier_block *this, unsigned long event, break; case NETDEV_CHANGEUPPER: - netdevice_event_changeupper( + netdevice_event_changeupper(ndev, container_of(ptr, struct netdev_notifier_changeupper_info, info), cmds); break; -- cgit From a03d4d2775c7f654bcea8a9f8f49f724fd4fa810 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 14 Aug 2018 10:36:18 +0300 Subject: IB/core: Consider adding default GIDs of bond device Now that we correctly delete the default GIDs of lower devices during CHANGEUPPER event, add default GIDs of the bonding master device. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/roce_gid_mgmt.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 21e008aa863c..783bcea6765e 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -440,6 +440,17 @@ static void del_default_gids(struct ib_device *ib_dev, u8 port, IB_CACHE_GID_DEFAULT_MODE_DELETE); } +static void add_default_gids(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *event_ndev = cookie; + unsigned long gid_type_mask; + + gid_type_mask = roce_gid_type_mask_support(ib_dev, port); + ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask, + IB_CACHE_GID_DEFAULT_MODE_SET); +} + static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, @@ -637,6 +648,11 @@ ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info, cmds[1] = add_cmd; } +static const struct netdev_event_work_cmd bonding_default_add_cmd = { + .cb = add_default_gids, + .filter = is_upper_ndev_bond_master_filter +}; + static void ndev_event_link(struct net_device *event_ndev, struct netdev_notifier_changeupper_info *changeupper_info, @@ -655,9 +671,15 @@ ndev_event_link(struct net_device *event_ndev, cmds[0].ndev = event_ndev; cmds[0].filter_ndev = changeupper_info->upper_dev; - cmds[1] = add_cmd_upper_ips; + /* Now add bonding upper device default GIDs */ + cmds[1] = bonding_default_add_cmd; cmds[1].ndev = changeupper_info->upper_dev; cmds[1].filter_ndev = changeupper_info->upper_dev; + + /* Now add bonding upper device IP based GIDs */ + cmds[2] = add_cmd_upper_ips; + cmds[2].ndev = changeupper_info->upper_dev; + cmds[2].filter_ndev = changeupper_info->upper_dev; } static void netdevice_event_changeupper(struct net_device *event_ndev, -- cgit From 464b79b45aede2859eb46ae91786f0266868602b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 14 Aug 2018 10:36:19 +0300 Subject: IB/core: Add default GIDs of the bond master netdev Currently following issues exist: 1. Default GIDs of the lower (slave) netdevice if the bond netdevice is added. Rather default GID should be of bond master netdevice. 2. Due to this, when failover event occurs FAILOVER event handler attempts to delete the GID of the upper device and tries to add the default GID of the lower device. This is incorrect behavior. To have simple and correct code: (a) Split default GIDs addition out of add_netdev_ips(). This allows easier removal in future if RoCE default GIDs are removed. (b) Add default GIDs of the bond master device by using right filter and callback function. (c) Remove unused function enum_netdev_default_gids(). Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/roce_gid_mgmt.c | 88 ++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 783bcea6765e..69f58e907810 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -184,6 +184,43 @@ static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port, return res; } +/** is_ndev_for_default_gid_filter - Check if a given netdevice + * can be considered for default GIDs or not. + * @ib_dev: IB device to check + * @port: Port to consider for adding default GID + * @rdma_ndev: rdma netdevice pointer + * @cookie_ndev: Netdevice to consider to form a default GID + * + * is_ndev_for_default_gid_filter() returns true (1) if a given netdevice can be + * considered for deriving default RoCE GID, returns false (0) otherwise. + */ +static int +is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *cookie_ndev = cookie; + int res; + + if (!rdma_ndev) + return 0; + + rcu_read_lock(); + + /* + * When rdma netdevice is used in bonding, bonding master netdevice + * should be considered for default GIDs. Therefore, ignore slave rdma + * netdevices when bonding is considered. + * Additionally when event(cookie) netdevice is bond master device, + * make sure that it the upper netdevice of rdma netdevice. + */ + res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) || + (netif_is_bond_master(cookie_ndev) && + rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))); + + rcu_read_unlock(); + return res; +} + static int pass_all_filter(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, void *cookie) { @@ -251,30 +288,6 @@ static void update_gid_ip(enum gid_op_type gid_op, update_gid(gid_op, ib_dev, port, &gid, &gid_attr); } -static void enum_netdev_default_gids(struct ib_device *ib_dev, - u8 port, struct net_device *event_ndev, - struct net_device *rdma_ndev) -{ - unsigned long gid_type_mask; - - rcu_read_lock(); - if (!rdma_ndev || - ((rdma_ndev != event_ndev && - !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) || - is_eth_active_slave_of_bonding_rcu(rdma_ndev, - netdev_master_upper_dev_get_rcu(rdma_ndev)) == - BONDING_SLAVE_STATE_INACTIVE)) { - rcu_read_unlock(); - return; - } - rcu_read_unlock(); - - gid_type_mask = roce_gid_type_mask_support(ib_dev, port); - - ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask, - IB_CACHE_GID_DEFAULT_MODE_SET); -} - static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, @@ -409,7 +422,6 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port, static void add_netdev_ips(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, void *cookie) { - enum_netdev_default_gids(ib_dev, port, cookie, rdma_ndev); _add_netdev_ips(ib_dev, port, cookie); } @@ -465,9 +477,19 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, rtnl_lock(); down_read(&net_rwsem); for_each_net(net) - for_each_netdev(net, ndev) + for_each_netdev(net, ndev) { + /* + * Filter and add default GIDs of the primary netdevice + * when not in bonding mode, or add default GIDs + * of bond master device, when in bonding mode. + */ + if (is_ndev_for_default_gid_filter(ib_dev, port, + rdma_ndev, ndev)) + add_default_gids(ib_dev, port, rdma_ndev, ndev); + if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev)) - add_netdev_ips(ib_dev, port, rdma_ndev, ndev); + _add_netdev_ips(ib_dev, port, ndev); + } up_read(&net_rwsem); rtnl_unlock(); } @@ -692,6 +714,11 @@ static void netdevice_event_changeupper(struct net_device *event_ndev, ndev_event_unlink(changeupper_info, cmds); } +static const struct netdev_event_work_cmd add_default_gid_cmd = { + .cb = add_default_gids, + .filter = is_ndev_for_default_gid_filter, +}; + static int netdevice_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -716,7 +743,8 @@ static int netdevice_event(struct notifier_block *this, unsigned long event, case NETDEV_REGISTER: case NETDEV_UP: cmds[0] = bonding_default_del_cmd_join; - cmds[1] = add_cmd; + cmds[1] = add_default_gid_cmd; + cmds[2] = add_cmd; break; case NETDEV_UNREGISTER: @@ -739,7 +767,9 @@ static int netdevice_event(struct notifier_block *this, unsigned long event, case NETDEV_BONDING_FAILOVER: cmds[0] = bonding_event_ips_del_cmd; - cmds[1] = bonding_default_del_cmd_join; + /* Add default GIDs of the bond device */ + cmds[1] = bonding_default_add_cmd; + /* Add IP based GIDs of the bond device */ cmds[2] = add_cmd_upper_ips; break; -- cgit From d12e2eed2743856b4493aeda4f6ed1bb0fa47e57 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 14 Aug 2018 10:36:20 +0300 Subject: IB/core: Update GID entries for netdevice whose mac address changes Update all GID table entries of the netdevice whose MAC address changed. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/roce_gid_mgmt.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 69f58e907810..0d27af5ad951 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -291,7 +291,7 @@ static void update_gid_ip(enum gid_op_type gid_op, static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, - void *event_ndev) + struct net_device *event_ndev) { struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev); unsigned long gid_type_mask; @@ -727,9 +727,9 @@ static int netdevice_event(struct notifier_block *this, unsigned long event, static const struct netdev_event_work_cmd bonding_default_del_cmd_join = { .cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave}; static const struct netdev_event_work_cmd - default_del_cmd = { - .cb = bond_delete_netdev_default_gids, - .filter = pass_all_filter + netdev_del_cmd = { + .cb = del_netdev_ips, + .filter = is_eth_port_of_netdev }; static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = { .cb = del_netdev_upper_ips, .filter = upper_device_filter}; @@ -755,8 +755,9 @@ static int netdevice_event(struct notifier_block *this, unsigned long event, break; case NETDEV_CHANGEADDR: - cmds[0] = default_del_cmd; - cmds[1] = add_cmd; + cmds[0] = netdev_del_cmd; + cmds[1] = add_default_gid_cmd; + cmds[2] = add_cmd; break; case NETDEV_CHANGEUPPER: -- cgit From dd81b2c8a3339c813568d07091fcd0d97dd0cf41 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 14 Aug 2018 10:36:21 +0300 Subject: IB/core: Change filter function return type from int to bool Filter functions returns either 0 or 1, therefore better change their return type from int to bool to reflect the same. Additionally some filter functions have suffix of _filter some doesn't. Make all filter function consistent to have __filter suffix to improve code readability. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 4 +- drivers/infiniband/core/roce_gid_mgmt.c | 70 +++++++++++++++++++-------------- 2 files changed, 43 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index fae417a391fb..77c7005c396c 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -91,8 +91,8 @@ void ib_device_unregister_sysfs(struct ib_device *device); typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); -typedef int (*roce_netdev_filter)(struct ib_device *device, u8 port, - struct net_device *idev, void *cookie); +typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port, + struct net_device *idev, void *cookie); void ib_enum_roce_netdev(struct ib_device *ib_dev, roce_netdev_filter filter, diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 0d27af5ad951..ee366199b169 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -143,14 +143,15 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de #define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \ BONDING_SLAVE_STATE_NA) -static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) +static bool +is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) { struct net_device *real_dev; - int res; + bool res; if (!rdma_ndev) - return 0; + return false; rcu_read_lock(); real_dev = rdma_vlan_dev_real_dev(cookie); @@ -166,14 +167,15 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port, return res; } -static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) +static bool +is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) { struct net_device *master_dev; - int res; + bool res; if (!rdma_ndev) - return 0; + return false; rcu_read_lock(); master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev); @@ -191,18 +193,18 @@ static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port, * @rdma_ndev: rdma netdevice pointer * @cookie_ndev: Netdevice to consider to form a default GID * - * is_ndev_for_default_gid_filter() returns true (1) if a given netdevice can be - * considered for deriving default RoCE GID, returns false (0) otherwise. + * is_ndev_for_default_gid_filter() returns true if a given netdevice can be + * considered for deriving default RoCE GID, returns false otherwise. */ -static int +static bool is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *cookie_ndev = cookie; - int res; + bool res; if (!rdma_ndev) - return 0; + return false; rcu_read_lock(); @@ -221,22 +223,22 @@ is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port, return res; } -static int pass_all_filter(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) +static bool pass_all_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) { - return 1; + return true; } -static int upper_device_filter(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) +static bool upper_device_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) { - int res; + bool res; if (!rdma_ndev) - return 0; + return false; if (rdma_ndev == cookie) - return 1; + return true; rcu_read_lock(); res = rdma_is_upper_dev_rcu(rdma_ndev, cookie); @@ -257,7 +259,7 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port, * is bond master device and rdma_ndev is its lower netdevice. It might * not have been established as slave device yet. */ -static int +static bool is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, void *cookie) @@ -487,7 +489,8 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, rdma_ndev, ndev)) add_default_gids(ib_dev, port, rdma_ndev, ndev); - if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev)) + if (is_eth_port_of_netdev_filter(ib_dev, port, + rdma_ndev, ndev)) _add_netdev_ips(ib_dev, port, ndev); } up_read(&net_rwsem); @@ -651,9 +654,14 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, } static const struct netdev_event_work_cmd add_cmd = { - .cb = add_netdev_ips, .filter = is_eth_port_of_netdev}; + .cb = add_netdev_ips, + .filter = is_eth_port_of_netdev_filter +}; + static const struct netdev_event_work_cmd add_cmd_upper_ips = { - .cb = add_netdev_upper_ips, .filter = is_eth_port_of_netdev}; + .cb = add_netdev_upper_ips, + .filter = is_eth_port_of_netdev_filter +}; static void ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info, @@ -724,12 +732,15 @@ static int netdevice_event(struct notifier_block *this, unsigned long event, { static const struct netdev_event_work_cmd del_cmd = { .cb = del_netdev_ips, .filter = pass_all_filter}; - static const struct netdev_event_work_cmd bonding_default_del_cmd_join = { - .cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave}; + static const struct netdev_event_work_cmd + bonding_default_del_cmd_join = { + .cb = del_netdev_default_ips_join, + .filter = is_eth_port_inactive_slave_filter + }; static const struct netdev_event_work_cmd netdev_del_cmd = { .cb = del_netdev_ips, - .filter = is_eth_port_of_netdev + .filter = is_eth_port_of_netdev_filter }; static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = { .cb = del_netdev_upper_ips, .filter = upper_device_filter}; @@ -786,7 +797,8 @@ static void update_gid_event_work_handler(struct work_struct *_work) struct update_gid_event_work *work = container_of(_work, struct update_gid_event_work, work); - ib_enum_all_roce_netdevs(is_eth_port_of_netdev, work->gid_attr.ndev, + ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter, + work->gid_attr.ndev, callback_for_addr_gid_device_scan, work); dev_put(work->gid_attr.ndev); -- cgit From a1ceeca679dccc492235f0f629d9e9f7b3d51ca8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 9 Aug 2018 22:00:47 +0300 Subject: RDMA/hns: Fix usage of bitmap allocation functions return values hns bitmap allocation functions return 0 on success and -1 on failure. Callers of these functions wrongly used their return value as an errno, fix that by making a proper conversion. Fixes: a598c6f4c5a8 ("IB/hns: Simplify function of pd alloc and qp alloc") Signed-off-by: Gal Pressman Acked-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_pd.c | 2 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index b9f2c871ff9a..e11c149da04d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -37,7 +37,7 @@ static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) { - return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn); + return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn) ? -ENOMEM : 0; } static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index c1dbddcd58c9..efb7e961ca65 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -115,7 +115,10 @@ static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt, { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, base); + return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, + base) ? + -ENOMEM : + 0; } enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) -- cgit From 92f4e77c85918eab5e5803d7e28ab89a7e6bd3a2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 15 Aug 2018 16:52:58 -0600 Subject: Revert "net/smc: Replace ib_query_gid with rdma_get_gid_attr" This reverts commit ddb457c6993babbcdd41fca638b870d2a2fc3941. The include rdma/ib_cache.h is kept, and we have to add a memset to the compat wrapper to avoid compiler warnings in gcc-7 This revert is done to avoid extensive merge conflicts with SMC changes in netdev during the 4.19 merge window. Signed-off-by: Jason Gunthorpe --- include/rdma/ib_cache.h | 1 + net/smc/smc_core.c | 19 ++++++++++--------- net/smc/smc_ib.c | 24 ++++++++++-------------- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index a4ce441f36f0..3e11e7cc60b7 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -143,6 +143,7 @@ static inline __deprecated int ib_query_gid(struct ib_device *device, { const struct ib_gid_attr *attr; + memset(attr_out, 0, sizeof(*attr_out)); attr = rdma_get_gid_attr(device, port_num, index); if (IS_ERR(attr)) return PTR_ERR(attr); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d99a75f75e42..15bad268f37d 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -451,7 +451,8 @@ out: static int smc_link_determine_gid(struct smc_link_group *lgr) { struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; - const struct ib_gid_attr *gattr; + struct ib_gid_attr gattr; + union ib_gid gid; int i; if (!lgr->vlan_id) { @@ -461,18 +462,18 @@ static int smc_link_determine_gid(struct smc_link_group *lgr) for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; i++) { - gattr = rdma_get_gid_attr(lnk->smcibdev->ibdev, lnk->ibport, i); - if (IS_ERR(gattr)) + if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, + &gattr)) continue; - if (gattr->ndev) { - if (is_vlan_dev(gattr->ndev) && - vlan_dev_vlan_id(gattr->ndev) == lgr->vlan_id) { - lnk->gid = gattr->gid; - rdma_put_gid_attr(gattr); + if (gattr.ndev) { + if (is_vlan_dev(gattr.ndev) && + vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { + lnk->gid = gid; + dev_put(gattr.ndev); return 0; } + dev_put(gattr.ndev); } - rdma_put_gid_attr(gattr); } return -ENODEV; } diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 74f29f814ec1..117b05f1a494 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -373,21 +373,17 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) { - const struct ib_gid_attr *gattr; - int rc = 0; + struct ib_gid_attr gattr; + int rc; - gattr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0); - if (IS_ERR(gattr)) - return PTR_ERR(gattr); - if (!gattr->ndev) { - rc = -ENODEV; - goto done; - } - smcibdev->gid[ibport - 1] = gattr->gid; - memcpy(smcibdev->mac[ibport - 1], gattr->ndev->dev_addr, ETH_ALEN); -done: - rdma_put_gid_attr(gattr); - return rc; + rc = ib_query_gid(smcibdev->ibdev, ibport, 0, + &smcibdev->gid[ibport - 1], &gattr); + if (rc || !gattr.ndev) + return -ENODEV; + + memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN); + dev_put(gattr.ndev); + return 0; } /* Create an identifier unique for this instance of SMC-R. -- cgit