diff options
Diffstat (limited to 'drivers/infiniband/core')
29 files changed, 1084 insertions, 607 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index f253295795f0..be0743dac3ff 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -348,16 +348,10 @@ static int dst_fetch_ha(const struct dst_entry *dst, static bool has_gateway(const struct dst_entry *dst, sa_family_t family) { - struct rtable *rt; - struct rt6_info *rt6; - - if (family == AF_INET) { - rt = container_of(dst, struct rtable, dst); - return rt->rt_uses_gateway; - } + if (family == AF_INET) + return dst_rtable(dst)->rt_uses_gateway; - rt6 = container_of(dst, struct rt6_info, dst); - return rt6->rt6i_flags & RTF_GATEWAY; + return dst_rt6_info(dst)->rt6i_flags & RTF_GATEWAY; } static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr, diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index f82b4260de42..3bb46696731e 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -59,7 +59,16 @@ __ib_get_agent_port(const struct ib_device *device, int port_num) struct ib_agent_port_private *entry; list_for_each_entry(entry, &ib_agent_port_list, port_list) { - if (entry->agent[1]->device == device && + /* Need to check both agent[0] and agent[1], as an agent port + * may only have one of them + */ + if (entry->agent[0] && + entry->agent[0]->device == device && + entry->agent[0]->port_num == port_num) + return entry; + + if (entry->agent[1] && + entry->agent[1]->device == device && entry->agent[1]->port_num == port_num) return entry; } @@ -172,14 +181,16 @@ int ib_agent_port_open(struct ib_device *device, int port_num) } } - /* Obtain send only MAD agent for GSI QP */ - port_priv->agent[1] = ib_register_mad_agent(device, port_num, - IB_QPT_GSI, NULL, 0, - &agent_send_handler, - NULL, NULL, 0); - if (IS_ERR(port_priv->agent[1])) { - ret = PTR_ERR(port_priv->agent[1]); - goto error3; + if (rdma_cap_ib_cm(device, port_num)) { + /* Obtain send only MAD agent for GSI QP */ + port_priv->agent[1] = ib_register_mad_agent(device, port_num, + IB_QPT_GSI, NULL, 0, + &agent_send_handler, + NULL, NULL, 0); + if (IS_ERR(port_priv->agent[1])) { + ret = PTR_ERR(port_priv->agent[1]); + goto error3; + } } spin_lock_irqsave(&ib_agent_port_list_lock, flags); @@ -212,7 +223,8 @@ int ib_agent_port_close(struct ib_device *device, int port_num) list_del(&port_priv->port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - ib_unregister_mad_agent(port_priv->agent[1]); + if (port_priv->agent[1]) + ib_unregister_mad_agent(port_priv->agent[1]); if (port_priv->agent[0]) ib_unregister_mad_agent(port_priv->agent[0]); diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index c02a96d3572a..f8413f8a9f26 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -794,7 +794,6 @@ err_free_table: static void release_gid_table(struct ib_device *device, struct ib_gid_table *table) { - bool leak = false; int i; if (!table) @@ -803,15 +802,12 @@ static void release_gid_table(struct ib_device *device, for (i = 0; i < table->sz; i++) { if (is_gid_entry_free(table->data_vec[i])) continue; - if (kref_read(&table->data_vec[i]->kref) > 1) { - dev_err(&device->dev, - "GID entry ref leak for index %d ref=%u\n", i, - kref_read(&table->data_vec[i]->kref)); - leak = true; - } + + WARN_ONCE(true, + "GID entry ref leak for dev %s index %d ref=%u\n", + dev_name(&device->dev), i, + kref_read(&table->data_vec[i]->kref)); } - if (leak) - return; mutex_destroy(&table->lock); kfree(table->data_vec); @@ -1131,41 +1127,6 @@ err: } EXPORT_SYMBOL(ib_find_cached_pkey); -int ib_find_exact_cached_pkey(struct ib_device *device, u32 port_num, - u16 pkey, u16 *index) -{ - struct ib_pkey_cache *cache; - unsigned long flags; - int i; - int ret = -ENOENT; - - if (!rdma_is_port_valid(device, port_num)) - return -EINVAL; - - read_lock_irqsave(&device->cache_lock, flags); - - cache = device->port_data[port_num].cache.pkey; - if (!cache) { - ret = -EINVAL; - goto err; - } - - *index = -1; - - for (i = 0; i < cache->table_len; ++i) - if (cache->table[i] == pkey) { - *index = i; - ret = 0; - break; - } - -err: - read_unlock_irqrestore(&device->cache_lock, flags); - - return ret; -} -EXPORT_SYMBOL(ib_find_exact_cached_pkey); - int ib_get_cached_lmc(struct ib_device *device, u32 port_num, u8 *lmc) { unsigned long flags; @@ -1644,8 +1605,10 @@ int ib_cache_setup_one(struct ib_device *device) rdma_for_each_port (device, p) { err = ib_cache_update(device, p, true, true, true); - if (err) + if (err) { + gid_table_cleanup_one(device); return err; + } } return 0; diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index ff58058aeadc..142170473e75 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -34,6 +34,9 @@ MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); +#define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */ +#define CM_DIRECT_RETRY_CTX ((void *) 1UL) + static const char * const ibcm_rej_reason_strs[] = { [IB_CM_REJ_NO_QP] = "no QP", [IB_CM_REJ_NO_EEC] = "no EEC", @@ -92,8 +95,7 @@ static void cm_process_work(struct cm_id_private *cm_id_priv, struct cm_work *work); static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, struct ib_cm_sidr_rep_param *param); -static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, - const void *private_data, u8 private_data_len); +static void cm_issue_dreq(struct cm_id_private *cm_id_priv); static int cm_send_drep_locked(struct cm_id_private *cm_id_priv, void *private_data, u8 private_data_len); static int cm_send_rej_locked(struct cm_id_private *cm_id_priv, @@ -306,12 +308,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) goto out; } - /* Timeout set by caller if response is expected. */ m->ah = ah; - m->retries = cm_id_priv->max_cm_retries; - - refcount_inc(&cm_id_priv->refcount); - m->context[0] = cm_id_priv; out: spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); @@ -320,16 +317,13 @@ out: static void cm_free_msg(struct ib_mad_send_buf *msg) { - struct cm_id_private *cm_id_priv = msg->context[0]; - if (msg->ah) rdma_destroy_ah(msg->ah, 0); - cm_deref_id(cm_id_priv); ib_free_send_mad(msg); } static struct ib_mad_send_buf * -cm_alloc_priv_msg(struct cm_id_private *cm_id_priv) +cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state) { struct ib_mad_send_buf *msg; @@ -338,7 +332,15 @@ cm_alloc_priv_msg(struct cm_id_private *cm_id_priv) msg = cm_alloc_msg(cm_id_priv); if (IS_ERR(msg)) return msg; + cm_id_priv->msg = msg; + refcount_inc(&cm_id_priv->refcount); + msg->context[0] = cm_id_priv; + msg->context[1] = (void *) (unsigned long) state; + + msg->retries = cm_id_priv->max_cm_retries; + msg->timeout_ms = cm_id_priv->timeout_ms; + return msg; } @@ -357,13 +359,20 @@ static void cm_free_priv_msg(struct ib_mad_send_buf *msg) ib_free_send_mad(msg); } -static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port, - struct ib_mad_recv_wc *mad_recv_wc) +static struct ib_mad_send_buf * +cm_alloc_response_msg_no_ah(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc, + bool direct_retry) { - return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, - 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_ATOMIC, - IB_MGMT_BASE_VERSION); + struct ib_mad_send_buf *m; + + m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, + 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, + GFP_ATOMIC, IB_MGMT_BASE_VERSION); + if (!IS_ERR(m)) + m->context[0] = direct_retry ? CM_DIRECT_RETRY_CTX : NULL; + + return m; } static int cm_create_response_msg_ah(struct cm_port *port, @@ -383,12 +392,13 @@ static int cm_create_response_msg_ah(struct cm_port *port, static int cm_alloc_response_msg(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc, + bool direct_retry, struct ib_mad_send_buf **msg) { struct ib_mad_send_buf *m; int ret; - m = cm_alloc_response_msg_no_ah(port, mad_recv_wc); + m = cm_alloc_response_msg_no_ah(port, mad_recv_wc, direct_retry); if (IS_ERR(m)) return PTR_ERR(m); @@ -402,13 +412,6 @@ static int cm_alloc_response_msg(struct cm_port *port, return 0; } -static void cm_free_response_msg(struct ib_mad_send_buf *msg) -{ - if (msg->ah) - rdma_destroy_ah(msg->ah, 0); - ib_free_send_mad(msg); -} - static void *cm_copy_private_data(const void *private_data, u8 private_data_len) { void *data; @@ -1025,13 +1028,26 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) } } +static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id, + enum ib_cm_state old_state) +{ + struct cm_id_private *cm_id_priv; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + pr_err("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__, + cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount)); +} + static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; + enum ib_cm_state old_state; struct cm_work *work; + int ret; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irq(&cm_id_priv->lock); + old_state = cm_id->state; retest: switch (cm_id->state) { case IB_CM_LISTEN: @@ -1095,7 +1111,8 @@ retest: cm_id->state = IB_CM_IDLE; break; } - cm_send_dreq_locked(cm_id_priv, NULL, 0); + cm_issue_dreq(cm_id_priv); + cm_enter_timewait(cm_id_priv); goto retest; case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->msg); @@ -1135,7 +1152,14 @@ retest: xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id)); cm_deref_id(cm_id_priv); - wait_for_completion(&cm_id_priv->comp); + do { + ret = wait_for_completion_timeout(&cm_id_priv->comp, + msecs_to_jiffies( + CM_DESTROY_ID_WAIT_TIMEOUT)); + if (!ret) /* timeout happened */ + cm_destroy_id_wait_timeout(cm_id, old_state); + } while (!ret); + while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); @@ -1536,7 +1560,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, if (param->alternate_path) cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); - msg = cm_alloc_priv_msg(cm_id_priv); + msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REQ_SENT); if (IS_ERR(msg)) { ret = PTR_ERR(msg); goto out_unlock; @@ -1545,8 +1569,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, req_msg = (struct cm_req_msg *)msg->mad; cm_format_req(req_msg, cm_id_priv, param); cm_id_priv->tid = req_msg->hdr.tid; - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *)(unsigned long)IB_CM_REQ_SENT; cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); @@ -1577,7 +1599,7 @@ static int cm_issue_rej(struct cm_port *port, struct cm_rej_msg *rej_msg, *rcv_msg; int ret; - ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); + ret = cm_alloc_response_msg(port, mad_recv_wc, false, &msg); if (ret) return ret; @@ -1603,7 +1625,7 @@ static int cm_issue_rej(struct cm_port *port, IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg)); ret = ib_post_send_mad(msg, NULL); if (ret) - cm_free_response_msg(msg); + cm_free_msg(msg); return ret; } @@ -1930,7 +1952,7 @@ static void cm_dup_req_handler(struct cm_work *work, } spin_unlock_irq(&cm_id_priv->lock); - ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); + ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, true, &msg); if (ret) return; @@ -1959,7 +1981,7 @@ static void cm_dup_req_handler(struct cm_work *work, return; unlock: spin_unlock_irq(&cm_id_priv->lock); -free: cm_free_response_msg(msg); +free: cm_free_msg(msg); } static struct cm_id_private *cm_match_req(struct cm_work *work, @@ -2273,7 +2295,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, goto out; } - msg = cm_alloc_priv_msg(cm_id_priv); + msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REP_SENT); if (IS_ERR(msg)) { ret = PTR_ERR(msg); goto out; @@ -2281,8 +2303,6 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, rep_msg = (struct cm_rep_msg *) msg->mad; cm_format_rep(rep_msg, cm_id_priv, param); - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; trace_icm_send_rep(cm_id); ret = ib_post_send_mad(msg, NULL); @@ -2423,7 +2443,7 @@ static void cm_dup_rep_handler(struct cm_work *work) atomic_long_inc( &work->port->counters[CM_RECV_DUPLICATES][CM_REP_COUNTER]); - ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); + ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, true, &msg); if (ret) goto deref; @@ -2448,7 +2468,7 @@ static void cm_dup_rep_handler(struct cm_work *work) goto deref; unlock: spin_unlock_irq(&cm_id_priv->lock); -free: cm_free_response_msg(msg); +free: cm_free_msg(msg); deref: cm_deref_id(cm_id_priv); } @@ -2632,59 +2652,68 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, private_data_len); } -static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, - const void *private_data, u8 private_data_len) +static void cm_issue_dreq(struct cm_id_private *cm_id_priv) { struct ib_mad_send_buf *msg; int ret; lockdep_assert_held(&cm_id_priv->lock); + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return; + + cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, NULL, 0); + + trace_icm_send_dreq(&cm_id_priv->id); + ret = ib_post_send_mad(msg, NULL); + if (ret) + cm_free_msg(msg); +} + +int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv = + container_of(cm_id, struct cm_id_private, id); + struct ib_mad_send_buf *msg; + unsigned long flags; + int ret; + if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE) return -EINVAL; + spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { trace_icm_dreq_skipped(&cm_id_priv->id); - return -EINVAL; + ret = -EINVAL; + goto unlock; } if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) ib_cancel_mad(cm_id_priv->msg); - msg = cm_alloc_priv_msg(cm_id_priv); + msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_DREQ_SENT); if (IS_ERR(msg)) { cm_enter_timewait(cm_id_priv); - return PTR_ERR(msg); + ret = PTR_ERR(msg); + goto unlock; } cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; trace_icm_send_dreq(&cm_id_priv->id); ret = ib_post_send_mad(msg, NULL); if (ret) { cm_enter_timewait(cm_id_priv); cm_free_priv_msg(msg); - return ret; + goto unlock; } cm_id_priv->id.state = IB_CM_DREQ_SENT; - return 0; -} - -int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv = - container_of(cm_id, struct cm_id_private, id); - unsigned long flags; - int ret; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - ret = cm_send_dreq_locked(cm_id_priv, private_data, private_data_len); +unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } @@ -2770,7 +2799,7 @@ static int cm_issue_drep(struct cm_port *port, struct cm_drep_msg *drep_msg; int ret; - ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); + ret = cm_alloc_response_msg(port, mad_recv_wc, true, &msg); if (ret) return ret; @@ -2788,7 +2817,7 @@ static int cm_issue_drep(struct cm_port *port, IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); ret = ib_post_send_mad(msg, NULL); if (ret) - cm_free_response_msg(msg); + cm_free_msg(msg); return ret; } @@ -2835,7 +2864,8 @@ static int cm_dreq_handler(struct cm_work *work) case IB_CM_TIMEWAIT: atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] [CM_DREQ_COUNTER]); - msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc); + msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc, + true); if (IS_ERR(msg)) goto unlock; @@ -2846,7 +2876,7 @@ static int cm_dreq_handler(struct cm_work *work) if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || ib_post_send_mad(msg, NULL)) - cm_free_response_msg(msg); + cm_free_msg(msg); goto deref; case IB_CM_DREQ_RCVD: atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] @@ -3340,7 +3370,8 @@ static int cm_lap_handler(struct cm_work *work) case IB_CM_MRA_LAP_SENT: atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] [CM_LAP_COUNTER]); - msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc); + msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc, + true); if (IS_ERR(msg)) goto unlock; @@ -3353,7 +3384,7 @@ static int cm_lap_handler(struct cm_work *work) if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || ib_post_send_mad(msg, NULL)) - cm_free_response_msg(msg); + cm_free_msg(msg); goto deref; case IB_CM_LAP_RCVD: atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] @@ -3492,7 +3523,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, goto out_unlock; } - msg = cm_alloc_priv_msg(cm_id_priv); + msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_SIDR_REQ_SENT); if (IS_ERR(msg)) { ret = PTR_ERR(msg); goto out_unlock; @@ -3500,8 +3531,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv, param); - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *)(unsigned long)IB_CM_SIDR_REQ_SENT; trace_icm_send_sidr_req(&cm_id_priv->id); ret = ib_post_send_mad(msg, NULL); @@ -3747,17 +3776,17 @@ out: static void cm_process_send_error(struct cm_id_private *cm_id_priv, struct ib_mad_send_buf *msg, - enum ib_cm_state state, enum ib_wc_status wc_status) { + enum ib_cm_state state = (unsigned long) msg->context[1]; struct ib_cm_event cm_event = {}; int ret; - /* Discard old sends or ones without a response. */ + /* Discard old sends. */ spin_lock_irq(&cm_id_priv->lock); if (msg != cm_id_priv->msg) { spin_unlock_irq(&cm_id_priv->lock); - cm_free_msg(msg); + cm_free_priv_msg(msg); return; } cm_free_priv_msg(msg); @@ -3805,9 +3834,7 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_send_buf *msg = mad_send_wc->send_buf; - struct cm_id_private *cm_id_priv = msg->context[0]; - enum ib_cm_state state = - (enum ib_cm_state)(unsigned long)msg->context[1]; + struct cm_id_private *cm_id_priv; struct cm_port *port; u16 attr_index; @@ -3815,13 +3842,12 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, attr_index = be16_to_cpu(((struct ib_mad_hdr *) msg->mad)->attr_id) - CM_ATTR_ID_OFFSET; - /* - * If the send was in response to a received message (context[0] is not - * set to a cm_id), and is not a REJ, then it is a send that was - * manually retried. - */ - if (!cm_id_priv && (attr_index != CM_REJ_COUNTER)) + if (msg->context[0] == CM_DIRECT_RETRY_CTX) { msg->retries = 1; + cm_id_priv = NULL; + } else { + cm_id_priv = msg->context[0]; + } atomic_long_add(1 + msg->retries, &port->counters[CM_XMIT][attr_index]); if (msg->retries) @@ -3829,10 +3855,9 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, &port->counters[CM_XMIT_RETRIES][attr_index]); if (cm_id_priv) - cm_process_send_error(cm_id_priv, msg, state, - mad_send_wc->status); + cm_process_send_error(cm_id_priv, msg, mad_send_wc->status); else - cm_free_response_msg(msg); + cm_free_msg(msg); } static void cm_work_handler(struct work_struct *_work) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 1e2cd7c8716e..91db10515d74 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -690,6 +690,7 @@ cma_validate_port(struct ib_device *device, u32 port, int bound_if_index = dev_addr->bound_dev_if; int dev_type = dev_addr->dev_type; struct net_device *ndev = NULL; + struct net_device *pdev = NULL; if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net)) goto out; @@ -714,9 +715,26 @@ cma_validate_port(struct ib_device *device, u32 port, rcu_read_lock(); ndev = rcu_dereference(sgid_attr->ndev); + if (ndev->ifindex != bound_if_index) { + pdev = dev_get_by_index_rcu(dev_addr->net, bound_if_index); + if (pdev) { + if (is_vlan_dev(pdev)) { + pdev = vlan_dev_real_dev(pdev); + if (ndev->ifindex == pdev->ifindex) + bound_if_index = pdev->ifindex; + } + if (is_vlan_dev(ndev)) { + pdev = vlan_dev_real_dev(ndev); + if (bound_if_index == pdev->ifindex) + bound_if_index = ndev->ifindex; + } + } + } if (!net_eq(dev_net(ndev), dev_addr->net) || - ndev->ifindex != bound_if_index) + ndev->ifindex != bound_if_index) { + rdma_put_gid_attr(sgid_attr); sgid_attr = ERR_PTR(-ENODEV); + } rcu_read_unlock(); goto out; } diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h index 47f3c6e4be89..dc622f3778be 100644 --- a/drivers/infiniband/core/cma_trace.h +++ b/drivers/infiniband/core/cma_trace.h @@ -84,7 +84,7 @@ TRACE_EVENT(cm_id_attach, sizeof(struct sockaddr_in6)); memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, sizeof(struct sockaddr_in6)); - __assign_str(devname, device->name); + __assign_str(devname); ), TP_printk("cm.id=%u src=%pISpc dst=%pISpc device=%s", @@ -334,7 +334,7 @@ DECLARE_EVENT_CLASS(cma_client_class, ), TP_fast_assign( - __assign_str(name, device->name); + __assign_str(name); ), TP_printk("device name=%s", diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index dd7715ba9fd1..05102769a918 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -325,9 +325,6 @@ void ib_qp_usecnt_inc(struct ib_qp *qp); void ib_qp_usecnt_dec(struct ib_qp *qp); struct rdma_dev_addr; -int rdma_resolve_ip_route(struct sockaddr *src_addr, - const struct sockaddr *dst_addr, - struct rdma_dev_addr *addr); int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 67bcea7a153c..0ded91f056f3 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -209,23 +209,6 @@ static void __ibdev_printk(const char *level, const struct ib_device *ibdev, printk("%s(NULL ib_device): %pV", level, vaf); } -void ibdev_printk(const char *level, const struct ib_device *ibdev, - const char *format, ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, format); - - vaf.fmt = format; - vaf.va = &args; - - __ibdev_printk(level, ibdev, &vaf); - - va_end(args); -} -EXPORT_SYMBOL(ibdev_printk); - #define define_ibdev_printk_level(func, level) \ void func(const struct ib_device *ibdev, const char *fmt, ...) \ { \ @@ -437,6 +420,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) client->rename(ibdev, client_data); } up_read(&ibdev->client_data_rwsem); + rdma_nl_notify_event(ibdev, 0, RDMA_RENAME_EVENT); up_read(&devices_rwsem); return 0; } @@ -503,6 +487,7 @@ static void ib_device_release(struct device *device) rcu_head); } + mutex_destroy(&dev->subdev_lock); mutex_destroy(&dev->unregistration_lock); mutex_destroy(&dev->compat_devs_mutex); @@ -641,6 +626,11 @@ struct ib_device *_ib_alloc_device(size_t size) BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); + + mutex_init(&device->subdev_lock); + INIT_LIST_HEAD(&device->subdev_list_head); + INIT_LIST_HEAD(&device->subdev_list); + return device; } EXPORT_SYMBOL(_ib_alloc_device); @@ -1345,6 +1335,29 @@ static void prevent_dealloc_device(struct ib_device *ib_dev) { } +static void ib_device_notify_register(struct ib_device *device) +{ + struct net_device *netdev; + u32 port; + int ret; + + ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT); + if (ret) + return; + + rdma_for_each_port(device, port) { + netdev = ib_device_get_netdev(device, port); + if (!netdev) + continue; + + ret = rdma_nl_notify_event(device, port, + RDMA_NETDEV_ATTACH_EVENT); + dev_put(netdev); + if (ret) + return; + } +} + /** * ib_register_device - Register an IB device with IB core * @device: Device to register @@ -1443,6 +1456,8 @@ int ib_register_device(struct ib_device *device, const char *name, dev_set_uevent_suppress(&device->dev, false); /* Mark for userspace that device is ready */ kobject_uevent(&device->dev.kobj, KOBJ_ADD); + + ib_device_notify_register(device); ib_device_put(device); return 0; @@ -1461,6 +1476,18 @@ EXPORT_SYMBOL(ib_register_device); /* Callers must hold a get on the device. */ static void __ib_unregister_device(struct ib_device *ib_dev) { + struct ib_device *sub, *tmp; + + mutex_lock(&ib_dev->subdev_lock); + list_for_each_entry_safe_reverse(sub, tmp, + &ib_dev->subdev_list_head, + subdev_list) { + list_del(&sub->subdev_list); + ib_dev->ops.del_sub_dev(sub); + ib_device_put(ib_dev); + } + mutex_unlock(&ib_dev->subdev_lock); + /* * We have a registration lock so that all the calls to unregister are * fully fenced, once any unregister returns the device is truely @@ -1473,6 +1500,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev) goto out; disable_device(ib_dev); + rdma_nl_notify_event(ib_dev, 0, RDMA_UNREGISTER_EVENT); /* Expedite removing unregistered pointers from the hash table */ free_netdevs(ib_dev); @@ -1730,7 +1758,7 @@ static int assign_client_id(struct ib_client *client) { int ret; - down_write(&clients_rwsem); + lockdep_assert_held(&clients_rwsem); /* * The add/remove callbacks must be called in FIFO/LIFO order. To * achieve this we assign client_ids so they are sorted in @@ -1739,14 +1767,11 @@ static int assign_client_id(struct ib_client *client) client->client_id = highest_client_id; ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); if (ret) - goto out; + return ret; highest_client_id++; xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); - -out: - up_write(&clients_rwsem); - return ret; + return 0; } static void remove_client_id(struct ib_client *client) @@ -1776,25 +1801,35 @@ int ib_register_client(struct ib_client *client) { struct ib_device *device; unsigned long index; + bool need_unreg = false; int ret; refcount_set(&client->uses, 1); init_completion(&client->uses_zero); + + /* + * The devices_rwsem is held in write mode to ensure that a racing + * ib_register_device() sees a consisent view of clients and devices. + */ + down_write(&devices_rwsem); + down_write(&clients_rwsem); ret = assign_client_id(client); if (ret) - return ret; + goto out; - down_read(&devices_rwsem); + need_unreg = true; xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { ret = add_client_context(device, client); - if (ret) { - up_read(&devices_rwsem); - ib_unregister_client(client); - return ret; - } + if (ret) + goto out; } - up_read(&devices_rwsem); - return 0; + ret = 0; +out: + up_write(&clients_rwsem); + up_write(&devices_rwsem); + if (need_unreg && ret) + ib_unregister_client(client); + return ret; } EXPORT_SYMBOL(ib_register_client); @@ -2134,11 +2169,15 @@ static void add_ndev_hash(struct ib_port_data *pdata) int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, u32 port) { + enum rdma_nl_notify_event_type etype; struct net_device *old_ndev; struct ib_port_data *pdata; unsigned long flags; int ret; + if (!rdma_is_port_valid(ib_dev, port)) + return -EINVAL; + /* * Drivers wish to call this before ib_register_driver, so we have to * setup the port data early. @@ -2147,9 +2186,6 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, if (ret) return ret; - if (!rdma_is_port_valid(ib_dev, port)) - return -EINVAL; - pdata = &ib_dev->port_data[port]; spin_lock_irqsave(&pdata->netdev_lock, flags); old_ndev = rcu_dereference_protected( @@ -2159,16 +2195,19 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, return 0; } - if (old_ndev) - netdev_tracker_free(ndev, &pdata->netdev_tracker); - if (ndev) - netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC); rcu_assign_pointer(pdata->netdev, ndev); + netdev_put(old_ndev, &pdata->netdev_tracker); + netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC); spin_unlock_irqrestore(&pdata->netdev_lock, flags); add_ndev_hash(pdata); - if (old_ndev) - __dev_put(old_ndev); + + /* Make sure that the device is registered before we send events */ + if (xa_load(&devices, ib_dev->index) != ib_dev) + return 0; + + etype = ndev ? RDMA_NETDEV_ATTACH_EVENT : RDMA_NETDEV_DETACH_EVENT; + rdma_nl_notify_event(ib_dev, port, etype); return 0; } @@ -2216,6 +2255,9 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, if (!rdma_is_port_valid(ib_dev, port)) return NULL; + if (!ib_dev->port_data) + return NULL; + pdata = &ib_dev->port_data[port]; /* @@ -2228,22 +2270,40 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, spin_lock(&pdata->netdev_lock); res = rcu_dereference_protected( pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); - if (res) - dev_hold(res); + dev_hold(res); spin_unlock(&pdata->netdev_lock); } - /* - * If we are starting to unregister expedite things by preventing - * propagation of an unregistering netdev. - */ - if (res && res->reg_state != NETREG_REGISTERED) { - dev_put(res); - return NULL; + return res; +} +EXPORT_SYMBOL(ib_device_get_netdev); + +/** + * ib_query_netdev_port - Query the port number of a net_device + * associated with an ibdev + * @ibdev: IB device + * @ndev: Network device + * @port: IB port the net_device is connected to + */ +int ib_query_netdev_port(struct ib_device *ibdev, struct net_device *ndev, + u32 *port) +{ + struct net_device *ib_ndev; + u32 port_num; + + rdma_for_each_port(ibdev, port_num) { + ib_ndev = ib_device_get_netdev(ibdev, port_num); + if (ndev == ib_ndev) { + *port = port_num; + dev_put(ib_ndev); + return 0; + } + dev_put(ib_ndev); } - return res; + return -ENOENT; } +EXPORT_SYMBOL(ib_query_netdev_port); /** * ib_device_get_by_netdev - Find an IB device associated with a netdev @@ -2304,9 +2364,7 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev, if (filter(ib_dev, port, idev, filter_cookie)) cb(ib_dev, port, idev, cookie); - - if (idev) - dev_put(idev); + dev_put(idev); } } @@ -2594,6 +2652,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) ops->uverbs_no_driver_id_binding; SET_DEVICE_OP(dev_ops, add_gid); + SET_DEVICE_OP(dev_ops, add_sub_dev); SET_DEVICE_OP(dev_ops, advise_mr); SET_DEVICE_OP(dev_ops, alloc_dm); SET_DEVICE_OP(dev_ops, alloc_hw_device_stats); @@ -2628,6 +2687,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, dealloc_ucontext); SET_DEVICE_OP(dev_ops, dealloc_xrcd); SET_DEVICE_OP(dev_ops, del_gid); + SET_DEVICE_OP(dev_ops, del_sub_dev); SET_DEVICE_OP(dev_ops, dereg_mr); SET_DEVICE_OP(dev_ops, destroy_ah); SET_DEVICE_OP(dev_ops, destroy_counters); @@ -2710,6 +2770,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, resize_cq); SET_DEVICE_OP(dev_ops, set_vf_guid); SET_DEVICE_OP(dev_ops, set_vf_link_state); + SET_DEVICE_OP(dev_ops, ufile_hw_cleanup); + SET_DEVICE_OP(dev_ops, report_port_event); SET_OBJ_SIZE(dev_ops, ib_ah); SET_OBJ_SIZE(dev_ops, ib_counters); @@ -2724,6 +2786,55 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) } EXPORT_SYMBOL(ib_set_device_ops); +int ib_add_sub_device(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name) +{ + struct ib_device *sub; + int ret = 0; + + if (!parent->ops.add_sub_dev || !parent->ops.del_sub_dev) + return -EOPNOTSUPP; + + if (!ib_device_try_get(parent)) + return -EINVAL; + + sub = parent->ops.add_sub_dev(parent, type, name); + if (IS_ERR(sub)) { + ib_device_put(parent); + return PTR_ERR(sub); + } + + sub->type = type; + sub->parent = parent; + + mutex_lock(&parent->subdev_lock); + list_add_tail(&parent->subdev_list_head, &sub->subdev_list); + mutex_unlock(&parent->subdev_lock); + + return ret; +} +EXPORT_SYMBOL(ib_add_sub_device); + +int ib_del_sub_device_and_put(struct ib_device *sub) +{ + struct ib_device *parent = sub->parent; + + if (!parent) + return -EOPNOTSUPP; + + mutex_lock(&parent->subdev_lock); + list_del(&sub->subdev_list); + mutex_unlock(&parent->subdev_lock); + + ib_device_put(sub); + parent->ops.del_sub_dev(sub); + ib_device_put(parent); + + return 0; +} +EXPORT_SYMBOL(ib_del_sub_device_and_put); + #ifdef CONFIG_INFINIBAND_VIRT_DMA int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) { @@ -2754,6 +2865,97 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { }, }; +void ib_dispatch_port_state_event(struct ib_device *ibdev, struct net_device *ndev) +{ + enum ib_port_state curr_state; + struct ib_event ibevent = {}; + u32 port; + + if (ib_query_netdev_port(ibdev, ndev, &port)) + return; + + curr_state = ib_get_curr_port_state(ndev); + + write_lock_irq(&ibdev->cache_lock); + if (ibdev->port_data[port].cache.last_port_state == curr_state) { + write_unlock_irq(&ibdev->cache_lock); + return; + } + ibdev->port_data[port].cache.last_port_state = curr_state; + write_unlock_irq(&ibdev->cache_lock); + + ibevent.event = (curr_state == IB_PORT_DOWN) ? + IB_EVENT_PORT_ERR : IB_EVENT_PORT_ACTIVE; + ibevent.device = ibdev; + ibevent.element.port_num = port; + ib_dispatch_event(&ibevent); +} +EXPORT_SYMBOL(ib_dispatch_port_state_event); + +static void handle_port_event(struct net_device *ndev, unsigned long event) +{ + struct ib_device *ibdev; + + /* Currently, link events in bonding scenarios are still + * reported by drivers that support bonding. + */ + if (netif_is_lag_master(ndev) || netif_is_lag_port(ndev)) + return; + + ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN); + if (!ibdev) + return; + + if (ibdev->ops.report_port_event) { + ibdev->ops.report_port_event(ibdev, ndev, event); + goto put_ibdev; + } + + ib_dispatch_port_state_event(ibdev, ndev); + +put_ibdev: + ib_device_put(ibdev); +}; + +static int ib_netdevice_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct ib_device *ibdev; + u32 port; + + switch (event) { + case NETDEV_CHANGENAME: + ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN); + if (!ibdev) + return NOTIFY_DONE; + + if (ib_query_netdev_port(ibdev, ndev, &port)) { + ib_device_put(ibdev); + break; + } + + rdma_nl_notify_event(ibdev, port, RDMA_NETDEV_RENAME_EVENT); + ib_device_put(ibdev); + break; + + case NETDEV_UP: + case NETDEV_CHANGE: + case NETDEV_DOWN: + handle_port_event(ndev, event); + break; + + default: + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block nb_netdevice = { + .notifier_call = ib_netdevice_event, +}; + static int __init ib_core_init(void) { int ret = -ENOMEM; @@ -2825,6 +3027,8 @@ static int __init ib_core_init(void) goto err_parent; } + register_netdevice_notifier(&nb_netdevice); + return 0; err_parent: @@ -2854,6 +3058,7 @@ err: static void __exit ib_core_cleanup(void) { + unregister_netdevice_notifier(&nb_netdevice); roce_gid_mgmt_cleanup(); rdma_nl_unregister(RDMA_NL_LS); nldev_exit(); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 0301fcad4b48..7e3a55349e10 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -143,8 +143,8 @@ static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) if (list_empty(&cm_id_priv->work_free_list)) return NULL; - work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, - free_list); + work = list_first_entry(&cm_id_priv->work_free_list, struct iwcm_work, + free_list); list_del_init(&work->free_list); return work; } @@ -206,17 +206,17 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv) /* * Release a reference on cm_id. If the last reference is being - * released, free the cm_id and return 1. + * released, free the cm_id and return 'true'. */ -static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) +static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv) { if (refcount_dec_and_test(&cm_id_priv->refcount)) { BUG_ON(!list_empty(&cm_id_priv->work_list)); free_cm_id(cm_id_priv); - return 1; + return true; } - return 0; + return false; } static void add_ref(struct iw_cm_id *cm_id) @@ -368,8 +368,10 @@ EXPORT_SYMBOL(iw_cm_disconnect); * * Clean up all resources associated with the connection and release * the initial reference taken by iw_create_cm_id. + * + * Returns true if and only if the last cm_id_priv reference has been dropped. */ -static void destroy_cm_id(struct iw_cm_id *cm_id) +static bool destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; struct ib_qp *qp; @@ -439,7 +441,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); } - (void)iwcm_deref_id(cm_id_priv); + return iwcm_deref_id(cm_id_priv); } /* @@ -450,7 +452,8 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) */ void iw_destroy_cm_id(struct iw_cm_id *cm_id) { - destroy_cm_id(cm_id); + if (!destroy_cm_id(cm_id)) + flush_workqueue(iwcm_wq); } EXPORT_SYMBOL(iw_destroy_cm_id); @@ -1017,16 +1020,13 @@ static void cm_work_handler(struct work_struct *_work) struct iw_cm_event levent; struct iwcm_id_private *cm_id_priv = work->cm_id; unsigned long flags; - int empty; int ret = 0; spin_lock_irqsave(&cm_id_priv->lock, flags); - empty = list_empty(&cm_id_priv->work_list); - while (!empty) { - work = list_entry(cm_id_priv->work_list.next, - struct iwcm_work, list); + while (!list_empty(&cm_id_priv->work_list)) { + work = list_first_entry(&cm_id_priv->work_list, + struct iwcm_work, list); list_del_init(&work->list); - empty = list_empty(&cm_id_priv->work_list); levent = work->event; put_work(work); spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -1034,13 +1034,11 @@ static void cm_work_handler(struct work_struct *_work) if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { ret = process_event(cm_id_priv, &levent); if (ret) - destroy_cm_id(&cm_id_priv->id); + WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id)); } else pr_debug("dropping event %d\n", levent.event); if (iwcm_deref_id(cm_id_priv)) return; - if (empty) - return; spin_lock_irqsave(&cm_id_priv->lock, flags); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -1093,11 +1091,8 @@ static int cm_event_handler(struct iw_cm_id *cm_id, } refcount_inc(&cm_id_priv->refcount); - if (list_empty(&cm_id_priv->work_list)) { - list_add_tail(&work->list, &cm_id_priv->work_list); - queue_work(iwcm_wq, &work->work); - } else - list_add_tail(&work->list, &cm_id_priv->work_list); + list_add_tail(&work->list, &cm_id_priv->work_list); + queue_work(iwcm_wq, &work->work); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; @@ -1187,7 +1182,7 @@ static int __init iw_cm_init(void) if (ret) return ret; - iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0); + iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); if (!iwcm_wq) goto err_alloc; diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c index eca6e37c72ba..8fd80adfe833 100644 --- a/drivers/infiniband/core/lag.c +++ b/drivers/infiniband/core/lag.c @@ -93,8 +93,7 @@ static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device, slave = netdev_get_xmit_slave(master, skb, !!(device->lag_flags & RDMA_LAG_FLAGS_HASH_ALL_SLAVES)); - if (slave) - dev_hold(slave); + dev_hold(slave); rcu_read_unlock(); kfree_skb(skb); return slave; diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 674344eb8e2f..1fd54d5c4dd8 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2616,14 +2616,16 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) static void timeout_sends(struct work_struct *work) { + struct ib_mad_send_wr_private *mad_send_wr, *n; struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; + struct list_head local_list; unsigned long flags, delay; mad_agent_priv = container_of(work, struct ib_mad_agent_private, timed_work.work); mad_send_wc.vendor_err = 0; + INIT_LIST_HEAD(&local_list); spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->wait_list)) { @@ -2641,13 +2643,16 @@ static void timeout_sends(struct work_struct *work) break; } - list_del(&mad_send_wr->agent_list); + list_del_init(&mad_send_wr->agent_list); if (mad_send_wr->status == IB_WC_SUCCESS && !retry_send(mad_send_wr)) continue; - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + list_add_tail(&mad_send_wr->agent_list, &local_list); + } + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + list_for_each_entry_safe(mad_send_wr, n, &local_list, agent_list) { if (mad_send_wr->status == IB_WC_SUCCESS) mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; else @@ -2655,11 +2660,8 @@ static void timeout_sends(struct work_struct *work) mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); - deref_mad_agent(mad_agent_priv); - spin_lock_irqsave(&mad_agent_priv->lock, flags); } - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } /* @@ -2937,7 +2939,6 @@ static int ib_mad_port_open(struct ib_device *device, int ret, cq_size; struct ib_mad_port_private *port_priv; unsigned long flags; - char name[sizeof "ib_mad123"]; int has_smi; if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) @@ -2983,12 +2984,15 @@ static int ib_mad_port_open(struct ib_device *device, if (ret) goto error6; } - ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); - if (ret) - goto error7; - snprintf(name, sizeof(name), "ib_mad%u", port_num); - port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); + if (rdma_cap_ib_cm(device, port_num)) { + ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); + if (ret) + goto error7; + } + + port_priv->wq = alloc_ordered_workqueue("ib_mad%u", WQ_MEM_RECLAIM, + port_num); if (!port_priv->wq) { ret = -ENOMEM; goto error8; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index ae2db0c70788..def14c54b648 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -311,6 +311,7 @@ int rdma_nl_net_init(struct rdma_dev_net *rnet) struct net *net = read_pnet(&rnet->net); struct netlink_kernel_cfg cfg = { .input = rdma_nl_rcv, + .flags = NL_CFG_F_NONROOT_RECV, }; struct sock *nls; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 4900a0848124..cb987ab0177c 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -137,6 +137,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_RES_SUBTYPE] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED }, @@ -164,6 +166,11 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 }, [RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DRIVER_DETAILS] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING }, + [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -298,6 +305,19 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim)) return -EMSGSIZE; + if (device->type && + nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type)) + return -EMSGSIZE; + + if (device->parent && + nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME, + dev_name(&device->parent->dev))) + return -EMSGSIZE; + + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, + device->name_assign_type)) + return -EMSGSIZE; + /* * Link type is determined on first port and mlx4 device * which can potentially have two different link type for the same @@ -399,7 +419,8 @@ err: return -EMSGSIZE; } -static int fill_res_info(struct sk_buff *msg, struct ib_device *device) +static int fill_res_info(struct sk_buff *msg, struct ib_device *device, + bool show_details) { static const char * const names[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_PD] = "pd", @@ -424,7 +445,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) for (i = 0; i < RDMA_RESTRACK_MAX; i++) { if (!names[i]) continue; - curr = rdma_restrack_count(device, i); + curr = rdma_restrack_count(device, i, show_details); ret = fill_res_info_entry(msg, names[i], curr); if (ret) goto err; @@ -1054,8 +1075,8 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, u32 index; int err; - err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NL_VALIDATE_LIBERAL, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; @@ -1103,8 +1124,8 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, u32 index; int err; - err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; @@ -1195,8 +1216,8 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, u32 port; int err; - err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NL_VALIDATE_LIBERAL, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) @@ -1255,8 +1276,8 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, int err; unsigned int p; - err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, NULL); + err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NL_VALIDATE_LIBERAL, NULL); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; @@ -1305,13 +1326,14 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + bool show_details = false; struct ib_device *device; struct sk_buff *msg; u32 index; int ret; - ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NL_VALIDATE_LIBERAL, extack); if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; @@ -1320,6 +1342,9 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (!device) return -EINVAL; + if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) + show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; @@ -1334,7 +1359,7 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_free; } - ret = fill_res_info(msg, device); + ret = fill_res_info(msg, device, show_details); if (ret) goto err_free; @@ -1364,7 +1389,7 @@ static int _nldev_res_get_dumpit(struct ib_device *device, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 0, NLM_F_MULTI); - if (!nlh || fill_res_info(skb, device)) { + if (!nlh || fill_res_info(skb, device, false)) { nlmsg_cancel(skb, nlh); goto out; } @@ -1457,8 +1482,8 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct sk_buff *msg; int ret; - ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NL_VALIDATE_LIBERAL, extack); if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id]) return -EINVAL; @@ -1534,6 +1559,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, struct rdma_restrack_entry *res; struct rdma_restrack_root *rt; int err, ret = 0, idx = 0; + bool show_details = false; struct nlattr *table_attr; struct nlattr *entry_attr; struct ib_device *device; @@ -1544,8 +1570,8 @@ static int res_get_common_dumpit(struct sk_buff *skb, u32 index, port = 0; bool filled = false; - err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, NULL); + err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NL_VALIDATE_LIBERAL, NULL); /* * Right now, we are expecting the device index to get res information, * but it is possible to extend this code to return all devices in @@ -1562,6 +1588,9 @@ static int res_get_common_dumpit(struct sk_buff *skb, if (!device) return -EINVAL; + if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) + show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); + /* * If no PORT_INDEX is supplied, we will return all QPs from that device */ @@ -1599,6 +1628,9 @@ static int res_get_common_dumpit(struct sk_buff *skb, * objects. */ xa_for_each(&rt->xa, id, res) { + if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details) + goto next; + if (idx < start || !rdma_restrack_get(res)) goto next; @@ -1731,8 +1763,8 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, char type[IFNAMSIZ]; int err; - err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME]) return -EINVAL; @@ -1775,8 +1807,8 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, u32 index; int err; - err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; @@ -1805,8 +1837,8 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, u32 index; int err; - err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, - extack); + err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, + NL_VALIDATE_LIBERAL, extack); if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE]) return -EINVAL; @@ -1889,8 +1921,8 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct sk_buff *msg; int err; - err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NL_VALIDATE_LIBERAL, extack); if (err) return err; @@ -1920,6 +1952,12 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_free(msg); return err; } + + err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1); + if (err) { + nlmsg_free(msg); + return err; + } /* * Copy-on-fork is supported. * See commits: @@ -2389,8 +2427,8 @@ static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; int ret; - ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NL_VALIDATE_LIBERAL, extack); if (ret) return -EINVAL; @@ -2419,8 +2457,8 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb, struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; int ret; - ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, NULL); + ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NL_VALIDATE_LIBERAL, NULL); if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES]) return -EINVAL; @@ -2451,8 +2489,8 @@ static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, u32 devid, port; int ret, i; - ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NL_VALIDATE_LIBERAL, extack); if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) return -EINVAL; @@ -2533,6 +2571,56 @@ err: return ret; } +static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + enum rdma_nl_dev_type type; + struct ib_device *parent; + char name[IFNAMSIZ] = {}; + u32 parentid; + int ret; + + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || + !tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE]) + return -EINVAL; + + nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name)); + type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]); + parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + parent = ib_device_get_by_index(sock_net(skb->sk), parentid); + if (!parent) + return -EINVAL; + + ret = ib_add_sub_device(parent, type, name); + ib_device_put(parent); + + return ret; +} + +static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + struct ib_device *device; + u32 devid; + int ret; + + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) + return -EINVAL; + + devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = ib_device_get_by_index(sock_net(skb->sk), devid); + if (!device) + return -EINVAL; + + return ib_del_sub_device_and_put(device); +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, @@ -2631,8 +2719,178 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_STAT_GET_STATUS] = { .doit = nldev_stat_get_counter_status_doit, }, + [RDMA_NLDEV_CMD_NEWDEV] = { + .doit = nldev_newdev, + .flags = RDMA_NL_ADMIN_PERM, + }, + [RDMA_NLDEV_CMD_DELDEV] = { + .doit = nldev_deldev, + .flags = RDMA_NL_ADMIN_PERM, + }, }; +static int fill_mon_netdev_rename(struct sk_buff *msg, + struct ib_device *device, u32 port, + const struct net *net) +{ + struct net_device *netdev = ib_device_get_netdev(device, port); + int ret = 0; + + if (!netdev || !net_eq(dev_net(netdev), net)) + goto out; + + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); + if (ret) + goto out; + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); +out: + dev_put(netdev); + return ret; +} + +static int fill_mon_netdev_association(struct sk_buff *msg, + struct ib_device *device, u32 port, + const struct net *net) +{ + struct net_device *netdev = ib_device_get_netdev(device, port); + int ret = 0; + + if (netdev && !net_eq(dev_net(netdev), net)) + goto out; + + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index); + if (ret) + goto out; + + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, + dev_name(&device->dev)); + if (ret) + goto out; + + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port); + if (ret) + goto out; + + if (netdev) { + ret = nla_put_u32(msg, + RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); + if (ret) + goto out; + + ret = nla_put_string(msg, + RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); + } + +out: + dev_put(netdev); + return ret; +} + +static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num, + enum rdma_nl_notify_event_type type) +{ + struct net_device *netdev; + + switch (type) { + case RDMA_REGISTER_EVENT: + dev_warn_ratelimited(&device->dev, + "Failed to send RDMA monitor register device event\n"); + break; + case RDMA_UNREGISTER_EVENT: + dev_warn_ratelimited(&device->dev, + "Failed to send RDMA monitor unregister device event\n"); + break; + case RDMA_NETDEV_ATTACH_EVENT: + netdev = ib_device_get_netdev(device, port_num); + dev_warn_ratelimited(&device->dev, + "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n", + port_num, netdev->ifindex); + dev_put(netdev); + break; + case RDMA_NETDEV_DETACH_EVENT: + dev_warn_ratelimited(&device->dev, + "Failed to send RDMA monitor netdev detach event: port %d\n", + port_num); + break; + case RDMA_RENAME_EVENT: + dev_warn_ratelimited(&device->dev, + "Failed to send RDMA monitor rename device event\n"); + break; + + case RDMA_NETDEV_RENAME_EVENT: + netdev = ib_device_get_netdev(device, port_num); + dev_warn_ratelimited(&device->dev, + "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n", + port_num, netdev->ifindex); + dev_put(netdev); + break; + default: + break; + } +} + +int rdma_nl_notify_event(struct ib_device *device, u32 port_num, + enum rdma_nl_notify_event_type type) +{ + struct sk_buff *skb; + int ret = -EMSGSIZE; + struct net *net; + void *nlh; + + net = read_pnet(&device->coredev.rdma_net); + if (!net) + return -EINVAL; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + nlh = nlmsg_put(skb, 0, 0, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR), + 0, 0); + if (!nlh) + goto err_free; + + switch (type) { + case RDMA_REGISTER_EVENT: + case RDMA_UNREGISTER_EVENT: + case RDMA_RENAME_EVENT: + ret = fill_nldev_handle(skb, device); + if (ret) + goto err_free; + break; + case RDMA_NETDEV_ATTACH_EVENT: + case RDMA_NETDEV_DETACH_EVENT: + ret = fill_mon_netdev_association(skb, device, port_num, net); + if (ret) + goto err_free; + break; + case RDMA_NETDEV_RENAME_EVENT: + ret = fill_mon_netdev_rename(skb, device, port_num, net); + if (ret) + goto err_free; + break; + default: + break; + } + + ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type); + if (ret) + goto err_free; + + nlmsg_end(skb, nlh); + ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL); + if (ret && ret != -ESRCH) { + skb = NULL; /* skb is freed in the netlink send-op handling */ + goto err_free; + } + return 0; + +err_free: + rdma_nl_notify_err_msg(device, port_num, type); + nlmsg_free(skb); + return ret; +} + void __init nldev_init(void) { rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 29b1ab1d5f93..90c177edf9b0 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -58,8 +58,8 @@ void uverbs_uobject_put(struct ib_uobject *uobject) } EXPORT_SYMBOL(uverbs_uobject_put); -static int uverbs_try_lock_object(struct ib_uobject *uobj, - enum rdma_lookup_mode mode) +int uverbs_try_lock_object(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { /* * When a shared access is required, we use a positive counter. Each @@ -84,6 +84,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, } return 0; } +EXPORT_SYMBOL(uverbs_try_lock_object); static void assert_uverbs_usecnt(struct ib_uobject *uobj, enum rdma_lookup_mode mode) @@ -880,9 +881,14 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason) { + struct uverbs_attr_bundle attrs = { .ufile = ufile }; + struct ib_ucontext *ucontext = ufile->ucontext; + struct ib_device *ib_dev = ucontext->device; struct ib_uobject *obj, *next_obj; int ret = -EINVAL; - struct uverbs_attr_bundle attrs = { .ufile = ufile }; + + if (ib_dev->ops.ufile_hw_cleanup) + ib_dev->ops.ufile_hw_cleanup(ufile); /* * This shouldn't run while executing other commands on this diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 01a499a8b88d..3313410014cd 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -37,22 +37,6 @@ int rdma_restrack_init(struct ib_device *dev) return 0; } -static const char *type2str(enum rdma_restrack_type type) -{ - static const char * const names[RDMA_RESTRACK_MAX] = { - [RDMA_RESTRACK_PD] = "PD", - [RDMA_RESTRACK_CQ] = "CQ", - [RDMA_RESTRACK_QP] = "QP", - [RDMA_RESTRACK_CM_ID] = "CM_ID", - [RDMA_RESTRACK_MR] = "MR", - [RDMA_RESTRACK_CTX] = "CTX", - [RDMA_RESTRACK_COUNTER] = "COUNTER", - [RDMA_RESTRACK_SRQ] = "SRQ", - }; - - return names[type]; -}; - /** * rdma_restrack_clean() - clean resource tracking * @dev: IB device @@ -60,47 +44,14 @@ static const char *type2str(enum rdma_restrack_type type) void rdma_restrack_clean(struct ib_device *dev) { struct rdma_restrack_root *rt = dev->res; - struct rdma_restrack_entry *e; - char buf[TASK_COMM_LEN]; - bool found = false; - const char *owner; int i; for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) { struct xarray *xa = &dev->res[i].xa; - if (!xa_empty(xa)) { - unsigned long index; - - if (!found) { - pr_err("restrack: %s", CUT_HERE); - dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n"); - } - xa_for_each(xa, index, e) { - if (rdma_is_kernel_res(e)) { - owner = e->kern_name; - } else { - /* - * There is no need to call get_task_struct here, - * because we can be here only if there are more - * get_task_struct() call than put_task_struct(). - */ - get_task_comm(buf, e->task); - owner = buf; - } - - pr_err("restrack: %s %s object allocated by %s is not freed\n", - rdma_is_kernel_res(e) ? "Kernel" : - "User", - type2str(e->type), owner); - } - found = true; - } + WARN_ON(!xa_empty(xa)); xa_destroy(xa); } - if (found) - pr_err("restrack: %s", CUT_HERE); - kfree(rt); } @@ -108,8 +59,10 @@ void rdma_restrack_clean(struct ib_device *dev) * rdma_restrack_count() - the current usage of specific object * @dev: IB device * @type: actual type of object to operate + * @show_details: count driver specific objects */ -int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type) +int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, + bool show_details) { struct rdma_restrack_root *rt = &dev->res[type]; struct rdma_restrack_entry *e; @@ -117,8 +70,11 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type) u32 cnt = 0; xa_lock(&rt->xa); - xas_for_each(&xas, e, U32_MAX) + xas_for_each(&xas, e, U32_MAX) { + if (xa_get_mark(&rt->xa, e->id, RESTRACK_DD) && !show_details) + continue; cnt++; + } xa_unlock(&rt->xa); return cnt; } @@ -247,6 +203,9 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) ret = xa_insert(&rt->xa, res->id, res, GFP_KERNEL); if (ret) res->id = 0; + + if (qp->qp_type >= IB_QPT_DRIVER) + xa_set_mark(&rt->xa, res->id, RESTRACK_DD); } else if (res->type == RDMA_RESTRACK_COUNTER) { /* Special case to ensure that cntn points to right counter */ struct rdma_counter *counter; diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index e958c43dd28f..a9f2c6b1b29e 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -515,6 +515,27 @@ void rdma_roce_rescan_device(struct ib_device *ib_dev) } EXPORT_SYMBOL(rdma_roce_rescan_device); +/** + * rdma_roce_rescan_port - Rescan all of the network devices in the system + * and add their gids if relevant to the port of the RoCE device. + * + * @ib_dev: IB device + * @port: Port number + */ +void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port) +{ + struct net_device *ndev = NULL; + + if (rdma_protocol_roce(ib_dev, port)) { + ndev = ib_device_get_netdev(ib_dev, port); + if (!ndev) + return; + enum_all_gids_of_dev_cb(ib_dev, port, ndev, ndev); + dev_put(ndev); + } +} +EXPORT_SYMBOL(rdma_roce_rescan_port); + static void callback_for_addr_gid_device_scan(struct ib_device *device, u32 port, struct net_device *rdma_ndev, @@ -575,16 +596,17 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u32 port, } } -static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u32 port, - struct net_device *event_ndev) +void roce_del_all_netdev_gids(struct ib_device *ib_dev, + u32 port, struct net_device *ndev) { - ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev); + ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev); } +EXPORT_SYMBOL(roce_del_all_netdev_gids); static void del_netdev_upper_ips(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { - handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids); + handle_netdev_upper(ib_dev, port, cookie, roce_del_all_netdev_gids); } static void add_netdev_upper_ips(struct ib_device *ib_dev, u32 port, @@ -601,8 +623,7 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u32 port, rcu_read_lock(); master_ndev = netdev_master_upper_dev_get_rcu(rdma_ndev); - if (master_ndev) - dev_hold(master_ndev); + dev_hold(master_ndev); rcu_read_unlock(); if (master_ndev) { diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8175dde60b0a..53571e6b3162 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1420,7 +1420,7 @@ enum opa_pr_supported { /* * opa_pr_query_possible - Check if current PR query can be an OPA query. * - * Retuns PR_NOT_SUPPORTED if a path record query is not + * Returns PR_NOT_SUPPORTED if a path record query is not * possible, PR_OPA_SUPPORTED if an OPA path record query * is possible and PR_IB_SUPPORTED if an IB path record * query is possible. diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 5f5ad8faf86e..02f1666f3cba 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1615,7 +1615,6 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, struct ucma_event *uevent, *tmp; struct ucma_context *ctx; LIST_HEAD(event_list); - struct fd f; struct ucma_file *cur_file; int ret = 0; @@ -1623,21 +1622,17 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, return -EFAULT; /* Get current fd to protect against it being closed */ - f = fdget(cmd.fd); - if (!f.file) + CLASS(fd, f)(cmd.fd); + if (fd_empty(f)) return -ENOENT; - if (f.file->f_op != &ucma_fops) { - ret = -EINVAL; - goto file_put; - } - cur_file = f.file->private_data; + if (fd_file(f)->f_op != &ucma_fops) + return -EINVAL; + cur_file = fd_file(f)->private_data; /* Validate current fd and prevent destruction of id. */ ctx = ucma_get_ctx(cur_file, cmd.id); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto file_put; - } + if (IS_ERR(ctx)) + return PTR_ERR(ctx); rdma_lock_handler(ctx->cm_id); /* @@ -1678,8 +1673,6 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, err_unlock: rdma_unlock_handler(ctx->cm_id); ucma_put_ctx(ctx); -file_put: - fdput(f); return ret; } @@ -1817,7 +1810,6 @@ static const struct file_operations ucma_fops = { .release = ucma_close, .write = ucma_write, .poll = ucma_poll, - .llseek = no_llseek, }; static struct miscdevice ucma_misc = { diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 64d9c492de64..8d3dfef9ebaa 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -462,86 +462,3 @@ int ib_ud_header_pack(struct ib_ud_header *header, return len; } EXPORT_SYMBOL(ib_ud_header_pack); - -/** - * ib_ud_header_unpack - Unpack UD header struct from wire format - * @header:UD header struct - * @buf:Buffer to pack into - * - * ib_ud_header_pack() unpacks the UD header structure @header from wire - * format in the buffer @buf. - */ -int ib_ud_header_unpack(void *buf, - struct ib_ud_header *header) -{ - ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), - buf, &header->lrh); - buf += IB_LRH_BYTES; - - if (header->lrh.link_version != 0) { - pr_warn("Invalid LRH.link_version %u\n", - header->lrh.link_version); - return -EINVAL; - } - - switch (header->lrh.link_next_header) { - case IB_LNH_IBA_LOCAL: - header->grh_present = 0; - break; - - case IB_LNH_IBA_GLOBAL: - header->grh_present = 1; - ib_unpack(grh_table, ARRAY_SIZE(grh_table), - buf, &header->grh); - buf += IB_GRH_BYTES; - - if (header->grh.ip_version != 6) { - pr_warn("Invalid GRH.ip_version %u\n", - header->grh.ip_version); - return -EINVAL; - } - if (header->grh.next_header != 0x1b) { - pr_warn("Invalid GRH.next_header 0x%02x\n", - header->grh.next_header); - return -EINVAL; - } - break; - - default: - pr_warn("Invalid LRH.link_next_header %u\n", - header->lrh.link_next_header); - return -EINVAL; - } - - ib_unpack(bth_table, ARRAY_SIZE(bth_table), - buf, &header->bth); - buf += IB_BTH_BYTES; - - switch (header->bth.opcode) { - case IB_OPCODE_UD_SEND_ONLY: - header->immediate_present = 0; - break; - case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE: - header->immediate_present = 1; - break; - default: - pr_warn("Invalid BTH.opcode 0x%02x\n", header->bth.opcode); - return -EINVAL; - } - - if (header->bth.transport_header_version != 0) { - pr_warn("Invalid BTH.transport_header_version %u\n", - header->bth.transport_header_version); - return -EINVAL; - } - - ib_unpack(deth_table, ARRAY_SIZE(deth_table), - buf, &header->deth); - buf += IB_DETH_BYTES; - - if (header->immediate_present) - memcpy(&header->immediate_data, buf, sizeof header->immediate_data); - - return 0; -} -EXPORT_SYMBOL(ib_ud_header_unpack); diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index 39357dc2d229..0ec2e4120cc9 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -10,7 +10,7 @@ #include "uverbs.h" -MODULE_IMPORT_NS(DMA_BUF); +MODULE_IMPORT_NS("DMA_BUF"); int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) { @@ -23,6 +23,9 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + if (umem_dmabuf->revoked) + return -EINVAL; + if (umem_dmabuf->sgt) goto wait_fence; @@ -110,10 +113,12 @@ void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) } EXPORT_SYMBOL(ib_umem_dmabuf_unmap_pages); -struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, - unsigned long offset, size_t size, - int fd, int access, - const struct dma_buf_attach_ops *ops) +static struct ib_umem_dmabuf * +ib_umem_dmabuf_get_with_dma_device(struct ib_device *device, + struct device *dma_device, + unsigned long offset, size_t size, + int fd, int access, + const struct dma_buf_attach_ops *ops) { struct dma_buf *dmabuf; struct ib_umem_dmabuf *umem_dmabuf; @@ -152,7 +157,7 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, umem_dmabuf->attach = dma_buf_dynamic_attach( dmabuf, - device->dma_device, + dma_device, ops, umem_dmabuf); if (IS_ERR(umem_dmabuf->attach)) { @@ -168,6 +173,15 @@ out_release_dmabuf: dma_buf_put(dmabuf); return ret; } + +struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, size_t size, + int fd, int access, + const struct dma_buf_attach_ops *ops) +{ + return ib_umem_dmabuf_get_with_dma_device(device, device->dma_device, + offset, size, fd, access, ops); +} EXPORT_SYMBOL(ib_umem_dmabuf_get); static void @@ -184,16 +198,18 @@ static struct dma_buf_attach_ops ib_umem_dmabuf_attach_pinned_ops = { .move_notify = ib_umem_dmabuf_unsupported_move_notify, }; -struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device, - unsigned long offset, - size_t size, int fd, - int access) +struct ib_umem_dmabuf * +ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device, + struct device *dma_device, + unsigned long offset, size_t size, + int fd, int access) { struct ib_umem_dmabuf *umem_dmabuf; int err; - umem_dmabuf = ib_umem_dmabuf_get(device, offset, size, fd, access, - &ib_umem_dmabuf_attach_pinned_ops); + umem_dmabuf = ib_umem_dmabuf_get_with_dma_device(device, dma_device, offset, + size, fd, access, + &ib_umem_dmabuf_attach_pinned_ops); if (IS_ERR(umem_dmabuf)) return umem_dmabuf; @@ -217,17 +233,41 @@ err_release: ib_umem_release(&umem_dmabuf->umem); return ERR_PTR(err); } +EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned_with_dma_device); + +struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device, + unsigned long offset, + size_t size, int fd, + int access) +{ + return ib_umem_dmabuf_get_pinned_with_dma_device(device, device->dma_device, + offset, size, fd, access); +} EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned); -void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) +void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf) { struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf; dma_resv_lock(dmabuf->resv, NULL); + if (umem_dmabuf->revoked) + goto end; ib_umem_dmabuf_unmap_pages(umem_dmabuf); - if (umem_dmabuf->pinned) + if (umem_dmabuf->pinned) { dma_buf_unpin(umem_dmabuf->attach); + umem_dmabuf->pinned = 0; + } + umem_dmabuf->revoked = 1; +end: dma_resv_unlock(dmabuf->resv); +} +EXPORT_SYMBOL(ib_umem_dmabuf_revoke); + +void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) +{ + struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf; + + ib_umem_dmabuf_revoke(umem_dmabuf); dma_buf_detach(dmabuf, umem_dmabuf->attach); dma_buf_put(dmabuf); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index f5feca7fa9b9..fd67fc9fe85a 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -63,6 +63,8 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace MAD packet access"); MODULE_LICENSE("Dual BSD/GPL"); +#define MAX_UMAD_RECV_LIST_SIZE 200000 + enum { IB_UMAD_MAX_PORTS = RDMA_MAX_PORTS, IB_UMAD_MAX_AGENTS = 32, @@ -113,6 +115,7 @@ struct ib_umad_file { struct mutex mutex; struct ib_umad_port *port; struct list_head recv_list; + atomic_t recv_list_size; struct list_head send_list; struct list_head port_list; spinlock_t send_lock; @@ -180,24 +183,28 @@ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) return file->agents_dead ? NULL : file->agent[id]; } -static int queue_packet(struct ib_umad_file *file, - struct ib_mad_agent *agent, - struct ib_umad_packet *packet) +static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent, + struct ib_umad_packet *packet, bool is_recv_mad) { int ret = 1; mutex_lock(&file->mutex); + if (is_recv_mad && + atomic_read(&file->recv_list_size) > MAX_UMAD_RECV_LIST_SIZE) + goto unlock; + for (packet->mad.hdr.id = 0; packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; packet->mad.hdr.id++) if (agent == __get_agent(file, packet->mad.hdr.id)) { list_add_tail(&packet->list, &file->recv_list); + atomic_inc(&file->recv_list_size); wake_up_interruptible(&file->recv_wait); ret = 0; break; } - +unlock: mutex_unlock(&file->mutex); return ret; @@ -224,7 +231,7 @@ static void send_handler(struct ib_mad_agent *agent, if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { packet->length = IB_MGMT_MAD_HDR; packet->mad.hdr.status = ETIMEDOUT; - if (!queue_packet(file, agent, packet)) + if (!queue_packet(file, agent, packet, false)) return; } kfree(packet); @@ -284,7 +291,7 @@ static void recv_handler(struct ib_mad_agent *agent, rdma_destroy_ah_attr(&ah_attr); } - if (queue_packet(file, agent, packet)) + if (queue_packet(file, agent, packet, true)) goto err2; return; @@ -409,6 +416,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); list_del(&packet->list); + atomic_dec(&file->recv_list_size); mutex_unlock(&file->mutex); @@ -421,6 +429,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, /* Requeue packet */ mutex_lock(&file->mutex); list_add(&packet->list, &file->recv_list); + atomic_inc(&file->recv_list_size); mutex_unlock(&file->mutex); } else { if (packet->recv_wc) @@ -1073,7 +1082,6 @@ static const struct file_operations umad_fops = { #endif .open = ib_umad_open, .release = ib_umad_close, - .llseek = no_llseek, }; static int ib_umad_sm_open(struct inode *inode, struct file *filp) @@ -1141,7 +1149,6 @@ static const struct file_operations umad_sm_fops = { .owner = THIS_MODULE, .open = ib_umad_sm_open, .release = ib_umad_sm_close, - .llseek = no_llseek, }; static struct ib_umad_port *get_port(struct ib_device *ibdev, @@ -1312,15 +1319,17 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (ret) goto err_cdev; - ib_umad_init_port_dev(&port->sm_dev, port, device); - port->sm_dev.devt = base_issm; - dev_set_name(&port->sm_dev, "issm%d", port->dev_num); - cdev_init(&port->sm_cdev, &umad_sm_fops); - port->sm_cdev.owner = THIS_MODULE; + if (rdma_cap_ib_smi(device, port_num)) { + ib_umad_init_port_dev(&port->sm_dev, port, device); + port->sm_dev.devt = base_issm; + dev_set_name(&port->sm_dev, "issm%d", port->dev_num); + cdev_init(&port->sm_cdev, &umad_sm_fops); + port->sm_cdev.owner = THIS_MODULE; - ret = cdev_device_add(&port->sm_cdev, &port->sm_dev); - if (ret) - goto err_dev; + ret = cdev_device_add(&port->sm_cdev, &port->sm_dev); + if (ret) + goto err_dev; + } return 0; @@ -1336,9 +1345,13 @@ err_cdev: static void ib_umad_kill_port(struct ib_umad_port *port) { struct ib_umad_file *file; + bool has_smi = false; int id; - cdev_device_del(&port->sm_cdev, &port->sm_dev); + if (rdma_cap_ib_smi(port->ib_dev, port->port_num)) { + cdev_device_del(&port->sm_cdev, &port->sm_dev); + has_smi = true; + } cdev_device_del(&port->cdev, &port->dev); mutex_lock(&port->file_mutex); @@ -1364,7 +1377,8 @@ static void ib_umad_kill_port(struct ib_umad_port *port) ida_free(&umad_ida, port->dev_num); /* balances device_initialize() */ - put_device(&port->sm_dev); + if (has_smi) + put_device(&port->sm_dev); put_device(&port->dev); } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 821d93c8f712..797e2fcc8072 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -133,35 +133,6 @@ struct ib_uverbs_completion_event_file { struct ib_uverbs_event_queue ev_queue; }; -struct ib_uverbs_file { - struct kref ref; - struct ib_uverbs_device *device; - struct mutex ucontext_lock; - /* - * ucontext must be accessed via ib_uverbs_get_ucontext() or with - * ucontext_lock held - */ - struct ib_ucontext *ucontext; - struct ib_uverbs_async_event_file *default_async_file; - struct list_head list; - - /* - * To access the uobjects list hw_destroy_rwsem must be held for write - * OR hw_destroy_rwsem held for read AND uobjects_lock held. - * hw_destroy_rwsem should be called across any destruction of the HW - * object of an associated uobject. - */ - struct rw_semaphore hw_destroy_rwsem; - spinlock_t uobjects_lock; - struct list_head uobjects; - - struct mutex umap_lock; - struct list_head umaps; - struct page *disassociate_page; - - struct xarray idr; -}; - struct ib_uverbs_event { union { struct ib_uverbs_async_event_desc async; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6de05ade2ba9..5ad14c39d48c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -161,7 +161,7 @@ static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter, { const void __user *res = iter->cur; - if (iter->cur + len > iter->end) + if (len > iter->end - iter->cur) return (void __force __user *)ERR_PTR(-ENOSPC); iter->cur += len; return res; @@ -572,7 +572,7 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) struct inode *inode = NULL; int new_xrcd = 0; struct ib_device *ib_dev; - struct fd f = {}; + struct fd f = EMPTY_FD; int ret; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); @@ -584,12 +584,12 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) if (cmd.fd != -1) { /* search for file descriptor */ f = fdget(cmd.fd); - if (!f.file) { + if (fd_empty(f)) { ret = -EBADF; goto err_tree_mutex_unlock; } - inode = file_inode(f.file); + inode = file_inode(fd_file(f)); xrcd = find_xrcd(ibudev, inode); if (!xrcd && !(cmd.oflags & O_CREAT)) { /* no file descriptor. Need CREATE flag */ @@ -632,8 +632,7 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) atomic_inc(&xrcd->usecnt); } - if (f.file) - fdput(f); + fdput(f); mutex_unlock(&ibudev->xrcd_tree_mutex); uobj_finalize_uobj_create(&obj->uobject, attrs); @@ -648,8 +647,7 @@ err: uobj_alloc_abort(&obj->uobject, attrs); err_tree_mutex_unlock: - if (f.file) - fdput(f); + fdput(f); mutex_unlock(&ibudev->xrcd_tree_mutex); @@ -1051,7 +1049,7 @@ static int create_cq(struct uverbs_attr_bundle *attrs, rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); rdma_restrack_set_name(&cq->res, NULL); - ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); + ret = ib_dev->ops.create_cq(cq, &attr, attrs); if (ret) goto err_free; rdma_restrack_add(&cq->res); @@ -2010,11 +2008,13 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs) ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); if (ret) return ret; - wqes = uverbs_request_next_ptr(&iter, cmd.wqe_size * cmd.wr_count); + wqes = uverbs_request_next_ptr(&iter, size_mul(cmd.wqe_size, + cmd.wr_count)); if (IS_ERR(wqes)) return PTR_ERR(wqes); - sgls = uverbs_request_next_ptr( - &iter, cmd.sge_count * sizeof(struct ib_uverbs_sge)); + sgls = uverbs_request_next_ptr(&iter, + size_mul(cmd.sge_count, + sizeof(struct ib_uverbs_sge))); if (IS_ERR(sgls)) return PTR_ERR(sgls); ret = uverbs_request_finish(&iter); @@ -2200,11 +2200,11 @@ ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count, if (wqe_size < sizeof(struct ib_uverbs_recv_wr)) return ERR_PTR(-EINVAL); - wqes = uverbs_request_next_ptr(iter, wqe_size * wr_count); + wqes = uverbs_request_next_ptr(iter, size_mul(wqe_size, wr_count)); if (IS_ERR(wqes)) return ERR_CAST(wqes); - sgls = uverbs_request_next_ptr( - iter, sge_count * sizeof(struct ib_uverbs_sge)); + sgls = uverbs_request_next_ptr(iter, size_mul(sge_count, + sizeof(struct ib_uverbs_sge))); if (IS_ERR(sgls)) return ERR_CAST(sgls); ret = uverbs_request_finish(iter); @@ -2737,7 +2737,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { case IB_FLOW_SPEC_ETH: - ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz); + ib_filter_sz = sizeof(struct ib_flow_eth_filter); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); @@ -2748,7 +2748,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, memcpy(&ib_spec->eth.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_IPV4: - ib_filter_sz = offsetof(struct ib_flow_ipv4_filter, real_sz); + ib_filter_sz = sizeof(struct ib_flow_ipv4_filter); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); @@ -2759,7 +2759,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, memcpy(&ib_spec->ipv4.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_IPV6: - ib_filter_sz = offsetof(struct ib_flow_ipv6_filter, real_sz); + ib_filter_sz = sizeof(struct ib_flow_ipv6_filter); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); @@ -2775,7 +2775,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, break; case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: - ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz); + ib_filter_sz = sizeof(struct ib_flow_tcp_udp_filter); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); @@ -2786,7 +2786,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_VXLAN_TUNNEL: - ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz); + ib_filter_sz = sizeof(struct ib_flow_tunnel_filter); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); @@ -2801,7 +2801,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, return -EINVAL; break; case IB_FLOW_SPEC_ESP: - ib_filter_sz = offsetof(struct ib_flow_esp_filter, real_sz); + ib_filter_sz = sizeof(struct ib_flow_esp_filter); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); @@ -2812,7 +2812,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_GRE: - ib_filter_sz = offsetof(struct ib_flow_gre_filter, real_sz); + ib_filter_sz = sizeof(struct ib_flow_gre_filter); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); @@ -2823,7 +2823,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, memcpy(&ib_spec->gre.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_MPLS: - ib_filter_sz = offsetof(struct ib_flow_mpls_filter, real_sz); + ib_filter_sz = sizeof(struct ib_flow_mpls_filter); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index d9799706c58e..f80da6a67e24 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -36,13 +36,15 @@ #include "uverbs.h" struct bundle_alloc_head { - struct bundle_alloc_head *next; + struct_group_tagged(bundle_alloc_head_hdr, hdr, + struct bundle_alloc_head *next; + ); u8 data[]; }; struct bundle_priv { /* Must be first */ - struct bundle_alloc_head alloc_head; + struct bundle_alloc_head_hdr alloc_head; struct bundle_alloc_head *allocated_mem; size_t internal_avail; size_t internal_used; @@ -64,7 +66,7 @@ struct bundle_priv { * Must be last. bundle ends in a flex array which overlaps * internal_buffer. */ - struct uverbs_attr_bundle bundle; + struct uverbs_attr_bundle_hdr bundle; u64 internal_buffer[32]; }; @@ -77,9 +79,10 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, unsigned int num_attrs) { struct bundle_priv *pbundle; + struct uverbs_attr_bundle *bundle; size_t bundle_size = offsetof(struct bundle_priv, internal_buffer) + - sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len + + sizeof(*bundle->attrs) * method_elm->key_bitmap_len + sizeof(*pbundle->uattrs) * num_attrs; method_elm->use_stack = bundle_size <= sizeof(*pbundle); @@ -107,7 +110,7 @@ __malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size, gfp_t flags) { struct bundle_priv *pbundle = - container_of(bundle, struct bundle_priv, bundle); + container_of(&bundle->hdr, struct bundle_priv, bundle); size_t new_used; void *res; @@ -149,7 +152,7 @@ static int uverbs_set_output(const struct uverbs_attr_bundle *bundle, const struct uverbs_attr *attr) { struct bundle_priv *pbundle = - container_of(bundle, struct bundle_priv, bundle); + container_of(&bundle->hdr, struct bundle_priv, bundle); u16 flags; flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags | @@ -166,6 +169,8 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle, struct ib_uverbs_attr *uattr, u32 attr_bkey) { + struct uverbs_attr_bundle *bundle = + container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr); const struct uverbs_attr_spec *spec = &attr_uapi->spec; size_t array_len; u32 *idr_vals; @@ -184,7 +189,7 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle, return -EINVAL; attr->uobjects = - uverbs_alloc(&pbundle->bundle, + uverbs_alloc(bundle, array_size(array_len, sizeof(*attr->uobjects))); if (IS_ERR(attr->uobjects)) return PTR_ERR(attr->uobjects); @@ -209,7 +214,7 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle, for (i = 0; i != array_len; i++) { attr->uobjects[i] = uverbs_get_uobject_from_file( spec->u2.objs_arr.obj_type, spec->u2.objs_arr.access, - idr_vals[i], &pbundle->bundle); + idr_vals[i], bundle); if (IS_ERR(attr->uobjects[i])) { ret = PTR_ERR(attr->uobjects[i]); break; @@ -240,7 +245,9 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, struct ib_uverbs_attr *uattr, u32 attr_bkey) { const struct uverbs_attr_spec *spec = &attr_uapi->spec; - struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey]; + struct uverbs_attr_bundle *bundle = + container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr); + struct uverbs_attr *e = &bundle->attrs[attr_bkey]; const struct uverbs_attr_spec *val_spec = spec; struct uverbs_obj_attr *o_attr; @@ -288,7 +295,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) { void *p; - p = uverbs_alloc(&pbundle->bundle, uattr->len); + p = uverbs_alloc(bundle, uattr->len); if (IS_ERR(p)) return PTR_ERR(p); @@ -321,7 +328,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, */ o_attr->uobject = uverbs_get_uobject_from_file( spec->u.obj.obj_type, spec->u.obj.access, - uattr->data_s64, &pbundle->bundle); + uattr->data_s64, bundle); if (IS_ERR(o_attr->uobject)) return PTR_ERR(o_attr->uobject); __set_bit(attr_bkey, pbundle->uobj_finalize); @@ -422,6 +429,8 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, unsigned int num_attrs) { int (*handler)(struct uverbs_attr_bundle *attrs); + struct uverbs_attr_bundle *bundle = + container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr); size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs); unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey; unsigned int i; @@ -434,7 +443,7 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, if (!handler) return -EIO; - pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size); + pbundle->uattrs = uverbs_alloc(bundle, uattrs_size); if (IS_ERR(pbundle->uattrs)) return PTR_ERR(pbundle->uattrs); if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size)) @@ -453,25 +462,23 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, return -EINVAL; if (pbundle->method_elm->has_udata) - uverbs_fill_udata(&pbundle->bundle, - &pbundle->bundle.driver_udata, + uverbs_fill_udata(bundle, &pbundle->bundle.driver_udata, UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT); else pbundle->bundle.driver_udata = (struct ib_udata){}; if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) { - struct uverbs_obj_attr *destroy_attr = - &pbundle->bundle.attrs[destroy_bkey].obj_attr; + struct uverbs_obj_attr *destroy_attr = &bundle->attrs[destroy_bkey].obj_attr; - ret = uobj_destroy(destroy_attr->uobject, &pbundle->bundle); + ret = uobj_destroy(destroy_attr->uobject, bundle); if (ret) return ret; __clear_bit(destroy_bkey, pbundle->uobj_finalize); - ret = handler(&pbundle->bundle); + ret = handler(bundle); uobj_put_destroy(destroy_attr->uobject); } else { - ret = handler(&pbundle->bundle); + ret = handler(bundle); } /* @@ -481,10 +488,10 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, */ if (!ret && pbundle->method_elm->has_udata) { const struct uverbs_attr *attr = - uverbs_attr_get(&pbundle->bundle, UVERBS_ATTR_UHW_OUT); + uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT); if (!IS_ERR(attr)) - ret = uverbs_set_output(&pbundle->bundle, attr); + ret = uverbs_set_output(bundle, attr); } /* @@ -501,6 +508,8 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, static void bundle_destroy(struct bundle_priv *pbundle, bool commit) { unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len; + struct uverbs_attr_bundle *bundle = + container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr); struct bundle_alloc_head *memblock; unsigned int i; @@ -508,20 +517,19 @@ static void bundle_destroy(struct bundle_priv *pbundle, bool commit) i = -1; while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, i + 1)) < key_bitmap_len) { - struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; + struct uverbs_attr *attr = &bundle->attrs[i]; uverbs_finalize_object( attr->obj_attr.uobject, attr->obj_attr.attr_elm->spec.u.obj.access, test_bit(i, pbundle->uobj_hw_obj_valid), - commit, - &pbundle->bundle); + commit, bundle); } i = -1; while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len, i + 1)) < key_bitmap_len) { - struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; + struct uverbs_attr *attr = &bundle->attrs[i]; const struct uverbs_api_attr *attr_uapi; void __rcu **slot; @@ -535,7 +543,7 @@ static void bundle_destroy(struct bundle_priv *pbundle, bool commit) if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) { uverbs_free_idrs_array(attr_uapi, &attr->objs_arr_attr, - commit, &pbundle->bundle); + commit, bundle); } } @@ -578,7 +586,8 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, method_elm->bundle_size - offsetof(struct bundle_priv, internal_buffer); pbundle->alloc_head.next = NULL; - pbundle->allocated_mem = &pbundle->alloc_head; + pbundle->allocated_mem = container_of(&pbundle->alloc_head, + struct bundle_alloc_head, hdr); } else { pbundle = &onstack; pbundle->internal_avail = sizeof(pbundle->internal_buffer); @@ -596,8 +605,9 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, pbundle->user_attrs = user_attrs; pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len * - sizeof(*pbundle->bundle.attrs), - sizeof(*pbundle->internal_buffer)); + sizeof(*container_of(&pbundle->bundle, + struct uverbs_attr_bundle, hdr)->attrs), + sizeof(*pbundle->internal_buffer)); memset(pbundle->bundle.attr_present, 0, sizeof(pbundle->bundle.attr_present)); memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); @@ -700,11 +710,13 @@ void uverbs_fill_udata(struct uverbs_attr_bundle *bundle, unsigned int attr_out) { struct bundle_priv *pbundle = - container_of(bundle, struct bundle_priv, bundle); + container_of(&bundle->hdr, struct bundle_priv, bundle); + struct uverbs_attr_bundle *bundle_aux = + container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr); const struct uverbs_attr *in = - uverbs_attr_get(&pbundle->bundle, attr_in); + uverbs_attr_get(bundle_aux, attr_in); const struct uverbs_attr *out = - uverbs_attr_get(&pbundle->bundle, attr_out); + uverbs_attr_get(bundle_aux, attr_out); if (!IS_ERR(in)) { udata->inlen = in->ptr_attr.len; @@ -829,7 +841,7 @@ void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle, u16 idx) { struct bundle_priv *pbundle = - container_of(bundle, struct bundle_priv, bundle); + container_of(&bundle->hdr, struct bundle_priv, bundle); __set_bit(uapi_bkey_attr(uapi_key_attr(idx)), pbundle->uobj_hw_obj_valid); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 495d5a5d0373..85cfc790a7bb 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -76,6 +76,7 @@ static dev_t dynamic_uverbs_dev; static DEFINE_IDA(uverbs_ida); static int ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); +static struct ib_client uverbs_client; static char *uverbs_devnode(const struct device *dev, umode_t *mode) { @@ -217,6 +218,7 @@ void ib_uverbs_release_file(struct kref *ref) if (file->disassociate_page) __free_pages(file->disassociate_page, 0); + mutex_destroy(&file->disassociation_lock); mutex_destroy(&file->umap_lock); mutex_destroy(&file->ucontext_lock); kfree(file); @@ -353,7 +355,6 @@ const struct file_operations uverbs_event_fops = { .poll = ib_uverbs_comp_event_poll, .release = uverbs_uobject_fd_release, .fasync = ib_uverbs_comp_event_fasync, - .llseek = no_llseek, }; const struct file_operations uverbs_async_event_fops = { @@ -362,7 +363,6 @@ const struct file_operations uverbs_async_event_fops = { .poll = ib_uverbs_async_event_poll, .release = uverbs_async_event_release, .fasync = ib_uverbs_async_event_fasync, - .llseek = no_llseek, }; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) @@ -700,8 +700,13 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) ret = PTR_ERR(ucontext); goto out; } + + mutex_lock(&file->disassociation_lock); + vma->vm_ops = &rdma_umap_ops; ret = ucontext->device->ops.mmap(ucontext, vma); + + mutex_unlock(&file->disassociation_lock); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; @@ -723,6 +728,8 @@ static void rdma_umap_open(struct vm_area_struct *vma) /* We are racing with disassociation */ if (!down_read_trylock(&ufile->hw_destroy_rwsem)) goto out_zap; + mutex_lock(&ufile->disassociation_lock); + /* * Disassociation already completed, the VMA should already be zapped. */ @@ -734,10 +741,12 @@ static void rdma_umap_open(struct vm_area_struct *vma) goto out_unlock; rdma_umap_priv_init(priv, vma, opriv->entry); + mutex_unlock(&ufile->disassociation_lock); up_read(&ufile->hw_destroy_rwsem); return; out_unlock: + mutex_unlock(&ufile->disassociation_lock); up_read(&ufile->hw_destroy_rwsem); out_zap: /* @@ -821,7 +830,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) { struct rdma_umap_priv *priv, *next_priv; - lockdep_assert_held(&ufile->hw_destroy_rwsem); + mutex_lock(&ufile->disassociation_lock); while (1) { struct mm_struct *mm = NULL; @@ -847,8 +856,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) break; } mutex_unlock(&ufile->umap_lock); - if (!mm) + if (!mm) { + mutex_unlock(&ufile->disassociation_lock); return; + } /* * The umap_lock is nested under mmap_lock since it used within @@ -878,7 +889,31 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) mmap_read_unlock(mm); mmput(mm); } + + mutex_unlock(&ufile->disassociation_lock); +} + +/** + * rdma_user_mmap_disassociate() - Revoke mmaps for a device + * @device: device to revoke + * + * This function should be called by drivers that need to disable mmaps for the + * device, for instance because it is going to be reset. + */ +void rdma_user_mmap_disassociate(struct ib_device *device) +{ + struct ib_uverbs_device *uverbs_dev = + ib_get_client_data(device, &uverbs_client); + struct ib_uverbs_file *ufile; + + mutex_lock(&uverbs_dev->lists_mutex); + list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) { + if (ufile->ucontext) + uverbs_user_mmap_disassociate(ufile); + } + mutex_unlock(&uverbs_dev->lists_mutex); } +EXPORT_SYMBOL(rdma_user_mmap_disassociate); /* * ib_uverbs_open() does not need the BKL: @@ -949,6 +984,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_init(&file->umap_lock); INIT_LIST_HEAD(&file->umaps); + mutex_init(&file->disassociation_lock); + filp->private_data = file; list_add_tail(&file->list, &dev->uverbs_file_list); mutex_unlock(&dev->lists_mutex); @@ -991,7 +1028,6 @@ static const struct file_operations uverbs_fops = { .write = ib_uverbs_write, .open = ib_uverbs_open, .release = ib_uverbs_close, - .llseek = no_llseek, .unlocked_ioctl = ib_uverbs_ioctl, .compat_ioctl = compat_ptr_ioctl, }; @@ -1002,7 +1038,6 @@ static const struct file_operations uverbs_mmap_fops = { .mmap = ib_uverbs_mmap, .open = ib_uverbs_open, .release = ib_uverbs_close, - .llseek = no_llseek, .unlocked_ioctl = ib_uverbs_ioctl, .compat_ioctl = compat_ptr_ioctl, }; @@ -1114,7 +1149,8 @@ static int ib_uverbs_add_one(struct ib_device *device) struct ib_uverbs_device *uverbs_dev; int ret; - if (!device->ops.alloc_ucontext) + if (!device->ops.alloc_ucontext || + device->type == RDMA_DEVICE_TYPE_SMI) return -EOPNOTSUPP; uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index 11a080646916..e803f609ec87 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -171,45 +171,3 @@ void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, __ib_copy_path_rec_to_user(dst, src); } EXPORT_SYMBOL(ib_copy_path_rec_to_user); - -void ib_copy_path_rec_from_user(struct sa_path_rec *dst, - struct ib_user_path_rec *src) -{ - u32 slid, dlid; - - memset(dst, 0, sizeof(*dst)); - if ((ib_is_opa_gid((union ib_gid *)src->sgid)) || - (ib_is_opa_gid((union ib_gid *)src->dgid))) { - dst->rec_type = SA_PATH_REC_TYPE_OPA; - slid = opa_get_lid_from_gid((union ib_gid *)src->sgid); - dlid = opa_get_lid_from_gid((union ib_gid *)src->dgid); - } else { - dst->rec_type = SA_PATH_REC_TYPE_IB; - slid = ntohs(src->slid); - dlid = ntohs(src->dlid); - } - memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid); - memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid); - - sa_path_set_dlid(dst, dlid); - sa_path_set_slid(dst, slid); - sa_path_set_raw_traffic(dst, src->raw_traffic); - dst->flow_label = src->flow_label; - dst->hop_limit = src->hop_limit; - dst->traffic_class = src->traffic_class; - dst->reversible = src->reversible; - dst->numb_path = src->numb_path; - dst->pkey = src->pkey; - dst->sl = src->sl; - dst->mtu_selector = src->mtu_selector; - dst->mtu = src->mtu; - dst->rate_selector = src->rate_selector; - dst->rate = src->rate; - dst->packet_life_time = src->packet_life_time; - dst->preference = src->preference; - dst->packet_life_time_selector = src->packet_life_time_selector; - - /* TODO: No need to set this */ - sa_path_set_dmac_zero(dst); -} -EXPORT_SYMBOL(ib_copy_path_rec_from_user); diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 370ad7c83f88..432054f0a8a4 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -128,7 +128,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); rdma_restrack_set_name(&cq->res, NULL); - ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); + ret = ib_dev->ops.create_cq(cq, &attr, attrs); if (ret) goto err_free; diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 03e1db5d1e8c..7ebc7bd3caae 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -239,7 +239,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_REG_DMABUF_MR)( mr = pd->device->ops.reg_user_mr_dmabuf(pd, offset, length, iova, fd, access_flags, - &attrs->driver_udata); + attrs); if (IS_ERR(mr)) return PTR_ERR(mr); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 94a7f3b0c71c..473ee0831307 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1101,6 +1101,16 @@ EXPORT_SYMBOL(ib_destroy_srq_user); /* Queue pairs */ +static void __ib_qp_event_handler(struct ib_event *event, void *context) +{ + struct ib_qp *qp = event->element.qp; + + if (event->event == IB_EVENT_QP_LAST_WQE_REACHED) + complete(&qp->srq_completion); + if (qp->registered_event_handler) + qp->registered_event_handler(event, qp->qp_context); +} + static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) { struct ib_qp *qp = context; @@ -1221,13 +1231,15 @@ static struct ib_qp *create_qp(struct ib_device *dev, struct ib_pd *pd, qp->qp_type = attr->qp_type; qp->rwq_ind_tbl = attr->rwq_ind_tbl; qp->srq = attr->srq; - qp->event_handler = attr->event_handler; + qp->event_handler = __ib_qp_event_handler; + qp->registered_event_handler = attr->event_handler; qp->port = attr->port_num; qp->qp_context = attr->qp_context; spin_lock_init(&qp->mr_lock); INIT_LIST_HEAD(&qp->rdma_mrs); INIT_LIST_HEAD(&qp->sig_mrs); + init_completion(&qp->srq_completion); qp->send_cq = attr->send_cq; qp->recv_cq = attr->recv_cq; @@ -2884,6 +2896,72 @@ static void __ib_drain_rq(struct ib_qp *qp) wait_for_completion(&rdrain.done); } +/* + * __ib_drain_srq() - Block until Last WQE Reached event arrives, or timeout + * expires. + * @qp: queue pair associated with SRQ to drain + * + * Quoting 10.3.1 Queue Pair and EE Context States: + * + * Note, for QPs that are associated with an SRQ, the Consumer should take the + * QP through the Error State before invoking a Destroy QP or a Modify QP to the + * Reset State. The Consumer may invoke the Destroy QP without first performing + * a Modify QP to the Error State and waiting for the Affiliated Asynchronous + * Last WQE Reached Event. However, if the Consumer does not wait for the + * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment + * leakage may occur. Therefore, it is good programming practice to tear down a + * QP that is associated with an SRQ by using the following process: + * + * - Put the QP in the Error State + * - Wait for the Affiliated Asynchronous Last WQE Reached Event; + * - either: + * drain the CQ by invoking the Poll CQ verb and either wait for CQ + * to be empty or the number of Poll CQ operations has exceeded + * CQ capacity size; + * - or + * post another WR that completes on the same CQ and wait for this + * WR to return as a WC; + * - and then invoke a Destroy QP or Reset QP. + * + * We use the first option. + */ +static void __ib_drain_srq(struct ib_qp *qp) +{ + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct ib_cq *cq; + int n, polled = 0; + int ret; + + if (!qp->srq) { + WARN_ONCE(1, "QP 0x%p is not associated with SRQ\n", qp); + return; + } + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + WARN_ONCE(ret, "failed to drain shared recv queue: %d\n", ret); + return; + } + + if (ib_srq_has_cq(qp->srq->srq_type)) { + cq = qp->srq->ext.cq; + } else if (qp->recv_cq) { + cq = qp->recv_cq; + } else { + WARN_ONCE(1, "QP 0x%p has no CQ associated with SRQ\n", qp); + return; + } + + if (wait_for_completion_timeout(&qp->srq_completion, 60 * HZ) > 0) { + while (polled != cq->cqe) { + n = ib_process_cq_direct(cq, cq->cqe - polled); + if (!n) + return; + polled += n; + } + } +} + /** * ib_drain_sq() - Block until all SQ CQEs have been consumed by the * application. @@ -2962,6 +3040,8 @@ void ib_drain_qp(struct ib_qp *qp) ib_drain_sq(qp); if (!qp->srq) ib_drain_rq(qp); + else + __ib_drain_srq(qp); } EXPORT_SYMBOL(ib_drain_qp); |