diff options
Diffstat (limited to 'drivers/infiniband/core/mad.c')
| -rw-r--r-- | drivers/infiniband/core/mad.c | 1029 |
1 files changed, 537 insertions, 492 deletions
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 7870823bac47..8f26bfb69586 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3,7 +3,7 @@ * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. - * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014,2018 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -38,10 +38,10 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/dma-mapping.h> -#include <linux/idr.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/security.h> +#include <linux/xarray.h> #include <rdma/ib_cache.h> #include "mad_priv.h" @@ -51,6 +51,27 @@ #include "opa_smi.h" #include "agent.h" +#define CREATE_TRACE_POINTS +#include <trace/events/ib_mad.h> + +#ifdef CONFIG_TRACEPOINTS +static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_qp_info *qp_info, + struct trace_event_raw_ib_mad_send_template *entry) +{ + struct ib_ud_wr *wr = &mad_send_wr->send_wr; + struct rdma_ah_attr attr = {}; + + rdma_query_ah(wr->ah, &attr); + + /* These are common */ + entry->sl = attr.sl; + entry->rqpn = wr->remote_qpn; + entry->rqkey = wr->remote_qkey; + entry->dlid = rdma_ah_get_dlid(&attr); +} +#endif + static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; @@ -59,12 +80,8 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); -/* - * The mlx4 driver uses the top byte to distinguish which virtual function - * generated the MAD, so we must avoid using it. - */ -#define AGENT_ID_LIMIT (1 << 24) -static DEFINE_IDR(ib_mad_clients); +static DEFINE_XARRAY_ALLOC1(ib_mad_clients); +static u32 ib_mad_client_next; static struct list_head ib_mad_port_list; /* Port list lock */ @@ -96,7 +113,7 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc); * Assumes ib_mad_port_list_lock is being held */ static inline struct ib_mad_port_private * -__ib_get_mad_port(struct ib_device *device, int port_num) +__ib_get_mad_port(struct ib_device *device, u32 port_num) { struct ib_mad_port_private *entry; @@ -112,7 +129,7 @@ __ib_get_mad_port(struct ib_device *device, int port_num) * for a device/port */ static inline struct ib_mad_port_private * -ib_get_mad_port(struct ib_device *device, int port_num) +ib_get_mad_port(struct ib_device *device, u32 port_num) { struct ib_mad_port_private *entry; unsigned long flags; @@ -133,8 +150,7 @@ static inline u8 convert_mgmt_class(u8 mgmt_class) static int get_spl_qp_index(enum ib_qp_type qp_type) { - switch (qp_type) - { + switch (qp_type) { case IB_QPT_SMI: return 0; case IB_QPT_GSI: @@ -194,13 +210,36 @@ int ib_response_mad(const struct ib_mad_hdr *hdr) } EXPORT_SYMBOL(ib_response_mad); +#define SOL_FC_MAX_DEFAULT_FRAC 4 +#define SOL_FC_MAX_SA_FRAC 32 + +static int get_sol_fc_max_outstanding(struct ib_mad_reg_req *mad_reg_req) +{ + if (!mad_reg_req) + /* Send only agent */ + return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC; + + switch (mad_reg_req->mgmt_class) { + case IB_MGMT_CLASS_CM: + return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC; + case IB_MGMT_CLASS_SUBN_ADM: + return mad_recvq_size / SOL_FC_MAX_SA_FRAC; + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + return min(mad_recvq_size, IB_MAD_QP_RECV_SIZE) / + SOL_FC_MAX_DEFAULT_FRAC; + default: + return 0; + } +} + /* * ib_register_mad_agent - Register to send/receive MADs * * Context: Process context. */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, - u8 port_num, + u32 port_num, enum ib_qp_type qp_type, struct ib_mad_reg_req *mad_reg_req, u8 rmpp_version, @@ -330,7 +369,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Validate device and port */ port_priv = ib_get_mad_port(device, port_num); if (!port_priv) { - dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n", + dev_dbg_ratelimited(&device->dev, "%s: Invalid port %u\n", __func__, port_num); ret = ERR_PTR(-ENODEV); goto error1; @@ -375,13 +414,17 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, spin_lock_init(&mad_agent_priv->lock); INIT_LIST_HEAD(&mad_agent_priv->send_list); INIT_LIST_HEAD(&mad_agent_priv->wait_list); - INIT_LIST_HEAD(&mad_agent_priv->done_list); INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); + INIT_LIST_HEAD(&mad_agent_priv->backlog_list); INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions); - atomic_set(&mad_agent_priv->refcount, 1); + refcount_set(&mad_agent_priv->refcount, 1); init_completion(&mad_agent_priv->comp); + mad_agent_priv->sol_fc_send_count = 0; + mad_agent_priv->sol_fc_wait_count = 0; + mad_agent_priv->sol_fc_max = + recv_handler ? get_sol_fc_max_outstanding(mad_reg_req) : 0; ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type); if (ret2) { @@ -389,18 +432,17 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error4; } - idr_preload(GFP_KERNEL); - idr_lock(&ib_mad_clients); - ret2 = idr_alloc_cyclic(&ib_mad_clients, mad_agent_priv, 0, - AGENT_ID_LIMIT, GFP_ATOMIC); - idr_unlock(&ib_mad_clients); - idr_preload_end(); - + /* + * The mlx4 driver uses the top byte to distinguish which virtual + * function generated the MAD, so we must avoid using it. + */ + ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid, + mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1), + &ib_mad_client_next, GFP_KERNEL); if (ret2 < 0) { ret = ERR_PTR(ret2); goto error5; } - mad_agent_priv->agent.hi_tid = ret2; /* * Make sure MAD registration (if supplied) @@ -445,12 +487,11 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, } spin_unlock_irq(&port_priv->reg_lock); + trace_ib_mad_create_agent(mad_agent_priv); return &mad_agent_priv->agent; error6: spin_unlock_irq(&port_priv->reg_lock); - idr_lock(&ib_mad_clients); - idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); - idr_unlock(&ib_mad_clients); + xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); error5: ib_mad_agent_security_cleanup(&mad_agent_priv->agent); error4: @@ -462,146 +503,18 @@ error1: } EXPORT_SYMBOL(ib_register_mad_agent); -static inline int is_snooping_sends(int mad_snoop_flags) -{ - return (mad_snoop_flags & - (/*IB_MAD_SNOOP_POSTED_SENDS | - IB_MAD_SNOOP_RMPP_SENDS |*/ - IB_MAD_SNOOP_SEND_COMPLETIONS /*| - IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/)); -} - -static inline int is_snooping_recvs(int mad_snoop_flags) -{ - return (mad_snoop_flags & - (IB_MAD_SNOOP_RECVS /*| - IB_MAD_SNOOP_RMPP_RECVS*/)); -} - -static int register_snoop_agent(struct ib_mad_qp_info *qp_info, - struct ib_mad_snoop_private *mad_snoop_priv) -{ - struct ib_mad_snoop_private **new_snoop_table; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - /* Check for empty slot in array. */ - for (i = 0; i < qp_info->snoop_table_size; i++) - if (!qp_info->snoop_table[i]) - break; - - if (i == qp_info->snoop_table_size) { - /* Grow table. */ - new_snoop_table = krealloc(qp_info->snoop_table, - sizeof mad_snoop_priv * - (qp_info->snoop_table_size + 1), - GFP_ATOMIC); - if (!new_snoop_table) { - i = -ENOMEM; - goto out; - } - - qp_info->snoop_table = new_snoop_table; - qp_info->snoop_table_size++; - } - qp_info->snoop_table[i] = mad_snoop_priv; - atomic_inc(&qp_info->snoop_count); -out: - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - return i; -} - -struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, - u8 port_num, - enum ib_qp_type qp_type, - int mad_snoop_flags, - ib_mad_snoop_handler snoop_handler, - ib_mad_recv_handler recv_handler, - void *context) -{ - struct ib_mad_port_private *port_priv; - struct ib_mad_agent *ret; - struct ib_mad_snoop_private *mad_snoop_priv; - int qpn; - int err; - - /* Validate parameters */ - if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) || - (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) { - ret = ERR_PTR(-EINVAL); - goto error1; - } - qpn = get_spl_qp_index(qp_type); - if (qpn == -1) { - ret = ERR_PTR(-EINVAL); - goto error1; - } - port_priv = ib_get_mad_port(device, port_num); - if (!port_priv) { - ret = ERR_PTR(-ENODEV); - goto error1; - } - /* Allocate structures */ - mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL); - if (!mad_snoop_priv) { - ret = ERR_PTR(-ENOMEM); - goto error1; - } - - /* Now, fill in the various structures */ - mad_snoop_priv->qp_info = &port_priv->qp_info[qpn]; - mad_snoop_priv->agent.device = device; - mad_snoop_priv->agent.recv_handler = recv_handler; - mad_snoop_priv->agent.snoop_handler = snoop_handler; - mad_snoop_priv->agent.context = context; - mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp; - mad_snoop_priv->agent.port_num = port_num; - mad_snoop_priv->mad_snoop_flags = mad_snoop_flags; - init_completion(&mad_snoop_priv->comp); - - err = ib_mad_agent_security_setup(&mad_snoop_priv->agent, qp_type); - if (err) { - ret = ERR_PTR(err); - goto error2; - } - - mad_snoop_priv->snoop_index = register_snoop_agent( - &port_priv->qp_info[qpn], - mad_snoop_priv); - if (mad_snoop_priv->snoop_index < 0) { - ret = ERR_PTR(mad_snoop_priv->snoop_index); - goto error3; - } - - atomic_set(&mad_snoop_priv->refcount, 1); - return &mad_snoop_priv->agent; -error3: - ib_mad_agent_security_cleanup(&mad_snoop_priv->agent); -error2: - kfree(mad_snoop_priv); -error1: - return ret; -} -EXPORT_SYMBOL(ib_register_mad_snoop); - static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { - if (atomic_dec_and_test(&mad_agent_priv->refcount)) + if (refcount_dec_and_test(&mad_agent_priv->refcount)) complete(&mad_agent_priv->comp); } -static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv) -{ - if (atomic_dec_and_test(&mad_snoop_priv->refcount)) - complete(&mad_snoop_priv->comp); -} - static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; /* Note that we could still be handling received MADs */ + trace_ib_mad_unregister_agent(mad_agent_priv); /* * Canceling all sends results in dropping received response @@ -614,15 +527,13 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) spin_lock_irq(&port_priv->reg_lock); remove_mad_reg_req(mad_agent_priv); spin_unlock_irq(&port_priv->reg_lock); - idr_lock(&ib_mad_clients); - idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); - idr_unlock(&ib_mad_clients); + xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); flush_workqueue(port_priv->wq); - ib_cancel_rmpp_recvs(mad_agent_priv); deref_mad_agent(mad_agent_priv); wait_for_completion(&mad_agent_priv->comp); + ib_cancel_rmpp_recvs(mad_agent_priv); ib_mad_agent_security_cleanup(&mad_agent_priv->agent); @@ -630,25 +541,6 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) kfree_rcu(mad_agent_priv, rcu); } -static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) -{ - struct ib_mad_qp_info *qp_info; - unsigned long flags; - - qp_info = mad_snoop_priv->qp_info; - spin_lock_irqsave(&qp_info->snoop_lock, flags); - qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL; - atomic_dec(&qp_info->snoop_count); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - - deref_snoop_agent(mad_snoop_priv); - wait_for_completion(&mad_snoop_priv->comp); - - ib_mad_agent_security_cleanup(&mad_snoop_priv->agent); - - kfree(mad_snoop_priv); -} - /* * ib_unregister_mad_agent - Unregisters a client from using MAD services * @@ -657,20 +549,11 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) { struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_snoop_private *mad_snoop_priv; - - /* If the TID is zero, the agent can only snoop. */ - if (mad_agent->hi_tid) { - mad_agent_priv = container_of(mad_agent, - struct ib_mad_agent_private, - agent); - unregister_mad_agent(mad_agent_priv); - } else { - mad_snoop_priv = container_of(mad_agent, - struct ib_mad_snoop_private, - agent); - unregister_mad_snoop(mad_snoop_priv); - } + + mad_agent_priv = container_of(mad_agent, + struct ib_mad_agent_private, + agent); + unregister_mad_agent(mad_agent_priv); } EXPORT_SYMBOL(ib_unregister_mad_agent); @@ -686,59 +569,8 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list) spin_unlock_irqrestore(&mad_queue->lock, flags); } -static void snoop_send(struct ib_mad_qp_info *qp_info, - struct ib_mad_send_buf *send_buf, - struct ib_mad_send_wc *mad_send_wc, - int mad_snoop_flags) -{ - struct ib_mad_snoop_private *mad_snoop_priv; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - for (i = 0; i < qp_info->snoop_table_size; i++) { - mad_snoop_priv = qp_info->snoop_table[i]; - if (!mad_snoop_priv || - !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) - continue; - - atomic_inc(&mad_snoop_priv->refcount); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, - send_buf, mad_send_wc); - deref_snoop_agent(mad_snoop_priv); - spin_lock_irqsave(&qp_info->snoop_lock, flags); - } - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); -} - -static void snoop_recv(struct ib_mad_qp_info *qp_info, - struct ib_mad_recv_wc *mad_recv_wc, - int mad_snoop_flags) -{ - struct ib_mad_snoop_private *mad_snoop_priv; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - for (i = 0; i < qp_info->snoop_table_size; i++) { - mad_snoop_priv = qp_info->snoop_table[i]; - if (!mad_snoop_priv || - !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) - continue; - - atomic_inc(&mad_snoop_priv->refcount); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL, - mad_recv_wc); - deref_snoop_agent(mad_snoop_priv); - spin_lock_irqsave(&qp_info->snoop_lock, flags); - } - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); -} - static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid, - u16 pkey_index, u8 port_num, struct ib_wc *wc) + u16 pkey_index, u32 port_num, struct ib_wc *wc) { memset(wc, 0, sizeof *wc); wc->wr_cqe = cqe; @@ -797,7 +629,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_port_private *port_priv; struct ib_mad_agent_private *recv_mad_agent = NULL; struct ib_device *device = mad_agent_priv->agent.device; - u8 port_num; + u32 port_num; struct ib_wc mad_wc; struct ib_ud_wr *send_wr = &mad_send_wr->send_wr; size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); @@ -821,6 +653,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, if (opa && smp->class_version == OPA_SM_CLASS_VERSION) { u32 opa_drslid; + trace_ib_mad_handle_out_opa_smi(opa_smp); + if ((opa_get_smp_direction(opa_smp) ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == OPA_LID_PERMISSIVE && @@ -846,6 +680,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) goto out; } else { + trace_ib_mad_handle_out_ib_smi(smp); + if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == IB_LID_PERMISSIVE && smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == @@ -889,11 +725,10 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, /* No GRH for DR SMP */ ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL, - (const struct ib_mad_hdr *)smp, mad_size, - (struct ib_mad_hdr *)mad_priv->mad, - &mad_size, &out_mad_pkey_index); - switch (ret) - { + (const struct ib_mad *)smp, + (struct ib_mad *)mad_priv->mad, &mad_size, + &out_mad_pkey_index); + switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) && mad_agent_priv->agent.recv_handler) { @@ -903,7 +738,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, * Reference MAD agent until receive * side of local completion handled */ - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); } else kfree(mad_priv); break; @@ -943,7 +778,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, local->return_wc_byte_len = mad_size; } /* Reference MAD agent until send side of local completion handled */ - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ spin_lock_irqsave(&mad_agent_priv->lock, flags); list_add_tail(&local->completion_list, &mad_agent_priv->local_list); @@ -992,7 +827,7 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, /* Allocate data segments. */ for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { - seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); + seg = kmalloc(sizeof(*seg) + seg_size, gfp_mask); if (!seg) { free_send_rmpp_list(send_wr); return -ENOMEM; @@ -1022,12 +857,11 @@ int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent) } EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent); -struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, - u32 remote_qpn, u16 pkey_index, - int rmpp_active, - int hdr_len, int data_len, - gfp_t gfp_mask, - u8 base_version) +struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent, + u32 remote_qpn, u16 pkey_index, + int rmpp_active, int hdr_len, + int data_len, gfp_t gfp_mask, + u8 base_version) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; @@ -1101,7 +935,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, } mad_send_wr->send_buf.mad_agent = mad_agent; - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); return &mad_send_wr->send_buf; } EXPORT_SYMBOL(ib_create_send_mad); @@ -1223,6 +1057,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { + trace_ib_mad_ib_send_mad(mad_send_wr, qp_info); ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, NULL); list = &qp_info->send_queue.list; @@ -1247,6 +1082,180 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) return ret; } +static void handle_queued_state(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_agent_private *mad_agent_priv) +{ + if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) { + mad_agent_priv->sol_fc_wait_count--; + list_move_tail(&mad_send_wr->agent_list, + &mad_agent_priv->backlog_list); + } else { + expect_mad_state(mad_send_wr, IB_MAD_STATE_INIT); + list_add_tail(&mad_send_wr->agent_list, + &mad_agent_priv->backlog_list); + } +} + +static void handle_send_state(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_agent_private *mad_agent_priv) +{ + if (mad_send_wr->state == IB_MAD_STATE_INIT) { + list_add_tail(&mad_send_wr->agent_list, + &mad_agent_priv->send_list); + } else { + expect_mad_state2(mad_send_wr, IB_MAD_STATE_WAIT_RESP, + IB_MAD_STATE_QUEUED); + list_move_tail(&mad_send_wr->agent_list, + &mad_agent_priv->send_list); + } + + if (mad_send_wr->is_solicited_fc) { + if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) + mad_agent_priv->sol_fc_wait_count--; + mad_agent_priv->sol_fc_send_count++; + } +} + +static void handle_wait_state(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_agent_private *mad_agent_priv) +{ + struct ib_mad_send_wr_private *temp_mad_send_wr; + struct list_head *list_item; + unsigned long delay; + + expect_mad_state3(mad_send_wr, IB_MAD_STATE_SEND_START, + IB_MAD_STATE_WAIT_RESP, IB_MAD_STATE_CANCELED); + if (mad_send_wr->state == IB_MAD_STATE_SEND_START && + mad_send_wr->is_solicited_fc) { + mad_agent_priv->sol_fc_send_count--; + mad_agent_priv->sol_fc_wait_count++; + } + + list_del_init(&mad_send_wr->agent_list); + delay = mad_send_wr->timeout; + mad_send_wr->timeout += jiffies; + + if (delay) { + list_for_each_prev(list_item, + &mad_agent_priv->wait_list) { + temp_mad_send_wr = list_entry( + list_item, + struct ib_mad_send_wr_private, + agent_list); + if (time_after(mad_send_wr->timeout, + temp_mad_send_wr->timeout)) + break; + } + } else { + list_item = &mad_agent_priv->wait_list; + } + + list_add(&mad_send_wr->agent_list, list_item); +} + +static void handle_early_resp_state(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_agent_private *mad_agent_priv) +{ + expect_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START); + mad_agent_priv->sol_fc_send_count -= mad_send_wr->is_solicited_fc; +} + +static void handle_canceled_state(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_agent_private *mad_agent_priv) +{ + not_expect_mad_state(mad_send_wr, IB_MAD_STATE_DONE); + if (mad_send_wr->is_solicited_fc) { + if (mad_send_wr->state == IB_MAD_STATE_SEND_START) + mad_agent_priv->sol_fc_send_count--; + else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) + mad_agent_priv->sol_fc_wait_count--; + } +} + +static void handle_done_state(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_agent_private *mad_agent_priv) +{ + if (mad_send_wr->is_solicited_fc) { + if (mad_send_wr->state == IB_MAD_STATE_SEND_START) + mad_agent_priv->sol_fc_send_count--; + else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) + mad_agent_priv->sol_fc_wait_count--; + } + + list_del_init(&mad_send_wr->agent_list); +} + +void change_mad_state(struct ib_mad_send_wr_private *mad_send_wr, + enum ib_mad_state new_state) +{ + struct ib_mad_agent_private *mad_agent_priv = + mad_send_wr->mad_agent_priv; + + switch (new_state) { + case IB_MAD_STATE_INIT: + break; + case IB_MAD_STATE_QUEUED: + handle_queued_state(mad_send_wr, mad_agent_priv); + break; + case IB_MAD_STATE_SEND_START: + handle_send_state(mad_send_wr, mad_agent_priv); + break; + case IB_MAD_STATE_WAIT_RESP: + handle_wait_state(mad_send_wr, mad_agent_priv); + if (mad_send_wr->state == IB_MAD_STATE_CANCELED) + return; + break; + case IB_MAD_STATE_EARLY_RESP: + handle_early_resp_state(mad_send_wr, mad_agent_priv); + break; + case IB_MAD_STATE_CANCELED: + handle_canceled_state(mad_send_wr, mad_agent_priv); + break; + case IB_MAD_STATE_DONE: + handle_done_state(mad_send_wr, mad_agent_priv); + break; + } + + mad_send_wr->state = new_state; +} + +static bool is_solicited_fc_mad(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_mad *rmpp_mad; + u8 mgmt_class; + + if (!mad_send_wr->timeout) + return 0; + + rmpp_mad = mad_send_wr->send_buf.mad; + if (mad_send_wr->mad_agent_priv->agent.rmpp_version && + (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) + return 0; + + mgmt_class = + ((struct ib_mad_hdr *)mad_send_wr->send_buf.mad)->mgmt_class; + return mgmt_class == IB_MGMT_CLASS_CM || + mgmt_class == IB_MGMT_CLASS_SUBN_ADM || + mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE; +} + +static bool mad_is_for_backlog(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_agent_private *mad_agent_priv = + mad_send_wr->mad_agent_priv; + + if (!mad_send_wr->is_solicited_fc || !mad_agent_priv->sol_fc_max) + return false; + + if (!list_empty(&mad_agent_priv->backlog_list)) + return true; + + return mad_agent_priv->sol_fc_send_count + + mad_agent_priv->sol_fc_wait_count >= + mad_agent_priv->sol_fc_max; +} + /* * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client @@ -1272,9 +1281,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, if (ret) goto error; - if (!send_buf->mad_agent->send_handler || - (send_buf->timeout_ms && - !send_buf->mad_agent->recv_handler)) { + if (!send_buf->mad_agent->send_handler) { ret = -EINVAL; goto error; } @@ -1310,15 +1317,19 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, mad_send_wr->max_retries = send_buf->retries; mad_send_wr->retries_left = send_buf->retries; send_buf->retries = 0; - /* Reference for work request to QP + response */ - mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); - mad_send_wr->status = IB_WC_SUCCESS; + change_mad_state(mad_send_wr, IB_MAD_STATE_INIT); /* Reference MAD agent until send completes */ - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); - list_add_tail(&mad_send_wr->agent_list, - &mad_agent_priv->send_list); + mad_send_wr->is_solicited_fc = is_solicited_fc_mad(mad_send_wr); + if (mad_is_for_backlog(mad_send_wr)) { + change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + return 0; + } + + change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { @@ -1330,9 +1341,9 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, if (ret < 0) { /* Fail send request */ spin_lock_irqsave(&mad_agent_priv->lock, flags); - list_del(&mad_send_wr->agent_list); + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); goto error; } } @@ -1372,25 +1383,6 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) } EXPORT_SYMBOL(ib_free_recv_mad); -struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context) -{ - return ERR_PTR(-EINVAL); /* XXX: for now */ -} -EXPORT_SYMBOL(ib_redirect_mad_qp); - -int ib_process_mad_wc(struct ib_mad_agent *mad_agent, - struct ib_wc *wc) -{ - dev_err(&mad_agent->device->dev, - "ib_process_mad_wc() not implemented yet\n"); - return 0; -} -EXPORT_SYMBOL(ib_process_mad_wc); - static int method_in_use(struct ib_mad_mgmt_method_table **method, struct ib_mad_reg_req *mad_reg_req) { @@ -1478,11 +1470,9 @@ static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method, int i; /* Remove any methods for this mad agent */ - for (i = 0; i < IB_MGMT_MAX_METHODS; i++) { - if (method->agent[i] == agent) { + for (i = 0; i < IB_MGMT_MAX_METHODS; i++) + if (method->agent[i] == agent) method->agent[i] = NULL; - } - } } static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, @@ -1657,9 +1647,8 @@ static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) * Was MAD registration request supplied * with original registration ? */ - if (!agent_priv->reg_req) { + if (!agent_priv->reg_req) goto out; - } port_priv = agent_priv->qp_info->port_priv; mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class); @@ -1756,8 +1745,8 @@ find_mad_agent(struct ib_mad_port_private *port_priv, */ hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; rcu_read_lock(); - mad_agent = idr_find(&ib_mad_clients, hi_tid); - if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount)) + mad_agent = xa_load(&ib_mad_clients, hi_tid); + if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount)) mad_agent = NULL; rcu_read_unlock(); } else { @@ -1809,14 +1798,14 @@ find_mad_agent(struct ib_mad_port_private *port_priv, } } if (mad_agent) - atomic_inc(&mad_agent->refcount); + refcount_inc(&mad_agent->refcount); out: spin_unlock_irqrestore(&port_priv->reg_lock, flags); } if (mad_agent && !mad_agent->agent.recv_handler) { dev_notice(&port_priv->device->dev, - "No receive handler for client %p on port %d\n", + "No receive handler for client %p on port %u\n", &mad_agent->agent, port_priv->port_num); deref_mad_agent(mad_agent); mad_agent = NULL; @@ -1835,7 +1824,7 @@ static int validate_mad(const struct ib_mad_hdr *mad_hdr, /* Make sure MAD base version is understood */ if (mad_hdr->base_version != IB_MGMT_BASE_VERSION && (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) { - pr_err("MAD received with unsupported base version %d %s\n", + pr_err("MAD received with unsupported base version %u %s\n", mad_hdr->base_version, opa ? "(opa)" : ""); goto out; } @@ -1880,15 +1869,16 @@ static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr, rwc->recv_buf.mad->mad_hdr.mgmt_class; } -static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv, - const struct ib_mad_send_wr_private *wr, - const struct ib_mad_recv_wc *rwc ) +static inline int +rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc) { struct rdma_ah_attr attr; u8 send_resp, rcv_resp; union ib_gid sgid; struct ib_device *device = mad_agent_priv->agent.device; - u8 port_num = mad_agent_priv->agent.port_num; + u32 port_num = mad_agent_priv->agent.port_num; u8 lmc; bool has_grh; @@ -1959,7 +1949,19 @@ ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, */ (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) - return (wr->status == IB_WC_SUCCESS) ? wr : NULL; + return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL; + } + + list_for_each_entry(wr, &mad_agent_priv->backlog_list, agent_list) { + if ((wr->tid == mad_hdr->tid) && + rcv_has_same_class(wr, wc) && + /* + * Don't check GID for direct routed MADs. + * These might have permissive LIDs. + */ + (is_direct(mad_hdr->mgmt_class) || + rcv_has_same_gid(mad_agent_priv, wr, wc))) + return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL; } /* @@ -1978,17 +1980,55 @@ ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) /* Verify request has not been canceled */ - return (wr->status == IB_WC_SUCCESS) ? wr : NULL; + return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL; } return NULL; } +static void +process_backlog_mads(struct ib_mad_agent_private *mad_agent_priv) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wc mad_send_wc = {}; + unsigned long flags; + int ret; + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + while (!list_empty(&mad_agent_priv->backlog_list) && + (mad_agent_priv->sol_fc_send_count + + mad_agent_priv->sol_fc_wait_count < + mad_agent_priv->sol_fc_max)) { + mad_send_wr = list_entry(mad_agent_priv->backlog_list.next, + struct ib_mad_send_wr_private, + agent_list); + change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + ret = ib_send_mad(mad_send_wr); + if (ret) { + spin_lock_irqsave(&mad_agent_priv->lock, flags); + deref_mad_agent(mad_agent_priv); + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_send_wc.status = IB_WC_LOC_QP_OP_ERR; + mad_agent_priv->agent.send_handler( + &mad_agent_priv->agent, &mad_send_wc); + } + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + } + + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); +} + void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr) { mad_send_wr->timeout = 0; - if (mad_send_wr->refcount == 1) - list_move_tail(&mad_send_wr->agent_list, - &mad_send_wr->mad_agent_priv->done_list); + if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP || + mad_send_wr->state == IB_MAD_STATE_QUEUED) + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); + else + change_mad_state(mad_send_wr, IB_MAD_STATE_EARLY_RESP); } static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, @@ -1997,6 +2037,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; + bool is_mad_done; int ret; INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); @@ -2034,16 +2075,18 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, mad_agent_priv->agent.recv_handler( &mad_agent_priv->agent, NULL, mad_recv_wc); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); } else { /* not user rmpp, revert to normal behavior and - * drop the mad */ + * drop the mad + */ ib_free_recv_mad(mad_recv_wc); deref_mad_agent(mad_agent_priv); return; } } else { ib_mark_mad_done(mad_send_wr); + is_mad_done = (mad_send_wr->state == IB_MAD_STATE_DONE); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Defined behavior is to complete response before request */ @@ -2051,32 +2094,35 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, &mad_agent_priv->agent, &mad_send_wr->send_buf, mad_recv_wc); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); - mad_send_wc.status = IB_WC_SUCCESS; - mad_send_wc.vendor_err = 0; - mad_send_wc.send_buf = &mad_send_wr->send_buf; - ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); + if (is_mad_done) { + mad_send_wc.status = IB_WC_SUCCESS; + mad_send_wc.vendor_err = 0; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + ib_mad_complete_send_wr(mad_send_wr, + &mad_send_wc); + } } } else { mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL, mad_recv_wc); deref_mad_agent(mad_agent_priv); } - - return; } static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv, const struct ib_mad_qp_info *qp_info, const struct ib_wc *wc, - int port_num, + u32 port_num, struct ib_mad_private *recv, struct ib_mad_private *response) { enum smi_forward_action retsmi; struct ib_smp *smp = (struct ib_smp *)recv->mad; + trace_ib_mad_handle_ib_smi(smp); + if (smi_handle_dr_smp_recv(smp, rdma_cap_ib_switch(port_priv->device), port_num, @@ -2155,13 +2201,15 @@ static enum smi_action handle_opa_smi(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info, struct ib_wc *wc, - int port_num, + u32 port_num, struct ib_mad_private *recv, struct ib_mad_private *response) { enum smi_forward_action retsmi; struct opa_smp *smp = (struct opa_smp *)recv->mad; + trace_ib_mad_handle_opa_smi(smp); + if (opa_smi_handle_dr_smp_recv(smp, rdma_cap_ib_switch(port_priv->device), port_num, @@ -2209,7 +2257,7 @@ static enum smi_action handle_smi(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info, struct ib_wc *wc, - int port_num, + u32 port_num, struct ib_mad_private *recv, struct ib_mad_private *response, bool opa) @@ -2233,7 +2281,7 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *recv, *response = NULL; struct ib_mad_agent_private *mad_agent; - int port_num; + u32 port_num; int ret = IB_MAD_RESULT_SUCCESS; size_t mad_size; u16 resp_mad_pkey_index = 0; @@ -2279,13 +2327,13 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; recv->header.recv_wc.recv_buf.grh = &recv->grh; - if (atomic_read(&qp_info->snoop_count)) - snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS); - /* Validate MAD */ if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) goto out; + trace_ib_mad_recv_done_handler(qp_info, wc, + (struct ib_mad_hdr *)recv->mad); + mad_size = recv->mad_size; response = alloc_mad_private(mad_size, GFP_KERNEL); if (!response) @@ -2308,9 +2356,9 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) if (port_priv->device->ops.process_mad) { ret = port_priv->device->ops.process_mad( port_priv->device, 0, port_priv->port_num, wc, - &recv->grh, (const struct ib_mad_hdr *)recv->mad, - recv->mad_size, (struct ib_mad_hdr *)response->mad, - &mad_size, &resp_mad_pkey_index); + &recv->grh, (const struct ib_mad *)recv->mad, + (struct ib_mad *)response->mad, &mad_size, + &resp_mad_pkey_index); if (opa) wc->pkey_index = resp_mad_pkey_index; @@ -2332,6 +2380,7 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); if (mad_agent) { + trace_ib_mad_recv_done_agent(mad_agent); ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); /* * recv is freed up in error cases in ib_mad_complete_recv @@ -2381,29 +2430,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_wr_private *temp_mad_send_wr; - struct list_head *list_item; unsigned long delay; mad_agent_priv = mad_send_wr->mad_agent_priv; - list_del(&mad_send_wr->agent_list); - delay = mad_send_wr->timeout; - mad_send_wr->timeout += jiffies; - - if (delay) { - list_for_each_prev(list_item, &mad_agent_priv->wait_list) { - temp_mad_send_wr = list_entry(list_item, - struct ib_mad_send_wr_private, - agent_list); - if (time_after(mad_send_wr->timeout, - temp_mad_send_wr->timeout)) - break; - } - } - else - list_item = &mad_agent_priv->wait_list; - list_add(&mad_send_wr->agent_list, list_item); + change_mad_state(mad_send_wr, IB_MAD_STATE_WAIT_RESP); /* Reschedule a work item if we have a shorter timeout */ if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) @@ -2437,32 +2468,28 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, } else ret = IB_RMPP_RESULT_UNHANDLED; - if (mad_send_wc->status != IB_WC_SUCCESS && - mad_send_wr->status == IB_WC_SUCCESS) { - mad_send_wr->status = mad_send_wc->status; - mad_send_wr->refcount -= (mad_send_wr->timeout > 0); - } - - if (--mad_send_wr->refcount > 0) { - if (mad_send_wr->refcount == 1 && mad_send_wr->timeout && - mad_send_wr->status == IB_WC_SUCCESS) { - wait_for_response(mad_send_wr); - } + if (mad_send_wr->state == IB_MAD_STATE_CANCELED) + mad_send_wc->status = IB_WC_WR_FLUSH_ERR; + else if (mad_send_wr->state == IB_MAD_STATE_SEND_START && + mad_send_wr->timeout) { + wait_for_response(mad_send_wr); goto done; } /* Remove send from MAD agent and notify client of completion */ - list_del(&mad_send_wr->agent_list); + if (mad_send_wr->state != IB_MAD_STATE_DONE) + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); adjust_timeout(mad_agent_priv); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - if (mad_send_wr->status != IB_WC_SUCCESS ) - mad_send_wc->status = mad_send_wr->status; - if (ret == IB_RMPP_RESULT_INTERNAL) + if (ret == IB_RMPP_RESULT_INTERNAL) { ib_rmpp_send_handler(mad_send_wc); - else + } else { + if (mad_send_wr->is_solicited_fc) + process_backlog_mads(mad_agent_priv); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, mad_send_wc); + } /* Release reference on agent taken when sending */ deref_mad_agent(mad_agent_priv); @@ -2496,6 +2523,9 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) send_queue = mad_list->mad_queue; qp_info = send_queue->qp_info; + trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv); + trace_ib_mad_send_done_handler(mad_send_wr, wc); + retry: ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, mad_send_wr->header_mapping, @@ -2521,12 +2551,10 @@ retry: mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_send_wc.status = wc->status; mad_send_wc.vendor_err = wc->vendor_err; - if (atomic_read(&qp_info->snoop_count)) - snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc, - IB_MAD_SNOOP_SEND_COMPLETIONS); ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { + trace_ib_mad_send_done_resend(queued_send_wr, qp_info); ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, NULL); if (ret) { @@ -2574,6 +2602,7 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, if (mad_send_wr->retry) { /* Repost send */ mad_send_wr->retry = 0; + trace_ib_mad_error_handler(mad_send_wr, qp_info); ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, NULL); if (!ret) @@ -2602,40 +2631,53 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, return true; } +static void clear_mad_error_list(struct list_head *list, + enum ib_wc_status wc_status, + struct ib_mad_agent_private *mad_agent_priv) +{ + struct ib_mad_send_wr_private *mad_send_wr, *n; + struct ib_mad_send_wc mad_send_wc; + + mad_send_wc.status = wc_status; + mad_send_wc.vendor_err = 0; + + list_for_each_entry_safe(mad_send_wr, n, list, agent_list) { + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + deref_mad_agent(mad_agent_priv); + } +} + static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) { unsigned long flags; struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr; - struct ib_mad_send_wc mad_send_wc; struct list_head cancel_list; INIT_LIST_HEAD(&cancel_list); spin_lock_irqsave(&mad_agent_priv->lock, flags); list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, - &mad_agent_priv->send_list, agent_list) { - if (mad_send_wr->status == IB_WC_SUCCESS) { - mad_send_wr->status = IB_WC_WR_FLUSH_ERR; - mad_send_wr->refcount -= (mad_send_wr->timeout > 0); - } - } - - /* Empty wait list to prevent receives from finding a request */ - list_splice_init(&mad_agent_priv->wait_list, &cancel_list); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + &mad_agent_priv->send_list, agent_list) + change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED); - /* Report all cancelled requests */ - mad_send_wc.status = IB_WC_WR_FLUSH_ERR; - mad_send_wc.vendor_err = 0; + /* Empty wait & backlog list to prevent receives from finding request */ + list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, + &mad_agent_priv->wait_list, agent_list) { + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); + list_add_tail(&mad_send_wr->agent_list, &cancel_list); + } list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, - &cancel_list, agent_list) { - mad_send_wc.send_buf = &mad_send_wr->send_buf; - list_del(&mad_send_wr->agent_list); - mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, - &mad_send_wc); - atomic_dec(&mad_agent_priv->refcount); + &mad_agent_priv->backlog_list, agent_list) { + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); + list_add_tail(&mad_send_wr->agent_list, &cancel_list); } + + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + /* Report all cancelled requests */ + clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv); } static struct ib_mad_send_wr_private* @@ -2657,31 +2699,40 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv, &mad_send_wr->send_buf == send_buf) return mad_send_wr; } + + list_for_each_entry(mad_send_wr, &mad_agent_priv->backlog_list, + agent_list) { + if (&mad_send_wr->send_buf == send_buf) + return mad_send_wr; + } + return NULL; } -int ib_modify_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, u32 timeout_ms) +int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; unsigned long flags; int active; - mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, - agent); + if (!send_buf) + return -EINVAL; + + mad_agent_priv = container_of(send_buf->mad_agent, + struct ib_mad_agent_private, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = find_send_wr(mad_agent_priv, send_buf); - if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { + if (!mad_send_wr || mad_send_wr->state == IB_MAD_STATE_CANCELED) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return -EINVAL; } - active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1); - if (!timeout_ms) { - mad_send_wr->status = IB_WC_WR_FLUSH_ERR; - mad_send_wr->refcount -= (mad_send_wr->timeout > 0); - } + active = ((mad_send_wr->state == IB_MAD_STATE_SEND_START) || + (mad_send_wr->state == IB_MAD_STATE_EARLY_RESP) || + (mad_send_wr->state == IB_MAD_STATE_QUEUED && timeout_ms)); + if (!timeout_ms) + change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED); mad_send_wr->send_buf.timeout_ms = timeout_ms; if (active) @@ -2694,13 +2745,6 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, } EXPORT_SYMBOL(ib_modify_mad); -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf) -{ - ib_modify_mad(mad_agent, send_buf, 0); -} -EXPORT_SYMBOL(ib_cancel_mad); - static void local_completions(struct work_struct *work) { struct ib_mad_agent_private *mad_agent_priv; @@ -2763,16 +2807,12 @@ static void local_completions(struct work_struct *work) local->mad_priv->header.recv_wc.recv_buf.grh = NULL; local->mad_priv->header.recv_wc.recv_buf.mad = (struct ib_mad *)local->mad_priv->mad; - if (atomic_read(&recv_mad_agent->qp_info->snoop_count)) - snoop_recv(recv_mad_agent->qp_info, - &local->mad_priv->header.recv_wc, - IB_MAD_SNOOP_RECVS); recv_mad_agent->agent.recv_handler( &recv_mad_agent->agent, &local->mad_send_wr->send_buf, &local->mad_priv->header.recv_wc); spin_lock_irqsave(&recv_mad_agent->lock, flags); - atomic_dec(&recv_mad_agent->refcount); + deref_mad_agent(recv_mad_agent); spin_unlock_irqrestore(&recv_mad_agent->lock, flags); } @@ -2781,15 +2821,11 @@ local_send_completion: mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; mad_send_wc.send_buf = &local->mad_send_wr->send_buf; - if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) - snoop_send(mad_agent_priv->qp_info, - &local->mad_send_wr->send_buf, - &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); spin_lock_irqsave(&mad_agent_priv->lock, flags); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); if (free_mad) kfree(local->mad_priv); kfree(local); @@ -2808,6 +2844,11 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->send_buf.retries++; mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); + if (mad_send_wr->is_solicited_fc && + !list_empty(&mad_send_wr->mad_agent_priv->backlog_list)) { + change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED); + return 0; + } if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) { ret = ib_retry_rmpp(mad_send_wr); @@ -2825,24 +2866,25 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) } else ret = ib_send_mad(mad_send_wr); - if (!ret) { - mad_send_wr->refcount++; - list_add_tail(&mad_send_wr->agent_list, - &mad_send_wr->mad_agent_priv->send_list); - } + if (!ret) + change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START); + return ret; } static void timeout_sends(struct work_struct *work) { - struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; - struct ib_mad_send_wc mad_send_wc; + struct ib_mad_agent_private *mad_agent_priv; + struct list_head timeout_list; + struct list_head cancel_list; + struct list_head *list_item; unsigned long flags, delay; mad_agent_priv = container_of(work, struct ib_mad_agent_private, timed_work.work); - mad_send_wc.vendor_err = 0; + INIT_LIST_HEAD(&timeout_list); + INIT_LIST_HEAD(&cancel_list); spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->wait_list)) { @@ -2860,25 +2902,22 @@ static void timeout_sends(struct work_struct *work) break; } - list_del(&mad_send_wr->agent_list); - if (mad_send_wr->status == IB_WC_SUCCESS && - !retry_send(mad_send_wr)) - continue; - - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - - if (mad_send_wr->status == IB_WC_SUCCESS) - mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; + if (mad_send_wr->state == IB_MAD_STATE_CANCELED) + list_item = &cancel_list; + else if (retry_send(mad_send_wr)) + list_item = &timeout_list; else - mad_send_wc.status = mad_send_wr->status; - mad_send_wc.send_buf = &mad_send_wr->send_buf; - mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, - &mad_send_wc); + continue; - atomic_dec(&mad_agent_priv->refcount); - spin_lock_irqsave(&mad_agent_priv->lock, flags); + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); + list_add_tail(&mad_send_wr->agent_list, list_item); } + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + process_backlog_mads(mad_agent_priv); + clear_mad_error_list(&timeout_list, IB_WC_RESP_TIMEOUT_ERR, + mad_agent_priv); + clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv); } /* @@ -2888,11 +2927,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad) { unsigned long flags; - int post, ret; struct ib_mad_private *mad_priv; struct ib_sge sg_list; struct ib_recv_wr recv_wr; struct ib_mad_queue *recv_queue = &qp_info->recv_queue; + int ret = 0; /* Initialize common scatter list fields */ sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; @@ -2902,7 +2941,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, recv_wr.sg_list = &sg_list; recv_wr.num_sge = 1; - do { + while (true) { /* Allocate and map receive buffer */ if (mad) { mad_priv = mad; @@ -2910,10 +2949,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, } else { mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), GFP_ATOMIC); - if (!mad_priv) { - ret = -ENOMEM; - break; - } + if (!mad_priv) + return -ENOMEM; } sg_list.length = mad_priv_dma_size(mad_priv); sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, @@ -2923,35 +2960,40 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, sg_list.addr))) { ret = -ENOMEM; - break; + goto free_mad_priv; } mad_priv->header.mapping = sg_list.addr; mad_priv->header.mad_list.mad_queue = recv_queue; mad_priv->header.mad_list.cqe.done = ib_mad_recv_done; recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe; - - /* Post receive WR */ spin_lock_irqsave(&recv_queue->lock, flags); - post = (++recv_queue->count < recv_queue->max_active); - list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); + if (recv_queue->count >= recv_queue->max_active) { + /* Fully populated the receive queue */ + spin_unlock_irqrestore(&recv_queue->lock, flags); + break; + } + recv_queue->count++; + list_add_tail(&mad_priv->header.mad_list.list, + &recv_queue->list); spin_unlock_irqrestore(&recv_queue->lock, flags); + ret = ib_post_recv(qp_info->qp, &recv_wr, NULL); if (ret) { spin_lock_irqsave(&recv_queue->lock, flags); list_del(&mad_priv->header.mad_list.list); recv_queue->count--; spin_unlock_irqrestore(&recv_queue->lock, flags); - ib_dma_unmap_single(qp_info->port_priv->device, - mad_priv->header.mapping, - mad_priv_dma_size(mad_priv), - DMA_FROM_DEVICE); - kfree(mad_priv); dev_err(&qp_info->port_priv->device->dev, "ib_post_recv failed: %d\n", ret); break; } - } while (post); + } + ib_dma_unmap_single(qp_info->port_priv->device, + mad_priv->header.mapping, + mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); +free_mad_priv: + kfree(mad_priv); return ret; } @@ -3080,7 +3122,7 @@ static void qp_event_handler(struct ib_event *event, void *qp_context) /* It's worse than that! He's dead, Jim! */ dev_err(&qp_info->port_priv->device->dev, - "Fatal error (%d) on MAD QP (%d)\n", + "Fatal error (%d) on MAD QP (%u)\n", event->event, qp_info->qp->qp_num); } @@ -3100,10 +3142,6 @@ static void init_mad_qp(struct ib_mad_port_private *port_priv, init_mad_queue(qp_info, &qp_info->send_queue); init_mad_queue(qp_info, &qp_info->recv_queue); INIT_LIST_HEAD(&qp_info->overflow_list); - spin_lock_init(&qp_info->snoop_lock); - qp_info->snoop_table = NULL; - qp_info->snoop_table_size = 0; - atomic_set(&qp_info->snoop_count, 0); } static int create_mad_qp(struct ib_mad_qp_info *qp_info, @@ -3147,7 +3185,6 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) return; ib_destroy_qp(qp_info->qp); - kfree(qp_info->snoop_table); } /* @@ -3155,12 +3192,11 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) * Create the QP, PD, MR, and CQ if needed */ static int ib_mad_port_open(struct ib_device *device, - int port_num) + u32 port_num) { int ret, cq_size; struct ib_mad_port_private *port_priv; unsigned long flags; - char name[sizeof "ib_mad123"]; int has_smi; if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) @@ -3186,18 +3222,18 @@ static int ib_mad_port_open(struct ib_device *device, if (has_smi) cq_size *= 2; + port_priv->pd = ib_alloc_pd(device, 0); + if (IS_ERR(port_priv->pd)) { + dev_err(&device->dev, "Couldn't create ib_mad PD\n"); + ret = PTR_ERR(port_priv->pd); + goto error3; + } + port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, IB_POLL_UNBOUND_WORKQUEUE); if (IS_ERR(port_priv->cq)) { dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); - goto error3; - } - - port_priv->pd = ib_alloc_pd(device, 0); - if (IS_ERR(port_priv->pd)) { - dev_err(&device->dev, "Couldn't create ib_mad PD\n"); - ret = PTR_ERR(port_priv->pd); goto error4; } @@ -3206,12 +3242,15 @@ static int ib_mad_port_open(struct ib_device *device, if (ret) goto error6; } - ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); - if (ret) - goto error7; - snprintf(name, sizeof name, "ib_mad%d", port_num); - port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); + if (rdma_cap_ib_cm(device, port_num)) { + ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); + if (ret) + goto error7; + } + + port_priv->wq = alloc_ordered_workqueue("ib_mad%u", WQ_MEM_RECLAIM, + port_num); if (!port_priv->wq) { ret = -ENOMEM; goto error8; @@ -3240,11 +3279,11 @@ error8: error7: destroy_mad_qp(&port_priv->qp_info[0]); error6: - ib_dealloc_pd(port_priv->pd); -error4: ib_free_cq(port_priv->cq); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); +error4: + ib_dealloc_pd(port_priv->pd); error3: kfree(port_priv); @@ -3256,7 +3295,7 @@ error3: * If there are no classes using the port, free the port * resources (CQ, MR, PD, QP) and remove the port's info structure */ -static int ib_mad_port_close(struct ib_device *device, int port_num) +static int ib_mad_port_close(struct ib_device *device, u32 port_num) { struct ib_mad_port_private *port_priv; unsigned long flags; @@ -3265,7 +3304,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) port_priv = __ib_get_mad_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); - dev_err(&device->dev, "Port %d not found\n", port_num); + dev_err(&device->dev, "Port %u not found\n", port_num); return -ENODEV; } list_del_init(&port_priv->port_list); @@ -3274,8 +3313,8 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) destroy_workqueue(port_priv->wq); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); - ib_dealloc_pd(port_priv->pd); ib_free_cq(port_priv->cq); + ib_dealloc_pd(port_priv->pd); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); /* XXX: Handle deallocation of MAD registration tables */ @@ -3285,9 +3324,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) return 0; } -static void ib_mad_init_device(struct ib_device *device) +static int ib_mad_init_device(struct ib_device *device) { int start, i; + unsigned int count = 0; + int ret; start = rdma_start_port(device); @@ -3295,17 +3336,23 @@ static void ib_mad_init_device(struct ib_device *device) if (!rdma_cap_ib_mad(device, i)) continue; - if (ib_mad_port_open(device, i)) { + ret = ib_mad_port_open(device, i); + if (ret) { dev_err(&device->dev, "Couldn't open port %d\n", i); goto error; } - if (ib_agent_port_open(device, i)) { + ret = ib_agent_port_open(device, i); + if (ret) { dev_err(&device->dev, "Couldn't open port %d for agents\n", i); goto error_agent; } + count++; } - return; + if (!count) + return -EOPNOTSUPP; + + return 0; error_agent: if (ib_mad_port_close(device, i)) @@ -3322,21 +3369,22 @@ error: if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); } + return ret; } static void ib_mad_remove_device(struct ib_device *device, void *client_data) { - int i; + unsigned int i; - for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { + rdma_for_each_port (device, i) { if (!rdma_cap_ib_mad(device, i)) continue; if (ib_agent_port_close(device, i)) dev_err(&device->dev, - "Couldn't close port %d for agents\n", i); + "Couldn't close port %u for agents\n", i); if (ib_mad_port_close(device, i)) - dev_err(&device->dev, "Couldn't close port %d\n", i); + dev_err(&device->dev, "Couldn't close port %u\n", i); } } @@ -3356,9 +3404,6 @@ int ib_mad_init(void) INIT_LIST_HEAD(&ib_mad_port_list); - /* Client ID 0 is used for snoop-only clients */ - idr_alloc(&ib_mad_clients, NULL, 0, 0, GFP_KERNEL); - if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); return -EINVAL; |
