diff options
Diffstat (limited to 'drivers/infiniband/core/mad.c')
| -rw-r--r-- | drivers/infiniband/core/mad.c | 2219 |
1 files changed, 1281 insertions, 938 deletions
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index dc3fd1e8af07..8f26bfb69586 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3,6 +3,7 @@ * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2014,2018 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -33,20 +34,43 @@ * SOFTWARE. * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/module.h> +#include <linux/security.h> +#include <linux/xarray.h> #include <rdma/ib_cache.h> #include "mad_priv.h" +#include "core_priv.h" #include "mad_rmpp.h" #include "smi.h" +#include "opa_smi.h" #include "agent.h" -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("kernel IB MAD API"); -MODULE_AUTHOR("Hal Rosenstock"); -MODULE_AUTHOR("Sean Hefty"); +#define CREATE_TRACE_POINTS +#include <trace/events/ib_mad.h> + +#ifdef CONFIG_TRACEPOINTS +static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_qp_info *qp_info, + struct trace_event_raw_ib_mad_send_template *entry) +{ + struct ib_ud_wr *wr = &mad_send_wr->send_wr; + struct rdma_ah_attr attr = {}; + + rdma_query_ah(wr->ah, &attr); + + /* These are common */ + entry->sl = attr.sl; + entry->rqpn = wr->remote_qpn; + entry->rqkey = wr->remote_qkey; + entry->dlid = rdma_ah_get_dlid(&attr); +} +#endif static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; @@ -56,10 +80,9 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); -static struct kmem_cache *ib_mad_cache; - +static DEFINE_XARRAY_ALLOC1(ib_mad_clients); +static u32 ib_mad_client_next; static struct list_head ib_mad_port_list; -static u32 ib_mad_client_id = 0; /* Port list lock */ static DEFINE_SPINLOCK(ib_mad_port_list_lock); @@ -70,7 +93,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, static void remove_mad_reg_req(struct ib_mad_agent_private *priv); static struct ib_mad_agent_private *find_mad_agent( struct ib_mad_port_private *port_priv, - struct ib_mad *mad); + const struct ib_mad_hdr *mad); static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad); static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); @@ -81,13 +104,16 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, u8 mgmt_class); static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv); +static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, + struct ib_wc *wc); +static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc); /* * Returns a ib_mad_port_private structure or NULL for a device/port * Assumes ib_mad_port_list_lock is being held */ static inline struct ib_mad_port_private * -__ib_get_mad_port(struct ib_device *device, int port_num) +__ib_get_mad_port(struct ib_device *device, u32 port_num) { struct ib_mad_port_private *entry; @@ -103,7 +129,7 @@ __ib_get_mad_port(struct ib_device *device, int port_num) * for a device/port */ static inline struct ib_mad_port_private * -ib_get_mad_port(struct ib_device *device, int port_num) +ib_get_mad_port(struct ib_device *device, u32 port_num) { struct ib_mad_port_private *entry; unsigned long flags; @@ -124,8 +150,7 @@ static inline u8 convert_mgmt_class(u8 mgmt_class) static int get_spl_qp_index(enum ib_qp_type qp_type) { - switch (qp_type) - { + switch (qp_type) { case IB_QPT_SMI: return 0; case IB_QPT_GSI: @@ -176,26 +201,52 @@ static int is_vendor_method_in_use( return 0; } -int ib_response_mad(struct ib_mad *mad) +int ib_response_mad(const struct ib_mad_hdr *hdr) { - return ((mad->mad_hdr.method & IB_MGMT_METHOD_RESP) || - (mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || - ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_BM) && - (mad->mad_hdr.attr_mod & IB_BM_ATTR_MOD_RESP))); + return ((hdr->method & IB_MGMT_METHOD_RESP) || + (hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) || + ((hdr->mgmt_class == IB_MGMT_CLASS_BM) && + (hdr->attr_mod & IB_BM_ATTR_MOD_RESP))); } EXPORT_SYMBOL(ib_response_mad); +#define SOL_FC_MAX_DEFAULT_FRAC 4 +#define SOL_FC_MAX_SA_FRAC 32 + +static int get_sol_fc_max_outstanding(struct ib_mad_reg_req *mad_reg_req) +{ + if (!mad_reg_req) + /* Send only agent */ + return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC; + + switch (mad_reg_req->mgmt_class) { + case IB_MGMT_CLASS_CM: + return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC; + case IB_MGMT_CLASS_SUBN_ADM: + return mad_recvq_size / SOL_FC_MAX_SA_FRAC; + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + return min(mad_recvq_size, IB_MAD_QP_RECV_SIZE) / + SOL_FC_MAX_DEFAULT_FRAC; + default: + return 0; + } +} + /* * ib_register_mad_agent - Register to send/receive MADs + * + * Context: Process context. */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, - u8 port_num, + u32 port_num, enum ib_qp_type qp_type, struct ib_mad_reg_req *mad_reg_req, u8 rmpp_version, ib_mad_send_handler send_handler, ib_mad_recv_handler recv_handler, - void *context) + void *context, + u32 registration_flags) { struct ib_mad_port_private *port_priv; struct ib_mad_agent *ret = ERR_PTR(-EINVAL); @@ -206,80 +257,130 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, struct ib_mad_mgmt_vendor_class *vendor_class; struct ib_mad_mgmt_method_table *method; int ret2, qpn; - unsigned long flags; u8 mgmt_class, vclass; + if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) || + (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num))) + return ERR_PTR(-EPROTONOSUPPORT); + /* Validate parameters */ qpn = get_spl_qp_index(qp_type); - if (qpn == -1) + if (qpn == -1) { + dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n", + __func__, qp_type); goto error1; + } - if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) + if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { + dev_dbg_ratelimited(&device->dev, + "%s: invalid RMPP Version %u\n", + __func__, rmpp_version); goto error1; + } /* Validate MAD registration request if supplied */ if (mad_reg_req) { - if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) + if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { + dev_dbg_ratelimited(&device->dev, + "%s: invalid Class Version %u\n", + __func__, + mad_reg_req->mgmt_class_version); goto error1; - if (!recv_handler) + } + if (!recv_handler) { + dev_dbg_ratelimited(&device->dev, + "%s: no recv_handler\n", __func__); goto error1; + } if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { /* * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only * one in this range currently allowed */ if (mad_reg_req->mgmt_class != - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + dev_dbg_ratelimited(&device->dev, + "%s: Invalid Mgmt Class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; + } } else if (mad_reg_req->mgmt_class == 0) { /* * Class 0 is reserved in IBA and is used for * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE */ + dev_dbg_ratelimited(&device->dev, + "%s: Invalid Mgmt Class 0\n", + __func__); goto error1; } else if (is_vendor_class(mad_reg_req->mgmt_class)) { /* * If class is in "new" vendor range, * ensure supplied OUI is not zero */ - if (!is_vendor_oui(mad_reg_req->oui)) + if (!is_vendor_oui(mad_reg_req->oui)) { + dev_dbg_ratelimited(&device->dev, + "%s: No OUI specified for class 0x%x\n", + __func__, + mad_reg_req->mgmt_class); goto error1; + } } /* Make sure class supplied is consistent with RMPP */ if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { - if (rmpp_version) + if (rmpp_version) { + dev_dbg_ratelimited(&device->dev, + "%s: RMPP version for non-RMPP class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; + } } + /* Make sure class supplied is consistent with QP type */ if (qp_type == IB_QPT_SMI) { if ((mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED) && (mad_reg_req->mgmt_class != - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + dev_dbg_ratelimited(&device->dev, + "%s: Invalid SM QP type: class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; + } } else { if ((mad_reg_req->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || (mad_reg_req->mgmt_class == - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + dev_dbg_ratelimited(&device->dev, + "%s: Invalid GS QP type: class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; + } } } else { /* No registration request supplied */ if (!send_handler) goto error1; + if (registration_flags & IB_MAD_USER_RMPP) + goto error1; } /* Validate device and port */ port_priv = ib_get_mad_port(device, port_num); if (!port_priv) { + dev_dbg_ratelimited(&device->dev, "%s: Invalid port %u\n", + __func__, port_num); ret = ERR_PTR(-ENODEV); goto error1; } - /* Verify the QP requested is supported. For example, Ethernet devices - * will not have QP0 */ + /* Verify the QP requested is supported. For example, Ethernet devices + * will not have QP0. + */ if (!port_priv->qp_info[qpn].qp) { + dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n", + __func__, qpn); ret = ERR_PTR(-EPROTONOSUPPORT); goto error1; } @@ -291,13 +392,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error1; } - mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(mad_agent_priv->agent.mr)) { - ret = ERR_PTR(-ENOMEM); - goto error2; - } - if (mad_reg_req) { reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL); if (!reg_req) { @@ -316,24 +410,45 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, mad_agent_priv->agent.context = context; mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; mad_agent_priv->agent.port_num = port_num; + mad_agent_priv->agent.flags = registration_flags; spin_lock_init(&mad_agent_priv->lock); INIT_LIST_HEAD(&mad_agent_priv->send_list); INIT_LIST_HEAD(&mad_agent_priv->wait_list); - INIT_LIST_HEAD(&mad_agent_priv->done_list); INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); + INIT_LIST_HEAD(&mad_agent_priv->backlog_list); INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions); - atomic_set(&mad_agent_priv->refcount, 1); + refcount_set(&mad_agent_priv->refcount, 1); init_completion(&mad_agent_priv->comp); + mad_agent_priv->sol_fc_send_count = 0; + mad_agent_priv->sol_fc_wait_count = 0; + mad_agent_priv->sol_fc_max = + recv_handler ? get_sol_fc_max_outstanding(mad_reg_req) : 0; + + ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type); + if (ret2) { + ret = ERR_PTR(ret2); + goto error4; + } - spin_lock_irqsave(&port_priv->reg_lock, flags); - mad_agent_priv->agent.hi_tid = ++ib_mad_client_id; + /* + * The mlx4 driver uses the top byte to distinguish which virtual + * function generated the MAD, so we must avoid using it. + */ + ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid, + mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1), + &ib_mad_client_next, GFP_KERNEL); + if (ret2 < 0) { + ret = ERR_PTR(ret2); + goto error5; + } /* * Make sure MAD registration (if supplied) * is non overlapping with any existing ones */ + spin_lock_irq(&port_priv->reg_lock); if (mad_reg_req) { mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); if (!is_vendor_class(mgmt_class)) { @@ -344,7 +459,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (method) { if (method_in_use(&method, mad_reg_req)) - goto error4; + goto error6; } } ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, @@ -360,167 +475,46 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (is_vendor_method_in_use( vendor_class, mad_reg_req)) - goto error4; + goto error6; } } ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); } if (ret2) { ret = ERR_PTR(ret2); - goto error4; + goto error6; } } + spin_unlock_irq(&port_priv->reg_lock); - /* Add mad agent into port's agent list */ - list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list); - spin_unlock_irqrestore(&port_priv->reg_lock, flags); - + trace_ib_mad_create_agent(mad_agent_priv); return &mad_agent_priv->agent; - +error6: + spin_unlock_irq(&port_priv->reg_lock); + xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); +error5: + ib_mad_agent_security_cleanup(&mad_agent_priv->agent); error4: - spin_unlock_irqrestore(&port_priv->reg_lock, flags); kfree(reg_req); error3: - ib_dereg_mr(mad_agent_priv->agent.mr); -error2: kfree(mad_agent_priv); error1: return ret; } EXPORT_SYMBOL(ib_register_mad_agent); -static inline int is_snooping_sends(int mad_snoop_flags) -{ - return (mad_snoop_flags & - (/*IB_MAD_SNOOP_POSTED_SENDS | - IB_MAD_SNOOP_RMPP_SENDS |*/ - IB_MAD_SNOOP_SEND_COMPLETIONS /*| - IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/)); -} - -static inline int is_snooping_recvs(int mad_snoop_flags) -{ - return (mad_snoop_flags & - (IB_MAD_SNOOP_RECVS /*| - IB_MAD_SNOOP_RMPP_RECVS*/)); -} - -static int register_snoop_agent(struct ib_mad_qp_info *qp_info, - struct ib_mad_snoop_private *mad_snoop_priv) -{ - struct ib_mad_snoop_private **new_snoop_table; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - /* Check for empty slot in array. */ - for (i = 0; i < qp_info->snoop_table_size; i++) - if (!qp_info->snoop_table[i]) - break; - - if (i == qp_info->snoop_table_size) { - /* Grow table. */ - new_snoop_table = krealloc(qp_info->snoop_table, - sizeof mad_snoop_priv * - (qp_info->snoop_table_size + 1), - GFP_ATOMIC); - if (!new_snoop_table) { - i = -ENOMEM; - goto out; - } - - qp_info->snoop_table = new_snoop_table; - qp_info->snoop_table_size++; - } - qp_info->snoop_table[i] = mad_snoop_priv; - atomic_inc(&qp_info->snoop_count); -out: - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - return i; -} - -struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, - u8 port_num, - enum ib_qp_type qp_type, - int mad_snoop_flags, - ib_mad_snoop_handler snoop_handler, - ib_mad_recv_handler recv_handler, - void *context) -{ - struct ib_mad_port_private *port_priv; - struct ib_mad_agent *ret; - struct ib_mad_snoop_private *mad_snoop_priv; - int qpn; - - /* Validate parameters */ - if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) || - (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) { - ret = ERR_PTR(-EINVAL); - goto error1; - } - qpn = get_spl_qp_index(qp_type); - if (qpn == -1) { - ret = ERR_PTR(-EINVAL); - goto error1; - } - port_priv = ib_get_mad_port(device, port_num); - if (!port_priv) { - ret = ERR_PTR(-ENODEV); - goto error1; - } - /* Allocate structures */ - mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL); - if (!mad_snoop_priv) { - ret = ERR_PTR(-ENOMEM); - goto error1; - } - - /* Now, fill in the various structures */ - mad_snoop_priv->qp_info = &port_priv->qp_info[qpn]; - mad_snoop_priv->agent.device = device; - mad_snoop_priv->agent.recv_handler = recv_handler; - mad_snoop_priv->agent.snoop_handler = snoop_handler; - mad_snoop_priv->agent.context = context; - mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp; - mad_snoop_priv->agent.port_num = port_num; - mad_snoop_priv->mad_snoop_flags = mad_snoop_flags; - init_completion(&mad_snoop_priv->comp); - mad_snoop_priv->snoop_index = register_snoop_agent( - &port_priv->qp_info[qpn], - mad_snoop_priv); - if (mad_snoop_priv->snoop_index < 0) { - ret = ERR_PTR(mad_snoop_priv->snoop_index); - goto error2; - } - - atomic_set(&mad_snoop_priv->refcount, 1); - return &mad_snoop_priv->agent; - -error2: - kfree(mad_snoop_priv); -error1: - return ret; -} -EXPORT_SYMBOL(ib_register_mad_snoop); - static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { - if (atomic_dec_and_test(&mad_agent_priv->refcount)) + if (refcount_dec_and_test(&mad_agent_priv->refcount)) complete(&mad_agent_priv->comp); } -static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv) -{ - if (atomic_dec_and_test(&mad_snoop_priv->refcount)) - complete(&mad_snoop_priv->comp); -} - static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; - unsigned long flags; /* Note that we could still be handling received MADs */ + trace_ib_mad_unregister_agent(mad_agent_priv); /* * Canceling all sends results in dropping received response @@ -530,60 +524,36 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) port_priv = mad_agent_priv->qp_info->port_priv; cancel_delayed_work(&mad_agent_priv->timed_work); - spin_lock_irqsave(&port_priv->reg_lock, flags); + spin_lock_irq(&port_priv->reg_lock); remove_mad_reg_req(mad_agent_priv); - list_del(&mad_agent_priv->agent_list); - spin_unlock_irqrestore(&port_priv->reg_lock, flags); + spin_unlock_irq(&port_priv->reg_lock); + xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); flush_workqueue(port_priv->wq); - ib_cancel_rmpp_recvs(mad_agent_priv); deref_mad_agent(mad_agent_priv); wait_for_completion(&mad_agent_priv->comp); + ib_cancel_rmpp_recvs(mad_agent_priv); - kfree(mad_agent_priv->reg_req); - ib_dereg_mr(mad_agent_priv->agent.mr); - kfree(mad_agent_priv); -} - -static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) -{ - struct ib_mad_qp_info *qp_info; - unsigned long flags; - - qp_info = mad_snoop_priv->qp_info; - spin_lock_irqsave(&qp_info->snoop_lock, flags); - qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL; - atomic_dec(&qp_info->snoop_count); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - - deref_snoop_agent(mad_snoop_priv); - wait_for_completion(&mad_snoop_priv->comp); + ib_mad_agent_security_cleanup(&mad_agent_priv->agent); - kfree(mad_snoop_priv); + kfree(mad_agent_priv->reg_req); + kfree_rcu(mad_agent_priv, rcu); } /* * ib_unregister_mad_agent - Unregisters a client from using MAD services + * + * Context: Process context. */ -int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) +void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) { struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_snoop_private *mad_snoop_priv; - - /* If the TID is zero, the agent can only snoop. */ - if (mad_agent->hi_tid) { - mad_agent_priv = container_of(mad_agent, - struct ib_mad_agent_private, - agent); - unregister_mad_agent(mad_agent_priv); - } else { - mad_snoop_priv = container_of(mad_agent, - struct ib_mad_snoop_private, - agent); - unregister_mad_snoop(mad_snoop_priv); - } - return 0; + + mad_agent_priv = container_of(mad_agent, + struct ib_mad_agent_private, + agent); + unregister_mad_agent(mad_agent_priv); } EXPORT_SYMBOL(ib_unregister_mad_agent); @@ -592,7 +562,6 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list) struct ib_mad_queue *mad_queue; unsigned long flags; - BUG_ON(!mad_list->mad_queue); mad_queue = mad_list->mad_queue; spin_lock_irqsave(&mad_queue->lock, flags); list_del(&mad_list->list); @@ -600,63 +569,11 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list) spin_unlock_irqrestore(&mad_queue->lock, flags); } -static void snoop_send(struct ib_mad_qp_info *qp_info, - struct ib_mad_send_buf *send_buf, - struct ib_mad_send_wc *mad_send_wc, - int mad_snoop_flags) -{ - struct ib_mad_snoop_private *mad_snoop_priv; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - for (i = 0; i < qp_info->snoop_table_size; i++) { - mad_snoop_priv = qp_info->snoop_table[i]; - if (!mad_snoop_priv || - !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) - continue; - - atomic_inc(&mad_snoop_priv->refcount); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, - send_buf, mad_send_wc); - deref_snoop_agent(mad_snoop_priv); - spin_lock_irqsave(&qp_info->snoop_lock, flags); - } - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); -} - -static void snoop_recv(struct ib_mad_qp_info *qp_info, - struct ib_mad_recv_wc *mad_recv_wc, - int mad_snoop_flags) -{ - struct ib_mad_snoop_private *mad_snoop_priv; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - for (i = 0; i < qp_info->snoop_table_size; i++) { - mad_snoop_priv = qp_info->snoop_table[i]; - if (!mad_snoop_priv || - !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) - continue; - - atomic_inc(&mad_snoop_priv->refcount); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, - mad_recv_wc); - deref_snoop_agent(mad_snoop_priv); - spin_lock_irqsave(&qp_info->snoop_lock, flags); - } - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); -} - -static void build_smp_wc(struct ib_qp *qp, - u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, - struct ib_wc *wc) +static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid, + u16 pkey_index, u32 port_num, struct ib_wc *wc) { memset(wc, 0, sizeof *wc); - wc->wr_id = wr_id; + wc->wr_cqe = cqe; wc->status = IB_WC_SUCCESS; wc->opcode = IB_WC_RECV; wc->pkey_index = pkey_index; @@ -669,6 +586,32 @@ static void build_smp_wc(struct ib_qp *qp, wc->port_num = port_num; } +static size_t mad_priv_size(const struct ib_mad_private *mp) +{ + return sizeof(struct ib_mad_private) + mp->mad_size; +} + +static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags) +{ + size_t size = sizeof(struct ib_mad_private) + mad_size; + struct ib_mad_private *ret = kzalloc(size, flags); + + if (ret) + ret->mad_size = mad_size; + + return ret; +} + +static size_t port_mad_size(const struct ib_mad_port_private *port_priv) +{ + return rdma_max_mad_size(port_priv->device, port_priv->port_num); +} + +static size_t mad_priv_dma_size(const struct ib_mad_private *mp) +{ + return sizeof(struct ib_grh) + mp->mad_size; +} + /* * Return 0 if SMP is to be sent * Return 1 if SMP was consumed locally (whether or not solicited) @@ -679,19 +622,25 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, { int ret = 0; struct ib_smp *smp = mad_send_wr->send_buf.mad; + struct opa_smp *opa_smp = (struct opa_smp *)smp; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; struct ib_mad_port_private *port_priv; struct ib_mad_agent_private *recv_mad_agent = NULL; struct ib_device *device = mad_agent_priv->agent.device; - u8 port_num; + u32 port_num; struct ib_wc mad_wc; - struct ib_send_wr *send_wr = &mad_send_wr->send_wr; - - if (device->node_type == RDMA_NODE_IB_SWITCH && + struct ib_ud_wr *send_wr = &mad_send_wr->send_wr; + size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); + u16 out_mad_pkey_index = 0; + u16 drslid; + bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, + mad_agent_priv->qp_info->port_priv->port_num); + + if (rdma_cap_ib_switch(device) && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - port_num = send_wr->wr.ud.port_num; + port_num = send_wr->port_num; else port_num = mad_agent_priv->agent.port_num; @@ -701,49 +650,87 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, * If we are at the start of the LID routed part, don't update the * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. */ - if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == - IB_LID_PERMISSIVE && - smi_handle_dr_smp_send(smp, device->node_type, port_num) == - IB_SMI_DISCARD) { - ret = -EINVAL; - printk(KERN_ERR PFX "Invalid directed route\n"); - goto out; - } + if (opa && smp->class_version == OPA_SM_CLASS_VERSION) { + u32 opa_drslid; - /* Check to post send on QP or process locally */ - if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && - smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) - goto out; + trace_ib_mad_handle_out_opa_smi(opa_smp); + + if ((opa_get_smp_direction(opa_smp) + ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == + OPA_LID_PERMISSIVE && + opa_smi_handle_dr_smp_send(opa_smp, + rdma_cap_ib_switch(device), + port_num) == IB_SMI_DISCARD) { + ret = -EINVAL; + dev_err(&device->dev, "OPA Invalid directed route\n"); + goto out; + } + opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); + if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) && + opa_drslid & 0xffff0000) { + ret = -EINVAL; + dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", + opa_drslid); + goto out; + } + drslid = (u16)(opa_drslid & 0x0000ffff); + + /* Check to post send on QP or process locally */ + if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD && + opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) + goto out; + } else { + trace_ib_mad_handle_out_ib_smi(smp); + + if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == + IB_LID_PERMISSIVE && + smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == + IB_SMI_DISCARD) { + ret = -EINVAL; + dev_err(&device->dev, "Invalid directed route\n"); + goto out; + } + drslid = be16_to_cpu(smp->dr_slid); + + /* Check to post send on QP or process locally */ + if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && + smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) + goto out; + } local = kmalloc(sizeof *local, GFP_ATOMIC); if (!local) { ret = -ENOMEM; - printk(KERN_ERR PFX "No memory for ib_mad_local_private\n"); goto out; } local->mad_priv = NULL; local->recv_mad_agent = NULL; - mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC); + mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC); if (!mad_priv) { ret = -ENOMEM; - printk(KERN_ERR PFX "No memory for local response MAD\n"); kfree(local); goto out; } build_smp_wc(mad_agent_priv->agent.qp, - send_wr->wr_id, be16_to_cpu(smp->dr_slid), - send_wr->wr.ud.pkey_index, - send_wr->wr.ud.port_num, &mad_wc); + send_wr->wr.wr_cqe, drslid, + send_wr->pkey_index, + send_wr->port_num, &mad_wc); + + if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { + mad_wc.byte_len = mad_send_wr->send_buf.hdr_len + + mad_send_wr->send_buf.data_len + + sizeof(struct ib_grh); + } /* No GRH for DR SMP */ - ret = device->process_mad(device, 0, port_num, &mad_wc, NULL, - (struct ib_mad *)smp, - (struct ib_mad *)&mad_priv->mad); - switch (ret) - { + ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL, + (const struct ib_mad *)smp, + (struct ib_mad *)mad_priv->mad, &mad_size, + &out_mad_pkey_index); + switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: - if (ib_response_mad(&mad_priv->mad.mad) && + if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) && mad_agent_priv->agent.recv_handler) { local->mad_priv = mad_priv; local->recv_mad_agent = mad_agent_priv; @@ -751,43 +738,47 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, * Reference MAD agent until receive * side of local completion handled */ - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); } else - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS: /* Treat like an incoming receive MAD */ port_priv = ib_get_mad_port(mad_agent_priv->agent.device, mad_agent_priv->agent.port_num); if (port_priv) { - memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad)); + memcpy(mad_priv->mad, smp, mad_priv->mad_size); recv_mad_agent = find_mad_agent(port_priv, - &mad_priv->mad.mad); + (const struct ib_mad_hdr *)mad_priv->mad); } if (!port_priv || !recv_mad_agent) { /* * No receiving agent so drop packet and * generate send completion. */ - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); break; } local->mad_priv = mad_priv; local->recv_mad_agent = recv_mad_agent; break; default: - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); kfree(local); ret = -EINVAL; goto out; } local->mad_send_wr = mad_send_wr; + if (opa) { + local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index; + local->return_wc_byte_len = mad_size; + } /* Reference MAD agent until send side of local completion handled */ - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ spin_lock_irqsave(&mad_agent_priv->lock, flags); list_add_tail(&local->completion_list, &mad_agent_priv->local_list); @@ -799,11 +790,11 @@ out: return ret; } -static int get_pad_size(int hdr_len, int data_len) +static int get_pad_size(int hdr_len, int data_len, size_t mad_size) { int seg_size, pad; - seg_size = sizeof(struct ib_mad) - hdr_len; + seg_size = mad_size - hdr_len; if (data_len && seg_size) { pad = seg_size - data_len % seg_size; return pad == seg_size ? 0 : pad; @@ -822,24 +813,22 @@ static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) } static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, - gfp_t gfp_mask) + size_t mad_size, gfp_t gfp_mask) { struct ib_mad_send_buf *send_buf = &send_wr->send_buf; struct ib_rmpp_mad *rmpp_mad = send_buf->mad; struct ib_rmpp_segment *seg = NULL; int left, seg_size, pad; - send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len; + send_buf->seg_size = mad_size - send_buf->hdr_len; + send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR; seg_size = send_buf->seg_size; pad = send_wr->pad; /* Allocate data segments. */ for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { - seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); + seg = kmalloc(sizeof(*seg) + seg_size, gfp_mask); if (!seg) { - printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem " - "alloc failed for len %zd, gfp %#x\n", - sizeof (*seg) + seg_size, gfp_mask); free_send_rmpp_list(send_wr); return -ENOMEM; } @@ -862,28 +851,46 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, return 0; } -struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, - u32 remote_qpn, u16 pkey_index, - int rmpp_active, - int hdr_len, int data_len, - gfp_t gfp_mask) +int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent) +{ + return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP); +} +EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent); + +struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent, + u32 remote_qpn, u16 pkey_index, + int rmpp_active, int hdr_len, + int data_len, gfp_t gfp_mask, + u8 base_version) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; int pad, message_size, ret, size; void *buf; + size_t mad_size; + bool opa; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); - pad = get_pad_size(hdr_len, data_len); + + opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num); + + if (opa && base_version == OPA_MGMT_BASE_VERSION) + mad_size = sizeof(struct opa_mad); + else + mad_size = sizeof(struct ib_mad); + + pad = get_pad_size(hdr_len, data_len, mad_size); message_size = hdr_len + data_len + pad; - if ((!mad_agent->rmpp_version && - (rmpp_active || message_size > sizeof(struct ib_mad))) || - (!rmpp_active && message_size > sizeof(struct ib_mad))) - return ERR_PTR(-EINVAL); + if (ib_mad_kernel_rmpp_agent(mad_agent)) { + if (!rmpp_active && message_size > mad_size) + return ERR_PTR(-EINVAL); + } else + if (rmpp_active || message_size > mad_size) + return ERR_PTR(-EINVAL); - size = rmpp_active ? hdr_len : sizeof(struct ib_mad); + size = rmpp_active ? hdr_len : mad_size; buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); @@ -897,21 +904,30 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->mad_agent_priv = mad_agent_priv; mad_send_wr->sg_list[0].length = hdr_len; - mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; - mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len; - mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey; - - mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; - mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; - mad_send_wr->send_wr.num_sge = 2; - mad_send_wr->send_wr.opcode = IB_WR_SEND; - mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; - mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; - mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; + mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey; + + /* OPA MADs don't have to be the full 2048 bytes */ + if (opa && base_version == OPA_MGMT_BASE_VERSION && + data_len < mad_size - hdr_len) + mad_send_wr->sg_list[1].length = data_len; + else + mad_send_wr->sg_list[1].length = mad_size - hdr_len; + + mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; + + mad_send_wr->mad_list.cqe.done = ib_mad_send_done; + + mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; + mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list; + mad_send_wr->send_wr.wr.num_sge = 2; + mad_send_wr->send_wr.wr.opcode = IB_WR_SEND; + mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED; + mad_send_wr->send_wr.remote_qpn = remote_qpn; + mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY; + mad_send_wr->send_wr.pkey_index = pkey_index; if (rmpp_active) { - ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask); + ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); if (ret) { kfree(buf); return ERR_PTR(ret); @@ -919,7 +935,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, } mad_send_wr->send_buf.mad_agent = mad_agent; - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); return &mad_send_wr->send_buf; } EXPORT_SYMBOL(ib_create_send_mad); @@ -1005,7 +1021,6 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; struct list_head *list; - struct ib_send_wr *bad_send_wr; struct ib_mad_agent *mad_agent; struct ib_sge *sge; unsigned long flags; @@ -1013,8 +1028,9 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) /* Set WR ID to find mad_send_wr upon completion */ qp_info = mad_send_wr->mad_agent_priv->qp_info; - mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; + mad_send_wr->mad_list.cqe.done = ib_mad_send_done; + mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; mad_agent = mad_send_wr->send_buf.mad_agent; sge = mad_send_wr->sg_list; @@ -1022,18 +1038,28 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->send_buf.mad, sge[0].length, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) + return -ENOMEM; + mad_send_wr->header_mapping = sge[0].addr; sge[1].addr = ib_dma_map_single(mad_agent->device, ib_get_payload(mad_send_wr), sge[1].length, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->header_mapping, + sge[0].length, DMA_TO_DEVICE); + return -ENOMEM; + } mad_send_wr->payload_mapping = sge[1].addr; spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { - ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr, - &bad_send_wr); + trace_ib_mad_ib_send_mad(mad_send_wr, qp_info); + ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, + NULL); list = &qp_info->send_queue.list; } else { ret = 0; @@ -1056,6 +1082,180 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) return ret; } +static void handle_queued_state(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_agent_private *mad_agent_priv) +{ + if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) { + mad_agent_priv->sol_fc_wait_count--; + list_move_tail(&mad_send_wr->agent_list, + &mad_agent_priv->backlog_list); + } else { + expect_mad_state(mad_send_wr, IB_MAD_STATE_INIT); + list_add_tail(&mad_send_wr->agent_list, + &mad_agent_priv->backlog_list); + } +} + +static void handle_send_state(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_agent_private *mad_agent_priv) +{ + if (mad_send_wr->state == IB_MAD_STATE_INIT) { + list_add_tail(&mad_send_wr->agent_list, + &mad_agent_priv->send_list); + } else { + expect_mad_state2(mad_send_wr, IB_MAD_STATE_WAIT_RESP, + IB_MAD_STATE_QUEUED); + list_move_tail(&mad_send_wr->agent_list, + &mad_agent_priv->send_list); + } + + if (mad_send_wr->is_solicited_fc) { + if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) + mad_agent_priv->sol_fc_wait_count--; + mad_agent_priv->sol_fc_send_count++; + } +} + +static void handle_wait_state(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_agent_private *mad_agent_priv) +{ + struct ib_mad_send_wr_private *temp_mad_send_wr; + struct list_head *list_item; + unsigned long delay; + + expect_mad_state3(mad_send_wr, IB_MAD_STATE_SEND_START, + IB_MAD_STATE_WAIT_RESP, IB_MAD_STATE_CANCELED); + if (mad_send_wr->state == IB_MAD_STATE_SEND_START && + mad_send_wr->is_solicited_fc) { + mad_agent_priv->sol_fc_send_count--; + mad_agent_priv->sol_fc_wait_count++; + } + + list_del_init(&mad_send_wr->agent_list); + delay = mad_send_wr->timeout; + mad_send_wr->timeout += jiffies; + + if (delay) { + list_for_each_prev(list_item, + &mad_agent_priv->wait_list) { + temp_mad_send_wr = list_entry( + list_item, + struct ib_mad_send_wr_private, + agent_list); + if (time_after(mad_send_wr->timeout, + temp_mad_send_wr->timeout)) + break; + } + } else { + list_item = &mad_agent_priv->wait_list; + } + + list_add(&mad_send_wr->agent_list, list_item); +} + +static void handle_early_resp_state(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_agent_private *mad_agent_priv) +{ + expect_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START); + mad_agent_priv->sol_fc_send_count -= mad_send_wr->is_solicited_fc; +} + +static void handle_canceled_state(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_agent_private *mad_agent_priv) +{ + not_expect_mad_state(mad_send_wr, IB_MAD_STATE_DONE); + if (mad_send_wr->is_solicited_fc) { + if (mad_send_wr->state == IB_MAD_STATE_SEND_START) + mad_agent_priv->sol_fc_send_count--; + else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) + mad_agent_priv->sol_fc_wait_count--; + } +} + +static void handle_done_state(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_agent_private *mad_agent_priv) +{ + if (mad_send_wr->is_solicited_fc) { + if (mad_send_wr->state == IB_MAD_STATE_SEND_START) + mad_agent_priv->sol_fc_send_count--; + else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) + mad_agent_priv->sol_fc_wait_count--; + } + + list_del_init(&mad_send_wr->agent_list); +} + +void change_mad_state(struct ib_mad_send_wr_private *mad_send_wr, + enum ib_mad_state new_state) +{ + struct ib_mad_agent_private *mad_agent_priv = + mad_send_wr->mad_agent_priv; + + switch (new_state) { + case IB_MAD_STATE_INIT: + break; + case IB_MAD_STATE_QUEUED: + handle_queued_state(mad_send_wr, mad_agent_priv); + break; + case IB_MAD_STATE_SEND_START: + handle_send_state(mad_send_wr, mad_agent_priv); + break; + case IB_MAD_STATE_WAIT_RESP: + handle_wait_state(mad_send_wr, mad_agent_priv); + if (mad_send_wr->state == IB_MAD_STATE_CANCELED) + return; + break; + case IB_MAD_STATE_EARLY_RESP: + handle_early_resp_state(mad_send_wr, mad_agent_priv); + break; + case IB_MAD_STATE_CANCELED: + handle_canceled_state(mad_send_wr, mad_agent_priv); + break; + case IB_MAD_STATE_DONE: + handle_done_state(mad_send_wr, mad_agent_priv); + break; + } + + mad_send_wr->state = new_state; +} + +static bool is_solicited_fc_mad(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_mad *rmpp_mad; + u8 mgmt_class; + + if (!mad_send_wr->timeout) + return 0; + + rmpp_mad = mad_send_wr->send_buf.mad; + if (mad_send_wr->mad_agent_priv->agent.rmpp_version && + (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) + return 0; + + mgmt_class = + ((struct ib_mad_hdr *)mad_send_wr->send_buf.mad)->mgmt_class; + return mgmt_class == IB_MGMT_CLASS_CM || + mgmt_class == IB_MGMT_CLASS_SUBN_ADM || + mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE; +} + +static bool mad_is_for_backlog(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_agent_private *mad_agent_priv = + mad_send_wr->mad_agent_priv; + + if (!mad_send_wr->is_solicited_fc || !mad_agent_priv->sol_fc_max) + return false; + + if (!list_empty(&mad_agent_priv->backlog_list)) + return true; + + return mad_agent_priv->sol_fc_send_count + + mad_agent_priv->sol_fc_wait_count >= + mad_agent_priv->sol_fc_max; +} + /* * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client @@ -1071,15 +1271,17 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, /* Walk list of send WRs and post each on send list */ for (; send_buf; send_buf = next_send_buf) { - mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); mad_agent_priv = mad_send_wr->mad_agent_priv; - if (!send_buf->mad_agent->send_handler || - (send_buf->timeout_ms && - !send_buf->mad_agent->recv_handler)) { + ret = ib_mad_enforce_security(mad_agent_priv, + mad_send_wr->send_wr.pkey_index); + if (ret) + goto error; + + if (!send_buf->mad_agent->send_handler) { ret = -EINVAL; goto error; } @@ -1097,7 +1299,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, * request associated with the completion */ next_send_buf = send_buf->next; - mad_send_wr->send_wr.wr.ud.ah = send_buf->ah; + mad_send_wr->send_wr.ah = send_buf->ah; if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { @@ -1115,18 +1317,22 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, mad_send_wr->max_retries = send_buf->retries; mad_send_wr->retries_left = send_buf->retries; send_buf->retries = 0; - /* Reference for work request to QP + response */ - mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); - mad_send_wr->status = IB_WC_SUCCESS; + change_mad_state(mad_send_wr, IB_MAD_STATE_INIT); /* Reference MAD agent until send completes */ - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); - list_add_tail(&mad_send_wr->agent_list, - &mad_agent_priv->send_list); + mad_send_wr->is_solicited_fc = is_solicited_fc_mad(mad_send_wr); + if (mad_is_for_backlog(mad_send_wr)) { + change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + return 0; + } + + change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - if (mad_agent_priv->agent.rmpp_version) { + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { ret = ib_send_rmpp_mad(mad_send_wr); if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED) ret = ib_send_mad(mad_send_wr); @@ -1135,9 +1341,9 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, if (ret < 0) { /* Fail send request */ spin_lock_irqsave(&mad_agent_priv->lock, flags); - list_del(&mad_send_wr->agent_list); + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); goto error; } } @@ -1172,29 +1378,11 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) recv_wc); priv = container_of(mad_priv_hdr, struct ib_mad_private, header); - kmem_cache_free(ib_mad_cache, priv); + kfree(priv); } } EXPORT_SYMBOL(ib_free_recv_mad); -struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context) -{ - return ERR_PTR(-EINVAL); /* XXX: for now */ -} -EXPORT_SYMBOL(ib_redirect_mad_qp); - -int ib_process_mad_wc(struct ib_mad_agent *mad_agent, - struct ib_wc *wc) -{ - printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n"); - return 0; -} -EXPORT_SYMBOL(ib_process_mad_wc); - static int method_in_use(struct ib_mad_mgmt_method_table **method, struct ib_mad_reg_req *mad_reg_req) { @@ -1202,7 +1390,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { if ((*method)->agent[i]) { - printk(KERN_ERR PFX "Method %d already in use\n", i); + pr_err("Method %d already in use\n", i); return -EINVAL; } } @@ -1213,13 +1401,7 @@ static int allocate_method_table(struct ib_mad_mgmt_method_table **method) { /* Allocate management method table */ *method = kzalloc(sizeof **method, GFP_ATOMIC); - if (!*method) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_mgmt_method_table\n"); - return -ENOMEM; - } - - return 0; + return (*method) ? 0 : (-ENOMEM); } /* @@ -1259,7 +1441,7 @@ static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class) } static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, - char *oui) + const char *oui) { int i; @@ -1288,11 +1470,9 @@ static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method, int i; /* Remove any methods for this mad agent */ - for (i = 0; i < IB_MGMT_MAX_METHODS; i++) { - if (method->agent[i] == agent) { + for (i = 0; i < IB_MGMT_MAX_METHODS; i++) + if (method->agent[i] == agent) method->agent[i] = NULL; - } - } } static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, @@ -1310,8 +1490,6 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, /* Allocate management class table for "new" class version */ *class = kzalloc(sizeof **class, GFP_ATOMIC); if (!*class) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_mgmt_class_table\n"); ret = -ENOMEM; goto error1; } @@ -1376,22 +1554,16 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, if (!*vendor_table) { /* Allocate mgmt vendor class table for "new" class version */ vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); - if (!vendor) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_mgmt_vendor_class_table\n"); + if (!vendor) goto error1; - } *vendor_table = vendor; } if (!(*vendor_table)->vendor_class[vclass]) { /* Allocate table for this management vendor class */ vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); - if (!vendor_class) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_mgmt_vendor_class\n"); + if (!vendor_class) goto error2; - } (*vendor_table)->vendor_class[vclass] = vendor_class; } @@ -1401,7 +1573,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, mad_reg_req->oui, 3)) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; - BUG_ON(!*method); + if (!*method) + goto error3; goto check_in_use; } } @@ -1411,16 +1584,18 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, vclass]->oui[i])) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; - BUG_ON(*method); /* Allocate method table for this OUI */ - if ((ret = allocate_method_table(method))) - goto error3; + if (!*method) { + ret = allocate_method_table(method); + if (ret) + goto error3; + } memcpy((*vendor_table)->vendor_class[vclass]->oui[i], mad_reg_req->oui, 3); goto check_in_use; } } - printk(KERN_ERR PFX "All OUI slots in use\n"); + dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n"); goto error3; check_in_use: @@ -1472,9 +1647,8 @@ static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) * Was MAD registration request supplied * with original registration ? */ - if (!agent_priv->reg_req) { + if (!agent_priv->reg_req) goto out; - } port_priv = agent_priv->qp_info->port_priv; mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class); @@ -1490,9 +1664,9 @@ static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) /* Now, check to see if there are any methods still in use */ if (!check_method_table(method)) { /* If not, release management method table */ - kfree(method); - class->method_table[mgmt_class] = NULL; - /* Any management classes left ? */ + kfree(method); + class->method_table[mgmt_class] = NULL; + /* Any management classes left ? */ if (!check_class_table(class)) { /* If not, release management class table */ kfree(class); @@ -1557,109 +1731,115 @@ out: static struct ib_mad_agent_private * find_mad_agent(struct ib_mad_port_private *port_priv, - struct ib_mad *mad) + const struct ib_mad_hdr *mad_hdr) { struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; - spin_lock_irqsave(&port_priv->reg_lock, flags); - if (ib_response_mad(mad)) { + if (ib_response_mad(mad_hdr)) { u32 hi_tid; - struct ib_mad_agent_private *entry; /* * Routing is based on high 32 bits of transaction ID * of MAD. */ - hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32; - list_for_each_entry(entry, &port_priv->agent_list, agent_list) { - if (entry->agent.hi_tid == hi_tid) { - mad_agent = entry; - break; - } - } + hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; + rcu_read_lock(); + mad_agent = xa_load(&ib_mad_clients, hi_tid); + if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount)) + mad_agent = NULL; + rcu_read_unlock(); } else { struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_method_table *method; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; - struct ib_vendor_mad *vendor_mad; + const struct ib_vendor_mad *vendor_mad; int index; + spin_lock_irqsave(&port_priv->reg_lock, flags); /* * Routing is based on version, class, and method * For "newer" vendor MADs, also based on OUI */ - if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION) + if (mad_hdr->class_version >= MAX_MGMT_VERSION) goto out; - if (!is_vendor_class(mad->mad_hdr.mgmt_class)) { + if (!is_vendor_class(mad_hdr->mgmt_class)) { class = port_priv->version[ - mad->mad_hdr.class_version].class; + mad_hdr->class_version].class; if (!class) goto out; - if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >= - IB_MGMT_MAX_METHODS) + if (convert_mgmt_class(mad_hdr->mgmt_class) >= + ARRAY_SIZE(class->method_table)) goto out; method = class->method_table[convert_mgmt_class( - mad->mad_hdr.mgmt_class)]; + mad_hdr->mgmt_class)]; if (method) - mad_agent = method->agent[mad->mad_hdr.method & + mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } else { vendor = port_priv->version[ - mad->mad_hdr.class_version].vendor; + mad_hdr->class_version].vendor; if (!vendor) goto out; vendor_class = vendor->vendor_class[vendor_class_index( - mad->mad_hdr.mgmt_class)]; + mad_hdr->mgmt_class)]; if (!vendor_class) goto out; /* Find matching OUI */ - vendor_mad = (struct ib_vendor_mad *)mad; + vendor_mad = (const struct ib_vendor_mad *)mad_hdr; index = find_vendor_oui(vendor_class, vendor_mad->oui); if (index == -1) goto out; method = vendor_class->method_table[index]; if (method) { - mad_agent = method->agent[mad->mad_hdr.method & + mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } } + if (mad_agent) + refcount_inc(&mad_agent->refcount); +out: + spin_unlock_irqrestore(&port_priv->reg_lock, flags); } - if (mad_agent) { - if (mad_agent->agent.recv_handler) - atomic_inc(&mad_agent->refcount); - else { - printk(KERN_NOTICE PFX "No receive handler for client " - "%p on port %d\n", - &mad_agent->agent, port_priv->port_num); - mad_agent = NULL; - } + if (mad_agent && !mad_agent->agent.recv_handler) { + dev_notice(&port_priv->device->dev, + "No receive handler for client %p on port %u\n", + &mad_agent->agent, port_priv->port_num); + deref_mad_agent(mad_agent); + mad_agent = NULL; } -out: - spin_unlock_irqrestore(&port_priv->reg_lock, flags); return mad_agent; } -static int validate_mad(struct ib_mad *mad, u32 qp_num) +static int validate_mad(const struct ib_mad_hdr *mad_hdr, + const struct ib_mad_qp_info *qp_info, + bool opa) { int valid = 0; + u32 qp_num = qp_info->qp->qp_num; /* Make sure MAD base version is understood */ - if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) { - printk(KERN_ERR PFX "MAD received with unsupported base " - "version %d\n", mad->mad_hdr.base_version); + if (mad_hdr->base_version != IB_MGMT_BASE_VERSION && + (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) { + pr_err("MAD received with unsupported base version %u %s\n", + mad_hdr->base_version, opa ? "(opa)" : ""); goto out; } /* Filter SMI packets sent to other than QP0 */ - if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || - (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || + (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { if (qp_num == 0) valid = 1; } else { + /* CM attributes other than ClassPortInfo only use Send method */ + if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) && + (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) && + (mad_hdr->method != IB_MGMT_METHOD_SEND)) + goto out; /* Filter GSI packets sent to QP0 */ if (qp_num != 0) valid = 1; @@ -1669,73 +1849,80 @@ out: return valid; } -static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_hdr *mad_hdr) +static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_hdr *mad_hdr) { struct ib_rmpp_mad *rmpp_mad; rmpp_mad = (struct ib_rmpp_mad *)mad_hdr; return !mad_agent_priv->agent.rmpp_version || + !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) || !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) || (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); } -static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr, - struct ib_mad_recv_wc *rwc) +static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc) { - return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class == + return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class == rwc->recv_buf.mad->mad_hdr.mgmt_class; } -static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_send_wr_private *wr, - struct ib_mad_recv_wc *rwc ) +static inline int +rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc) { - struct ib_ah_attr attr; + struct rdma_ah_attr attr; u8 send_resp, rcv_resp; union ib_gid sgid; struct ib_device *device = mad_agent_priv->agent.device; - u8 port_num = mad_agent_priv->agent.port_num; + u32 port_num = mad_agent_priv->agent.port_num; u8 lmc; + bool has_grh; - send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad); - rcv_resp = ib_response_mad(rwc->recv_buf.mad); + send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad); + rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr); if (send_resp == rcv_resp) /* both requests, or both responses. GIDs different */ return 0; - if (ib_query_ah(wr->send_buf.ah, &attr)) + if (rdma_query_ah(wr->send_buf.ah, &attr)) /* Assume not equal, to avoid false positives. */ return 0; - if (!!(attr.ah_flags & IB_AH_GRH) != - !!(rwc->wc->wc_flags & IB_WC_GRH)) + has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH); + if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH)) /* one has GID, other does not. Assume different */ return 0; if (!send_resp && rcv_resp) { /* is request/response. */ - if (!(attr.ah_flags & IB_AH_GRH)) { + if (!has_grh) { if (ib_get_cached_lmc(device, port_num, &lmc)) return 0; - return (!lmc || !((attr.src_path_bits ^ + return (!lmc || !((rdma_ah_get_path_bits(&attr) ^ rwc->wc->dlid_path_bits) & ((1 << lmc) - 1))); } else { - if (ib_get_cached_gid(device, port_num, - attr.grh.sgid_index, &sgid)) + const struct ib_global_route *grh = + rdma_ah_read_grh(&attr); + + if (rdma_query_gid(device, port_num, + grh->sgid_index, &sgid)) return 0; return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 16); } } - if (!(attr.ah_flags & IB_AH_GRH)) - return attr.dlid == rwc->wc->slid; + if (!has_grh) + return rdma_ah_get_dlid(&attr) == rwc->wc->slid; else - return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw, + return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw, + rwc->recv_buf.grh->sgid.raw, 16); } @@ -1745,24 +1932,36 @@ static inline int is_direct(u8 class) } struct ib_mad_send_wr_private* -ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_recv_wc *wc) +ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_recv_wc *wc) { struct ib_mad_send_wr_private *wr; - struct ib_mad *mad; + const struct ib_mad_hdr *mad_hdr; - mad = (struct ib_mad *)wc->recv_buf.mad; + mad_hdr = &wc->recv_buf.mad->mad_hdr; list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) { - if ((wr->tid == mad->mad_hdr.tid) && + if ((wr->tid == mad_hdr->tid) && + rcv_has_same_class(wr, wc) && + /* + * Don't check GID for direct routed MADs. + * These might have permissive LIDs. + */ + (is_direct(mad_hdr->mgmt_class) || + rcv_has_same_gid(mad_agent_priv, wr, wc))) + return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL; + } + + list_for_each_entry(wr, &mad_agent_priv->backlog_list, agent_list) { + if ((wr->tid == mad_hdr->tid) && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ - (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || + (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) - return (wr->status == IB_WC_SUCCESS) ? wr : NULL; + return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL; } /* @@ -1770,28 +1969,66 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, * been notified that the send has completed */ list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, wr->send_buf.mad) && - wr->tid == mad->mad_hdr.tid && + if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) && + wr->tid == mad_hdr->tid && wr->timeout && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ - (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || + (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) /* Verify request has not been canceled */ - return (wr->status == IB_WC_SUCCESS) ? wr : NULL; + return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL; } return NULL; } +static void +process_backlog_mads(struct ib_mad_agent_private *mad_agent_priv) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wc mad_send_wc = {}; + unsigned long flags; + int ret; + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + while (!list_empty(&mad_agent_priv->backlog_list) && + (mad_agent_priv->sol_fc_send_count + + mad_agent_priv->sol_fc_wait_count < + mad_agent_priv->sol_fc_max)) { + mad_send_wr = list_entry(mad_agent_priv->backlog_list.next, + struct ib_mad_send_wr_private, + agent_list); + change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + ret = ib_send_mad(mad_send_wr); + if (ret) { + spin_lock_irqsave(&mad_agent_priv->lock, flags); + deref_mad_agent(mad_agent_priv); + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_send_wc.status = IB_WC_LOC_QP_OP_ERR; + mad_agent_priv->agent.send_handler( + &mad_agent_priv->agent, &mad_send_wc); + } + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + } + + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); +} + void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr) { mad_send_wr->timeout = 0; - if (mad_send_wr->refcount == 1) - list_move_tail(&mad_send_wr->agent_list, - &mad_send_wr->mad_agent_priv->done_list); + if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP || + mad_send_wr->state == IB_MAD_STATE_QUEUED) + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); + else + change_mad_state(mad_send_wr, IB_MAD_STATE_EARLY_RESP); } static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, @@ -1800,10 +2037,20 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; + bool is_mad_done; + int ret; INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); + ret = ib_mad_enforce_security(mad_agent_priv, + mad_recv_wc->wc->pkey_index); + if (ret) { + ib_free_recv_mad(mad_recv_wc); + deref_mad_agent(mad_agent_priv); + return; + } + list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); - if (mad_agent_priv->agent.rmpp_version) { + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, mad_recv_wc); if (!mad_recv_wc) { @@ -1813,169 +2060,327 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } /* Complete corresponding request */ - if (ib_response_mad(mad_recv_wc->recv_buf.mad)) { + if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) { spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); if (!mad_send_wr) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - ib_free_recv_mad(mad_recv_wc); - deref_mad_agent(mad_agent_priv); - return; - } - ib_mark_mad_done(mad_send_wr); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) + && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class) + && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr) + & IB_MGMT_RMPP_FLAG_ACTIVE)) { + /* user rmpp is in effect + * and this is an active RMPP MAD + */ + mad_agent_priv->agent.recv_handler( + &mad_agent_priv->agent, NULL, + mad_recv_wc); + deref_mad_agent(mad_agent_priv); + } else { + /* not user rmpp, revert to normal behavior and + * drop the mad + */ + ib_free_recv_mad(mad_recv_wc); + deref_mad_agent(mad_agent_priv); + return; + } + } else { + ib_mark_mad_done(mad_send_wr); + is_mad_done = (mad_send_wr->state == IB_MAD_STATE_DONE); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - /* Defined behavior is to complete response before request */ - mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf; - mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, - mad_recv_wc); - atomic_dec(&mad_agent_priv->refcount); + /* Defined behavior is to complete response before request */ + mad_agent_priv->agent.recv_handler( + &mad_agent_priv->agent, + &mad_send_wr->send_buf, + mad_recv_wc); + deref_mad_agent(mad_agent_priv); - mad_send_wc.status = IB_WC_SUCCESS; - mad_send_wc.vendor_err = 0; - mad_send_wc.send_buf = &mad_send_wr->send_buf; - ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); + if (is_mad_done) { + mad_send_wc.status = IB_WC_SUCCESS; + mad_send_wc.vendor_err = 0; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + ib_mad_complete_send_wr(mad_send_wr, + &mad_send_wc); + } + } } else { - mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, + mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL, mad_recv_wc); deref_mad_agent(mad_agent_priv); } } -static bool generate_unmatched_resp(struct ib_mad_private *recv, - struct ib_mad_private *response) +static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv, + const struct ib_mad_qp_info *qp_info, + const struct ib_wc *wc, + u32 port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + enum smi_forward_action retsmi; + struct ib_smp *smp = (struct ib_smp *)recv->mad; + + trace_ib_mad_handle_ib_smi(smp); + + if (smi_handle_dr_smp_recv(smp, + rdma_cap_ib_switch(port_priv->device), + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + retsmi = smi_check_forward_dr_smp(smp); + if (retsmi == IB_SMI_LOCAL) + return IB_SMI_HANDLE; + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (smi_handle_dr_smp_send(smp, + rdma_cap_ib_switch(port_priv->device), + port_num) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + } else if (rdma_cap_ib_switch(port_priv->device)) { + /* forward case for switches */ + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + agent_send_response((const struct ib_mad_hdr *)response->mad, + &response->grh, wc, + port_priv->device, + smi_get_fwd_port(smp), + qp_info->qp->qp_num, + response->mad_size, + false); + + return IB_SMI_DISCARD; + } + return IB_SMI_HANDLE; +} + +static bool generate_unmatched_resp(const struct ib_mad_private *recv, + struct ib_mad_private *response, + size_t *resp_len, bool opa) { - if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET || - recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) { - memcpy(response, recv, sizeof *response); + const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad; + struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad; + + if (recv_hdr->method == IB_MGMT_METHOD_GET || + recv_hdr->method == IB_MGMT_METHOD_SET) { + memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; - response->header.recv_wc.recv_buf.mad = &response->mad.mad; + response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; - response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; - response->mad.mad.mad_hdr.status = - cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); - if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION; + resp_hdr->method = IB_MGMT_METHOD_GET_RESP; + resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); + if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + resp_hdr->status |= IB_SMP_DIRECTION; + + if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) { + if (recv_hdr->mgmt_class == + IB_MGMT_CLASS_SUBN_LID_ROUTED || + recv_hdr->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + *resp_len = opa_get_smp_header_size( + (struct opa_smp *)recv->mad); + else + *resp_len = sizeof(struct ib_mad_hdr); + } return true; } else { return false; } } -static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, - struct ib_wc *wc) + +static enum smi_action +handle_opa_smi(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info, + struct ib_wc *wc, + u32 port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response) { + enum smi_forward_action retsmi; + struct opa_smp *smp = (struct opa_smp *)recv->mad; + + trace_ib_mad_handle_opa_smi(smp); + + if (opa_smi_handle_dr_smp_recv(smp, + rdma_cap_ib_switch(port_priv->device), + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + retsmi = opa_smi_check_forward_dr_smp(smp); + if (retsmi == IB_SMI_LOCAL) + return IB_SMI_HANDLE; + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (opa_smi_handle_dr_smp_send(smp, + rdma_cap_ib_switch(port_priv->device), + port_num) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + if (opa_smi_check_local_smp(smp, port_priv->device) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + } else if (rdma_cap_ib_switch(port_priv->device)) { + /* forward case for switches */ + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.opa_mad = + (struct opa_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + agent_send_response((const struct ib_mad_hdr *)response->mad, + &response->grh, wc, + port_priv->device, + opa_smi_get_fwd_port(smp), + qp_info->qp->qp_num, + recv->header.wc.byte_len, + true); + + return IB_SMI_DISCARD; + } + + return IB_SMI_HANDLE; +} + +static enum smi_action +handle_smi(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info, + struct ib_wc *wc, + u32 port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response, + bool opa) +{ + struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad; + + if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION && + mad_hdr->class_version == OPA_SM_CLASS_VERSION) + return handle_opa_smi(port_priv, qp_info, wc, port_num, recv, + response); + + return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response); +} + +static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_mad_port_private *port_priv = cq->cq_context; + struct ib_mad_list_head *mad_list = + container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); struct ib_mad_qp_info *qp_info; struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *recv, *response = NULL; - struct ib_mad_list_head *mad_list; struct ib_mad_agent_private *mad_agent; - int port_num; + u32 port_num; int ret = IB_MAD_RESULT_SUCCESS; + size_t mad_size; + u16 resp_mad_pkey_index = 0; + bool opa; + + if (list_empty_careful(&port_priv->port_list)) + return; + + if (wc->status != IB_WC_SUCCESS) { + /* + * Receive errors indicate that the QP has entered the error + * state - error handling/shutdown code will cleanup + */ + return; + } - mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; qp_info = mad_list->mad_queue->qp_info; dequeue_mad(mad_list); + opa = rdma_cap_opa_mad(qp_info->port_priv->device, + qp_info->port_priv->port_num); + mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, mad_list); recv = container_of(mad_priv_hdr, struct ib_mad_private, header); ib_dma_unmap_single(port_priv->device, recv->header.mapping, - sizeof(struct ib_mad_private) - - sizeof(struct ib_mad_private_header), + mad_priv_dma_size(recv), DMA_FROM_DEVICE); /* Setup MAD receive work completion from "normal" work completion */ recv->header.wc = *wc; recv->header.recv_wc.wc = &recv->header.wc; - recv->header.recv_wc.mad_len = sizeof(struct ib_mad); - recv->header.recv_wc.recv_buf.mad = &recv->mad.mad; - recv->header.recv_wc.recv_buf.grh = &recv->grh; - if (atomic_read(&qp_info->snoop_count)) - snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS); + if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) { + recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh); + recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); + } else { + recv->header.recv_wc.mad_len = sizeof(struct ib_mad); + recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); + } + + recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; + recv->header.recv_wc.recv_buf.grh = &recv->grh; /* Validate MAD */ - if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num)) + if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) goto out; - response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); - if (!response) { - printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory " - "for response buffer\n"); + trace_ib_mad_recv_done_handler(qp_info, wc, + (struct ib_mad_hdr *)recv->mad); + + mad_size = recv->mad_size; + response = alloc_mad_private(mad_size, GFP_KERNEL); + if (!response) goto out; - } - if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) + if (rdma_cap_ib_switch(port_priv->device)) port_num = wc->port_num; else port_num = port_priv->port_num; - if (recv->mad.mad.mad_hdr.mgmt_class == + if (((struct ib_mad_hdr *)recv->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - enum smi_forward_action retsmi; - - if (smi_handle_dr_smp_recv(&recv->mad.smp, - port_priv->device->node_type, - port_num, - port_priv->device->phys_port_cnt) == - IB_SMI_DISCARD) + if (handle_smi(port_priv, qp_info, wc, port_num, recv, + response, opa) + == IB_SMI_DISCARD) goto out; - - retsmi = smi_check_forward_dr_smp(&recv->mad.smp); - if (retsmi == IB_SMI_LOCAL) - goto local; - - if (retsmi == IB_SMI_SEND) { /* don't forward */ - if (smi_handle_dr_smp_send(&recv->mad.smp, - port_priv->device->node_type, - port_num) == IB_SMI_DISCARD) - goto out; - - if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD) - goto out; - } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { - /* forward case for switches */ - memcpy(response, recv, sizeof(*response)); - response->header.recv_wc.wc = &response->header.wc; - response->header.recv_wc.recv_buf.mad = &response->mad.mad; - response->header.recv_wc.recv_buf.grh = &response->grh; - - agent_send_response(&response->mad.mad, - &response->grh, wc, - port_priv->device, - smi_get_fwd_port(&recv->mad.smp), - qp_info->qp->qp_num); - - goto out; - } } -local: /* Give driver "right of first refusal" on incoming MAD */ - if (port_priv->device->process_mad) { - ret = port_priv->device->process_mad(port_priv->device, 0, - port_priv->port_num, - wc, &recv->grh, - &recv->mad.mad, - &response->mad.mad); + if (port_priv->device->ops.process_mad) { + ret = port_priv->device->ops.process_mad( + port_priv->device, 0, port_priv->port_num, wc, + &recv->grh, (const struct ib_mad *)recv->mad, + (struct ib_mad *)response->mad, &mad_size, + &resp_mad_pkey_index); + + if (opa) + wc->pkey_index = resp_mad_pkey_index; + if (ret & IB_MAD_RESULT_SUCCESS) { if (ret & IB_MAD_RESULT_CONSUMED) goto out; if (ret & IB_MAD_RESULT_REPLY) { - agent_send_response(&response->mad.mad, + agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, port_priv->device, port_num, - qp_info->qp->qp_num); + qp_info->qp->qp_num, + mad_size, opa); goto out; } } } - mad_agent = find_mad_agent(port_priv, &recv->mad.mad); + mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); if (mad_agent) { + trace_ib_mad_recv_done_agent(mad_agent); ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); /* * recv is freed up in error cases in ib_mad_complete_recv @@ -1983,17 +2388,17 @@ local: */ recv = NULL; } else if ((ret & IB_MAD_RESULT_SUCCESS) && - generate_unmatched_resp(recv, response)) { - agent_send_response(&response->mad.mad, &recv->grh, wc, - port_priv->device, port_num, qp_info->qp->qp_num); + generate_unmatched_resp(recv, response, &mad_size, opa)) { + agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, + port_priv->device, port_num, + qp_info->qp->qp_num, mad_size, opa); } out: /* Post another receive request for this QP */ if (response) { ib_mad_post_receive_mads(qp_info, response); - if (recv) - kmem_cache_free(ib_mad_cache, recv); + kfree(recv); } else ib_mad_post_receive_mads(qp_info, recv); } @@ -2025,29 +2430,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_wr_private *temp_mad_send_wr; - struct list_head *list_item; unsigned long delay; mad_agent_priv = mad_send_wr->mad_agent_priv; - list_del(&mad_send_wr->agent_list); - delay = mad_send_wr->timeout; - mad_send_wr->timeout += jiffies; - - if (delay) { - list_for_each_prev(list_item, &mad_agent_priv->wait_list) { - temp_mad_send_wr = list_entry(list_item, - struct ib_mad_send_wr_private, - agent_list); - if (time_after(mad_send_wr->timeout, - temp_mad_send_wr->timeout)) - break; - } - } - else - list_item = &mad_agent_priv->wait_list; - list_add(&mad_send_wr->agent_list, list_item); + change_mad_state(mad_send_wr, IB_MAD_STATE_WAIT_RESP); /* Reschedule a work item if we have a shorter timeout */ if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) @@ -2056,7 +2443,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) } void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, - int timeout_ms) + unsigned long timeout_ms) { mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); wait_for_response(mad_send_wr); @@ -2074,39 +2461,35 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, mad_agent_priv = mad_send_wr->mad_agent_priv; spin_lock_irqsave(&mad_agent_priv->lock, flags); - if (mad_agent_priv->agent.rmpp_version) { + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc); if (ret == IB_RMPP_RESULT_CONSUMED) goto done; } else ret = IB_RMPP_RESULT_UNHANDLED; - if (mad_send_wc->status != IB_WC_SUCCESS && - mad_send_wr->status == IB_WC_SUCCESS) { - mad_send_wr->status = mad_send_wc->status; - mad_send_wr->refcount -= (mad_send_wr->timeout > 0); - } - - if (--mad_send_wr->refcount > 0) { - if (mad_send_wr->refcount == 1 && mad_send_wr->timeout && - mad_send_wr->status == IB_WC_SUCCESS) { - wait_for_response(mad_send_wr); - } + if (mad_send_wr->state == IB_MAD_STATE_CANCELED) + mad_send_wc->status = IB_WC_WR_FLUSH_ERR; + else if (mad_send_wr->state == IB_MAD_STATE_SEND_START && + mad_send_wr->timeout) { + wait_for_response(mad_send_wr); goto done; } /* Remove send from MAD agent and notify client of completion */ - list_del(&mad_send_wr->agent_list); + if (mad_send_wr->state != IB_MAD_STATE_DONE) + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); adjust_timeout(mad_agent_priv); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - if (mad_send_wr->status != IB_WC_SUCCESS ) - mad_send_wc->status = mad_send_wr->status; - if (ret == IB_RMPP_RESULT_INTERNAL) + if (ret == IB_RMPP_RESULT_INTERNAL) { ib_rmpp_send_handler(mad_send_wc); - else + } else { + if (mad_send_wr->is_solicited_fc) + process_backlog_mads(mad_agent_priv); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, mad_send_wc); + } /* Release reference on agent taken when sending */ deref_mad_agent(mad_agent_priv); @@ -2115,24 +2498,34 @@ done: spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } -static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, - struct ib_wc *wc) +static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) { + struct ib_mad_port_private *port_priv = cq->cq_context; + struct ib_mad_list_head *mad_list = + container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; - struct ib_mad_list_head *mad_list; struct ib_mad_qp_info *qp_info; struct ib_mad_queue *send_queue; - struct ib_send_wr *bad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; int ret; - mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; + if (list_empty_careful(&port_priv->port_list)) + return; + + if (wc->status != IB_WC_SUCCESS) { + if (!ib_mad_send_error(port_priv, wc)) + return; + } + mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); send_queue = mad_list->mad_queue; qp_info = send_queue->qp_info; + trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv); + trace_ib_mad_send_done_handler(mad_send_wr, wc); + retry: ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, mad_send_wr->header_mapping, @@ -2158,16 +2551,15 @@ retry: mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_send_wc.status = wc->status; mad_send_wc.vendor_err = wc->vendor_err; - if (atomic_read(&qp_info->snoop_count)) - snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc, - IB_MAD_SNOOP_SEND_COMPLETIONS); ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { - ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, - &bad_send_wr); + trace_ib_mad_send_done_resend(queued_send_wr, qp_info); + ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, + NULL); if (ret) { - printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); + dev_err(&port_priv->device->dev, + "ib_post_send failed: %d\n", ret); mad_send_wr = queued_send_wr; wc->status = IB_WC_LOC_QP_OP_ERR; goto retry; @@ -2191,24 +2583,15 @@ static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info) spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); } -static void mad_error_handler(struct ib_mad_port_private *port_priv, - struct ib_wc *wc) +static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, + struct ib_wc *wc) { - struct ib_mad_list_head *mad_list; - struct ib_mad_qp_info *qp_info; + struct ib_mad_list_head *mad_list = + container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); + struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info; struct ib_mad_send_wr_private *mad_send_wr; int ret; - /* Determine if failure was a send or receive */ - mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; - qp_info = mad_list->mad_queue->qp_info; - if (mad_list->mad_queue == &qp_info->recv_queue) - /* - * Receive errors indicate that the QP has entered the error - * state - error handling/shutdown code will cleanup - */ - return; - /* * Send errors will transition the QP to SQE - move * QP to RTS and repost flushed work requests @@ -2218,15 +2601,13 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv, if (wc->status == IB_WC_WR_FLUSH_ERR) { if (mad_send_wr->retry) { /* Repost send */ - struct ib_send_wr *bad_send_wr; - mad_send_wr->retry = 0; - ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr, - &bad_send_wr); - if (ret) - ib_mad_send_done_handler(port_priv, wc); - } else - ib_mad_send_done_handler(port_priv, wc); + trace_ib_mad_error_handler(mad_send_wr, qp_info); + ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, + NULL); + if (!ret) + return false; + } } else { struct ib_qp_attr *attr; @@ -2239,41 +2620,32 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv, IB_QP_STATE | IB_QP_CUR_STATE); kfree(attr); if (ret) - printk(KERN_ERR PFX "mad_error_handler - " - "ib_modify_qp to RTS : %d\n", ret); + dev_err(&port_priv->device->dev, + "%s - ib_modify_qp to RTS: %d\n", + __func__, ret); else mark_sends_for_retry(qp_info); } - ib_mad_send_done_handler(port_priv, wc); } + + return true; } -/* - * IB MAD completion callback - */ -static void ib_mad_completion_handler(struct work_struct *work) +static void clear_mad_error_list(struct list_head *list, + enum ib_wc_status wc_status, + struct ib_mad_agent_private *mad_agent_priv) { - struct ib_mad_port_private *port_priv; - struct ib_wc wc; + struct ib_mad_send_wr_private *mad_send_wr, *n; + struct ib_mad_send_wc mad_send_wc; - port_priv = container_of(work, struct ib_mad_port_private, work); - ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); + mad_send_wc.status = wc_status; + mad_send_wc.vendor_err = 0; - while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) { - if (wc.status == IB_WC_SUCCESS) { - switch (wc.opcode) { - case IB_WC_SEND: - ib_mad_send_done_handler(port_priv, &wc); - break; - case IB_WC_RECV: - ib_mad_recv_done_handler(port_priv, &wc); - break; - default: - BUG_ON(1); - break; - } - } else - mad_error_handler(port_priv, &wc); + list_for_each_entry_safe(mad_send_wr, n, list, agent_list) { + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + deref_mad_agent(mad_agent_priv); } } @@ -2281,36 +2653,31 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) { unsigned long flags; struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr; - struct ib_mad_send_wc mad_send_wc; struct list_head cancel_list; INIT_LIST_HEAD(&cancel_list); spin_lock_irqsave(&mad_agent_priv->lock, flags); list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, - &mad_agent_priv->send_list, agent_list) { - if (mad_send_wr->status == IB_WC_SUCCESS) { - mad_send_wr->status = IB_WC_WR_FLUSH_ERR; - mad_send_wr->refcount -= (mad_send_wr->timeout > 0); - } - } + &mad_agent_priv->send_list, agent_list) + change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED); - /* Empty wait list to prevent receives from finding a request */ - list_splice_init(&mad_agent_priv->wait_list, &cancel_list); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - - /* Report all cancelled requests */ - mad_send_wc.status = IB_WC_WR_FLUSH_ERR; - mad_send_wc.vendor_err = 0; + /* Empty wait & backlog list to prevent receives from finding request */ + list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, + &mad_agent_priv->wait_list, agent_list) { + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); + list_add_tail(&mad_send_wr->agent_list, &cancel_list); + } list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, - &cancel_list, agent_list) { - mad_send_wc.send_buf = &mad_send_wr->send_buf; - list_del(&mad_send_wr->agent_list); - mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, - &mad_send_wc); - atomic_dec(&mad_agent_priv->refcount); + &mad_agent_priv->backlog_list, agent_list) { + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); + list_add_tail(&mad_send_wr->agent_list, &cancel_list); } + + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + /* Report all cancelled requests */ + clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv); } static struct ib_mad_send_wr_private* @@ -2327,35 +2694,45 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv, list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && + if (is_rmpp_data_mad(mad_agent_priv, + mad_send_wr->send_buf.mad) && &mad_send_wr->send_buf == send_buf) return mad_send_wr; } + + list_for_each_entry(mad_send_wr, &mad_agent_priv->backlog_list, + agent_list) { + if (&mad_send_wr->send_buf == send_buf) + return mad_send_wr; + } + return NULL; } -int ib_modify_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, u32 timeout_ms) +int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; unsigned long flags; int active; - mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, - agent); + if (!send_buf) + return -EINVAL; + + mad_agent_priv = container_of(send_buf->mad_agent, + struct ib_mad_agent_private, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = find_send_wr(mad_agent_priv, send_buf); - if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { + if (!mad_send_wr || mad_send_wr->state == IB_MAD_STATE_CANCELED) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return -EINVAL; } - active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1); - if (!timeout_ms) { - mad_send_wr->status = IB_WC_WR_FLUSH_ERR; - mad_send_wr->refcount -= (mad_send_wr->timeout > 0); - } + active = ((mad_send_wr->state == IB_MAD_STATE_SEND_START) || + (mad_send_wr->state == IB_MAD_STATE_EARLY_RESP) || + (mad_send_wr->state == IB_MAD_STATE_QUEUED && timeout_ms)); + if (!timeout_ms) + change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED); mad_send_wr->send_buf.timeout_ms = timeout_ms; if (active) @@ -2368,13 +2745,6 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, } EXPORT_SYMBOL(ib_modify_mad); -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf) -{ - ib_modify_mad(mad_agent, send_buf, 0); -} -EXPORT_SYMBOL(ib_cancel_mad); - static void local_completions(struct work_struct *work) { struct ib_mad_agent_private *mad_agent_priv; @@ -2384,10 +2754,14 @@ static void local_completions(struct work_struct *work) int free_mad; struct ib_wc wc; struct ib_mad_send_wc mad_send_wc; + bool opa; mad_agent_priv = container_of(work, struct ib_mad_agent_private, local_work); + opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, + mad_agent_priv->qp_info->port_priv->port_num); + spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->local_list)) { local = list_entry(mad_agent_priv->local_list.next, @@ -2397,9 +2771,11 @@ static void local_completions(struct work_struct *work) spin_unlock_irqrestore(&mad_agent_priv->lock, flags); free_mad = 0; if (local->mad_priv) { + u8 base_version; recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { - printk(KERN_ERR PFX "No receive MAD agent for local completion\n"); + dev_err(&mad_agent_priv->agent.device->dev, + "No receive MAD agent for local completion\n"); free_mad = 1; goto local_send_completion; } @@ -2409,28 +2785,34 @@ static void local_completions(struct work_struct *work) * before request */ build_smp_wc(recv_mad_agent->agent.qp, - (unsigned long) local->mad_send_wr, + local->mad_send_wr->send_wr.wr.wr_cqe, be16_to_cpu(IB_LID_PERMISSIVE), - 0, recv_mad_agent->agent.port_num, &wc); + local->mad_send_wr->send_wr.pkey_index, + recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; - local->mad_priv->header.recv_wc.mad_len = - sizeof(struct ib_mad); + + base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version; + if (opa && base_version == OPA_MGMT_BASE_VERSION) { + local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len; + local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); + } else { + local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad); + local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); + } + INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list); list_add(&local->mad_priv->header.recv_wc.recv_buf.list, &local->mad_priv->header.recv_wc.rmpp_list); local->mad_priv->header.recv_wc.recv_buf.grh = NULL; local->mad_priv->header.recv_wc.recv_buf.mad = - &local->mad_priv->mad.mad; - if (atomic_read(&recv_mad_agent->qp_info->snoop_count)) - snoop_recv(recv_mad_agent->qp_info, - &local->mad_priv->header.recv_wc, - IB_MAD_SNOOP_RECVS); + (struct ib_mad *)local->mad_priv->mad; recv_mad_agent->agent.recv_handler( &recv_mad_agent->agent, + &local->mad_send_wr->send_buf, &local->mad_priv->header.recv_wc); spin_lock_irqsave(&recv_mad_agent->lock, flags); - atomic_dec(&recv_mad_agent->refcount); + deref_mad_agent(recv_mad_agent); spin_unlock_irqrestore(&recv_mad_agent->lock, flags); } @@ -2439,17 +2821,13 @@ local_send_completion: mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; mad_send_wc.send_buf = &local->mad_send_wr->send_buf; - if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) - snoop_send(mad_agent_priv->qp_info, - &local->mad_send_wr->send_buf, - &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); spin_lock_irqsave(&mad_agent_priv->lock, flags); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); if (free_mad) - kmem_cache_free(ib_mad_cache, local->mad_priv); + kfree(local->mad_priv); kfree(local); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); @@ -2466,8 +2844,13 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->send_buf.retries++; mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); + if (mad_send_wr->is_solicited_fc && + !list_empty(&mad_send_wr->mad_agent_priv->backlog_list)) { + change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED); + return 0; + } - if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { + if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) { ret = ib_retry_rmpp(mad_send_wr); switch (ret) { case IB_RMPP_RESULT_UNHANDLED: @@ -2483,24 +2866,25 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) } else ret = ib_send_mad(mad_send_wr); - if (!ret) { - mad_send_wr->refcount++; - list_add_tail(&mad_send_wr->agent_list, - &mad_send_wr->mad_agent_priv->send_list); - } + if (!ret) + change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START); + return ret; } static void timeout_sends(struct work_struct *work) { - struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; - struct ib_mad_send_wc mad_send_wc; + struct ib_mad_agent_private *mad_agent_priv; + struct list_head timeout_list; + struct list_head cancel_list; + struct list_head *list_item; unsigned long flags, delay; mad_agent_priv = container_of(work, struct ib_mad_agent_private, timed_work.work); - mad_send_wc.vendor_err = 0; + INIT_LIST_HEAD(&timeout_list); + INIT_LIST_HEAD(&cancel_list); spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->wait_list)) { @@ -2518,36 +2902,22 @@ static void timeout_sends(struct work_struct *work) break; } - list_del(&mad_send_wr->agent_list); - if (mad_send_wr->status == IB_WC_SUCCESS && - !retry_send(mad_send_wr)) - continue; - - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - - if (mad_send_wr->status == IB_WC_SUCCESS) - mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; + if (mad_send_wr->state == IB_MAD_STATE_CANCELED) + list_item = &cancel_list; + else if (retry_send(mad_send_wr)) + list_item = &timeout_list; else - mad_send_wc.status = mad_send_wr->status; - mad_send_wc.send_buf = &mad_send_wr->send_buf; - mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, - &mad_send_wc); + continue; - atomic_dec(&mad_agent_priv->refcount); - spin_lock_irqsave(&mad_agent_priv->lock, flags); + change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); + list_add_tail(&mad_send_wr->agent_list, list_item); } - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); -} - -static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg) -{ - struct ib_mad_port_private *port_priv = cq->cq_context; - unsigned long flags; - spin_lock_irqsave(&ib_mad_port_list_lock, flags); - if (!list_empty(&port_priv->port_list)) - queue_work(port_priv->wq, &port_priv->work); - spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + process_backlog_mads(mad_agent_priv); + clear_mad_error_list(&timeout_list, IB_WC_RESP_TIMEOUT_ERR, + mad_agent_priv); + clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv); } /* @@ -2557,65 +2927,73 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad) { unsigned long flags; - int post, ret; struct ib_mad_private *mad_priv; struct ib_sge sg_list; - struct ib_recv_wr recv_wr, *bad_recv_wr; + struct ib_recv_wr recv_wr; struct ib_mad_queue *recv_queue = &qp_info->recv_queue; + int ret = 0; /* Initialize common scatter list fields */ - sg_list.length = sizeof *mad_priv - sizeof mad_priv->header; - sg_list.lkey = (*qp_info->port_priv->mr).lkey; + sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; /* Initialize common receive WR fields */ recv_wr.next = NULL; recv_wr.sg_list = &sg_list; recv_wr.num_sge = 1; - do { + while (true) { /* Allocate and map receive buffer */ if (mad) { mad_priv = mad; mad = NULL; } else { - mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); - if (!mad_priv) { - printk(KERN_ERR PFX "No memory for receive buffer\n"); - ret = -ENOMEM; - break; - } + mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), + GFP_ATOMIC); + if (!mad_priv) + return -ENOMEM; } + sg_list.length = mad_priv_dma_size(mad_priv); sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, &mad_priv->grh, - sizeof *mad_priv - - sizeof mad_priv->header, + mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, + sg_list.addr))) { + ret = -ENOMEM; + goto free_mad_priv; + } mad_priv->header.mapping = sg_list.addr; - recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; mad_priv->header.mad_list.mad_queue = recv_queue; - - /* Post receive WR */ + mad_priv->header.mad_list.cqe.done = ib_mad_recv_done; + recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe; spin_lock_irqsave(&recv_queue->lock, flags); - post = (++recv_queue->count < recv_queue->max_active); - list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); + if (recv_queue->count >= recv_queue->max_active) { + /* Fully populated the receive queue */ + spin_unlock_irqrestore(&recv_queue->lock, flags); + break; + } + recv_queue->count++; + list_add_tail(&mad_priv->header.mad_list.list, + &recv_queue->list); spin_unlock_irqrestore(&recv_queue->lock, flags); - ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr); + + ret = ib_post_recv(qp_info->qp, &recv_wr, NULL); if (ret) { spin_lock_irqsave(&recv_queue->lock, flags); list_del(&mad_priv->header.mad_list.list); recv_queue->count--; spin_unlock_irqrestore(&recv_queue->lock, flags); - ib_dma_unmap_single(qp_info->port_priv->device, - mad_priv->header.mapping, - sizeof *mad_priv - - sizeof mad_priv->header, - DMA_FROM_DEVICE); - kmem_cache_free(ib_mad_cache, mad_priv); - printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret); + dev_err(&qp_info->port_priv->device->dev, + "ib_post_recv failed: %d\n", ret); break; } - } while (post); + } + ib_dma_unmap_single(qp_info->port_priv->device, + mad_priv->header.mapping, + mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); +free_mad_priv: + kfree(mad_priv); return ret; } @@ -2646,10 +3024,9 @@ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) ib_dma_unmap_single(qp_info->port_priv->device, recv->header.mapping, - sizeof(struct ib_mad_private) - - sizeof(struct ib_mad_private_header), + mad_priv_dma_size(recv), DMA_FROM_DEVICE); - kmem_cache_free(ib_mad_cache, recv); + kfree(recv); } qp_info->recv_queue.count = 0; @@ -2663,12 +3040,16 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) int ret, i; struct ib_qp_attr *attr; struct ib_qp *qp; + u16 pkey_index; attr = kmalloc(sizeof *attr, GFP_KERNEL); - if (!attr) { - printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n"); + if (!attr) return -ENOMEM; - } + + ret = ib_find_pkey(port_priv->device, port_priv->port_num, + IB_DEFAULT_PKEY_FULL, &pkey_index); + if (ret) + pkey_index = 0; for (i = 0; i < IB_MAD_QPS_CORE; i++) { qp = port_priv->qp_info[i].qp; @@ -2680,21 +3061,23 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) * one is needed for the Reset to Init transition */ attr->qp_state = IB_QPS_INIT; - attr->pkey_index = 0; + attr->pkey_index = pkey_index; attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY); if (ret) { - printk(KERN_ERR PFX "Couldn't change QP%d state to " - "INIT: %d\n", i, ret); + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to INIT: %d\n", + i, ret); goto out; } attr->qp_state = IB_QPS_RTR; ret = ib_modify_qp(qp, attr, IB_QP_STATE); if (ret) { - printk(KERN_ERR PFX "Couldn't change QP%d state to " - "RTR: %d\n", i, ret); + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to RTR: %d\n", + i, ret); goto out; } @@ -2702,16 +3085,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) attr->sq_psn = IB_MAD_SEND_Q_PSN; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN); if (ret) { - printk(KERN_ERR PFX "Couldn't change QP%d state to " - "RTS: %d\n", i, ret); + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to RTS: %d\n", + i, ret); goto out; } } ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); if (ret) { - printk(KERN_ERR PFX "Failed to request completion " - "notification: %d\n", ret); + dev_err(&port_priv->device->dev, + "Failed to request completion notification: %d\n", + ret); goto out; } @@ -2721,7 +3106,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); if (ret) { - printk(KERN_ERR PFX "Couldn't post receive WRs\n"); + dev_err(&port_priv->device->dev, + "Couldn't post receive WRs\n"); goto out; } } @@ -2735,7 +3121,8 @@ static void qp_event_handler(struct ib_event *event, void *qp_context) struct ib_mad_qp_info *qp_info = qp_context; /* It's worse than that! He's dead, Jim! */ - printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n", + dev_err(&qp_info->port_priv->device->dev, + "Fatal error (%d) on MAD QP (%u)\n", event->event, qp_info->qp->qp_num); } @@ -2755,10 +3142,6 @@ static void init_mad_qp(struct ib_mad_port_private *port_priv, init_mad_queue(qp_info, &qp_info->send_queue); init_mad_queue(qp_info, &qp_info->recv_queue); INIT_LIST_HEAD(&qp_info->overflow_list); - spin_lock_init(&qp_info->snoop_lock); - qp_info->snoop_table = NULL; - qp_info->snoop_table_size = 0; - atomic_set(&qp_info->snoop_count, 0); } static int create_mad_qp(struct ib_mad_qp_info *qp_info, @@ -2781,8 +3164,9 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info, qp_init_attr.event_handler = qp_event_handler; qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr); if (IS_ERR(qp_info->qp)) { - printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n", - get_spl_qp_index(qp_type)); + dev_err(&qp_info->port_priv->device->dev, + "Couldn't create ib_mad QP%d\n", + get_spl_qp_index(qp_type)); ret = PTR_ERR(qp_info->qp); goto error; } @@ -2801,7 +3185,6 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) return; ib_destroy_qp(qp_info->qp); - kfree(qp_info->snoop_table); } /* @@ -2809,54 +3192,49 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) * Create the QP, PD, MR, and CQ if needed */ static int ib_mad_port_open(struct ib_device *device, - int port_num) + u32 port_num) { int ret, cq_size; struct ib_mad_port_private *port_priv; unsigned long flags; - char name[sizeof "ib_mad123"]; int has_smi; + if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) + return -EFAULT; + + if (WARN_ON(rdma_cap_opa_mad(device, port_num) && + rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE)) + return -EFAULT; + /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); - if (!port_priv) { - printk(KERN_ERR PFX "No memory for ib_mad_port_private\n"); + if (!port_priv) return -ENOMEM; - } port_priv->device = device; port_priv->port_num = port_num; spin_lock_init(&port_priv->reg_lock); - INIT_LIST_HEAD(&port_priv->agent_list); init_mad_qp(port_priv, &port_priv->qp_info[0]); init_mad_qp(port_priv, &port_priv->qp_info[1]); cq_size = mad_sendq_size + mad_recvq_size; - has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND; + has_smi = rdma_cap_ib_smi(device, port_num); if (has_smi) cq_size *= 2; - port_priv->cq = ib_create_cq(port_priv->device, - ib_mad_thread_completion_handler, - NULL, port_priv, cq_size, 0); - if (IS_ERR(port_priv->cq)) { - printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n"); - ret = PTR_ERR(port_priv->cq); - goto error3; - } - - port_priv->pd = ib_alloc_pd(device); + port_priv->pd = ib_alloc_pd(device, 0); if (IS_ERR(port_priv->pd)) { - printk(KERN_ERR PFX "Couldn't create ib_mad PD\n"); + dev_err(&device->dev, "Couldn't create ib_mad PD\n"); ret = PTR_ERR(port_priv->pd); - goto error4; + goto error3; } - port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(port_priv->mr)) { - printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n"); - ret = PTR_ERR(port_priv->mr); - goto error5; + port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, + IB_POLL_UNBOUND_WORKQUEUE); + if (IS_ERR(port_priv->cq)) { + dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); + ret = PTR_ERR(port_priv->cq); + goto error4; } if (has_smi) { @@ -2864,17 +3242,19 @@ static int ib_mad_port_open(struct ib_device *device, if (ret) goto error6; } - ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); - if (ret) - goto error7; - snprintf(name, sizeof name, "ib_mad%d", port_num); - port_priv->wq = create_singlethread_workqueue(name); + if (rdma_cap_ib_cm(device, port_num)) { + ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); + if (ret) + goto error7; + } + + port_priv->wq = alloc_ordered_workqueue("ib_mad%u", WQ_MEM_RECLAIM, + port_num); if (!port_priv->wq) { ret = -ENOMEM; goto error8; } - INIT_WORK(&port_priv->work, ib_mad_completion_handler); spin_lock_irqsave(&ib_mad_port_list_lock, flags); list_add_tail(&port_priv->port_list, &ib_mad_port_list); @@ -2882,7 +3262,7 @@ static int ib_mad_port_open(struct ib_device *device, ret = ib_mad_port_start(port_priv); if (ret) { - printk(KERN_ERR PFX "Couldn't start port\n"); + dev_err(&device->dev, "Couldn't start port\n"); goto error9; } @@ -2899,13 +3279,11 @@ error8: error7: destroy_mad_qp(&port_priv->qp_info[0]); error6: - ib_dereg_mr(port_priv->mr); -error5: - ib_dealloc_pd(port_priv->pd); -error4: - ib_destroy_cq(port_priv->cq); + ib_free_cq(port_priv->cq); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); +error4: + ib_dealloc_pd(port_priv->pd); error3: kfree(port_priv); @@ -2917,7 +3295,7 @@ error3: * If there are no classes using the port, free the port * resources (CQ, MR, PD, QP) and remove the port's info structure */ -static int ib_mad_port_close(struct ib_device *device, int port_num) +static int ib_mad_port_close(struct ib_device *device, u32 port_num) { struct ib_mad_port_private *port_priv; unsigned long flags; @@ -2926,7 +3304,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) port_priv = __ib_get_mad_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); - printk(KERN_ERR PFX "Port %d not found\n", port_num); + dev_err(&device->dev, "Port %u not found\n", port_num); return -ENODEV; } list_del_init(&port_priv->port_list); @@ -2935,9 +3313,8 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) destroy_workqueue(port_priv->wq); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); - ib_dereg_mr(port_priv->mr); + ib_free_cq(port_priv->cq); ib_dealloc_pd(port_priv->pd); - ib_destroy_cq(port_priv->cq); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); /* XXX: Handle deallocation of MAD registration tables */ @@ -2947,78 +3324,67 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) return 0; } -static void ib_mad_init_device(struct ib_device *device) +static int ib_mad_init_device(struct ib_device *device) { - int start, end, i; + int start, i; + unsigned int count = 0; + int ret; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; + start = rdma_start_port(device); - if (device->node_type == RDMA_NODE_IB_SWITCH) { - start = 0; - end = 0; - } else { - start = 1; - end = device->phys_port_cnt; - } + for (i = start; i <= rdma_end_port(device); i++) { + if (!rdma_cap_ib_mad(device, i)) + continue; - for (i = start; i <= end; i++) { - if (ib_mad_port_open(device, i)) { - printk(KERN_ERR PFX "Couldn't open %s port %d\n", - device->name, i); + ret = ib_mad_port_open(device, i); + if (ret) { + dev_err(&device->dev, "Couldn't open port %d\n", i); goto error; } - if (ib_agent_port_open(device, i)) { - printk(KERN_ERR PFX "Couldn't open %s port %d " - "for agents\n", - device->name, i); + ret = ib_agent_port_open(device, i); + if (ret) { + dev_err(&device->dev, + "Couldn't open port %d for agents\n", i); goto error_agent; } + count++; } - return; + if (!count) + return -EOPNOTSUPP; + + return 0; error_agent: if (ib_mad_port_close(device, i)) - printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, i); + dev_err(&device->dev, "Couldn't close port %d\n", i); error: - i--; + while (--i >= start) { + if (!rdma_cap_ib_mad(device, i)) + continue; - while (i >= start) { if (ib_agent_port_close(device, i)) - printk(KERN_ERR PFX "Couldn't close %s port %d " - "for agents\n", - device->name, i); + dev_err(&device->dev, + "Couldn't close port %d for agents\n", i); if (ib_mad_port_close(device, i)) - printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, i); - i--; + dev_err(&device->dev, "Couldn't close port %d\n", i); } + return ret; } -static void ib_mad_remove_device(struct ib_device *device) +static void ib_mad_remove_device(struct ib_device *device, void *client_data) { - int i, num_ports, cur_port; + unsigned int i; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; + rdma_for_each_port (device, i) { + if (!rdma_cap_ib_mad(device, i)) + continue; - if (device->node_type == RDMA_NODE_IB_SWITCH) { - num_ports = 1; - cur_port = 0; - } else { - num_ports = device->phys_port_cnt; - cur_port = 1; - } - for (i = 0; i < num_ports; i++, cur_port++) { - if (ib_agent_port_close(device, cur_port)) - printk(KERN_ERR PFX "Couldn't close %s port %d " - "for agents\n", - device->name, cur_port); - if (ib_mad_port_close(device, cur_port)) - printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, cur_port); + if (ib_agent_port_close(device, i)) + dev_err(&device->dev, + "Couldn't close port %u for agents\n", i); + if (ib_mad_port_close(device, i)) + dev_err(&device->dev, "Couldn't close port %u\n", i); } } @@ -3028,48 +3394,25 @@ static struct ib_client mad_client = { .remove = ib_mad_remove_device }; -static int __init ib_mad_init_module(void) +int ib_mad_init(void) { - int ret; - mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); - ib_mad_cache = kmem_cache_create("ib_mad", - sizeof(struct ib_mad_private), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!ib_mad_cache) { - printk(KERN_ERR PFX "Couldn't create ib_mad cache\n"); - ret = -ENOMEM; - goto error1; - } - INIT_LIST_HEAD(&ib_mad_port_list); if (ib_register_client(&mad_client)) { - printk(KERN_ERR PFX "Couldn't register ib_mad client\n"); - ret = -EINVAL; - goto error2; + pr_err("Couldn't register ib_mad client\n"); + return -EINVAL; } return 0; - -error2: - kmem_cache_destroy(ib_mad_cache); -error1: - return ret; } -static void __exit ib_mad_cleanup_module(void) +void ib_mad_cleanup(void) { ib_unregister_client(&mad_client); - kmem_cache_destroy(ib_mad_cache); } - -module_init(ib_mad_init_module); -module_exit(ib_mad_cleanup_module); |
