From 693dfd5a3f19efc44acf3a57217c0480e414f8ee Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 27 Apr 2017 17:01:34 +0300 Subject: IB/mlx5: Enable IPoIB acceleration Enable mlx5 IPoIB acceleration by declaring mlx5_ib_{alloc,free}_rdma_netdev and assigning the mlx5 IPoIB rdma_netdev callbacks. In addition, this patch brings in sync mlx5's IPoIB parts for net and IB trees. As a precaution, we disabled IPoIB acceleration by default (in the mlx5_core Kconfig file). Signed-off-by: Saeed Mahameed Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 9f3ba320ce70..d45772da0963 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3530,6 +3530,26 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, return num_counters; } +static struct net_device* +mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, + u8 port_num, + enum rdma_netdev_t type, + const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *)) +{ + if (type != RDMA_NETDEV_IPOIB) + return ERR_PTR(-EOPNOTSUPP); + + return mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca, + name, setup); +} + +static void mlx5_ib_free_rdma_netdev(struct net_device *netdev) +{ + return mlx5_rdma_netdev_free(netdev); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -3660,6 +3680,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; dev->ib_dev.get_dev_fw_str = get_dev_fw_str; + dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev; + dev->ib_dev.free_rdma_netdev = mlx5_ib_free_rdma_netdev; if (mlx5_core_is_pf(mdev)) { dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; -- cgit From 0d7e2d2166f6b0b7d1959ca858052a15feb574cc Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 4 May 2017 01:24:51 -0400 Subject: IB/ipoib: add get_link_ksettings in ethtool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to let the bonding driver report the correct speed of the underlaying interfaces, when they are IPoIB, the ethtool function get_link_ksettings() in the IPoIB driver is implemented. Cc: Joe Jin Cc: Junxiao Bi Suggested-by: HÃ¥kon Bugge Signed-off-by: Zhu Yanjun Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 59 ++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 379c02fb4181..874b24366e4d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -155,7 +155,66 @@ static int ipoib_get_sset_count(struct net_device __always_unused *dev, return -EOPNOTSUPP; } +/* Return lane speed in unit of 1e6 bit/sec */ +static inline int ib_speed_enum_to_int(int speed) +{ + switch (speed) { + case IB_SPEED_SDR: + return SPEED_2500; + case IB_SPEED_DDR: + return SPEED_5000; + case IB_SPEED_QDR: + case IB_SPEED_FDR10: + return SPEED_10000; + case IB_SPEED_FDR: + return SPEED_14000; + case IB_SPEED_EDR: + return SPEED_25000; + } + + return SPEED_UNKNOWN; +} + +static int ipoib_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct ipoib_dev_priv *priv = netdev_priv(netdev); + struct ib_port_attr attr; + int ret, speed, width; + + if (!netif_carrier_ok(netdev)) { + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + return 0; + } + + ret = ib_query_port(priv->ca, priv->port, &attr); + if (ret < 0) + return -EINVAL; + + speed = ib_speed_enum_to_int(attr.active_speed); + width = ib_width_enum_to_int(attr.active_width); + + if (speed < 0 || width < 0) + return -EINVAL; + + /* Except the following are set, the other members of + * the struct ethtool_link_settings are initialized to + * zero in the function __ethtool_get_link_ksettings. + */ + cmd->base.speed = speed * width; + cmd->base.duplex = DUPLEX_FULL; + + cmd->base.phy_address = 0xFF; + + cmd->base.autoneg = AUTONEG_ENABLE; + cmd->base.port = PORT_OTHER; + + return 0; +} + static const struct ethtool_ops ipoib_ethtool_ops = { + .get_link_ksettings = ipoib_get_link_ksettings, .get_drvinfo = ipoib_get_drvinfo, .get_coalesce = ipoib_get_coalesce, .set_coalesce = ipoib_set_coalesce, -- cgit From 02d1008bcf41c7eada7882df673ae3794f7bfea8 Mon Sep 17 00:00:00 2001 From: Jakub Byczkowski Date: Thu, 4 May 2017 05:13:58 -0700 Subject: IB/hfi1: Fix checks for Offline transient state In goto_offline() function pstate is masked by 0xff when compared to PLS_OFFLINE state. Mask should be 0xf0, since upper 4 bits specify the "major" state. Reviewed-by: Dennis Dalessandro Reviewed-by: Easwar Hariharan Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 0f6916d2d549..c96bb3a0c788 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10233,7 +10233,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) if (pstate == PLS_OFFLINE) { do_transition = 0; /* in right state */ do_wait = 0; /* ...no need to wait */ - } else if ((pstate & 0xff) == PLS_OFFLINE) { + } else if ((pstate & 0xf0) == PLS_OFFLINE) { do_transition = 0; /* in an offline transient state */ do_wait = 1; /* ...wait for it to settle */ } else { -- cgit From 688f21c0be9e3ddd6c5b2241daeb9fe978c0e081 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 May 2017 05:14:04 -0700 Subject: IB/hfi1, IB/rdmavt: Move r_adefered to r_lock cache line This field is causing excessive cache line bouncing. There are spare bytes in the r_lock cache line so the best approach is to make an rvt QP field and remove from the hfi1 priv field. Signed-off-by: Sebastian Sanchez Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 4 +--- drivers/infiniband/hw/hfi1/rc.c | 13 ++++--------- drivers/infiniband/hw/hfi1/verbs.h | 1 - 3 files changed, 5 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 4573e4c9f35c..650305cc0373 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -731,9 +731,7 @@ void quiesce_qp(struct rvt_qp *qp) void notify_qp_reset(struct rvt_qp *qp) { - struct hfi1_qp_priv *priv = qp->priv; - - priv->r_adefered = 0; + qp->r_adefered = 0; clear_ahg(qp); } diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 75a729cd0c3d..069bdaf061ab 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -727,10 +727,9 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, struct ib_header hdr; struct ib_other_headers *ohdr; unsigned long flags; - struct hfi1_qp_priv *priv = qp->priv; /* clear the defer count */ - priv->r_adefered = 0; + qp->r_adefered = 0; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ if (qp->s_flags & RVT_S_RESP_PENDING) @@ -1604,9 +1603,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, static inline void rc_cancel_ack(struct rvt_qp *qp) { - struct hfi1_qp_priv *priv = qp->priv; - - priv->r_adefered = 0; + qp->r_adefered = 0; if (list_empty(&qp->rspwait)) return; list_del_init(&qp->rspwait); @@ -2314,13 +2311,11 @@ send_last: qp->r_nak_state = 0; /* Send an ACK if requested or required. */ if (psn & IB_BTH_REQ_ACK) { - struct hfi1_qp_priv *priv = qp->priv; - if (packet->numpkt == 0) { rc_cancel_ack(qp); goto send_ack; } - if (priv->r_adefered >= HFI1_PSN_CREDIT) { + if (qp->r_adefered >= HFI1_PSN_CREDIT) { rc_cancel_ack(qp); goto send_ack; } @@ -2328,7 +2323,7 @@ send_last: rc_cancel_ack(qp); goto send_ack; } - priv->r_adefered++; + qp->r_adefered++; rc_defered_ack(rcd, qp); } return; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 52ff275caf54..c0913c6c8002 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -125,7 +125,6 @@ struct hfi1_qp_priv { struct sdma_engine *s_sde; /* current sde */ struct send_context *s_sendcontext; /* current sendcontext */ u8 s_sc; /* SC[0..4] for next packet */ - u8 r_adefered; /* number of acks defered */ struct iowait s_iowait; struct rvt_qp *owner; }; -- cgit From dd1ed1081750a1ce4daeeb53a0ae91af599ecdad Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 May 2017 05:14:10 -0700 Subject: IB/hfi1: Fix yield logic in send engine When there are many RC QPs and an RDMA READ request is sent, timeouts occur on the requester side because of fairness among RC QPs on their relative SDMA engine on the responder side. This also hits write and send, but to a lesser extent. Complicating the issue is that the current code checks if workqueue is congested before scheduling other QPs, however, this check is based on the number of active entries in the workqueue, which was found to be too big to for workqueue_congested() to be effective. Fix by reducing the number of active entries as revealed by experimentation from the default of num_sdma to HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES. Retry counts were monitored to determine the correct value. Tracing to investigate any future issues is also added. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 3 +- drivers/infiniband/hw/hfi1/ruc.c | 80 ++++++++++++++++++++++------------- drivers/infiniband/hw/hfi1/trace_tx.h | 34 +++++++++++++++ drivers/infiniband/hw/hfi1/verbs.h | 4 ++ 4 files changed, 90 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 4d6b9f82efa3..71b0204ad027 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -70,6 +70,7 @@ #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt +#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5 /* * min buffers we want to have per context, after driver */ @@ -623,7 +624,7 @@ static int create_workqueues(struct hfi1_devdata *dd) alloc_workqueue( "hfi%d_%d", WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE, - dd->num_sdma, + HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES, dd->unit, pidx); if (!ppd->hfi1_wq) goto wq_error; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 891ba0a81bbd..3a17daba28a9 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -800,6 +800,43 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, /* when sending, force a reschedule every one of these periods */ #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */ +/** + * schedule_send_yield - test for a yield required for QP send engine + * @timeout: Final time for timeout slice for jiffies + * @qp: a pointer to QP + * @ps: a pointer to a structure with commonly lookup values for + * the the send engine progress + * + * This routine checks if the time slice for the QP has expired + * for RC QPs, if so an additional work entry is queued. At this + * point, other QPs have an opportunity to be scheduled. It + * returns true if a yield is required, otherwise, false + * is returned. + */ +static bool schedule_send_yield(struct rvt_qp *qp, + struct hfi1_pkt_state *ps) +{ + if (unlikely(time_after(jiffies, ps->timeout))) { + if (!ps->in_thread || + workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) { + spin_lock_irqsave(&qp->s_lock, ps->flags); + qp->s_flags &= ~RVT_S_BUSY; + hfi1_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, ps->flags); + this_cpu_inc(*ps->ppd->dd->send_schedule); + trace_hfi1_rc_expired_time_slice(qp, true); + return true; + } + + cond_resched(); + this_cpu_inc(*ps->ppd->dd->send_schedule); + ps->timeout = jiffies + ps->timeout_int; + } + + trace_hfi1_rc_expired_time_slice(qp, false); + return false; +} + void hfi1_do_send_from_rvt(struct rvt_qp *qp) { hfi1_do_send(qp, false); @@ -827,13 +864,13 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) struct hfi1_pkt_state ps; struct hfi1_qp_priv *priv = qp->priv; int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps); - unsigned long timeout; - unsigned long timeout_int; - int cpu; ps.dev = to_idev(qp->ibqp.device); ps.ibp = to_iport(qp->ibqp.device, qp->port_num); ps.ppd = ppd_from_ibp(ps.ibp); + ps.in_thread = in_thread; + + trace_hfi1_rc_do_send(qp, in_thread); switch (qp->ibqp.qp_type) { case IB_QPT_RC: @@ -844,7 +881,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) return; } make_req = hfi1_make_rc_req; - timeout_int = (qp->timeout_jiffies); + ps.timeout_int = qp->timeout_jiffies; break; case IB_QPT_UC: if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & @@ -854,11 +891,11 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) return; } make_req = hfi1_make_uc_req; - timeout_int = SEND_RESCHED_TIMEOUT; + ps.timeout_int = SEND_RESCHED_TIMEOUT; break; default: make_req = hfi1_make_ud_req; - timeout_int = SEND_RESCHED_TIMEOUT; + ps.timeout_int = SEND_RESCHED_TIMEOUT; } spin_lock_irqsave(&qp->s_lock, ps.flags); @@ -871,9 +908,11 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) qp->s_flags |= RVT_S_BUSY; - timeout = jiffies + (timeout_int) / 8; - cpu = priv->s_sde ? priv->s_sde->cpu : + ps.timeout_int = ps.timeout_int / 8; + ps.timeout = jiffies + ps.timeout_int; + ps.cpu = priv->s_sde ? priv->s_sde->cpu : cpumask_first(cpumask_of_node(ps.ppd->dd->node)); + /* insure a pre-built packet is handled */ ps.s_txreq = get_waiting_verbs_txreq(qp); do { @@ -889,28 +928,9 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) /* Record that s_ahg is empty. */ qp->s_hdrwords = 0; /* allow other tasks to run */ - if (unlikely(time_after(jiffies, timeout))) { - if (!in_thread || - workqueue_congested( - cpu, - ps.ppd->hfi1_wq)) { - spin_lock_irqsave( - &qp->s_lock, - ps.flags); - qp->s_flags &= ~RVT_S_BUSY; - hfi1_schedule_send(qp); - spin_unlock_irqrestore( - &qp->s_lock, - ps.flags); - this_cpu_inc( - *ps.ppd->dd->send_schedule); - return; - } - cond_resched(); - this_cpu_inc( - *ps.ppd->dd->send_schedule); - timeout = jiffies + (timeout_int) / 8; - } + if (schedule_send_yield(qp, &ps)) + return; + spin_lock_irqsave(&qp->s_lock, ps.flags); } } while (make_req(qp, &ps)); diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index 2c9ac57657d3..c59809a7f121 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -676,6 +676,40 @@ TRACE_EVENT( ) ); +DECLARE_EVENT_CLASS( + hfi1_do_send_template, + TP_PROTO(struct rvt_qp *qp, bool flag), + TP_ARGS(qp, flag), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(bool, flag) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->flag = flag; + ), + TP_printk( + "[%s] qpn %x flag %d", + __get_str(dev), + __entry->qpn, + __entry->flag + ) +); + +DEFINE_EVENT( + hfi1_do_send_template, hfi1_rc_do_send, + TP_PROTO(struct rvt_qp *qp, bool flag), + TP_ARGS(qp, flag) +); + +DEFINE_EVENT( + hfi1_do_send_template, hfi1_rc_expired_time_slice, + TP_PROTO(struct rvt_qp *qp, bool flag), + TP_ARGS(qp, flag) +); + #endif /* __HFI1_TRACE_TX_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index c0913c6c8002..cd635d0c1d3b 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -139,6 +139,10 @@ struct hfi1_pkt_state { struct hfi1_pportdata *ppd; struct verbs_txreq *s_txreq; unsigned long flags; + unsigned long timeout; + unsigned long timeout_int; + int cpu; + bool in_thread; }; #define HFI1_PSN_CREDIT 16 -- cgit From ade6f8af52f5187e6af6f774cff43e0bc60902e4 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Thu, 4 May 2017 05:14:16 -0700 Subject: IB/hfi1: Get rid of divide when setting the tx request header Div instructions show costly in profiles when the tx request header is set. Using right shift instead of a divide operation reduces the cycles spent in the function that sets the tx request header as shown in the profile. Use right shift operation instead. Profile before change: 43.24% 009 | |--23.41%-- user_sdma_send_pkts | | | |--99.90%-- hfi1_user_sdma_process_requestAfter: Profile after change: 45.75% 009 | |--14.81%-- user_sdma_send_pkts | | | |--99.95%-- hfi1_user_sdma_process_request Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 0749689d7643..8adb6dfeb2eb 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -143,7 +143,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* KDETH OM multipliers and switch over point */ #define KDETH_OM_SMALL 4 +#define KDETH_OM_SMALL_SHIFT 2 #define KDETH_OM_LARGE 64 +#define KDETH_OM_LARGE_SHIFT 6 #define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) /* Tx request flag bits */ @@ -228,12 +230,6 @@ struct user_sdma_request { * size of the TID entry. */ u32 tidoffset; - /* - * KDETH.OM - * Remember this because the header template always sets it - * to 0. - */ - u8 omfactor; /* * We copy the iovs for this request (based on * info.iovcnt). These are only the data vectors @@ -1323,6 +1319,7 @@ static int set_txreq_header(struct user_sdma_request *req, { struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &tx->hdr; + u8 omfactor; /* KDETH.OM */ u16 pbclen; int ret; u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); @@ -1400,8 +1397,9 @@ static int set_txreq_header(struct user_sdma_request *req, } tidval = req->tids[req->tididx]; } - req->omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >= - KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE : KDETH_OM_SMALL; + omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >= + KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT : + KDETH_OM_SMALL_SHIFT; /* Set KDETH.TIDCtrl based on value for this TID. */ KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL, EXP_TID_GET(tidval, CTRL)); @@ -1416,12 +1414,12 @@ static int set_txreq_header(struct user_sdma_request *req, * transfer. */ SDMA_DBG(req, "TID offset %ubytes %uunits om%u", - req->tidoffset, req->tidoffset / req->omfactor, - req->omfactor != KDETH_OM_SMALL); + req->tidoffset, req->tidoffset >> omfactor, + omfactor != KDETH_OM_SMALL_SHIFT); KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, - req->tidoffset / req->omfactor); + req->tidoffset >> omfactor); KDETH_SET(hdr->kdeth.ver_tid_offset, OM, - req->omfactor != KDETH_OM_SMALL); + omfactor != KDETH_OM_SMALL_SHIFT); } done: trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt, @@ -1433,6 +1431,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, struct user_sdma_txreq *tx, u32 len) { int diff = 0; + u8 omfactor; /* KDETH.OM */ struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); @@ -1484,14 +1483,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, } tidval = req->tids[req->tididx]; } - req->omfactor = ((EXP_TID_GET(tidval, LEN) * + omfactor = ((EXP_TID_GET(tidval, LEN) * PAGE_SIZE) >= - KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE : - KDETH_OM_SMALL; + KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : + KDETH_OM_SMALL_SHIFT; /* KDETH.OM and KDETH.OFFSET (TID) */ AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, - ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 | - ((req->tidoffset / req->omfactor) & 0x7fff))); + ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | + ((req->tidoffset >> omfactor) + & 0x7fff))); /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | (EXP_TID_GET(tidval, IDX) & 0x3ff)); -- cgit From 9746fa439c63893195c7f285728617ab874b8bbd Mon Sep 17 00:00:00 2001 From: Tymoteusz Kielan Date: Thu, 4 May 2017 05:14:22 -0700 Subject: IB/hfi1: Adjust default eager_buffer_size to 8MB Performance analysis shows benefits for PSM2 in increasing eager buffer size from 2MB to 8MB. The change has neutral impact on verbs. Make change to the module parameter's default value. Allocation ring down was verified to work with the larger buffer size. Reviewed-by: Tadeusz Struk Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Tymoteusz Kielan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 71b0204ad027..c573f06fb7b9 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -102,9 +102,9 @@ static unsigned hfi1_rcvarr_split = 25; module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO); MODULE_PARM_DESC(rcvarr_split, "Percent of context's RcvArray entries used for Eager buffers"); -static uint eager_buffer_size = (2 << 20); /* 2MB */ +static uint eager_buffer_size = (8 << 20); /* 8MB */ module_param(eager_buffer_size, uint, S_IRUGO); -MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 2MB"); +MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 8MB"); static uint rcvhdrcnt = 2048; /* 2x the max eager buffer count */ module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO); -- cgit From 94679061dcdddbafcf24e3bfb526e54dedcc2f2f Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 4 May 2017 05:14:28 -0700 Subject: IB/hfi1: Return an error on memory allocation failure If the eager buffer allocation fails, it is necessary to return an error code. Cc: stable@vger.kernel.org Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index c573f06fb7b9..b2db77626fc5 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1778,6 +1778,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) { dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n", rcd->ctxt); + ret = -ENOMEM; goto bail_rcvegrbuf_phys; } -- cgit From 224d71f910102c966cdcd782c97e096d5e26e4da Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 4 May 2017 05:14:34 -0700 Subject: IB/hfi1: Fix a subcontext memory leak The only context that frees user_exp_rcv data structures is the last context closed (from a sub-context set). This leaks the allocations from the other sub-contexts. Separate the common frees from the specific frees and call them at the appropriate time. Using KEDR to check for memory leaks we get: Before test: [leak_check] Possible leaks: 25 After test: [leak_check] Possible leaks: 31 (6 leaked data structures) After patch applied (before and after test have the same value) [leak_check] Possible leaks: 25 Each leak is 192 + 13440 + 6720 = 20352 bytes per sub-context. Cc: stable@vger.kernel.org Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 5 ++++- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 32 +++++++++++++++++-------------- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 1 + 3 files changed, 23 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 3d9bce4bfcc7..afdf3ef8c069 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -756,6 +756,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) /* release the cpu */ hfi1_put_proc_affinity(fdata->rec_cpu_num); + /* clean up rcv side */ + hfi1_user_exp_rcv_free(fdata); + /* * Clear any left over, unhandled events so the next process that * gets this context doesn't get confused. @@ -795,7 +798,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) dd->rcd[uctxt->ctxt] = NULL; - hfi1_user_exp_rcv_free(fdata); + hfi1_user_exp_rcv_grp_free(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); uctxt->rcvwait_to = 0; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 35c6e7ec8ad6..c7f13df471c4 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -252,36 +252,40 @@ done: return ret; } +void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt) +{ + struct tid_group *grp, *gptr; + + list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, + list) { + list_del_init(&grp->list); + kfree(grp); + } + hfi1_clear_tids(uctxt); +} + int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct tid_group *grp, *gptr; - if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) - return 0; /* * The notifier would have been removed when the process'es mm * was freed. */ - if (fd->handler) + if (fd->handler) { hfi1_mmu_rb_unregister(fd->handler); - - kfree(fd->invalid_tids); - - if (!uctxt->cnt) { + } else { if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); - list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, - list) { - list_del_init(&grp->list); - kfree(grp); - } - hfi1_clear_tids(uctxt); } + kfree(fd->invalid_tids); + fd->invalid_tids = NULL; + kfree(fd->entry_to_rb); + fd->entry_to_rb = NULL; return 0; } diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 9bc8d9fba87e..d1d7d3d3bd44 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -70,6 +70,7 @@ (tid) |= EXP_TID_SET(field, (value)); \ } while (0) +void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt); int hfi1_user_exp_rcv_init(struct file *); int hfi1_user_exp_rcv_free(struct hfi1_filedata *); int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *); -- cgit From f4cd876529194b2d3f653c645ba203688e9e4ba3 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 4 May 2017 05:14:39 -0700 Subject: IB/hfi1: Name function prototype parameters To improve the readability of function prototypes, give the parameters names. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 8 ++-- drivers/infiniband/hw/hfi1/chip.h | 6 ++- drivers/infiniband/hw/hfi1/driver.c | 4 +- drivers/infiniband/hw/hfi1/file_ops.c | 54 ++++++++++++----------- drivers/infiniband/hw/hfi1/hfi.h | 72 ++++++++++++++++--------------- drivers/infiniband/hw/hfi1/init.c | 2 +- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 30 +++++++------ drivers/infiniband/hw/hfi1/user_exp_rcv.h | 12 +++--- drivers/infiniband/hw/hfi1/user_sdma.c | 62 +++++++++++++------------- drivers/infiniband/hw/hfi1/user_sdma.h | 13 +++--- 10 files changed, 143 insertions(+), 120 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index c96bb3a0c788..5aa95dac7c4f 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1055,7 +1055,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg); -static void set_partition_keys(struct hfi1_pportdata *); +static void set_partition_keys(struct hfi1_pportdata *ppd); static const char *link_state_name(u32 state); static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state); @@ -1068,9 +1068,9 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); -static void handle_temp_err(struct hfi1_devdata *); -static void dc_shutdown(struct hfi1_devdata *); -static void dc_start(struct hfi1_devdata *); +static void handle_temp_err(struct hfi1_devdata *dd); +static void dc_shutdown(struct hfi1_devdata *dd); +static void dc_start(struct hfi1_devdata *dd); static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np); static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd); diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index b9dbf16d7703..4c3c88d7ec65 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -636,7 +636,8 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt, write_csr(dd, offset0 + (0x1000 * ctxt), value); } -u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32); +u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, + u32 dw_len); /* firmware.c */ #define SBUS_MASTER_BROADCAST 0xfd @@ -728,7 +729,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd); void set_intr_state(struct hfi1_devdata *dd, u32 enable); void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths); -void update_usrhead(struct hfi1_ctxtdata *, u32, u32, u32, u32, u32); +void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd, + u32 intr_adjust, u32 npkts); int stop_drain_data_vls(struct hfi1_devdata *dd); int open_fill_data_vls(struct hfi1_devdata *dd); u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 527895487175..fb319d2a579e 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -85,8 +85,8 @@ module_param_named(cu, hfi1_cu, uint, S_IRUGO); MODULE_PARM_DESC(cu, "Credit return units"); unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT; -static int hfi1_caps_set(const char *, const struct kernel_param *); -static int hfi1_caps_get(char *, const struct kernel_param *); +static int hfi1_caps_set(const char *val, const struct kernel_param *kp); +static int hfi1_caps_get(char *buffer, const struct kernel_param *kp); static const struct kernel_param_ops cap_ops = { .set = hfi1_caps_set, .get = hfi1_caps_get diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index afdf3ef8c069..0b77283078e5 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -70,30 +70,36 @@ /* * File operation functions */ -static int hfi1_file_open(struct inode *, struct file *); -static int hfi1_file_close(struct inode *, struct file *); -static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *); -static unsigned int hfi1_poll(struct file *, struct poll_table_struct *); -static int hfi1_file_mmap(struct file *, struct vm_area_struct *); - -static u64 kvirt_to_phys(void *); -static int assign_ctxt(struct file *, struct hfi1_user_info *); -static int init_subctxts(struct hfi1_ctxtdata *, const struct hfi1_user_info *); -static int user_init(struct file *); -static int get_ctxt_info(struct file *, void __user *, __u32); -static int get_base_info(struct file *, void __user *, __u32); -static int setup_ctxt(struct file *); -static int setup_subctxt(struct hfi1_ctxtdata *); -static int get_user_context(struct file *, struct hfi1_user_info *, int); -static int find_shared_ctxt(struct file *, const struct hfi1_user_info *); -static int allocate_ctxt(struct file *, struct hfi1_devdata *, - struct hfi1_user_info *); -static unsigned int poll_urgent(struct file *, struct poll_table_struct *); -static unsigned int poll_next(struct file *, struct poll_table_struct *); -static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long); -static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); -static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); -static int vma_fault(struct vm_fault *); +static int hfi1_file_open(struct inode *inode, struct file *fp); +static int hfi1_file_close(struct inode *inode, struct file *fp); +static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from); +static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt); +static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); + +static u64 kvirt_to_phys(void *addr); +static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo); +static int init_subctxts(struct hfi1_ctxtdata *uctxt, + const struct hfi1_user_info *uinfo); +static int user_init(struct file *fp); +static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len); +static int get_base_info(struct file *fp, void __user *ubase, __u32 len); +static int setup_ctxt(struct file *fp); +static int setup_subctxt(struct hfi1_ctxtdata *uctxt); +static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, + int devno); +static int find_shared_ctxt(struct file *fp, + const struct hfi1_user_info *uinfo); +static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, + struct hfi1_user_info *uinfo); +static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); +static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); +static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, + unsigned long events); +static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt, + u16 pkey); +static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt, + int start_stop); +static int vma_fault(struct vm_fault *vmf); static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index f06674317abf..a1e9b8b55c0a 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1263,27 +1263,28 @@ struct hfi1_devdata *hfi1_lookup(int unit); extern u32 hfi1_cpulist_count; extern unsigned long *hfi1_cpulist; -int hfi1_init(struct hfi1_devdata *, int); +int hfi1_init(struct hfi1_devdata *dd, int reinit); int hfi1_count_units(int *npresentp, int *nupp); int hfi1_count_active_units(void); -int hfi1_diag_add(struct hfi1_devdata *); -void hfi1_diag_remove(struct hfi1_devdata *); +int hfi1_diag_add(struct hfi1_devdata *dd); +void hfi1_diag_remove(struct hfi1_devdata *dd); void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup); void handle_user_interrupt(struct hfi1_ctxtdata *rcd); -int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *); -int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *); +int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); +int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd); int hfi1_create_ctxts(struct hfi1_devdata *dd); -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int); -void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *, - struct hfi1_devdata *, u8, u8); -void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *); - -int handle_receive_interrupt(struct hfi1_ctxtdata *, int); -int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int); -int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int); +struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, + int numa); +void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, + struct hfi1_devdata *dd, u8 hw_pidx, u8 port); +void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); + +int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); +int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); +int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); void set_all_slowpath(struct hfi1_devdata *dd); void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd); void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd); @@ -1580,7 +1581,7 @@ bad: u32 lrh_max_header_bytes(struct hfi1_devdata *dd); int mtu_to_enum(u32 mtu, int default_if_bad); -u16 enum_to_mtu(int); +u16 enum_to_mtu(int mtu); static inline int valid_ib_mtu(unsigned int mtu) { return mtu == 256 || mtu == 512 || @@ -1594,15 +1595,15 @@ static inline int valid_opa_max_mtu(unsigned int mtu) (valid_ib_mtu(mtu) || mtu == 8192 || mtu == 10240); } -int set_mtu(struct hfi1_pportdata *); +int set_mtu(struct hfi1_pportdata *ppd); -int hfi1_set_lid(struct hfi1_pportdata *, u32, u8); -void hfi1_disable_after_error(struct hfi1_devdata *); -int hfi1_set_uevent_bits(struct hfi1_pportdata *, const int); -int hfi1_rcvbuf_validate(u32, u8, u16 *); +int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc); +void hfi1_disable_after_error(struct hfi1_devdata *dd); +int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit); +int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode); -int fm_get_table(struct hfi1_pportdata *, int, void *); -int fm_set_table(struct hfi1_pportdata *, int, void *); +int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t); +int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t); void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf); void reset_link_credits(struct hfi1_devdata *dd); @@ -1734,9 +1735,9 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) #define HFI1_CTXT_WAITING_URG 5 /* free up any allocated data at closes */ -struct hfi1_devdata *hfi1_init_dd(struct pci_dev *, - const struct pci_device_id *); -void hfi1_free_devdata(struct hfi1_devdata *); +struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, + const struct pci_device_id *ent); +void hfi1_free_devdata(struct hfi1_devdata *dd); struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); /* LED beaconing functions */ @@ -1811,24 +1812,25 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd) extern const char ib_hfi1_version[]; -int hfi1_device_create(struct hfi1_devdata *); -void hfi1_device_remove(struct hfi1_devdata *); +int hfi1_device_create(struct hfi1_devdata *dd); +void hfi1_device_remove(struct hfi1_devdata *dd); int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, struct kobject *kobj); -int hfi1_verbs_register_sysfs(struct hfi1_devdata *); -void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *); +int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd); +void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd); /* Hook for sysfs read of QSFP */ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len); -int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *); -void hfi1_pcie_cleanup(struct pci_dev *); -int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *); +int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent); +void hfi1_pcie_cleanup(struct pci_dev *pdev); +int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); -void hfi1_pcie_flr(struct hfi1_devdata *); -int pcie_speeds(struct hfi1_devdata *); -void request_msix(struct hfi1_devdata *, u32 *, struct hfi1_msix_entry *); -void hfi1_enable_intx(struct pci_dev *); +void hfi1_pcie_flr(struct hfi1_devdata *dd); +int pcie_speeds(struct hfi1_devdata *dd); +void request_msix(struct hfi1_devdata *dd, u32 *nent, + struct hfi1_msix_entry *entry); +void hfi1_enable_intx(struct pci_dev *pdev); void restore_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index b2db77626fc5..e872644b0f10 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -118,7 +118,7 @@ unsigned int user_credit_return_threshold = 33; /* default is 33% */ module_param(user_credit_return_threshold, uint, S_IRUGO); MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)"); -static inline u64 encode_rcv_header_entry_size(u16); +static inline u64 encode_rcv_header_entry_size(u16 size); static struct idr hfi1_unit_table; u32 hfi1_cpulist_count; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index c7f13df471c4..6090933d5171 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -82,20 +82,26 @@ struct tid_pageset { (unsigned long)(len) - 1) & PAGE_MASK) - \ ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) -static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, - struct hfi1_filedata *); -static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); -static int set_rcvarray_entry(struct file *, unsigned long, u32, - struct tid_group *, struct page **, unsigned); -static int tid_rb_insert(void *, struct mmu_rb_node *); +static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, + struct exp_tid_set *set, + struct hfi1_filedata *fd); +static u32 find_phys_blocks(struct page **pages, unsigned npages, + struct tid_pageset *list); +static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, + u32 rcventry, struct tid_group *grp, + struct page **pages, unsigned npages); +static int tid_rb_insert(void *arg, struct mmu_rb_node *node); static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, struct tid_rb_node *tnode); -static void tid_rb_remove(void *, struct mmu_rb_node *); -static int tid_rb_invalidate(void *, struct mmu_rb_node *); -static int program_rcvarray(struct file *, unsigned long, struct tid_group *, - struct tid_pageset *, unsigned, u16, struct page **, - u32 *, unsigned *, unsigned *); -static int unprogram_rcvarray(struct file *, u32, struct tid_group **); +static void tid_rb_remove(void *arg, struct mmu_rb_node *node); +static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); +static int program_rcvarray(struct file *fp, unsigned long vaddr, + struct tid_group *grp, + struct tid_pageset *sets, + unsigned start, u16 count, struct page **pages, + u32 *tidlist, unsigned *tididx, unsigned *pmapped); +static int unprogram_rcvarray(struct file *fp, u32 tidinfo, + struct tid_group **grp); static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); static struct mmu_rb_ops tid_rb_ops = { diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index d1d7d3d3bd44..8a33a09493b1 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -1,7 +1,7 @@ #ifndef _HFI1_USER_EXP_RCV_H #define _HFI1_USER_EXP_RCV_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -71,10 +71,10 @@ } while (0) void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt); -int hfi1_user_exp_rcv_init(struct file *); -int hfi1_user_exp_rcv_free(struct hfi1_filedata *); -int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *); -int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *); -int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *); +int hfi1_user_exp_rcv_init(struct file *fp); +int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); +int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo); +int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo); +int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo); #endif /* _HFI1_USER_EXP_RCV_H */ diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8adb6dfeb2eb..17142c526c62 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -280,39 +280,43 @@ struct user_sdma_txreq { hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \ (pq)->subctxt, ##__VA_ARGS__) -static int user_sdma_send_pkts(struct user_sdma_request *, unsigned); -static int num_user_pages(const struct iovec *); -static void user_sdma_txreq_cb(struct sdma_txreq *, int); -static inline void pq_update(struct hfi1_user_sdma_pkt_q *); -static void user_sdma_free_request(struct user_sdma_request *, bool); -static int pin_vector_pages(struct user_sdma_request *, - struct user_sdma_iovec *); -static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned, - unsigned); -static int check_header_template(struct user_sdma_request *, - struct hfi1_pkt_header *, u32, u32); -static int set_txreq_header(struct user_sdma_request *, - struct user_sdma_txreq *, u32); -static int set_txreq_header_ahg(struct user_sdma_request *, - struct user_sdma_txreq *, u32); -static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *, - struct hfi1_user_sdma_comp_q *, - u16, enum hfi1_sdma_comp_state, int); -static inline u32 set_pkt_bth_psn(__be32, u8, u32); +static int user_sdma_send_pkts(struct user_sdma_request *req, + unsigned maxpkts); +static int num_user_pages(const struct iovec *iov); +static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); +static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); +static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); +static int pin_vector_pages(struct user_sdma_request *req, + struct user_sdma_iovec *iovec); +static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, + unsigned start, unsigned npages); +static int check_header_template(struct user_sdma_request *req, + struct hfi1_pkt_header *hdr, u32 lrhlen, + u32 datalen); +static int set_txreq_header(struct user_sdma_request *req, + struct user_sdma_txreq *tx, u32 datalen); +static int set_txreq_header_ahg(struct user_sdma_request *req, + struct user_sdma_txreq *tx, u32 len); +static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, + struct hfi1_user_sdma_comp_q *cq, + u16 idx, enum hfi1_sdma_comp_state state, + int ret); +static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags); static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len); static int defer_packet_queue( - struct sdma_engine *, - struct iowait *, - struct sdma_txreq *, - unsigned seq); -static void activate_packet_queue(struct iowait *, int); -static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); -static int sdma_rb_insert(void *, struct mmu_rb_node *); + struct sdma_engine *sde, + struct iowait *wait, + struct sdma_txreq *txreq, + unsigned int seq); +static void activate_packet_queue(struct iowait *wait, int reason); +static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, + unsigned long len); +static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode); static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, void *arg2, bool *stop); -static void sdma_rb_remove(void *, struct mmu_rb_node *); -static int sdma_rb_invalidate(void *, struct mmu_rb_node *); +static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); +static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode); static struct mmu_rb_ops sdma_rb_ops = { .filter = sdma_rb_filter, diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 39001714f551..95ec7832e5e9 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -1,5 +1,7 @@ +#ifndef _HFI1_USER_SDMA_H +#define _HFI1_USER_SDMA_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -78,7 +80,8 @@ struct hfi1_user_sdma_comp_q { struct hfi1_sdma_comp_entry *comps; }; -int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *, struct file *); -int hfi1_user_sdma_free_queues(struct hfi1_filedata *); -int hfi1_user_sdma_process_request(struct file *, struct iovec *, unsigned long, - unsigned long *); +int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp); +int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd); +int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, + unsigned long dim, unsigned long *count); +#endif /* _HFI1_USER_SDMA_H */ -- cgit From 5042cddfd058a2db419ba7aaadc866e8dccca06c Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 4 May 2017 05:14:45 -0700 Subject: IB/hfi1: Use filedata rather than filepointer Since almost all functions that use the hfi1_filedata get the pointer from the file pointer, simplify by only passing the hfi1_filedata pointer. Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 68 +++++++++++++++---------------- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 45 +++++++++----------- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 11 +++-- drivers/infiniband/hw/hfi1/user_sdma.c | 14 +++---- drivers/infiniband/hw/hfi1/user_sdma.h | 9 ++-- 5 files changed, 72 insertions(+), 75 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 0b77283078e5..ab798a81d6b4 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -80,16 +80,18 @@ static u64 kvirt_to_phys(void *addr); static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo); static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo); -static int user_init(struct file *fp); -static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len); -static int get_base_info(struct file *fp, void __user *ubase, __u32 len); -static int setup_ctxt(struct file *fp); +static int user_init(struct hfi1_filedata *fd); +static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, + __u32 len); +static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, + __u32 len); +static int setup_ctxt(struct hfi1_filedata *fd); static int setup_subctxt(struct hfi1_ctxtdata *uctxt); -static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, - int devno); -static int find_shared_ctxt(struct file *fp, +static int get_user_context(struct hfi1_filedata *fd, + struct hfi1_user_info *uinfo, int devno); +static int find_shared_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo); -static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, +static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo); static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); @@ -238,17 +240,17 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, ret = assign_ctxt(fp, &uinfo); if (ret < 0) return ret; - ret = setup_ctxt(fp); + ret = setup_ctxt(fd); if (ret) return ret; - ret = user_init(fp); + ret = user_init(fd); break; case HFI1_IOCTL_CTXT_INFO: - ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg, + ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg, sizeof(struct hfi1_ctxt_info)); break; case HFI1_IOCTL_USER_INFO: - ret = get_base_info(fp, (void __user *)(unsigned long)arg, + ret = get_base_info(fd, (void __user *)(unsigned long)arg, sizeof(struct hfi1_base_info)); break; case HFI1_IOCTL_CREDIT_UPD: @@ -262,7 +264,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(tinfo))) return -EFAULT; - ret = hfi1_user_exp_rcv_setup(fp, &tinfo); + ret = hfi1_user_exp_rcv_setup(fd, &tinfo); if (!ret) { /* * Copy the number of tidlist entries we used @@ -284,7 +286,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(tinfo))) return -EFAULT; - ret = hfi1_user_exp_rcv_clear(fp, &tinfo); + ret = hfi1_user_exp_rcv_clear(fd, &tinfo); if (ret) break; addr = arg + offsetof(struct hfi1_tid_info, tidcnt); @@ -299,7 +301,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(tinfo))) return -EFAULT; - ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); + ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); if (ret) break; addr = arg + offsetof(struct hfi1_tid_info, tidcnt); @@ -436,7 +438,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) unsigned long count = 0; ret = hfi1_user_sdma_process_request( - kiocb->ki_filp, (struct iovec *)(from->iov + done), + fd, (struct iovec *)(from->iov + done), dim, &count); if (ret) { reqs = ret; @@ -863,7 +865,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) if (uinfo->subctxt_cnt) { struct hfi1_filedata *fd = fp->private_data; - ret = find_shared_ctxt(fp, uinfo); + ret = find_shared_ctxt(fd, uinfo); if (ret < 0) goto done_unlock; if (ret) { @@ -878,7 +880,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) */ if (!ret) { i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; - ret = get_user_context(fp, uinfo, i_minor); + ret = get_user_context(fp->private_data, uinfo, i_minor); } done_unlock: mutex_unlock(&hfi1_mutex); @@ -886,8 +888,8 @@ done: return ret; } -static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, - int devno) +static int get_user_context(struct hfi1_filedata *fd, + struct hfi1_user_info *uinfo, int devno) { struct hfi1_devdata *dd = NULL; int devmax, npresent, nup; @@ -905,15 +907,14 @@ static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, else if (!dd->freectxts) return -EBUSY; - return allocate_ctxt(fp, dd, uinfo); + return allocate_ctxt(fd, dd, uinfo); } -static int find_shared_ctxt(struct file *fp, +static int find_shared_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo) { int devmax, ndev, i; int ret = 0; - struct hfi1_filedata *fd = fp->private_data; devmax = hfi1_count_units(NULL, NULL); @@ -960,10 +961,9 @@ done: return ret; } -static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, +static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt; unsigned ctxt; int ret, numa; @@ -1113,10 +1113,9 @@ bail: return ret; } -static int user_init(struct file *fp) +static int user_init(struct hfi1_filedata *fd) { unsigned int rcvctrl_ops = 0; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; /* make sure that the context has already been setup */ @@ -1179,10 +1178,10 @@ static int user_init(struct file *fp) return 0; } -static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) +static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, + __u32 len) { struct hfi1_ctxt_info cinfo; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; int ret = 0; @@ -1220,9 +1219,8 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) return ret; } -static int setup_ctxt(struct file *fp) +static int setup_ctxt(struct hfi1_filedata *fd) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; int ret = 0; @@ -1257,7 +1255,7 @@ static int setup_ctxt(struct file *fp) goto done; } - ret = hfi1_user_sdma_alloc_queues(uctxt, fp); + ret = hfi1_user_sdma_alloc_queues(uctxt, fd); if (ret) goto done; /* @@ -1269,7 +1267,7 @@ static int setup_ctxt(struct file *fp) * (due to the above wait_event_interruptible() until the master * is setup. */ - ret = hfi1_user_exp_rcv_init(fp); + ret = hfi1_user_exp_rcv_init(fd); if (ret) goto done; @@ -1278,10 +1276,10 @@ done: return ret; } -static int get_base_info(struct file *fp, void __user *ubase, __u32 len) +static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, + __u32 len) { struct hfi1_base_info binfo; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; ssize_t sz; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 6090933d5171..22ffda8bf65f 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -87,7 +87,7 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, struct hfi1_filedata *fd); static u32 find_phys_blocks(struct page **pages, unsigned npages, struct tid_pageset *list); -static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, +static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, u32 rcventry, struct tid_group *grp, struct page **pages, unsigned npages); static int tid_rb_insert(void *arg, struct mmu_rb_node *node); @@ -95,12 +95,11 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, struct tid_rb_node *tnode); static void tid_rb_remove(void *arg, struct mmu_rb_node *node); static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); -static int program_rcvarray(struct file *fp, unsigned long vaddr, - struct tid_group *grp, - struct tid_pageset *sets, +static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, + struct tid_group *grp, struct tid_pageset *sets, unsigned start, u16 count, struct page **pages, u32 *tidlist, unsigned *tididx, unsigned *pmapped); -static int unprogram_rcvarray(struct file *fp, u32 tidinfo, +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, struct tid_group **grp); static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); @@ -160,9 +159,8 @@ static inline void tid_group_move(struct tid_group *group, * receive caching. This needs to be done after the context has * been configured with the eager/expected RcvEntry counts. */ -int hfi1_user_exp_rcv_init(struct file *fp) +int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; unsigned tidbase; @@ -361,10 +359,10 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) * can fit into the group. If the group becomes fully * used, move it to tid_full_list. */ -int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) +int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo) { int ret = 0, need_group = 0, pinned; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets, @@ -461,7 +459,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) struct tid_group *grp = tid_group_pop(&uctxt->tid_group_list); - ret = program_rcvarray(fp, vaddr, grp, pagesets, + ret = program_rcvarray(fd, vaddr, grp, pagesets, pageidx, dd->rcv_entries.group_size, pages, tidlist, &tididx, &mapped); /* @@ -507,7 +505,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) unsigned use = min_t(unsigned, pageset_count - pageidx, grp->size - grp->used); - ret = program_rcvarray(fp, vaddr, grp, pagesets, + ret = program_rcvarray(fd, vaddr, grp, pagesets, pageidx, use, pages, tidlist, &tididx, &mapped); if (ret < 0) { @@ -557,7 +555,7 @@ nomem: * everything done so far so we don't leak resources. */ tinfo->tidlist = (unsigned long)&tidlist; - hfi1_user_exp_rcv_clear(fp, tinfo); + hfi1_user_exp_rcv_clear(fd, tinfo); tinfo->tidlist = 0; ret = -EFAULT; goto bail; @@ -581,10 +579,10 @@ bail: return ret > 0 ? 0 : ret; } -int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) +int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo) { int ret = 0; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; u32 *tidinfo; unsigned tididx; @@ -599,7 +597,7 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) mutex_lock(&uctxt->exp_lock); for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { - ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL); + ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); if (ret) { hfi1_cdbg(TID, "Failed to unprogram rcv array %d", ret); @@ -616,9 +614,9 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) return ret; } -int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo) +int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; unsigned long *ev = uctxt->dd->events + (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * @@ -733,7 +731,7 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages, /** * program_rcvarray() - program an RcvArray group with receive buffers - * @fp: file pointer + * @fd: filedata pointer * @vaddr: starting user virtual address * @grp: RcvArray group * @sets: array of struct tid_pageset holding information on physically @@ -758,13 +756,12 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages, * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or * number of RcvArray entries programmed. */ -static int program_rcvarray(struct file *fp, unsigned long vaddr, +static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, struct tid_group *grp, struct tid_pageset *sets, unsigned start, u16 count, struct page **pages, u32 *tidlist, unsigned *tididx, unsigned *pmapped) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; u16 idx; @@ -805,7 +802,7 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr, npages = sets[setidx].count; pageidx = sets[setidx].idx; - ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE), + ret = set_rcvarray_entry(fd, vaddr + (pageidx * PAGE_SIZE), rcventry, grp, pages + pageidx, npages); if (ret) @@ -827,12 +824,11 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr, return idx; } -static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, +static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, u32 rcventry, struct tid_group *grp, struct page **pages, unsigned npages) { int ret; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct tid_rb_node *node; struct hfi1_devdata *dd = uctxt->dd; @@ -886,10 +882,9 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, return 0; } -static int unprogram_rcvarray(struct file *fp, u32 tidinfo, +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, struct tid_group **grp) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; struct tid_rb_node *node; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 8a33a09493b1..9787511b30b4 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -71,10 +71,13 @@ } while (0) void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt); -int hfi1_user_exp_rcv_init(struct file *fp); +int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); -int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo); -int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo); -int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo); +int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo); +int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo); +int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo); #endif /* _HFI1_USER_EXP_RCV_H */ diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 17142c526c62..4ac4e9d6c464 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -372,9 +372,9 @@ static void sdma_kmem_cache_ctor(void *obj) memset(tx, 0, sizeof(*tx)); } -int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) +int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, + struct hfi1_filedata *fd) { - struct hfi1_filedata *fd; int ret = 0; char buf[64]; struct hfi1_devdata *dd; @@ -382,13 +382,11 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) struct hfi1_user_sdma_pkt_q *pq; unsigned long flags; - if (!uctxt || !fp) { + if (!uctxt || !fd) { ret = -EBADF; goto done; } - fd = fp->private_data; - if (!hfi1_sdma_comp_ring_size) { ret = -EINVAL; goto done; @@ -536,11 +534,11 @@ static u8 dlid_to_selector(u16 dlid) return mapping[hash]; } -int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, - unsigned long dim, unsigned long *count) +int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, + struct iovec *iovec, unsigned long dim, + unsigned long *count) { int ret = 0, i; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq = fd->pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 95ec7832e5e9..9181d7cbe8f6 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -80,8 +80,11 @@ struct hfi1_user_sdma_comp_q { struct hfi1_sdma_comp_entry *comps; }; -int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp); +int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, + struct hfi1_filedata *fd); int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd); -int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, - unsigned long dim, unsigned long *count); +int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, + struct iovec *iovec, unsigned long dim, + unsigned long *count); + #endif /* _HFI1_USER_SDMA_H */ -- cgit From 780a4c16aaacba3746a20d9138a5b1f467035ffc Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Thu, 4 May 2017 05:14:51 -0700 Subject: IB/hfi1: Remove atomic operations for SDMA_REQ_HAVE_AHG bit The AHG index is only accessed in the request call from user space, so there's no need for atomic semantics. Replace atomic operations for SDMA_REQ_HAVE_AHG bit with a test of the AHG index. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 4ac4e9d6c464..6b72267df9e7 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -155,9 +155,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* SDMA request flag bits */ #define SDMA_REQ_FOR_THREAD 1 #define SDMA_REQ_SEND_DONE 2 -#define SDMA_REQ_HAVE_AHG 3 -#define SDMA_REQ_HAS_ERROR 4 -#define SDMA_REQ_DONE_ERROR 5 +#define SDMA_REQ_HAS_ERROR 3 +#define SDMA_REQ_DONE_ERROR 4 #define SDMA_PKT_Q_INACTIVE BIT(0) #define SDMA_PKT_Q_ACTIVE BIT(1) @@ -216,7 +215,7 @@ struct user_sdma_request { * each request will need it's own engine pointer. */ struct sdma_engine *sde; - u8 ahg_idx; + s8 ahg_idx; u32 ahg[9]; /* * KDETH.Offset (Eager) field @@ -614,6 +613,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->pq = pq; req->cq = cq; req->status = -1; + req->ahg_idx = -1; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -764,14 +764,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, } /* We don't need an AHG entry if the request contains only one packet */ - if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) { - int ahg = sdma_ahg_alloc(req->sde); - - if (likely(ahg >= 0)) { - req->ahg_idx = (u8)ahg; - set_bit(SDMA_REQ_HAVE_AHG, &req->flags); - } - } + if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) + req->ahg_idx = sdma_ahg_alloc(req->sde); set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); atomic_inc(&pq->n_reqs); @@ -989,7 +983,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) } } - if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) { + if (req->ahg_idx >= 0) { if (!req->seqnum) { u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); u32 lrhlen = get_lrh_len(req->hdr, @@ -1119,7 +1113,7 @@ dosend: * happen due to the sequential manner in which * descriptors are processed. */ - if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) + if (req->ahg_idx >= 0) sdma_ahg_free(req->sde, req->ahg_idx); } return ret; -- cgit From 5fbded483c699823a7cca001a5c27ddc4d73370f Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 4 May 2017 05:14:57 -0700 Subject: IB/hfi1: Search shared contexts on the opened device, not all devices The search for available shared contexts walks each registered hfi1 device. This search is too broad because other devices may not be on the same fabric, and using its contexts could cause unexpected behavior. Removed walking the list of devices, limiting the search to the opened device. With the device walk removed, the hfi1_devdata (dd) is not available. Added it to the hfi1_filedata for reference. With this change, hfi1_count_units() was rendered obsolete and was removed. Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 36 ---------- drivers/infiniband/hw/hfi1/file_ops.c | 122 +++++++++++++--------------------- drivers/infiniband/hw/hfi1/hfi.h | 2 +- 3 files changed, 49 insertions(+), 111 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index fb319d2a579e..566d152e36f2 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -210,42 +210,6 @@ int hfi1_count_active_units(void) return nunits_active; } -/* - * Return count of all units, optionally return in arguments - * the number of usable (present) units, and the number of - * ports that are up. - */ -int hfi1_count_units(int *npresentp, int *nupp) -{ - int nunits = 0, npresent = 0, nup = 0; - struct hfi1_devdata *dd; - unsigned long flags; - int pidx; - struct hfi1_pportdata *ppd; - - spin_lock_irqsave(&hfi1_devs_lock, flags); - - list_for_each_entry(dd, &hfi1_dev_list, list) { - nunits++; - if ((dd->flags & HFI1_PRESENT) && dd->kregbase) - npresent++; - for (pidx = 0; pidx < dd->num_pports; ++pidx) { - ppd = dd->pport + pidx; - if (ppd->lid && ppd->linkup) - nup++; - } - } - - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - - if (npresentp) - *npresentp = npresent; - if (nupp) - *nupp = nup; - - return nunits; -} - /* * Get address of eager buffer from it's index (allocated in chunks, not * contiguous). diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index ab798a81d6b4..9574fc4722ff 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -77,7 +77,7 @@ static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt); static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); static u64 kvirt_to_phys(void *addr); -static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo); +static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo); static int user_init(struct hfi1_filedata *fd); @@ -87,8 +87,7 @@ static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, __u32 len); static int setup_ctxt(struct hfi1_filedata *fd); static int setup_subctxt(struct hfi1_ctxtdata *uctxt); -static int get_user_context(struct hfi1_filedata *fd, - struct hfi1_user_info *uinfo, int devno); + static int find_shared_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo); static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, @@ -181,6 +180,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) struct hfi1_devdata, user_cdev); + if (!((dd->flags & HFI1_PRESENT) && dd->kregbase)) + return -EINVAL; + if (!atomic_inc_not_zero(&dd->user_refcount)) return -ENXIO; @@ -195,6 +197,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; mmgrab(fd->mm); + fd->dd = dd; fp->private_data = fd; } else { fp->private_data = NULL; @@ -237,7 +240,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(uinfo))) return -EFAULT; - ret = assign_ctxt(fp, &uinfo); + ret = assign_ctxt(fd, &uinfo); if (ret < 0) return ret; ret = setup_ctxt(fd); @@ -847,9 +850,9 @@ static u64 kvirt_to_phys(void *addr) return paddr; } -static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) +static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) { - int i_minor, ret = 0; + int ret = 0; unsigned int swmajor, swminor; swmajor = uinfo->userversion >> 16; @@ -863,8 +866,6 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) mutex_lock(&hfi1_mutex); /* First, lets check if we need to setup a shared context? */ if (uinfo->subctxt_cnt) { - struct hfi1_filedata *fd = fp->private_data; - ret = find_shared_ctxt(fd, uinfo); if (ret < 0) goto done_unlock; @@ -878,94 +879,59 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) * We execute the following block if we couldn't find a * shared context or if context sharing is not required. */ - if (!ret) { - i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; - ret = get_user_context(fp->private_data, uinfo, i_minor); - } + if (!ret) + ret = allocate_ctxt(fd, fd->dd, uinfo); + done_unlock: mutex_unlock(&hfi1_mutex); done: return ret; } -static int get_user_context(struct hfi1_filedata *fd, - struct hfi1_user_info *uinfo, int devno) -{ - struct hfi1_devdata *dd = NULL; - int devmax, npresent, nup; - - devmax = hfi1_count_units(&npresent, &nup); - if (!npresent) - return -ENXIO; - - if (!nup) - return -ENETDOWN; - - dd = hfi1_lookup(devno); - if (!dd) - return -ENODEV; - else if (!dd->freectxts) - return -EBUSY; - - return allocate_ctxt(fd, dd, uinfo); -} - static int find_shared_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo) { - int devmax, ndev, i; - int ret = 0; + int i; + struct hfi1_devdata *dd = fd->dd; - devmax = hfi1_count_units(NULL, NULL); + for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { + struct hfi1_ctxtdata *uctxt = dd->rcd[i]; - for (ndev = 0; ndev < devmax; ndev++) { - struct hfi1_devdata *dd = hfi1_lookup(ndev); + /* Skip ctxts which are not yet open */ + if (!uctxt || !uctxt->cnt) + continue; - if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase)) + /* Skip dynamically allocted kernel contexts */ + if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) continue; - for (i = dd->first_dyn_alloc_ctxt; - i < dd->num_rcv_contexts; i++) { - struct hfi1_ctxtdata *uctxt = dd->rcd[i]; - - /* Skip ctxts which are not yet open */ - if (!uctxt || !uctxt->cnt) - continue; - - /* Skip dynamically allocted kernel contexts */ - if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) - continue; - - /* Skip ctxt if it doesn't match the requested one */ - if (memcmp(uctxt->uuid, uinfo->uuid, - sizeof(uctxt->uuid)) || - uctxt->jkey != generate_jkey(current_uid()) || - uctxt->subctxt_id != uinfo->subctxt_id || - uctxt->subctxt_cnt != uinfo->subctxt_cnt) - continue; - - /* Verify the sharing process matches the master */ - if (uctxt->userversion != uinfo->userversion || - uctxt->cnt >= uctxt->subctxt_cnt) { - ret = -EINVAL; - goto done; - } - fd->uctxt = uctxt; - fd->subctxt = uctxt->cnt++; - uctxt->active_slaves |= 1 << fd->subctxt; - ret = 1; - goto done; + + /* Skip ctxt if it doesn't match the requested one */ + if (memcmp(uctxt->uuid, uinfo->uuid, + sizeof(uctxt->uuid)) || + uctxt->jkey != generate_jkey(current_uid()) || + uctxt->subctxt_id != uinfo->subctxt_id || + uctxt->subctxt_cnt != uinfo->subctxt_cnt) + continue; + + /* Verify the sharing process matches the master */ + if (uctxt->userversion != uinfo->userversion || + uctxt->cnt >= uctxt->subctxt_cnt) { + return -EINVAL; } + fd->uctxt = uctxt; + fd->subctxt = uctxt->cnt++; + uctxt->active_slaves |= 1 << fd->subctxt; + return 1; } -done: - return ret; + return 0; } static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo) { struct hfi1_ctxtdata *uctxt; - unsigned ctxt; + unsigned int ctxt; int ret, numa; if (dd->flags & HFI1_FROZEN) { @@ -979,6 +945,14 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, return -EIO; } + /* + * This check is sort of redundant to the next EBUSY error. It would + * also indicate an inconsistancy in the driver if this value was + * zero, but there were still contexts available. + */ + if (!dd->freectxts) + return -EBUSY; + for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) if (!dd->rcd[ctxt]) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index a1e9b8b55c0a..e0c4c5ed7721 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1238,6 +1238,7 @@ struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { + struct hfi1_devdata *dd; struct hfi1_ctxtdata *uctxt; unsigned subctxt; struct hfi1_user_sdma_comp_q *cq; @@ -1264,7 +1265,6 @@ extern u32 hfi1_cpulist_count; extern unsigned long *hfi1_cpulist; int hfi1_init(struct hfi1_devdata *dd, int reinit); -int hfi1_count_units(int *npresentp, int *nupp); int hfi1_count_active_units(void); int hfi1_diag_add(struct hfi1_devdata *dd); -- cgit From 637a9a7febf8b48d2ac1916f34d639aa81998a8a Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 4 May 2017 05:15:03 -0700 Subject: IB/hfi1: Correctly clear the pkey In the close path the context is removed from the device array, and then the clear pkey function is called. The pkey function trys to get the context from the device array, but because it was removed the clearing does not occur. Rework pkey clear function to work as expected. Update the function variable to reflect the correct size and name of the hw_context. Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 34 ++++++++++++++-------------------- drivers/infiniband/hw/hfi1/chip.h | 2 +- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- drivers/infiniband/hw/hfi1/vnic_main.c | 2 +- 4 files changed, 17 insertions(+), 23 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 5aa95dac7c4f..9f72d55feb85 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14528,30 +14528,24 @@ done: return ret; } -int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt) +int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt) { - struct hfi1_ctxtdata *rcd; - unsigned sctxt; - int ret = 0; + u8 hw_ctxt; u64 reg; - if (ctxt < dd->num_rcv_contexts) { - rcd = dd->rcd[ctxt]; - } else { - ret = -EINVAL; - goto done; - } - if (!rcd || !rcd->sc) { - ret = -EINVAL; - goto done; - } - sctxt = rcd->sc->hw_context; - reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); + if (!ctxt || !ctxt->sc) + return -EINVAL; + + if (ctxt->ctxt >= dd->num_rcv_contexts) + return -EINVAL; + + hw_ctxt = ctxt->sc->hw_context; + reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0); -done: - return ret; + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0); + + return 0; } /* diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 4c3c88d7ec65..760aa2e17402 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1362,7 +1362,7 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey); int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt); int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey); -int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt); +int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt); void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 9574fc4722ff..5a624401b817 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -810,7 +810,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) dd->rcd[uctxt->ctxt] = NULL; hfi1_user_exp_rcv_grp_free(uctxt); - hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); + hfi1_clear_ctxt_pkey(dd, uctxt); uctxt->rcvwait_to = 0; uctxt->piowait_to = 0; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 392f4d57f3e3..b1572c795c35 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -209,7 +209,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, uctxt->event_flags = 0; hfi1_clear_tids(uctxt); - hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); + hfi1_clear_ctxt_pkey(dd, uctxt); hfi1_stats.sps_ctxts--; hfi1_free_ctxtdata(dd, uctxt); -- cgit From 9b60d2cbe07486658a32d4ed2fff7085c44bae7a Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 4 May 2017 05:15:09 -0700 Subject: IB/hfi1: Clean up context initialization Context initialization mixes base context init with sub context init. This is bad because contexts can be reused, and on reuse, reinit things that should not re-initialized. Normalize comments and function names to refer to base context and sub context (not main, shared or slaves). Separate the base context initialization from sub context initialization. hfi1_init_ctxt() cannot return an error so changed to a void and remove error message. Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 3 +- drivers/infiniband/hw/hfi1/chip.h | 2 +- drivers/infiniband/hw/hfi1/file_ops.c | 179 +++++++++++++++--------------- drivers/infiniband/hw/hfi1/hfi.h | 13 +-- drivers/infiniband/hw/hfi1/init.c | 10 +- drivers/infiniband/hw/hfi1/trace_ctxts.h | 17 ++- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 84 +++++++------- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 3 +- drivers/infiniband/hw/hfi1/vnic_main.c | 6 +- 9 files changed, 159 insertions(+), 158 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9f72d55feb85..972da4185628 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12662,7 +12662,7 @@ u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd) #define SET_STATIC_RATE_CONTROL_SMASK(r) \ (r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) -int hfi1_init_ctxt(struct send_context *sc) +void hfi1_init_ctxt(struct send_context *sc) { if (sc) { struct hfi1_devdata *dd = sc->dd; @@ -12679,7 +12679,6 @@ int hfi1_init_ctxt(struct send_context *sc) write_kctxt_csr(dd, sc->hw_context, SEND_CTXT_CHECK_ENABLE, reg); } - return 0; } int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 760aa2e17402..cbe455d9ab8b 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1349,7 +1349,7 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); struct ib_header *hfi1_get_msgheader( struct hfi1_devdata *dd, __le32 *rhf_addr); -int hfi1_init_ctxt(struct send_context *sc); +void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 5a624401b817..467f876551ba 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -80,16 +80,17 @@ static u64 kvirt_to_phys(void *addr); static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo); -static int user_init(struct hfi1_filedata *fd); +static int init_user_ctxt(struct hfi1_filedata *fd); +static int user_init(struct hfi1_ctxtdata *uctxt); static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, __u32 len); static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, __u32 len); -static int setup_ctxt(struct hfi1_filedata *fd); +static int setup_base_ctxt(struct hfi1_filedata *fd); static int setup_subctxt(struct hfi1_ctxtdata *uctxt); -static int find_shared_ctxt(struct hfi1_filedata *fd, - const struct hfi1_user_info *uinfo); +static int find_sub_ctxt(struct hfi1_filedata *fd, + const struct hfi1_user_info *uinfo); static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo); static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); @@ -241,12 +242,6 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, return -EFAULT; ret = assign_ctxt(fd, &uinfo); - if (ret < 0) - return ret; - ret = setup_ctxt(fd); - if (ret) - return ret; - ret = user_init(fd); break; case HFI1_IOCTL_CTXT_INFO: ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg, @@ -856,40 +851,62 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) unsigned int swmajor, swminor; swmajor = uinfo->userversion >> 16; - if (swmajor != HFI1_USER_SWMAJOR) { - ret = -ENODEV; - goto done; - } + if (swmajor != HFI1_USER_SWMAJOR) + return -ENODEV; swminor = uinfo->userversion & 0xffff; mutex_lock(&hfi1_mutex); - /* First, lets check if we need to setup a shared context? */ + /* First, lets check if we need to get a sub context? */ if (uinfo->subctxt_cnt) { - ret = find_shared_ctxt(fd, uinfo); - if (ret < 0) - goto done_unlock; - if (ret) { + /* < 0 error, 0 no context, 1 sub-context found */ + ret = find_sub_ctxt(fd, uinfo); + if (ret > 0) { fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); } } /* - * We execute the following block if we couldn't find a - * shared context or if context sharing is not required. + * Allocate a base context f context sharing is not required or we + * couldn't find a sub context. */ if (!ret) ret = allocate_ctxt(fd, fd->dd, uinfo); -done_unlock: mutex_unlock(&hfi1_mutex); -done: + + /* Depending on the context type, do the appropriate init */ + if (ret > 0) { + /* + * sub-context info can only be set up after the base + * context has been completed. + */ + ret = wait_event_interruptible(fd->uctxt->wait, !test_bit( + HFI1_CTXT_BASE_UNINIT, + &fd->uctxt->event_flags)); + /* The only thing a sub context needs is the user_xxx stuff */ + if (!ret) + init_user_ctxt(fd); + } else if (!ret) { + ret = setup_base_ctxt(fd); + + /* + * Base context is done, notify anybody using a sub-context + * that is waiting for this completion + */ + if (!ret && fd->uctxt->subctxt_cnt) { + clear_bit(HFI1_CTXT_BASE_UNINIT, + &fd->uctxt->event_flags); + wake_up(&fd->uctxt->wait); + } + } + return ret; } -static int find_shared_ctxt(struct hfi1_filedata *fd, - const struct hfi1_user_info *uinfo) +static int find_sub_ctxt(struct hfi1_filedata *fd, + const struct hfi1_user_info *uinfo) { int i; struct hfi1_devdata *dd = fd->dd; @@ -996,12 +1013,12 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, goto ctxdata_free; /* - * Setup shared context resources if the user-level has requested - * shared contexts and this is the 'master' process. + * Setup sub context resources if the user-level has requested + * sub contexts. * This has to be done here so the rest of the sub-contexts find the * proper master. */ - if (uinfo->subctxt_cnt && !fd->subctxt) { + if (uinfo->subctxt_cnt) { ret = init_subctxts(uctxt, uinfo); /* * On error, we don't need to disable and de-allocate the @@ -1048,7 +1065,7 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt, uctxt->subctxt_id = uinfo->subctxt_id; uctxt->active_slaves = 1; uctxt->redirect_seq_cnt = 1; - set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); + set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); return 0; } @@ -1059,10 +1076,9 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt) unsigned num_subctxts = uctxt->subctxt_cnt; uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); - if (!uctxt->subctxt_uregbase) { - ret = -ENOMEM; - goto bail; - } + if (!uctxt->subctxt_uregbase) + return -ENOMEM; + /* We can take the size of the RcvHdr Queue from the master */ uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * num_subctxts); @@ -1077,24 +1093,22 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt) ret = -ENOMEM; goto bail_rhdr; } - goto bail; + + return 0; + bail_rhdr: vfree(uctxt->subctxt_rcvhdr_base); + uctxt->subctxt_rcvhdr_base = NULL; bail_ureg: vfree(uctxt->subctxt_uregbase); uctxt->subctxt_uregbase = NULL; -bail: + return ret; } -static int user_init(struct hfi1_filedata *fd) +static int user_init(struct hfi1_ctxtdata *uctxt) { unsigned int rcvctrl_ops = 0; - struct hfi1_ctxtdata *uctxt = fd->uctxt; - - /* make sure that the context has already been setup */ - if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) - return -EFAULT; /* initialize poll variables... */ uctxt->urgent = 0; @@ -1143,12 +1157,6 @@ static int user_init(struct hfi1_filedata *fd) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); - /* Notify any waiting slaves */ - if (uctxt->subctxt_cnt) { - clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); - wake_up(&uctxt->wait); - } - return 0; } @@ -1193,59 +1201,52 @@ static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, return ret; } -static int setup_ctxt(struct hfi1_filedata *fd) +static int init_user_ctxt(struct hfi1_filedata *fd) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + int ret; + + ret = hfi1_user_sdma_alloc_queues(uctxt, fd); + if (ret) + return ret; + + ret = hfi1_user_exp_rcv_init(fd); + + return ret; +} + +static int setup_base_ctxt(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; int ret = 0; - /* - * Context should be set up only once, including allocation and - * programming of eager buffers. This is done if context sharing - * is not requested or by the master process. - */ - if (!uctxt->subctxt_cnt || !fd->subctxt) { - ret = hfi1_init_ctxt(uctxt->sc); - if (ret) - goto done; + hfi1_init_ctxt(uctxt->sc); - /* Now allocate the RcvHdr queue and eager buffers. */ - ret = hfi1_create_rcvhdrq(dd, uctxt); - if (ret) - goto done; - ret = hfi1_setup_eagerbufs(uctxt); - if (ret) - goto done; - if (uctxt->subctxt_cnt && !fd->subctxt) { - ret = setup_subctxt(uctxt); - if (ret) - goto done; - } - } else { - ret = wait_event_interruptible(uctxt->wait, !test_bit( - HFI1_CTXT_MASTER_UNINIT, - &uctxt->event_flags)); - if (ret) - goto done; - } + /* Now allocate the RcvHdr queue and eager buffers. */ + ret = hfi1_create_rcvhdrq(dd, uctxt); + if (ret) + goto done; - ret = hfi1_user_sdma_alloc_queues(uctxt, fd); + ret = hfi1_setup_eagerbufs(uctxt); if (ret) goto done; - /* - * Expected receive has to be setup for all processes (including - * shared contexts). However, it has to be done after the master - * context has been fully configured as it depends on the - * eager/expected split of the RcvArray entries. - * Setting it up here ensures that the subcontexts will be waiting - * (due to the above wait_event_interruptible() until the master - * is setup. - */ - ret = hfi1_user_exp_rcv_init(fd); + + /* If sub-contexts are enabled, do the appropriate setup */ + if (uctxt->subctxt_cnt) + ret = setup_subctxt(uctxt); + if (ret) + goto done; + + ret = hfi1_user_exp_rcv_grp_init(fd); + if (ret) + goto done; + + ret = init_user_ctxt(fd); if (ret) goto done; - set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags); + ret = user_init(uctxt); done: return ret; } @@ -1260,7 +1261,7 @@ static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, unsigned offset; int ret = 0; - trace_hfi1_uctxtdata(uctxt->dd, uctxt); + trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt); memset(&binfo, 0, sizeof(binfo)); binfo.hw_version = dd->revision; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index e0c4c5ed7721..1b7203a3f1ce 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -224,13 +224,12 @@ struct hfi1_ctxtdata { * (ignoring forks, dup, etc. for now) */ int cnt; + /* Device context index */ + unsigned ctxt; /* - * how much space to leave at start of eager TID entries for - * protocol use, on each TID + * non-zero if ctxt can be shared, and defines the maximum number of + * sub contexts allowed. */ - /* instead of calculating it */ - unsigned ctxt; - /* non-zero if ctxt is being shared. */ u16 subctxt_cnt; /* non-zero if ctxt is being shared. */ u16 subctxt_id; @@ -1725,12 +1724,10 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) #define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1) /* ctxt_flag bit offsets */ - /* context has been setup */ -#define HFI1_CTXT_SETUP_DONE 1 /* waiting for a packet to arrive */ #define HFI1_CTXT_WAITING_RCV 2 /* master has not finished initializing */ -#define HFI1_CTXT_MASTER_UNINIT 4 +#define HFI1_CTXT_BASE_UNINIT 4 /* waiting for an urgent packet to arrive */ #define HFI1_CTXT_WAITING_URG 5 diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e872644b0f10..52a6364c30de 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -176,13 +176,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) goto nomem; } - ret = hfi1_init_ctxt(rcd->sc); - if (ret < 0) { - dd_dev_err(dd, - "Failed to setup kernel receive context, failing\n"); - ret = -EFAULT; - goto bail; - } + hfi1_init_ctxt(rcd->sc); } /* @@ -194,7 +188,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) return 0; nomem: ret = -ENOMEM; -bail: + if (dd->rcd) { for (i = 0; i < dd->num_rcv_contexts; ++i) hfi1_free_ctxtdata(dd, dd->rcd[i]); diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h index 26ae789e47cf..4eb4cc798035 100644 --- a/drivers/infiniband/hw/hfi1/trace_ctxts.h +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -57,12 +57,14 @@ #define UCTXT_FMT \ "cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, " \ - "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx" + "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx, subctxt_cnt:%u" TRACE_EVENT(hfi1_uctxtdata, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt), - TP_ARGS(dd, uctxt), + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt, + unsigned int subctxt), + TP_ARGS(dd, uctxt, subctxt), TP_STRUCT__entry(DD_DEV_ENTRY(dd) __field(unsigned int, ctxt) + __field(unsigned int, subctxt) __field(u32, credits) __field(u64, hw_free) __field(void __iomem *, piobase) @@ -70,9 +72,11 @@ TRACE_EVENT(hfi1_uctxtdata, __field(u64, rcvhdrq_dma) __field(u32, eager_cnt) __field(u64, rcvegr_dma) + __field(unsigned int, subctxt_cnt) ), TP_fast_assign(DD_DEV_ASSIGN(dd); __entry->ctxt = uctxt->ctxt; + __entry->subctxt = subctxt; __entry->credits = uctxt->sc->credits; __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free); __entry->piobase = uctxt->sc->base_addr; @@ -80,17 +84,20 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma; __entry->eager_cnt = uctxt->egrbufs.alloced; __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma; + __entry->subctxt_cnt = uctxt->subctxt_cnt; ), - TP_printk("[%s] ctxt %u " UCTXT_FMT, + TP_printk("[%s] ctxt %u:%u " UCTXT_FMT, __get_str(dev), __entry->ctxt, + __entry->subctxt, __entry->credits, __entry->hw_free, __entry->piobase, __entry->rcvhdrq_cnt, __entry->rcvhdrq_dma, __entry->eager_cnt, - __entry->rcvegr_dma + __entry->rcvegr_dma, + __entry->subctxt_cnt ) ); diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 22ffda8bf65f..4c66f8d57cc1 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -53,7 +53,7 @@ struct tid_group { struct list_head list; - unsigned base; + u32 base; u8 size; u8 used; u8 map; @@ -154,6 +154,40 @@ static inline void tid_group_move(struct tid_group *group, tid_group_add_tail(group, s2); } +int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = fd->dd; + u32 tidbase; + u32 i; + + exp_tid_group_init(&uctxt->tid_group_list); + exp_tid_group_init(&uctxt->tid_used_list); + exp_tid_group_init(&uctxt->tid_full_list); + + tidbase = uctxt->expected_base; + for (i = 0; i < uctxt->expected_count / + dd->rcv_entries.group_size; i++) { + struct tid_group *grp; + + grp = kzalloc(sizeof(*grp), GFP_KERNEL); + if (!grp) { + /* + * If we fail here, the groups already + * allocated will be freed by the close + * call. + */ + return -ENOMEM; + } + grp->size = dd->rcv_entries.group_size; + grp->base = tidbase; + tid_group_add_tail(grp, &uctxt->tid_group_list); + tidbase += dd->rcv_entries.group_size; + } + + return 0; +} + /* * Initialize context and file private data needed for Expected * receive caching. This needs to be done after the context has @@ -163,42 +197,14 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; - unsigned tidbase; - int i, ret = 0; + int ret = 0; spin_lock_init(&fd->tid_lock); spin_lock_init(&fd->invalid_lock); - if (!uctxt->subctxt_cnt || !fd->subctxt) { - exp_tid_group_init(&uctxt->tid_group_list); - exp_tid_group_init(&uctxt->tid_used_list); - exp_tid_group_init(&uctxt->tid_full_list); - - tidbase = uctxt->expected_base; - for (i = 0; i < uctxt->expected_count / - dd->rcv_entries.group_size; i++) { - struct tid_group *grp; - - grp = kzalloc(sizeof(*grp), GFP_KERNEL); - if (!grp) { - /* - * If we fail here, the groups already - * allocated will be freed by the close - * call. - */ - ret = -ENOMEM; - goto done; - } - grp->size = dd->rcv_entries.group_size; - grp->base = tidbase; - tid_group_add_tail(grp, &uctxt->tid_group_list); - tidbase += dd->rcv_entries.group_size; - } - } - fd->entry_to_rb = kcalloc(uctxt->expected_count, - sizeof(struct rb_node *), - GFP_KERNEL); + sizeof(struct rb_node *), + GFP_KERNEL); if (!fd->entry_to_rb) return -ENOMEM; @@ -207,10 +213,11 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) fd->invalid_tids = kcalloc(uctxt->expected_count, sizeof(*fd->invalid_tids), GFP_KERNEL); - if (!fd->invalid_tids) { - ret = -ENOMEM; - goto done; - } + /* + * NOTE: If this is an error, shouldn't we cleanup enry_to_rb? + */ + if (!fd->invalid_tids) + return -ENOMEM; /* * Register MMU notifier callbacks. If the registration @@ -252,7 +259,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) fd->tid_limit = uctxt->expected_count; } spin_unlock(&fd->tid_lock); -done: + return ret; } @@ -268,7 +275,7 @@ void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt) hfi1_clear_tids(uctxt); } -int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) +void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; @@ -290,7 +297,6 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) kfree(fd->entry_to_rb); fd->entry_to_rb = NULL; - return 0; } /* diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 9787511b30b4..5250c897298d 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -71,8 +71,9 @@ } while (0) void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt); +int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd); -int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); +void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, struct hfi1_tid_info *tinfo); int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index b1572c795c35..b601c2929f8f 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -67,9 +67,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) unsigned int rcvctrl_ops = 0; int ret; - ret = hfi1_init_ctxt(uctxt->sc); - if (ret) - goto done; + hfi1_init_ctxt(uctxt->sc); uctxt->do_interrupt = &handle_receive_interrupt; @@ -82,8 +80,6 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) if (ret) goto done; - set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags); - if (uctxt->rcvhdrtail_kvaddr) clear_rcvhdrtail(uctxt); -- cgit From 8737ce95c463c6d8c4307ab3d6858cbf71cd4fc8 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 4 May 2017 05:15:15 -0700 Subject: IB/hfi1: Fix an assign/ordering issue with shared context IDs The current algorithm for generating sub-context IDs is FILO. If the contexts are not closed in that order, the uniqueness of the ID will be compromised. I.e. logging the creation/deletion of context IDs with an application that assigns and closes in a FIFO order reveals: cache_id: assign: uctxt: 3 sub_ctxt: 0 cache_id: assign: uctxt: 3 sub_ctxt: 1 cache_id: assign: uctxt: 3 sub_ctxt: 2 cache_id: close: uctxt: 3 sub_ctxt: 0 cache_id: assign: uctxt: 3 sub_ctxt: 2 <<< The sub_ctxt ID 2 is reused incorrectly. Update the sub-context ID assign algorithm to use a bitmask of in_use contexts. The new algorithm will allow the contexts to be closed in any order, and will only re-use unused contexts. Size subctxt and subctxt_cnt to match the user API size. Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 2 +- drivers/infiniband/hw/hfi1/file_ops.c | 51 +++++++++++++++++++++------------- drivers/infiniband/hw/hfi1/hfi.h | 8 +++--- drivers/infiniband/hw/hfi1/init.c | 3 +- drivers/infiniband/hw/hfi1/intr.c | 3 +- drivers/infiniband/hw/hfi1/user_sdma.h | 2 +- 6 files changed, 41 insertions(+), 28 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 566d152e36f2..a50870e455a3 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1289,7 +1289,7 @@ int hfi1_reset_device(int unit) if (dd->rcd) for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { - if (!dd->rcd[i] || !dd->rcd[i]->cnt) + if (!dd->rcd[i]) continue; spin_unlock_irqrestore(&dd->uctxt_lock, flags); ret = -EBUSY; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 467f876551ba..9c177ef79db5 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -95,11 +96,10 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo); static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); -static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, +static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, unsigned long events); -static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt, - u16 pkey); -static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt, +static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey); +static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, int start_stop); static int vma_fault(struct vm_fault *vmf); static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -773,8 +773,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; *ev = 0; - if (--uctxt->cnt) { - uctxt->active_slaves &= ~(1 << fdata->subctxt); + __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); + if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { mutex_unlock(&hfi1_mutex); goto done; } @@ -868,7 +868,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) } /* - * Allocate a base context f context sharing is not required or we + * Allocate a base context if context sharing is not required or we * couldn't find a sub context. */ if (!ret) @@ -905,17 +905,24 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) return ret; } +/* + * The hfi1_mutex must be held when this function is called. It is + * necessary to ensure serialized access to the bitmask in_use_ctxts. + */ static int find_sub_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo) { int i; struct hfi1_devdata *dd = fd->dd; + u16 subctxt; for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { struct hfi1_ctxtdata *uctxt = dd->rcd[i]; /* Skip ctxts which are not yet open */ - if (!uctxt || !uctxt->cnt) + if (!uctxt || + bitmap_empty(uctxt->in_use_ctxts, + HFI1_MAX_SHARED_CTXTS)) continue; /* Skip dynamically allocted kernel contexts */ @@ -931,13 +938,19 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, continue; /* Verify the sharing process matches the master */ - if (uctxt->userversion != uinfo->userversion || - uctxt->cnt >= uctxt->subctxt_cnt) { + if (uctxt->userversion != uinfo->userversion) return -EINVAL; - } + + /* Find an unused context */ + subctxt = find_first_zero_bit(uctxt->in_use_ctxts, + HFI1_MAX_SHARED_CTXTS); + if (subctxt >= uctxt->subctxt_cnt) + return -EINVAL; + fd->uctxt = uctxt; - fd->subctxt = uctxt->cnt++; - uctxt->active_slaves |= 1 << fd->subctxt; + fd->subctxt = subctxt; + __set_bit(fd->subctxt, uctxt->in_use_ctxts); + return 1; } @@ -1055,7 +1068,7 @@ ctxdata_free: static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo) { - unsigned num_subctxts; + u16 num_subctxts; num_subctxts = uinfo->subctxt_cnt; if (num_subctxts > HFI1_MAX_SHARED_CTXTS) @@ -1063,7 +1076,6 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt, uctxt->subctxt_cnt = uinfo->subctxt_cnt; uctxt->subctxt_id = uinfo->subctxt_id; - uctxt->active_slaves = 1; uctxt->redirect_seq_cnt = 1; set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); @@ -1073,7 +1085,7 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt, static int setup_subctxt(struct hfi1_ctxtdata *uctxt) { int ret = 0; - unsigned num_subctxts = uctxt->subctxt_cnt; + u16 num_subctxts = uctxt->subctxt_cnt; uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); if (!uctxt->subctxt_uregbase) @@ -1425,7 +1437,7 @@ done: * overflow conditions. start_stop==1 re-enables, to be used to * re-init the software copy of the head register */ -static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt, +static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, int start_stop) { struct hfi1_devdata *dd = uctxt->dd; @@ -1460,7 +1472,7 @@ bail: * User process then performs actions appropriate to bit having been * set, if desired, and checks again in future. */ -static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, +static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, unsigned long events) { int i; @@ -1481,8 +1493,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, return 0; } -static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt, - u16 pkey) +static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey) { int ret = -ENOENT, i, intable = 0; struct hfi1_pportdata *ppd = uctxt->ppd; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 1b7203a3f1ce..f3d75fcd5f07 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -228,7 +228,7 @@ struct hfi1_ctxtdata { unsigned ctxt; /* * non-zero if ctxt can be shared, and defines the maximum number of - * sub contexts allowed. + * sub-contexts for this device context. */ u16 subctxt_cnt; /* non-zero if ctxt is being shared. */ @@ -287,10 +287,10 @@ struct hfi1_ctxtdata { void *subctxt_rcvegrbuf; /* An array of pages for the eager header queue entries * N */ void *subctxt_rcvhdr_base; + /* Bitmask of in use context(s) */ + DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS); /* The version of the library which opened this ctxt */ u32 userversion; - /* Bitmask of active slaves */ - u32 active_slaves; /* Type of packets or conditions we want to poll for */ u16 poll_type; /* receive packet sequence counter */ @@ -1239,9 +1239,9 @@ struct mmu_rb_handler; struct hfi1_filedata { struct hfi1_devdata *dd; struct hfi1_ctxtdata *uctxt; - unsigned subctxt; struct hfi1_user_sdma_comp_q *cq; struct hfi1_user_sdma_pkt_q *pq; + u16 subctxt; /* for cpu affinity; -1 if none */ int rec_cpu_num; u32 tid_n_pinned; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 52a6364c30de..694a8ecf9f26 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include "hfi.h" @@ -222,7 +223,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, INIT_LIST_HEAD(&rcd->qp_wait_list); rcd->ppd = ppd; rcd->dd = dd; - rcd->cnt = 1; + __set_bit(0, rcd->in_use_ctxts); rcd->ctxt = ctxt; dd->rcd[ctxt] = rcd; rcd->numa_id = numa; diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 232014d46f79..ba265d0ae93b 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -47,6 +47,7 @@ #include #include +#include #include "hfi.h" #include "common.h" @@ -189,7 +190,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd) unsigned long flags; spin_lock_irqsave(&dd->uctxt_lock, flags); - if (!rcd->cnt) + if (bitmap_empty(rcd->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) goto done; if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) { diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 9181d7cbe8f6..e5b10aefe212 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -58,7 +58,7 @@ extern uint extended_psn; struct hfi1_user_sdma_pkt_q { struct list_head list; unsigned ctxt; - unsigned subctxt; + u16 subctxt; u16 n_max_reqs; atomic_t n_reqs; u16 reqidx; -- cgit From 62239fc6e5545b2e59f83dfbc5db231a81f37a45 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 4 May 2017 05:15:21 -0700 Subject: IB/hfi1: Clean up on context initialization failure The error path for context initialization is not consistent. Cleanup all resources on failure. Removed unused variable user_event_mask. Add the _BASE_FAILED bit to the event flags so that a base context can notify waiting sub contexts that they cannot continue. Running out of sub contexts is an EBUSY result, not EINVAL. Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 62 +++++++++++++++++++------------ drivers/infiniband/hw/hfi1/hfi.h | 16 +++----- drivers/infiniband/hw/hfi1/init.c | 10 ++--- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 31 +++++++++------- drivers/infiniband/hw/hfi1/user_sdma.c | 61 +++++++++++++++--------------- 5 files changed, 95 insertions(+), 85 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 9c177ef79db5..3158128d57e8 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -82,7 +82,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo); static int init_user_ctxt(struct hfi1_filedata *fd); -static int user_init(struct hfi1_ctxtdata *uctxt); +static void user_init(struct hfi1_ctxtdata *uctxt); static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, __u32 len); static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, @@ -847,7 +847,7 @@ static u64 kvirt_to_phys(void *addr) static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) { - int ret = 0; + int ret; unsigned int swmajor, swminor; swmajor = uinfo->userversion >> 16; @@ -857,14 +857,16 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) swminor = uinfo->userversion & 0xffff; mutex_lock(&hfi1_mutex); - /* First, lets check if we need to get a sub context? */ + /* + * Get a sub context if necessary. + * ret < 0 error, 0 no context, 1 sub-context found + */ + ret = 0; if (uinfo->subctxt_cnt) { - /* < 0 error, 0 no context, 1 sub-context found */ ret = find_sub_ctxt(fd, uinfo); - if (ret > 0) { + if (ret > 0) fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); - } } /* @@ -885,17 +887,27 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) ret = wait_event_interruptible(fd->uctxt->wait, !test_bit( HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); + if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) { + clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); + return -ENOMEM; + } /* The only thing a sub context needs is the user_xxx stuff */ if (!ret) - init_user_ctxt(fd); + ret = init_user_ctxt(fd); + + if (ret) + clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); } else if (!ret) { ret = setup_base_ctxt(fd); - - /* - * Base context is done, notify anybody using a sub-context - * that is waiting for this completion - */ - if (!ret && fd->uctxt->subctxt_cnt) { + if (fd->uctxt->subctxt_cnt) { + /* If there is an error, set the failed bit. */ + if (ret) + set_bit(HFI1_CTXT_BASE_FAILED, + &fd->uctxt->event_flags); + /* + * Base context is done, notify anybody using a + * sub-context that is waiting for this completion + */ clear_bit(HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags); wake_up(&fd->uctxt->wait); @@ -945,7 +957,7 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, subctxt = find_first_zero_bit(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS); if (subctxt >= uctxt->subctxt_cnt) - return -EINVAL; + return -EBUSY; fd->uctxt = uctxt; fd->subctxt = subctxt; @@ -1118,7 +1130,7 @@ bail_ureg: return ret; } -static int user_init(struct hfi1_ctxtdata *uctxt) +static void user_init(struct hfi1_ctxtdata *uctxt) { unsigned int rcvctrl_ops = 0; @@ -1168,8 +1180,6 @@ static int user_init(struct hfi1_ctxtdata *uctxt) else rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); - - return 0; } static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, @@ -1238,28 +1248,32 @@ static int setup_base_ctxt(struct hfi1_filedata *fd) /* Now allocate the RcvHdr queue and eager buffers. */ ret = hfi1_create_rcvhdrq(dd, uctxt); if (ret) - goto done; + return ret; ret = hfi1_setup_eagerbufs(uctxt); if (ret) - goto done; + goto setup_failed; /* If sub-contexts are enabled, do the appropriate setup */ if (uctxt->subctxt_cnt) ret = setup_subctxt(uctxt); if (ret) - goto done; + goto setup_failed; ret = hfi1_user_exp_rcv_grp_init(fd); if (ret) - goto done; + goto setup_failed; ret = init_user_ctxt(fd); if (ret) - goto done; + goto setup_failed; - ret = user_init(uctxt); -done: + user_init(uctxt); + + return 0; + +setup_failed: + hfi1_free_ctxtdata(dd, uctxt); return ret; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index f3d75fcd5f07..509df984a09f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -196,12 +196,6 @@ struct hfi1_ctxtdata { void *rcvhdrq; /* kernel virtual address where hdrqtail is updated */ volatile __le64 *rcvhdrtail_kvaddr; - /* - * Shared page for kernel to signal user processes that send buffers - * need disarming. The process should call HFI1_CMD_DISARM_BUFS - * or HFI1_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set. - */ - unsigned long *user_event_mask; /* when waiting for rcv or pioavail */ wait_queue_head_t wait; /* rcvhdrq size (for freeing) */ @@ -1724,12 +1718,14 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) #define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1) /* ctxt_flag bit offsets */ + /* base context has not finished initializing */ +#define HFI1_CTXT_BASE_UNINIT 1 + /* base context initaliation failed */ +#define HFI1_CTXT_BASE_FAILED 2 /* waiting for a packet to arrive */ -#define HFI1_CTXT_WAITING_RCV 2 - /* master has not finished initializing */ -#define HFI1_CTXT_BASE_UNINIT 4 +#define HFI1_CTXT_WAITING_RCV 3 /* waiting for an urgent packet to arrive */ -#define HFI1_CTXT_WAITING_URG 5 +#define HFI1_CTXT_WAITING_URG 4 /* free up any allocated data at closes */ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 694a8ecf9f26..4a11d4da4c92 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -964,7 +964,6 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) kfree(rcd->egrbufs.buffers); sc_free(rcd->sc); - vfree(rcd->user_event_mask); vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); @@ -1683,8 +1682,6 @@ bail_free: dd_dev_err(dd, "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", rcd->ctxt); - vfree(rcd->user_event_mask); - rcd->user_event_mask = NULL; dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; @@ -1851,7 +1848,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) "ctxt%u: current Eager buffer size is invalid %u\n", rcd->ctxt, rcd->egrbufs.rcvtid_size); ret = -EINVAL; - goto bail; + goto bail_rcvegrbuf_phys; } for (idx = 0; idx < rcd->egrbufs.alloced; idx++) { @@ -1859,7 +1856,8 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.rcvtids[idx].dma, order); cond_resched(); } - goto bail; + + return 0; bail_rcvegrbuf_phys: for (idx = 0; idx < rcd->egrbufs.alloced && @@ -1873,6 +1871,6 @@ bail_rcvegrbuf_phys: rcd->egrbufs.buffers[idx].dma = 0; rcd->egrbufs.buffers[idx].len = 0; } -bail: + return ret; } diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 4c66f8d57cc1..a8f0aa4722f6 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -160,6 +160,7 @@ int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd) struct hfi1_devdata *dd = fd->dd; u32 tidbase; u32 i; + struct tid_group *grp, *gptr; exp_tid_group_init(&uctxt->tid_group_list); exp_tid_group_init(&uctxt->tid_used_list); @@ -168,17 +169,10 @@ int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd) tidbase = uctxt->expected_base; for (i = 0; i < uctxt->expected_count / dd->rcv_entries.group_size; i++) { - struct tid_group *grp; - grp = kzalloc(sizeof(*grp), GFP_KERNEL); - if (!grp) { - /* - * If we fail here, the groups already - * allocated will be freed by the close - * call. - */ - return -ENOMEM; - } + if (!grp) + goto grp_failed; + grp->size = dd->rcv_entries.group_size; grp->base = tidbase; tid_group_add_tail(grp, &uctxt->tid_group_list); @@ -186,6 +180,15 @@ int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd) } return 0; + +grp_failed: + list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, + list) { + list_del_init(&grp->list); + kfree(grp); + } + + return -ENOMEM; } /* @@ -213,11 +216,11 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) fd->invalid_tids = kcalloc(uctxt->expected_count, sizeof(*fd->invalid_tids), GFP_KERNEL); - /* - * NOTE: If this is an error, shouldn't we cleanup enry_to_rb? - */ - if (!fd->invalid_tids) + if (!fd->invalid_tids) { + kfree(fd->entry_to_rb); + fd->entry_to_rb = NULL; return -ENOMEM; + } /* * Register MMU notifier callbacks. If the registration diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 6b72267df9e7..d55339f5d73b 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -374,40 +374,24 @@ static void sdma_kmem_cache_ctor(void *obj) int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_filedata *fd) { - int ret = 0; + int ret = -ENOMEM; char buf[64]; struct hfi1_devdata *dd; struct hfi1_user_sdma_comp_q *cq; struct hfi1_user_sdma_pkt_q *pq; unsigned long flags; - if (!uctxt || !fd) { - ret = -EBADF; - goto done; - } + if (!uctxt || !fd) + return -EBADF; - if (!hfi1_sdma_comp_ring_size) { - ret = -EINVAL; - goto done; - } + if (!hfi1_sdma_comp_ring_size) + return -EINVAL; dd = uctxt->dd; pq = kzalloc(sizeof(*pq), GFP_KERNEL); if (!pq) - goto pq_nomem; - - pq->reqs = kcalloc(hfi1_sdma_comp_ring_size, - sizeof(*pq->reqs), - GFP_KERNEL); - if (!pq->reqs) - goto pq_reqs_nomem; - - pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size), - sizeof(*pq->req_in_use), - GFP_KERNEL); - if (!pq->req_in_use) - goto pq_reqs_no_in_use; + return -ENOMEM; INIT_LIST_HEAD(&pq->list); pq->dd = dd; @@ -423,10 +407,23 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, iowait_init(&pq->busy, 0, NULL, defer_packet_queue, activate_packet_queue, NULL); pq->reqidx = 0; + + pq->reqs = kcalloc(hfi1_sdma_comp_ring_size, + sizeof(*pq->reqs), + GFP_KERNEL); + if (!pq->reqs) + goto pq_reqs_nomem; + + pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size), + sizeof(*pq->req_in_use), + GFP_KERNEL); + if (!pq->req_in_use) + goto pq_reqs_no_in_use; + snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt, fd->subctxt); pq->txreq_cache = kmem_cache_create(buf, - sizeof(struct user_sdma_txreq), + sizeof(struct user_sdma_txreq), L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, sdma_kmem_cache_ctor); @@ -435,7 +432,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, uctxt->ctxt); goto pq_txreq_nomem; } - fd->pq = pq; + cq = kzalloc(sizeof(*cq), GFP_KERNEL); if (!cq) goto cq_nomem; @@ -446,20 +443,25 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, goto cq_comps_nomem; cq->nentries = hfi1_sdma_comp_ring_size; - fd->cq = cq; ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq, &pq->handler); if (ret) { dd_dev_err(dd, "Failed to register with MMU %d", ret); - goto done; + goto pq_mmu_fail; } + fd->pq = pq; + fd->cq = cq; + spin_lock_irqsave(&uctxt->sdma_qlock, flags); list_add(&pq->list, &uctxt->sdma_queues); spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); - goto done; + return 0; + +pq_mmu_fail: + vfree(cq->comps); cq_comps_nomem: kfree(cq); cq_nomem: @@ -470,10 +472,7 @@ pq_reqs_no_in_use: kfree(pq->reqs); pq_reqs_nomem: kfree(pq); - fd->pq = NULL; -pq_nomem: - ret = -ENOMEM; -done: + return ret; } -- cgit From af5df5fb59ee01e78fd334c3d6202d9e7bcfddd3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 4 May 2017 15:36:07 +0300 Subject: IB/rxe: Update caller's CRC for RXE_MEM_TYPE_DMA memory type Callers of rxe_mem_copy() provide pointer to store updated CRC value. That pointer was supposed to be updated, but the commit cee2688e3cd6 ("IB/rxe: Offload CRC calculation when possible") mistakenly removed that assignment for RXE_MEM_TYPE_DMA memory type. The code worked because there are no actual callers with RXE_MEM_TYPE_DMA, who are interested in returned value of crcp. The one caller in read_reply(), who uses the returned crcp didn't set RXE_MEM_TYPE_DMA as mem->type. Fixes: cee2688e3cd6 ("IB/rxe: Offload CRC calculation when possible") Reported-by: Andrew Boyer Signed-off-by: Leon Romanovsky Acked-by: Moni Shoua Reviewed-by: Andrew Boyer Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index ced15c4446bd..e37cc89987e1 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -368,7 +368,7 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, ((void *)(uintptr_t)iova) : addr; if (crcp) - crc = rxe_crc32(to_rdev(mem->pd->ibpd.device), + *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device), *crcp, src, length); memcpy(dest, src, length); -- cgit From 67cf3623e097706b0ca03bf79bf28d60c39591eb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 4 May 2017 16:23:07 +0300 Subject: rxe: expose num_possible_cpus() cnum_comp_vectors They're completely logical, so don't impose an artificial limitation. Signed-off-by: Sagi Grimberg Reviewed-by: Leon Romanovsky Acked-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_param.h | 1 - drivers/infiniband/sw/rxe/rxe_verbs.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 13ed2cc6eaa2..1b596fbbe251 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -114,7 +114,6 @@ enum rxe_device_param { RXE_MAX_UCONTEXT = 512, RXE_NUM_PORT = 1, - RXE_NUM_COMP_VECTORS = 1, RXE_MIN_QP_INDEX = 16, RXE_MAX_QP_INDEX = 0x00020000, diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 299b0f8423f2..83d709e74dfb 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1239,7 +1239,7 @@ int rxe_register_device(struct rxe_dev *rxe) dev->owner = THIS_MODULE; dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; - dev->num_comp_vectors = RXE_NUM_COMP_VECTORS; + dev->num_comp_vectors = num_possible_cpus(); dev->dev.parent = rxe_dma_device(rxe); dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, -- cgit