From a7ca4c3ebe6994ba88eb23576b2c3901b08e1dec Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Wed, 17 Jun 2020 22:08:03 +0800 Subject: IB/srpt: Remove WARN_ON from srpt_cm_req_recv The callers pass the pointer '&req' or 'private_data' to srpt_cm_req_recv(), and 'private_data' is initialized in srp_send_req(). 'sdev' is allocated and stored in srpt_add_one(). It's easy to show that sdev and req are always valid. So we remove unnecessary WARN_ON. Link: https://lore.kernel.org/r/20200617140803.181333-1-jingxiangfeng@huawei.com Signed-off-by: Jing Xiangfeng Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index ef7fcd3e8e15..0fa65c683f09 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2156,9 +2156,6 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, WARN_ON_ONCE(irqs_disabled()); - if (WARN_ON(!sdev || !req)) - return -EINVAL; - it_iu_len = be32_to_cpu(req->req_it_iu_len); pr_info("Received SRP_LOGIN_REQ with i_port_id %pI6, t_port_id %pI6 and it_iu_len %d on port %d (guid=%pI6); pkey %#04x\n", -- cgit From 11708142bc3689d98a959e5da927d002c590be7b Mon Sep 17 00:00:00 2001 From: Colton Lewis Date: Sun, 21 Jun 2020 02:07:38 +0000 Subject: RDMA: Correct trivial kernel-doc inconsistencies Silence documentation build warnings by correcting kernel-doc comments. ./drivers/infiniband/core/verbs.c:1004: warning: Function parameter or member 'uobject' not described in 'ib_create_srq_user' ./drivers/infiniband/core/verbs.c:1004: warning: Function parameter or member 'udata' not described in 'ib_create_srq_user' ./drivers/infiniband/core/umem_odp.c:161: warning: Function parameter or member 'ops' not described in 'ib_umem_odp_alloc_child' ./drivers/infiniband/core/umem_odp.c:225: warning: Function parameter or member 'ops' not described in 'ib_umem_odp_get' ./drivers/infiniband/sw/rdmavt/ah.c:104: warning: Excess function parameter 'ah_attr' description in 'rvt_create_ah' ./drivers/infiniband/sw/rdmavt/ah.c:104: warning: Excess function parameter 'create_flags' description in 'rvt_create_ah' ./drivers/infiniband/ulp/iser/iscsi_iser.h:363: warning: Function parameter or member 'all_list' not described in 'iser_fr_desc' ./drivers/infiniband/ulp/iser/iscsi_iser.h:377: warning: Function parameter or member 'all_list' not described in 'iser_fr_pool' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:148: warning: Function parameter or member 'rsvd0' not described in 'opa_vesw_info' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:148: warning: Function parameter or member 'rsvd1' not described in 'opa_vesw_info' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:148: warning: Function parameter or member 'rsvd2' not described in 'opa_vesw_info' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:148: warning: Function parameter or member 'rsvd3' not described in 'opa_vesw_info' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:148: warning: Function parameter or member 'rsvd4' not described in 'opa_vesw_info' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:205: warning: Function parameter or member 'rsvd0' not described in 'opa_per_veswport_info' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:205: warning: Function parameter or member 'rsvd1' not described in 'opa_per_veswport_info' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:205: warning: Function parameter or member 'rsvd2' not described in 'opa_per_veswport_info' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:205: warning: Function parameter or member 'rsvd3' not described in 'opa_per_veswport_info' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:342: warning: Function parameter or member 'reserved' not described in 'opa_veswport_summary_counters' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:394: warning: Function parameter or member 'rsvd0' not described in 'opa_veswport_error_counters' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:394: warning: Function parameter or member 'rsvd1' not described in 'opa_veswport_error_counters' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:394: warning: Function parameter or member 'rsvd2' not described in 'opa_veswport_error_counters' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:394: warning: Function parameter or member 'rsvd3' not described in 'opa_veswport_error_counters' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:394: warning: Function parameter or member 'rsvd4' not described in 'opa_veswport_error_counters' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:394: warning: Function parameter or member 'rsvd5' not described in 'opa_veswport_error_counters' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:394: warning: Function parameter or member 'rsvd6' not described in 'opa_veswport_error_counters' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:394: warning: Function parameter or member 'rsvd7' not described in 'opa_veswport_error_counters' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:394: warning: Function parameter or member 'rsvd8' not described in 'opa_veswport_error_counters' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:394: warning: Function parameter or member 'rsvd9' not described in 'opa_veswport_error_counters' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:460: warning: Function parameter or member 'reserved' not described in 'opa_vnic_vema_mad' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:485: warning: Function parameter or member 'reserved' not described in 'opa_vnic_notice_attr' ./drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h:500: warning: Function parameter or member 'reserved' not described in 'opa_vnic_vema_mad_trap' Link: https://lore.kernel.org/r/5373936.DvuYhMxLoT@laptop.coltonlewis.name Signed-off-by: Colton Lewis Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem_odp.c | 2 ++ drivers/infiniband/core/verbs.c | 4 ++-- drivers/infiniband/sw/rdmavt/ah.c | 3 +-- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 ++ drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h | 23 +++++++++++++++++++++++ 5 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index ccd28405451c..5e32f61a2fe4 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -152,6 +152,7 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_implicit); * ib_alloc_implicit_odp_umem() * @addr: The starting userspace VA * @size: The length of the userspace VA + * @ops: MMU interval ops, currently only @invalidate */ struct ib_umem_odp * ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr, @@ -213,6 +214,7 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_child); * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned + * @ops: MMU interval ops, currently only @invalidate * * The driver should use when the access flags indicate ODP memory. It avoids * pinning, instead, stores the mm for future page fault handling in diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 53d6505c0c7b..a3761d432a79 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -988,8 +988,8 @@ EXPORT_SYMBOL(rdma_destroy_ah_user); * @srq_init_attr: A list of initial attributes required to create the * SRQ. If SRQ creation succeeds, then the attributes are updated to * the actual capabilities of the created SRQ. - * @uobject - uobject pointer if this is not a kernel SRQ - * @udata - udata pointer if this is not a kernel SRQ + * @uobject: uobject pointer if this is not a kernel SRQ + * @udata: udata pointer if this is not a kernel SRQ * * srq_attr->max_wr and srq_attr->max_sge are read the determine the * requested size of the SRQ, and set to the actual values allocated diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 40480add7dd3..75a04b1497c4 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -90,8 +90,7 @@ EXPORT_SYMBOL(rvt_check_ah); /** * rvt_create_ah - create an address handle * @ibah: the IB address handle - * @ah_attr: the attributes of the AH - * @create_flags: create address handle flags (see enum rdma_create_ah_flags) + * @init_attr: the attributes of the AH * @udata: pointer to user's input output buffer information. * * This may be called from interrupt context. diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 1d77c7f42e38..0fd8c243ec67 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -353,6 +353,7 @@ struct iser_reg_resources { * @list: entry in connection fastreg pool * @rsc: data buffer registration resources * @sig_protected: is region protected indicator + * @all_list: first and last list members */ struct iser_fr_desc { struct list_head list; @@ -367,6 +368,7 @@ struct iser_fr_desc { * @list: list of fastreg descriptors * @lock: protects fastreg pool * @size: size of the pool + * @all_list: first and last list members */ struct iser_fr_pool { struct list_head list; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h index d324312a373c..f64519872297 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h @@ -118,12 +118,17 @@ * struct opa_vesw_info - OPA vnic switch information * @fabric_id: 10-bit fabric id * @vesw_id: 12-bit virtual ethernet switch id + * @rsvd0: reserved bytes * @def_port_mask: bitmask of default ports + * @rsvd1: reserved bytes * @pkey: partition key + * @rsvd2: reserved bytes * @u_mcast_dlid: unknown multicast dlid * @u_ucast_dlid: array of unknown unicast dlids + * @rsvd3: reserved bytes * @rc: routing control * @eth_mtu: Ethernet MTU + * @rsvd4: reserved bytes */ struct opa_vesw_info { __be16 fabric_id; @@ -150,12 +155,14 @@ struct opa_vesw_info { * struct opa_per_veswport_info - OPA vnic per port information * @port_num: port number * @eth_link_status: current ethernet link state + * @rsvd0: reserved bytes * @base_mac_addr: base mac address * @config_state: configured port state * @oper_state: operational port state * @max_mac_tbl_ent: max number of mac table entries * @max_smac_ent: max smac entries in mac table * @mac_tbl_digest: mac table digest + * @rsvd1: reserved bytes * @encap_slid: base slid for the port * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets @@ -165,8 +172,10 @@ struct opa_vesw_info { * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets + * @rsvd2: reserved bytes * @uc_macs_gen_count: generation count for unicast macs list * @mc_macs_gen_count: generation count for multicast macs list + * @rsvd3: reserved bytes */ struct opa_per_veswport_info { __be32 port_num; @@ -294,6 +303,7 @@ struct opa_veswport_mactable { * @rx_512_1023: received packet length is >=512 and < 1023 bytes * @rx_1024_1518: received packet length is >=1024 and < 1518 bytes * @rx_1519_max: received packet length >= 1519 bytes + * @reserved: reserved bytes * * All the above are counters of corresponding conditions. */ @@ -347,16 +357,26 @@ struct opa_veswport_summary_counters { * @veswport_num: virtual ethernet switch port number * @tx_errors: transmit errors * @rx_errors: receive errors + * @rsvd0: reserved bytes * @tx_smac_filt: smac filter errors + * @rsvd1: reserved bytes + * @rsvd2: reserved bytes + * @rsvd3: reserved bytes * @tx_dlid_zero: transmit packets with invalid dlid + * @rsvd4: reserved bytes * @tx_logic: other transmit errors + * @rsvd5: reserved bytes * @tx_drop_state: packet tansmission in non-forward port state * @rx_bad_veswid: received packet with invalid vesw id + * @rsvd6: reserved bytes * @rx_runt: received ethernet packet with length < 64 bytes * @rx_oversize: received ethernet packet with length > MTU size + * @rsvd7: reserved bytes * @rx_eth_down: received packets when interface is down * @rx_drop_state: received packets in non-forwarding port state * @rx_logic: other receive errors + * @rsvd8: reserved bytes + * @rsvd9: reserved bytes * * All the above are counters of corresponding error conditions. */ @@ -447,6 +467,7 @@ struct opa_veswport_iface_macs { * struct opa_vnic_vema_mad - Generic VEMA MAD * @mad_hdr: Generic MAD header * @rmpp_hdr: RMPP header for vendor specific MADs + * @reserved: reserved bytes * @oui: Unique org identifier * @data: MAD data */ @@ -467,6 +488,7 @@ struct opa_vnic_vema_mad { * @trap_num: Trap number * @toggle_count: Notice toggle bit and count value * @issuer_lid: Trap issuer's lid + * @reserved: reserved bytes * @issuer_gid: Issuer GID (only if Report method) * @raw_data: Trap message body */ @@ -487,6 +509,7 @@ struct opa_vnic_notice_attr { * struct opa_vnic_vema_mad_trap - Generic VEMA MAD Trap * @mad_hdr: Generic MAD header * @rmpp_hdr: RMPP header for vendor specific MADs + * @reserved: reserved bytes * @oui: Unique org identifier * @notice: Notice structure */ -- cgit From 90cdff90dfb507b36128a4b2bcf4721d441cd697 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 22 Jun 2020 12:22:56 +0300 Subject: RDMA/ipoib: Return void from ipoib_mcast_stop_thread() The return value from ipoib_mcast_stop_thread() is always 0 - change it to be void. Link: https://lore.kernel.org/r/20200622092256.6931-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 9a3379c49541..15f519ce7e0b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -527,7 +527,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); void ipoib_mcast_restart_task(struct work_struct *work); void ipoib_mcast_start_thread(struct net_device *dev); -int ipoib_mcast_stop_thread(struct net_device *dev); +void ipoib_mcast_stop_thread(struct net_device *dev); void ipoib_mcast_dev_down(struct net_device *dev); void ipoib_mcast_dev_flush(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 9bfa514473d5..86e4ed64e4e2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -680,15 +680,13 @@ void ipoib_mcast_start_thread(struct net_device *dev) spin_unlock_irqrestore(&priv->lock, flags); } -int ipoib_mcast_stop_thread(struct net_device *dev) +void ipoib_mcast_stop_thread(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); ipoib_dbg_mcast(priv, "stopping multicast thread\n"); cancel_delayed_work_sync(&priv->mcast_task); - - return 0; } static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) -- cgit From d5fdffe23995895a2141bcc0b89117e8cb867ccf Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 22 Jun 2020 12:47:09 +0300 Subject: RDMA/hfi1: Remove hfi1_create_qp declaration The function isn't implemented - delete the declaration. Link: https://lore.kernel.org/r/20200622094709.12981-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/qp.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index b670321365d3..b0d053d12129 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -112,20 +112,6 @@ static inline void clear_ahg(struct rvt_qp *qp) qp->s_ahgidx = -1; } -/** - * hfi1_create_qp - create a queue pair for a device - * @ibpd: the protection domain who's device we create the queue pair for - * @init_attr: the attributes of the queue pair - * @udata: user data for libibverbs.so - * - * Returns the queue pair on success, otherwise returns an errno. - * - * Called by the ib_create_qp() core verbs function. - */ -struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); - /** * hfi1_qp_wakeup - wake up on the indicated event * @qp: the QP -- cgit From f6b4c11fc527b9ca98c219fb9a465a08278d4c2b Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 22 Jun 2020 13:07:31 +0300 Subject: RDMA/rxe: Remove unused rxe_mem_map_pages This function is not in use - delete it. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200622100731.27359-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 3 --- drivers/infiniband/sw/rxe/rxe_mr.c | 44 ------------------------------------- 2 files changed, 47 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 775c23becaec..238d6a357aac 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -132,9 +132,6 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length); -int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, - u64 *page, int num_pages, u64 iova); - void rxe_mem_cleanup(struct rxe_pool_entry *arg); int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index e83c7b518bfa..a63cb5fac01f 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -587,47 +587,3 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, return mem; } - -int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, - u64 *page, int num_pages, u64 iova) -{ - int i; - int num_buf; - int err; - struct rxe_map **map; - struct rxe_phys_buf *buf; - int page_size; - - if (num_pages > mem->max_buf) { - err = -EINVAL; - goto err1; - } - - num_buf = 0; - page_size = 1 << mem->page_shift; - map = mem->map; - buf = map[0]->buf; - - for (i = 0; i < num_pages; i++) { - buf->addr = *page++; - buf->size = page_size; - buf++; - num_buf++; - - if (num_buf == RXE_BUF_PER_MAP) { - map++; - buf = map[0]->buf; - num_buf = 0; - } - } - - mem->iova = iova; - mem->va = iova; - mem->length = num_pages << mem->page_shift; - mem->state = RXE_MEM_STATE_VALID; - - return 0; - -err1: - return err; -} -- cgit From 24fd6d6f85d24b020aaaf01109d368dd15570caf Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 23 Jun 2020 14:30:35 +0300 Subject: RDMA/core: Don't call fill_res_entry for PD None of the drivers implement it, remove it. Link: https://lore.kernel.org/r/20200623113043.1228482-4-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index e16105be2eb2..8548f09746ab 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -653,7 +653,6 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_pd *pd = container_of(res, struct ib_pd, res); - struct ib_device *dev = pd->device; if (has_cap_net_admin) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, @@ -676,13 +675,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, pd->uobject->context->res.id)) goto err; - if (fill_res_name_pid(msg, res)) - goto err; - - if (fill_res_entry(dev, msg, res)) - goto err; - - return 0; + return fill_res_name_pid(msg, res); err: return -EMSGSIZE; } -- cgit From f4434529003522d72b314d26d65b18c06ea9307c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 23 Jun 2020 14:30:36 +0300 Subject: RDMA: Add dedicated MR resource tracker function In order to avoid double multiplexing of the resource when it is a MR, add a dedicated callback function. Link: https://lore.kernel.org/r/20200623113043.1228482-5-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 3 ++- drivers/infiniband/core/nldev.c | 18 ++++-------------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + drivers/infiniband/hw/cxgb4/provider.c | 1 + drivers/infiniband/hw/cxgb4/restrack.c | 5 +---- drivers/infiniband/hw/mlx5/main.c | 4 ++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 ++---- drivers/infiniband/hw/mlx5/restrack.c | 28 ++++------------------------ include/rdma/ib_verbs.h | 4 ++-- 9 files changed, 19 insertions(+), 51 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 905a2beaf885..ffdf9787e7f6 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2618,7 +2618,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, drain_sq); SET_DEVICE_OP(dev_ops, enable_driver); SET_DEVICE_OP(dev_ops, fill_res_entry); - SET_DEVICE_OP(dev_ops, fill_stat_entry); + SET_DEVICE_OP(dev_ops, fill_res_mr_entry); + SET_DEVICE_OP(dev_ops, fill_stat_mr_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); SET_DEVICE_OP(dev_ops, get_hw_stats); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 8548f09746ab..a4f3f838d6fe 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -454,14 +454,6 @@ static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, return dev->ops.fill_res_entry(msg, res); } -static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg, - struct rdma_restrack_entry *res) -{ - if (!dev->ops.fill_stat_entry) - return false; - return dev->ops.fill_stat_entry(msg, res); -} - static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { @@ -641,9 +633,8 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, if (fill_res_name_pid(msg, res)) goto err; - if (fill_res_entry(dev, msg, res)) - goto err; - + if (dev->ops.fill_res_mr_entry) + return dev->ops.fill_res_mr_entry(msg, mr); return 0; err: return -EMSGSIZE; @@ -786,9 +777,8 @@ static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) goto err; - if (fill_stat_entry(dev, msg, res)) - goto err; - + if (dev->ops.fill_stat_mr_entry) + return dev->ops.fill_stat_mr_entry(msg, mr); return 0; err: diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index e8e11bd95e42..5b9884ca2f5e 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -1055,6 +1055,7 @@ struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); typedef int c4iw_restrack_func(struct sk_buff *msg, struct rdma_restrack_entry *res); +int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr); extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX]; #endif diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index ba83d942997c..36eeb595d41c 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -486,6 +486,7 @@ static const struct ib_device_ops c4iw_dev_ops = { .destroy_qp = c4iw_destroy_qp, .destroy_srq = c4iw_destroy_srq, .fill_res_entry = fill_res_entry, + .fill_res_mr_entry = c4iw_fill_res_mr_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = c4iw_get_dma_mr, .get_hw_stats = c4iw_get_mib, diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c index f82d46ed969d..9a5ca9192c1c 100644 --- a/drivers/infiniband/hw/cxgb4/restrack.c +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -433,10 +433,8 @@ err: return -EMSGSIZE; } -static int fill_res_mr_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) { - struct ib_mr *ibmr = container_of(res, struct ib_mr, res); struct c4iw_mr *mhp = to_c4iw_mr(ibmr); struct c4iw_dev *dev = mhp->rhp; u32 stag = mhp->attr.stag; @@ -497,5 +495,4 @@ c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_QP] = fill_res_qp_entry, [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry, [RDMA_RESTRACK_CQ] = fill_res_cq_entry, - [RDMA_RESTRACK_MR] = fill_res_mr_entry, }; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 343a8b8361e7..fa9237a10ed8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6598,8 +6598,8 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, .enable_driver = mlx5_ib_enable_driver, - .fill_res_entry = mlx5_ib_fill_res_entry, - .fill_stat_entry = mlx5_ib_fill_stat_entry, + .fill_res_mr_entry = mlx5_ib_fill_res_mr_entry, + .fill_stat_mr_entry = mlx5_ib_fill_stat_mr_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5dbe3eb0d9cb..37ead14eb317 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1375,10 +1375,8 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, u8 *native_port_num); void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); -int mlx5_ib_fill_res_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res); -int mlx5_ib_fill_stat_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res); +int mlx5_ib_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); +int mlx5_ib_fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); extern const struct uapi_definition mlx5_ib_devx_defs[]; extern const struct uapi_definition mlx5_ib_flow_defs[]; diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 8f6c04f12531..598a09796d09 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -8,10 +8,9 @@ #include #include "mlx5_ib.h" -static int fill_stat_mr_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int mlx5_ib_fill_stat_mr_entry(struct sk_buff *msg, + struct ib_mr *ibmr) { - struct ib_mr *ibmr = container_of(res, struct ib_mr, res); struct mlx5_ib_mr *mr = to_mmr(ibmr); struct nlattr *table_attr; @@ -41,10 +40,9 @@ err: return -EMSGSIZE; } -static int fill_res_mr_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int mlx5_ib_fill_res_mr_entry(struct sk_buff *msg, + struct ib_mr *ibmr) { - struct ib_mr *ibmr = container_of(res, struct ib_mr, res); struct mlx5_ib_mr *mr = to_mmr(ibmr); struct nlattr *table_attr; @@ -70,21 +68,3 @@ err: nla_nest_cancel(msg, table_attr); return -EMSGSIZE; } - -int mlx5_ib_fill_res_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) -{ - if (res->type == RDMA_RESTRACK_MR) - return fill_res_mr_entry(msg, res); - - return 0; -} - -int mlx5_ib_fill_stat_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) -{ - if (res->type == RDMA_RESTRACK_MR) - return fill_stat_mr_entry(msg, res); - - return 0; -} diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ef2f3986c493..117a0e802aa1 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2583,6 +2583,7 @@ struct ib_device_ops { */ int (*fill_res_entry)(struct sk_buff *msg, struct rdma_restrack_entry *entry); + int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); /* Device lifecycle callbacks */ /* @@ -2637,8 +2638,7 @@ struct ib_device_ops { * Allows rdma drivers to add their own restrack attributes * dumped via 'rdma stat' iproute2 command. */ - int (*fill_stat_entry)(struct sk_buff *msg, - struct rdma_restrack_entry *entry); + int (*fill_stat_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_cq); -- cgit From 9e2a187a93c395f573ed38b888ba4bd731e70622 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 23 Jun 2020 14:30:37 +0300 Subject: RDMA: Add a dedicated CQ resource tracker function In order to avoid double multiplexing of the resource when it is a CQ, add a dedicated callback function. Link: https://lore.kernel.org/r/20200623113043.1228482-6-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/nldev.c | 5 ++--- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + drivers/infiniband/hw/cxgb4/provider.c | 1 + drivers/infiniband/hw/cxgb4/restrack.c | 5 +---- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++-- drivers/infiniband/hw/hns/hns_roce_main.c | 2 +- drivers/infiniband/hw/hns/hns_roce_restrack.c | 14 ++------------ include/rdma/ib_verbs.h | 1 + 9 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ffdf9787e7f6..9eeac8cb600e 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2617,6 +2617,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, drain_rq); SET_DEVICE_OP(dev_ops, drain_sq); SET_DEVICE_OP(dev_ops, enable_driver); + SET_DEVICE_OP(dev_ops, fill_res_cq_entry); SET_DEVICE_OP(dev_ops, fill_res_entry); SET_DEVICE_OP(dev_ops, fill_res_mr_entry); SET_DEVICE_OP(dev_ops, fill_stat_mr_entry); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index a4f3f838d6fe..707f724db1dd 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -598,9 +598,8 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, if (fill_res_name_pid(msg, res)) goto err; - if (fill_res_entry(dev, msg, res)) - goto err; - + if (dev->ops.fill_res_cq_entry) + return dev->ops.fill_res_cq_entry(msg, cq); return 0; err: return -EMSGSIZE; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 5b9884ca2f5e..18a2c1a44dcc 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -1056,6 +1056,7 @@ struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); typedef int c4iw_restrack_func(struct sk_buff *msg, struct rdma_restrack_entry *res); int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr); +int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq); extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX]; #endif diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 36eeb595d41c..d6b20aa314a0 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -485,6 +485,7 @@ static const struct ib_device_ops c4iw_dev_ops = { .destroy_cq = c4iw_destroy_cq, .destroy_qp = c4iw_destroy_qp, .destroy_srq = c4iw_destroy_srq, + .fill_res_cq_entry = c4iw_fill_res_cq_entry, .fill_res_entry = fill_res_entry, .fill_res_mr_entry = c4iw_fill_res_mr_entry, .get_dev_fw_str = get_dev_fw_str, diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c index 9a5ca9192c1c..ead2cd08793d 100644 --- a/drivers/infiniband/hw/cxgb4/restrack.c +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -372,10 +372,8 @@ err: return -EMSGSIZE; } -static int fill_res_cq_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq) { - struct ib_cq *ibcq = container_of(res, struct ib_cq, res); struct c4iw_cq *chp = to_c4iw_cq(ibcq); struct nlattr *table_attr; struct t4_cqe hwcqes[2]; @@ -494,5 +492,4 @@ err: c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_QP] = fill_res_qp_entry, [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry, - [RDMA_RESTRACK_CQ] = fill_res_cq_entry, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a77fa6730b2d..a61f0c4d4dbb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1266,6 +1266,6 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); -int hns_roce_fill_res_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res); +int hns_roce_fill_res_cq_entry(struct sk_buff *msg, + struct ib_cq *ib_cq); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 50763cf4fa3d..5907cfd878a6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -428,7 +428,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .destroy_ah = hns_roce_destroy_ah, .destroy_cq = hns_roce_destroy_cq, .disassociate_ucontext = hns_roce_disassociate_ucontext, - .fill_res_entry = hns_roce_fill_res_entry, + .fill_res_cq_entry = hns_roce_fill_res_cq_entry, .get_dma_mr = hns_roce_get_dma_mr, .get_link_layer = hns_roce_get_link_layer, .get_port_immutable = hns_roce_port_immutable, diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 06871731ac43..259444c0a630 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -76,10 +76,9 @@ err: return -EMSGSIZE; } -static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int hns_roce_fill_res_cq_entry(struct sk_buff *msg, + struct ib_cq *ib_cq) { - struct ib_cq *ib_cq = container_of(res, struct ib_cq, res); struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); struct hns_roce_v2_cq_context *context; @@ -119,12 +118,3 @@ err: kfree(context); return ret; } - -int hns_roce_fill_res_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) -{ - if (res->type == RDMA_RESTRACK_CQ) - return hns_roce_fill_res_cq_entry(msg, res); - - return 0; -} diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 117a0e802aa1..097b1d497d5f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2584,6 +2584,7 @@ struct ib_device_ops { int (*fill_res_entry)(struct sk_buff *msg, struct rdma_restrack_entry *entry); int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); + int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq); /* Device lifecycle callbacks */ /* -- cgit From 5cc34116ccec60032dbaa92768f41e95ce2d8ec7 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 23 Jun 2020 14:30:38 +0300 Subject: RDMA: Add dedicated QP resource tracker function In order to avoid double multiplexing of the resource when it is a QP, add a dedicated callback function. Link: https://lore.kernel.org/r/20200623113043.1228482-7-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/nldev.c | 5 ++--- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + drivers/infiniband/hw/cxgb4/restrack.c | 5 +---- include/rdma/ib_verbs.h | 1 + 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 9eeac8cb600e..f94989274df5 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2620,6 +2620,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, fill_res_cq_entry); SET_DEVICE_OP(dev_ops, fill_res_entry); SET_DEVICE_OP(dev_ops, fill_res_mr_entry); + SET_DEVICE_OP(dev_ops, fill_res_qp_entry); SET_DEVICE_OP(dev_ops, fill_stat_mr_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 707f724db1dd..79d0980a75e0 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -507,9 +507,8 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, if (fill_res_name_pid(msg, res)) goto err; - if (fill_res_entry(dev, msg, res)) - goto err; - + if (dev->ops.fill_res_qp_entry) + return dev->ops.fill_res_qp_entry(msg, qp); return 0; err: return -EMSGSIZE; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 18a2c1a44dcc..c84aa7c937f1 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -1057,6 +1057,7 @@ typedef int c4iw_restrack_func(struct sk_buff *msg, struct rdma_restrack_entry *res); int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr); int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq); +int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp); extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX]; #endif diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c index ead2cd08793d..5144d3b67293 100644 --- a/drivers/infiniband/hw/cxgb4/restrack.c +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -134,10 +134,8 @@ err: return -EMSGSIZE; } -static int fill_res_qp_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp) { - struct ib_qp *ibqp = container_of(res, struct ib_qp, res); struct t4_swsqe *fsp = NULL, *lsp = NULL; struct c4iw_qp *qhp = to_c4iw_qp(ibqp); u16 first_sq_idx = 0, last_sq_idx = 0; @@ -490,6 +488,5 @@ err: } c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = { - [RDMA_RESTRACK_QP] = fill_res_qp_entry, [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry, }; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 097b1d497d5f..4e8519ac7363 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2585,6 +2585,7 @@ struct ib_device_ops { struct rdma_restrack_entry *entry); int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq); + int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp); /* Device lifecycle callbacks */ /* -- cgit From 211cd9459fdabe9f556e539966f50825853bf262 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 23 Jun 2020 14:30:39 +0300 Subject: RDMA: Add dedicated CM_ID resource tracker function In order to avoid double multiplexing of the resource when it is a cm id, add a dedicated callback function. In addition remove fill_res_entry which is not used anymore. Link: https://lore.kernel.org/r/20200623113043.1228482-8-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 2 +- drivers/infiniband/core/nldev.c | 13 ++----------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 4 +--- drivers/infiniband/hw/cxgb4/provider.c | 9 +-------- drivers/infiniband/hw/cxgb4/restrack.c | 9 ++------- include/rdma/ib_verbs.h | 4 ++-- 6 files changed, 9 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f94989274df5..cbe95e729cf1 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2617,8 +2617,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, drain_rq); SET_DEVICE_OP(dev_ops, drain_sq); SET_DEVICE_OP(dev_ops, enable_driver); + SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry); SET_DEVICE_OP(dev_ops, fill_res_cq_entry); - SET_DEVICE_OP(dev_ops, fill_res_entry); SET_DEVICE_OP(dev_ops, fill_res_mr_entry); SET_DEVICE_OP(dev_ops, fill_res_qp_entry); SET_DEVICE_OP(dev_ops, fill_stat_mr_entry); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 79d0980a75e0..394e307c342c 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -446,14 +446,6 @@ static int fill_res_name_pid(struct sk_buff *msg, return err ? -EMSGSIZE : 0; } -static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, - struct rdma_restrack_entry *res) -{ - if (!dev->ops.fill_res_entry) - return false; - return dev->ops.fill_res_entry(msg, res); -} - static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { @@ -559,9 +551,8 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, if (fill_res_name_pid(msg, res)) goto err; - if (fill_res_entry(dev, msg, res)) - goto err; - + if (dev->ops.fill_res_cm_id_entry) + return dev->ops.fill_res_cm_id_entry(msg, cm_id); return 0; err: return -EMSGSIZE; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index c84aa7c937f1..27da0705c88a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -1053,11 +1053,9 @@ int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); -typedef int c4iw_restrack_func(struct sk_buff *msg, - struct rdma_restrack_entry *res); int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr); int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq); int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp); -extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX]; +int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, struct rdma_cm_id *cm_id); #endif diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index d6b20aa314a0..1d3ff59e4060 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -458,13 +458,6 @@ static void get_dev_fw_str(struct ib_device *dev, char *str) FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); } -static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) -{ - return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) && - c4iw_restrack_funcs[res->type]) ? - c4iw_restrack_funcs[res->type](msg, res) : 0; -} - static const struct ib_device_ops c4iw_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_CXGB4, @@ -486,7 +479,7 @@ static const struct ib_device_ops c4iw_dev_ops = { .destroy_qp = c4iw_destroy_qp, .destroy_srq = c4iw_destroy_srq, .fill_res_cq_entry = c4iw_fill_res_cq_entry, - .fill_res_entry = fill_res_entry, + .fill_res_cm_id_entry = c4iw_fill_res_cm_id_entry, .fill_res_mr_entry = c4iw_fill_res_mr_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = c4iw_get_dma_mr, diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c index 5144d3b67293..b32e6516d65f 100644 --- a/drivers/infiniband/hw/cxgb4/restrack.c +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -193,10 +193,9 @@ union union_ep { struct c4iw_ep ep; }; -static int fill_res_ep_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, + struct rdma_cm_id *cm_id) { - struct rdma_cm_id *cm_id = rdma_res_to_id(res); struct nlattr *table_attr; struct c4iw_ep_common *epcp; struct c4iw_listen_ep *listen_ep = NULL; @@ -486,7 +485,3 @@ err_cancel_table: err: return -EMSGSIZE; } - -c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = { - [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry, -}; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4e8519ac7363..9127cffafccd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -75,6 +75,7 @@ struct ib_umem_odp; struct ib_uqp_object; struct ib_usrq_object; struct ib_uwq_object; +struct rdma_cm_id; extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; @@ -2581,11 +2582,10 @@ struct ib_device_ops { /** * Allows rdma drivers to add their own restrack attributes. */ - int (*fill_res_entry)(struct sk_buff *msg, - struct rdma_restrack_entry *entry); int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq); int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp); + int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id); /* Device lifecycle callbacks */ /* -- cgit From 65959522f8060659e308977f09f3eb7b7af5e43f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 23 Jun 2020 14:30:40 +0300 Subject: RDMA: Add support to dump resource tracker in RAW format Add support to get resource dump in raw format. It enable drivers to return the entire device specific QP/CQ/MR context without a need from the driver to set each field separately. The raw query returns only the device specific data, general data is still returned by using the existing queries. Example: $ rdma res show mr dev mlx5_1 mrn 2 -r -j [{"ifindex":7,"ifname":"mlx5_1", "data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}] Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 3 + drivers/infiniband/core/nldev.c | 180 +++++++++++++++++++++++++-------------- include/rdma/ib_verbs.h | 3 + include/uapi/rdma/rdma_netlink.h | 8 ++ 4 files changed, 132 insertions(+), 62 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index cbe95e729cf1..1335ed1f1e4a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2619,8 +2619,11 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, enable_driver); SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry); SET_DEVICE_OP(dev_ops, fill_res_cq_entry); + SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw); SET_DEVICE_OP(dev_ops, fill_res_mr_entry); + SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw); SET_DEVICE_OP(dev_ops, fill_res_qp_entry); + SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw); SET_DEVICE_OP(dev_ops, fill_stat_mr_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 394e307c342c..1051b5622b62 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -114,6 +114,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY }, [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, @@ -446,11 +447,11 @@ static int fill_res_name_pid(struct sk_buff *msg, return err ? -EMSGSIZE : 0; } -static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, - struct rdma_restrack_entry *res, uint32_t port) +static int fill_res_qp_entry_query(struct sk_buff *msg, + struct rdma_restrack_entry *res, + struct ib_device *dev, + struct ib_qp *qp) { - struct ib_qp *qp = container_of(res, struct ib_qp, res); - struct ib_device *dev = qp->device; struct ib_qp_init_attr qp_init_attr; struct ib_qp_attr qp_attr; int ret; @@ -459,16 +460,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, if (ret) return ret; - if (port && port != qp_attr.port_num) - return -EAGAIN; - - /* In create_qp() port is not set yet */ - if (qp_attr.port_num && - nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num)) - goto err; - - if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num)) - goto err; if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN, qp_attr.dest_qp_num)) @@ -492,13 +483,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state)) goto err; - if (!rdma_is_kernel_res(res) && - nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id)) - goto err; - - if (fill_res_name_pid(msg, res)) - goto err; - if (dev->ops.fill_res_qp_entry) return dev->ops.fill_res_qp_entry(msg, qp); return 0; @@ -506,6 +490,48 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, err: return -EMSGSIZE; } +static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_qp *qp = container_of(res, struct ib_qp, res); + struct ib_device *dev = qp->device; + int ret; + + if (port && port != qp->port) + return -EAGAIN; + + /* In create_qp() port is not set yet */ + if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port)) + return -EINVAL; + + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num); + if (ret) + return -EMSGSIZE; + + if (!rdma_is_kernel_res(res) && + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id)) + return -EMSGSIZE; + + ret = fill_res_name_pid(msg, res); + if (ret) + return -EMSGSIZE; + + return fill_res_qp_entry_query(msg, res, dev, qp); +} + +static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_qp *qp = container_of(res, struct ib_qp, res); + struct ib_device *dev = qp->device; + + if (port && port != qp->port) + return -EAGAIN; + if (!dev->ops.fill_res_qp_entry_raw) + return -EINVAL; + return dev->ops.fill_res_qp_entry_raw(msg, qp); +} + static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { @@ -565,34 +591,42 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, struct ib_device *dev = cq->device; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe)) - goto err; + return -EMSGSIZE; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD)) - goto err; + return -EMSGSIZE; /* Poll context is only valid for kernel CQs */ if (rdma_is_kernel_res(res) && nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx)) - goto err; + return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL))) - goto err; + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id)) - goto err; + return -EMSGSIZE; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, cq->uobject->uevent.uobject.context->res.id)) - goto err; + return -EMSGSIZE; if (fill_res_name_pid(msg, res)) - goto err; + return -EMSGSIZE; - if (dev->ops.fill_res_cq_entry) - return dev->ops.fill_res_cq_entry(msg, cq); - return 0; + return (dev->ops.fill_res_cq_entry) ? + dev->ops.fill_res_cq_entry(msg, cq) : 0; +} -err: return -EMSGSIZE; +static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_cq *cq = container_of(res, struct ib_cq, res); + struct ib_device *dev = cq->device; + + if (!dev->ops.fill_res_cq_entry_raw) + return -EINVAL; + return dev->ops.fill_res_cq_entry_raw(msg, cq); } static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, @@ -603,30 +637,39 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, if (has_cap_net_admin) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey)) - goto err; + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) - goto err; + return -EMSGSIZE; } if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, RDMA_NLDEV_ATTR_PAD)) - goto err; + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) - goto err; + return -EMSGSIZE; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id)) - goto err; + return -EMSGSIZE; if (fill_res_name_pid(msg, res)) - goto err; + return -EMSGSIZE; - if (dev->ops.fill_res_mr_entry) - return dev->ops.fill_res_mr_entry(msg, mr); - return 0; + return (dev->ops.fill_res_mr_entry) ? + dev->ops.fill_res_mr_entry(msg, mr) : + 0; +} -err: return -EMSGSIZE; +static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_mr *mr = container_of(res, struct ib_mr, res); + struct ib_device *dev = mr->pd->device; + + if (!dev->ops.fill_res_mr_entry_raw) + return -EINVAL; + return dev->ops.fill_res_mr_entry_raw(msg, mr); } static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, @@ -1149,7 +1192,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb, struct nldev_fill_res_entry { enum rdma_nldev_attr nldev_attr; - enum rdma_nldev_command nldev_cmd; u8 flags; u32 entry; u32 id; @@ -1161,40 +1203,34 @@ enum nldev_res_flags { static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_QP] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY, .id = RDMA_NLDEV_ATTR_RES_LQPN, }, [RDMA_RESTRACK_CM_ID] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CM_IDN, }, [RDMA_RESTRACK_CQ] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CQN, }, [RDMA_RESTRACK_MR] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY, .id = RDMA_NLDEV_ATTR_RES_MRN, }, [RDMA_RESTRACK_PD] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY, .id = RDMA_NLDEV_ATTR_RES_PDN, }, [RDMA_RESTRACK_COUNTER] = { - .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET, .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID, @@ -1253,7 +1289,8 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, - RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd), + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NL_GET_OP(nlh->nlmsg_type)), 0, 0); if (fill_nldev_handle(msg, device)) { @@ -1331,7 +1368,8 @@ static int res_get_common_dumpit(struct sk_buff *skb, } nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd), + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NL_GET_OP(cb->nlh->nlmsg_type)), 0, NLM_F_MULTI); if (fill_nldev_handle(skb, device)) { @@ -1413,26 +1451,29 @@ err_index: return ret; } -#define RES_GET_FUNCS(name, type) \ - static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ +#define RES_GET_FUNCS(name, type) \ + static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ struct netlink_callback *cb) \ - { \ - return res_get_common_dumpit(skb, cb, type, \ - fill_res_##name##_entry); \ - } \ - static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ - struct nlmsghdr *nlh, \ + { \ + return res_get_common_dumpit(skb, cb, type, \ + fill_res_##name##_entry); \ + } \ + static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ + struct nlmsghdr *nlh, \ struct netlink_ext_ack *extack) \ - { \ - return res_get_common_doit(skb, nlh, extack, type, \ - fill_res_##name##_entry); \ + { \ + return res_get_common_doit(skb, nlh, extack, type, \ + fill_res_##name##_entry); \ } RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); +RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP); RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID); RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ); +RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ); RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); +RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR); RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER); static LIST_HEAD(link_ops); @@ -2117,6 +2158,21 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { .doit = nldev_stat_del_doit, .flags = RDMA_NL_ADMIN_PERM, }, + [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = { + .doit = nldev_res_get_qp_raw_doit, + .dump = nldev_res_get_qp_raw_dumpit, + .flags = RDMA_NL_ADMIN_PERM, + }, + [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = { + .doit = nldev_res_get_cq_raw_doit, + .dump = nldev_res_get_cq_raw_dumpit, + .flags = RDMA_NL_ADMIN_PERM, + }, + [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = { + .doit = nldev_res_get_mr_raw_doit, + .dump = nldev_res_get_mr_raw_dumpit, + .flags = RDMA_NL_ADMIN_PERM, + }, }; void __init nldev_init(void) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9127cffafccd..77106ff3cd26 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2583,8 +2583,11 @@ struct ib_device_ops { * Allows rdma drivers to add their own restrack attributes. */ int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); + int (*fill_res_mr_entry_raw)(struct sk_buff *msg, struct ib_mr *ibmr); int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq); + int (*fill_res_cq_entry_raw)(struct sk_buff *msg, struct ib_cq *ibcq); int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp); + int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp); int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id); /* Device lifecycle callbacks */ diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 8e277783fa96..3826143d420d 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -287,6 +287,12 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_STAT_DEL, + RDMA_NLDEV_CMD_RES_QP_GET_RAW, + + RDMA_NLDEV_CMD_RES_CQ_GET_RAW, + + RDMA_NLDEV_CMD_RES_MR_GET_RAW, + RDMA_NLDEV_NUM_OPS }; @@ -525,6 +531,8 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_DEV_DIM, /* u8 */ + RDMA_NLDEV_ATTR_RES_RAW, /* binary */ + /* * Always the end */ -- cgit From 1776dd234a14fbef5f8581f6374c8c6ccd69cb1c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 23 Jun 2020 14:30:41 +0300 Subject: RDMA/mlx5: Add support to get QP resource in RAW format Add a generic function to use the resource dump mechanism to get the QP resource data. Link: https://lore.kernel.org/r/20200623113043.1228482-10-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/restrack.c | 77 +++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index fa9237a10ed8..7ac75935d3c2 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6599,6 +6599,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .drain_sq = mlx5_ib_drain_sq, .enable_driver = mlx5_ib_enable_driver, .fill_res_mr_entry = mlx5_ib_fill_res_mr_entry, + .fill_res_qp_entry_raw = mlx5_ib_fill_res_qp_entry_raw, .fill_stat_mr_entry = mlx5_ib_fill_stat_mr_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 37ead14eb317..7285c00e474d 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1376,6 +1376,7 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); int mlx5_ib_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); +int mlx5_ib_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp); int mlx5_ib_fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); extern const struct uapi_definition mlx5_ib_devx_defs[]; diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 598a09796d09..c8b91d8e0f42 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -4,10 +4,79 @@ */ #include +#include #include #include #include "mlx5_ib.h" +#define MAX_DUMP_SIZE 1024 + +static int dump_rsc(struct mlx5_core_dev *dev, enum mlx5_sgmt_type type, + int index, void *data, int *data_len) +{ + struct mlx5_core_dev *mdev = dev; + struct mlx5_rsc_dump_cmd *cmd; + struct mlx5_rsc_key key = {}; + struct page *page; + int offset = 0; + int err = 0; + int cmd_err; + int size; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + key.size = PAGE_SIZE; + key.rsc = type; + key.index1 = index; + key.num_of_obj1 = 1; + + cmd = mlx5_rsc_dump_cmd_create(mdev, &key); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto free_page; + } + + do { + cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size); + if (cmd_err < 0 || size + offset > MAX_DUMP_SIZE) { + err = cmd_err; + goto destroy_cmd; + } + memcpy(data + offset, page_address(page), size); + offset += size; + } while (cmd_err > 0); + *data_len = offset; + +destroy_cmd: + mlx5_rsc_dump_cmd_destroy(cmd); +free_page: + __free_page(page); + return err; +} + +static int fill_res_raw(struct sk_buff *msg, struct mlx5_ib_dev *dev, + enum mlx5_sgmt_type type, u32 key) +{ + int len = 0; + void *data; + int err; + + data = kzalloc(MAX_DUMP_SIZE, GFP_KERNEL); + if (!data) + return -ENOMEM; + + err = dump_rsc(dev->mdev, type, key, data, &len); + if (err) + goto out; + + err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); +out: + kfree(data); + return err; +} + int mlx5_ib_fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) { @@ -68,3 +137,11 @@ err: nla_nest_cancel(msg, table_attr); return -EMSGSIZE; } + +int mlx5_ib_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + + return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_QP, + ibqp->qp_num); +} -- cgit From 1ccecc88af33f6807b5cb7c227adb46b63582680 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 23 Jun 2020 14:30:42 +0300 Subject: RDMA/mlx5: Add support to get CQ resource in RAW format Add support to get CQ resource dump in RAW format. Link: https://lore.kernel.org/r/20200623113043.1228482-11-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/restrack.c | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7ac75935d3c2..30e0645ca3ba 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6598,6 +6598,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, .enable_driver = mlx5_ib_enable_driver, + .fill_res_cq_entry_raw = mlx5_ib_fill_res_cq_entry_raw, .fill_res_mr_entry = mlx5_ib_fill_res_mr_entry, .fill_res_qp_entry_raw = mlx5_ib_fill_res_qp_entry_raw, .fill_stat_mr_entry = mlx5_ib_fill_stat_mr_entry, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 7285c00e474d..c6b2102a5a09 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1377,6 +1377,7 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); int mlx5_ib_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); int mlx5_ib_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp); +int mlx5_ib_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq); int mlx5_ib_fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); extern const struct uapi_definition mlx5_ib_devx_defs[]; diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index c8b91d8e0f42..7db49650a2b6 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -138,6 +138,14 @@ err: return -EMSGSIZE; } +int mlx5_ib_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq) +{ + struct mlx5_ib_dev *dev = to_mdev(ibcq->device); + struct mlx5_ib_cq *cq = to_mcq(ibcq); + + return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_CQ, cq->mcq.cqn); +} + int mlx5_ib_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); -- cgit From 28b5fa687f3a55cef1a9144ece9292fe08e57592 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 23 Jun 2020 14:30:43 +0300 Subject: RDMA/mlx5: Add support to get MR resource in RAW format Add support to get MR (mkey) resource dump in RAW format. Link: https://lore.kernel.org/r/20200623113043.1228482-12-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/restrack.c | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 30e0645ca3ba..9dbc87c540e4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6600,6 +6600,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .enable_driver = mlx5_ib_enable_driver, .fill_res_cq_entry_raw = mlx5_ib_fill_res_cq_entry_raw, .fill_res_mr_entry = mlx5_ib_fill_res_mr_entry, + .fill_res_mr_entry_raw = mlx5_ib_fill_res_mr_entry_raw, .fill_res_qp_entry_raw = mlx5_ib_fill_res_qp_entry_raw, .fill_stat_mr_entry = mlx5_ib_fill_stat_mr_entry, .get_dev_fw_str = get_dev_fw_str, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c6b2102a5a09..2fd199c07dda 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1376,6 +1376,7 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); int mlx5_ib_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); +int mlx5_ib_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr); int mlx5_ib_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp); int mlx5_ib_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq); int mlx5_ib_fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 7db49650a2b6..224a63975822 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -109,6 +109,14 @@ err: return -EMSGSIZE; } +int mlx5_ib_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + + return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY, + mlx5_mkey_to_idx(mr->mmkey.key)); +} + int mlx5_ib_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) { -- cgit From 95a5631f6c9f3045f26245e6045244652204dfdb Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 23 Jun 2020 13:52:36 +0300 Subject: RDMA/ipoib: Return void from ipoib_ib_dev_stop() The return value from ipoib_ib_dev_stop() is always 0 - change it to be void. Link: https://lore.kernel.org/r/20200623105236.18683-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 15f519ce7e0b..3440dc48d02c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -515,7 +515,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev); int ipoib_ib_dev_open_default(struct net_device *dev); int ipoib_ib_dev_open(struct net_device *dev); -int ipoib_ib_dev_stop(struct net_device *dev); +void ipoib_ib_dev_stop(struct net_device *dev); void ipoib_ib_dev_up(struct net_device *dev); void ipoib_ib_dev_down(struct net_device *dev); int ipoib_ib_dev_stop_default(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index da3c5315bbb5..6ee64c25aaff 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -846,7 +846,7 @@ timeout: return 0; } -int ipoib_ib_dev_stop(struct net_device *dev) +void ipoib_ib_dev_stop(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -854,8 +854,6 @@ int ipoib_ib_dev_stop(struct net_device *dev) clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); ipoib_flush_ah(dev); - - return 0; } int ipoib_ib_dev_open_default(struct net_device *dev) -- cgit From b9af0e2d5aeab77bef33bd3df073c3c96b69ec79 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 21 Jun 2020 13:47:36 +0300 Subject: IB/mad: Issue complete whenever decrements agent refcount Replace calls of atomic_dec() to mad_agent_priv->refcount with calls to deref_mad_agent() in order to issue complete. Most likely the refcount is > 1 at these points, but it is difficult to prove. Performance is not important on these paths, so be obviously correct. Link: https://lore.kernel.org/r/20200621104738.54850-3-leon@kernel.org Signed-off-by: Shay Drory Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 186e0d652e8b..1135f2a5231a 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1148,7 +1148,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, spin_lock_irqsave(&mad_agent_priv->lock, flags); list_del(&mad_send_wr->agent_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); goto error; } } @@ -1831,7 +1831,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, mad_agent_priv->agent.recv_handler( &mad_agent_priv->agent, NULL, mad_recv_wc); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); } else { /* not user rmpp, revert to normal behavior and * drop the mad */ @@ -1848,7 +1848,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, &mad_agent_priv->agent, &mad_send_wr->send_buf, mad_recv_wc); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; @@ -2438,7 +2438,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) list_del(&mad_send_wr->agent_list); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); } } @@ -2572,7 +2572,7 @@ static void local_completions(struct work_struct *work) &local->mad_send_wr->send_buf, &local->mad_priv->header.recv_wc); spin_lock_irqsave(&recv_mad_agent->lock, flags); - atomic_dec(&recv_mad_agent->refcount); + deref_mad_agent(recv_mad_agent); spin_unlock_irqrestore(&recv_mad_agent->lock, flags); } @@ -2585,7 +2585,7 @@ local_send_completion: &mad_send_wc); spin_lock_irqsave(&mad_agent_priv->lock, flags); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); if (free_mad) kfree(local->mad_priv); kfree(local); @@ -2671,7 +2671,7 @@ static void timeout_sends(struct work_struct *work) mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); spin_lock_irqsave(&mad_agent_priv->lock, flags); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); -- cgit From e41c425349bc9fa5399717c21f77bb7cd911084f Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 21 Jun 2020 13:47:37 +0300 Subject: IB/mad: Change atomics to refcount API The refcount API provides better safety than atomics API. Therefore, change atomic functions to refcount functions. Link: https://lore.kernel.org/r/20200621104738.54850-4-leon@kernel.org Signed-off-by: Shay Drory Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 16 ++++++++-------- drivers/infiniband/core/mad_priv.h | 2 +- drivers/infiniband/core/mad_rmpp.c | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 1135f2a5231a..1a7f364e9677 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -402,7 +402,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions); - atomic_set(&mad_agent_priv->refcount, 1); + refcount_set(&mad_agent_priv->refcount, 1); init_completion(&mad_agent_priv->comp); ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type); @@ -484,7 +484,7 @@ EXPORT_SYMBOL(ib_register_mad_agent); static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { - if (atomic_dec_and_test(&mad_agent_priv->refcount)) + if (refcount_dec_and_test(&mad_agent_priv->refcount)) complete(&mad_agent_priv->comp); } @@ -718,7 +718,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, * Reference MAD agent until receive * side of local completion handled */ - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); } else kfree(mad_priv); break; @@ -758,7 +758,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, local->return_wc_byte_len = mad_size; } /* Reference MAD agent until send side of local completion handled */ - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ spin_lock_irqsave(&mad_agent_priv->lock, flags); list_add_tail(&local->completion_list, &mad_agent_priv->local_list); @@ -916,7 +916,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, } mad_send_wr->send_buf.mad_agent = mad_agent; - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); return &mad_send_wr->send_buf; } EXPORT_SYMBOL(ib_create_send_mad); @@ -1131,7 +1131,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, mad_send_wr->status = IB_WC_SUCCESS; /* Reference MAD agent until send completes */ - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); list_add_tail(&mad_send_wr->agent_list, &mad_agent_priv->send_list); @@ -1554,7 +1554,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; rcu_read_lock(); mad_agent = xa_load(&ib_mad_clients, hi_tid); - if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount)) + if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount)) mad_agent = NULL; rcu_read_unlock(); } else { @@ -1606,7 +1606,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, } } if (mad_agent) - atomic_inc(&mad_agent->refcount); + refcount_inc(&mad_agent->refcount); out: spin_unlock_irqrestore(&port_priv->reg_lock, flags); } diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 403d8673a2f9..4aa16b35dad0 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -103,7 +103,7 @@ struct ib_mad_agent_private { struct work_struct local_work; struct list_head rmpp_list; - atomic_t refcount; + refcount_t refcount; union { struct completion comp; struct rcu_head rcu; diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 5ec57abc0849..1bc9dfecae70 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -52,7 +52,7 @@ struct mad_rmpp_recv { struct completion comp; enum rmpp_state state; spinlock_t lock; - atomic_t refcount; + refcount_t refcount; struct ib_ah *ah; struct ib_mad_recv_wc *rmpp_wc; @@ -73,7 +73,7 @@ struct mad_rmpp_recv { static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) { - if (atomic_dec_and_test(&rmpp_recv->refcount)) + if (refcount_dec_and_test(&rmpp_recv->refcount)) complete(&rmpp_recv->comp); } @@ -305,7 +305,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent, INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler); spin_lock_init(&rmpp_recv->lock); rmpp_recv->state = RMPP_STATE_ACTIVE; - atomic_set(&rmpp_recv->refcount, 1); + refcount_set(&rmpp_recv->refcount, 1); rmpp_recv->rmpp_wc = mad_recv_wc; rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf; @@ -357,7 +357,7 @@ acquire_rmpp_recv(struct ib_mad_agent_private *agent, spin_lock_irqsave(&agent->lock, flags); rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); if (rmpp_recv) - atomic_inc(&rmpp_recv->refcount); + refcount_inc(&rmpp_recv->refcount); spin_unlock_irqrestore(&agent->lock, flags); return rmpp_recv; } @@ -553,7 +553,7 @@ start_rmpp(struct ib_mad_agent_private *agent, destroy_rmpp_recv(rmpp_recv); return continue_rmpp(agent, mad_recv_wc); } - atomic_inc(&rmpp_recv->refcount); + refcount_inc(&rmpp_recv->refcount); if (get_last_flag(&mad_recv_wc->recv_buf)) { rmpp_recv->state = RMPP_STATE_COMPLETE; -- cgit From 5611074a20fd17e05cda1f3897fce3528c4b26eb Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 21 Jun 2020 13:47:38 +0300 Subject: IB/mad: Delete RMPP_STATE_CANCELING state The cancel_delayed_work can be called under lock since it doesn't sleep. This makes the RMPP_STATE_CANCELING state not needed anymore, remove it. Link: https://lore.kernel.org/r/20200621104738.54850-5-leon@kernel.org Signed-off-by: Shay Drory Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad_rmpp.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 1bc9dfecae70..e0573e4d0404 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -40,8 +40,7 @@ enum rmpp_state { RMPP_STATE_ACTIVE, RMPP_STATE_TIMEOUT, - RMPP_STATE_COMPLETE, - RMPP_STATE_CANCELING + RMPP_STATE_COMPLETE }; struct mad_rmpp_recv { @@ -91,23 +90,19 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) unsigned long flags; spin_lock_irqsave(&agent->lock, flags); - list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { - if (rmpp_recv->state != RMPP_STATE_COMPLETE) - ib_free_recv_mad(rmpp_recv->rmpp_wc); - rmpp_recv->state = RMPP_STATE_CANCELING; - } - spin_unlock_irqrestore(&agent->lock, flags); - list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { cancel_delayed_work(&rmpp_recv->timeout_work); cancel_delayed_work(&rmpp_recv->cleanup_work); } + spin_unlock_irqrestore(&agent->lock, flags); flush_workqueue(agent->qp_info->port_priv->wq); list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, &agent->rmpp_list, list) { list_del(&rmpp_recv->list); + if (rmpp_recv->state != RMPP_STATE_COMPLETE) + ib_free_recv_mad(rmpp_recv->rmpp_wc); destroy_rmpp_recv(rmpp_recv); } } @@ -272,10 +267,6 @@ static void recv_cleanup_handler(struct work_struct *work) unsigned long flags; spin_lock_irqsave(&rmpp_recv->agent->lock, flags); - if (rmpp_recv->state == RMPP_STATE_CANCELING) { - spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); - return; - } list_del(&rmpp_recv->list); spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); destroy_rmpp_recv(rmpp_recv); -- cgit From 14c2b89634a28577a242cb34f339d981da6d3ef6 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 24 Jun 2020 13:54:21 +0300 Subject: RDMA/core: Delete not-used create RWQ table function The RWQ table is used for RSS uverbs and not in used for the kernel consumers, delete ib_create_rwq_ind_table() routine that is not called at all. Link: https://lore.kernel.org/r/20200624105422.1452290-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 39 --------------------------------------- include/rdma/ib_verbs.h | 3 --- 2 files changed, 42 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index a3761d432a79..7232e6ec2e91 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2409,45 +2409,6 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, } EXPORT_SYMBOL(ib_modify_wq); -/* - * ib_create_rwq_ind_table - Creates a RQ Indirection Table. - * @device: The device on which to create the rwq indirection table. - * @ib_rwq_ind_table_init_attr: A list of initial attributes required to - * create the Indirection Table. - * - * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less - * than the created ib_rwq_ind_table object and the caller is responsible - * for its memory allocation/free. - */ -struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr) -{ - struct ib_rwq_ind_table *rwq_ind_table; - int i; - u32 table_size; - - if (!device->ops.create_rwq_ind_table) - return ERR_PTR(-EOPNOTSUPP); - - table_size = (1 << init_attr->log_ind_tbl_size); - rwq_ind_table = device->ops.create_rwq_ind_table(device, - init_attr, NULL); - if (IS_ERR(rwq_ind_table)) - return rwq_ind_table; - - rwq_ind_table->ind_tbl = init_attr->ind_tbl; - rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size; - rwq_ind_table->device = device; - rwq_ind_table->uobject = NULL; - atomic_set(&rwq_ind_table->usecnt, 0); - - for (i = 0; i < table_size; i++) - atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt); - - return rwq_ind_table; -} -EXPORT_SYMBOL(ib_create_rwq_ind_table); - /* * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table. * @wq_ind_table: The Indirection Table to destroy. diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 77106ff3cd26..1e902a8f1713 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4422,9 +4422,6 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd, int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask); -struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr* - wq_ind_table_init_attr); int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, -- cgit From c4334a99d3d6d8e2d1aa97655f44f637f17a3a11 Mon Sep 17 00:00:00 2001 From: Bolarinwa Olayemi Saheed Date: Mon, 15 Jun 2020 09:32:19 +0200 Subject: IB/hfi1: Convert PCIBIOS_* errors to generic -E* errors pcie_speeds() and restore_pci_variables() returns PCIBIOS_ error codes from PCIe capability accessors. PCIBIOS_ error codes have positive values. Passing on these values is inconsistent with functions which return only a negative value on failure. Before passing on the return value of PCIe capability accessors, call pcibios_err_to_errno() to convert any positive PCIBIOS_ error codes to negative generic error values. Link: https://lore.kernel.org/r/20200615073225.24061-3-refactormyself@gmail.com Suggested-by: Bjorn Helgaas Signed-off-by: Bolarinwa Olayemi Saheed Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/pcie.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 1a6268d61977..18d32f053d26 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -306,7 +306,7 @@ int pcie_speeds(struct hfi1_devdata *dd) ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap); if (ret) { dd_dev_err(dd, "Unable to read from PCI config\n"); - return ret; + return pcibios_err_to_errno(ret); } if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) { @@ -334,10 +334,14 @@ int pcie_speeds(struct hfi1_devdata *dd) return 0; } -/* restore command and BARs after a reset has wiped them out */ +/** + * Restore command and BARs after a reset has wiped them out + * + * Returns 0 on success, otherwise a negative error value + */ int restore_pci_variables(struct hfi1_devdata *dd) { - int ret = 0; + int ret; ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command); if (ret) @@ -386,13 +390,17 @@ int restore_pci_variables(struct hfi1_devdata *dd) error: dd_dev_err(dd, "Unable to write to PCI config\n"); - return ret; + return pcibios_err_to_errno(ret); } -/* Save BARs and command to rewrite after device reset */ +/** + * Save BARs and command to rewrite after device reset + * + * Returns 0 on success, otherwise a negative error value + */ int save_pci_variables(struct hfi1_devdata *dd) { - int ret = 0; + int ret; ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); @@ -441,7 +449,7 @@ int save_pci_variables(struct hfi1_devdata *dd) error: dd_dev_err(dd, "Unable to read from PCI config\n"); - return ret; + return pcibios_err_to_errno(ret); } /* -- cgit From 65936bf25f90fe440bb2d11624c7d10fab266639 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 25 Jun 2020 20:42:19 +0300 Subject: RDMA/ipoib: Fix ABBA deadlock with ipoib_reap_ah() ipoib_mcast_carrier_on_task() insanely open codes a rtnl_lock() such that the only time flush_workqueue() can be called is if it also clears IPOIB_FLAG_OPER_UP. Thus the flush inside ipoib_flush_ah() will deadlock if it gets unlucky enough, and lockdep doesn't help us to find it early: CPU0 CPU1 CPU2 __ipoib_ib_dev_flush() down_read(vlan_rwsem) ipoib_vlan_add() rtnl_trylock() down_write(vlan_rwsem) ipoib_mcast_carrier_on_task() while (!rtnl_trylock()) msleep(20); ipoib_flush_ah() flush_workqueue(priv->wq) Clean up the ah_reaper related functions and lifecycle to make sense: - Start/Stop of the reaper should only be done in open/stop NDOs, not in any other places - cancel and flush of the reaper should only happen in the stop NDO. cancel is only functional when combined with IPOIB_STOP_REAPER. - Non-stop places were flushing the AH's just need to flush out dead AH's synchronously and ignore the background task completely. It is fully locked and harmless to leave running. Which ultimately fixes the ABBA deadlock by removing the unnecessary flush_workqueue() from the problematic place under the vlan_rwsem. Fixes: efc82eeeae4e ("IB/ipoib: No longer use flush as a parameter") Link: https://lore.kernel.org/r/20200625174219.290842-1-kamalheib1@gmail.com Reported-by: Kamal Heib Tested-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 65 ++++++++++++++----------------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 + 2 files changed, 31 insertions(+), 36 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 6ee64c25aaff..494f413dc3c6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -670,13 +670,12 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, return rc; } -static void __ipoib_reap_ah(struct net_device *dev) +static void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_ah *ah, *tah; unsigned long flags; - netif_tx_lock_bh(dev); + netif_tx_lock_bh(priv->dev); spin_lock_irqsave(&priv->lock, flags); list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) @@ -687,37 +686,37 @@ static void __ipoib_reap_ah(struct net_device *dev) } spin_unlock_irqrestore(&priv->lock, flags); - netif_tx_unlock_bh(dev); + netif_tx_unlock_bh(priv->dev); } void ipoib_reap_ah(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, ah_reap_task.work); - struct net_device *dev = priv->dev; - __ipoib_reap_ah(dev); + ipoib_reap_dead_ahs(priv); if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) queue_delayed_work(priv->wq, &priv->ah_reap_task, round_jiffies_relative(HZ)); } -static void ipoib_flush_ah(struct net_device *dev) +static void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - cancel_delayed_work(&priv->ah_reap_task); - flush_workqueue(priv->wq); - ipoib_reap_ah(&priv->ah_reap_task.work); + clear_bit(IPOIB_STOP_REAPER, &priv->flags); + queue_delayed_work(priv->wq, &priv->ah_reap_task, + round_jiffies_relative(HZ)); } -static void ipoib_stop_ah(struct net_device *dev) +static void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); - set_bit(IPOIB_STOP_REAPER, &priv->flags); - ipoib_flush_ah(dev); + cancel_delayed_work(&priv->ah_reap_task); + /* + * After ipoib_stop_ah_reaper() we always go through + * ipoib_reap_dead_ahs() which ensures the work is really stopped and + * does a final flush out of the dead_ah's list + */ } static int recvs_pending(struct net_device *dev) @@ -846,16 +845,6 @@ timeout: return 0; } -void ipoib_ib_dev_stop(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - priv->rn_ops->ndo_stop(dev); - - clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - ipoib_flush_ah(dev); -} - int ipoib_ib_dev_open_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -899,10 +888,7 @@ int ipoib_ib_dev_open(struct net_device *dev) return -1; } - clear_bit(IPOIB_STOP_REAPER, &priv->flags); - queue_delayed_work(priv->wq, &priv->ah_reap_task, - round_jiffies_relative(HZ)); - + ipoib_start_ah_reaper(priv); if (priv->rn_ops->ndo_open(dev)) { pr_warn("%s: Failed to open dev\n", dev->name); goto dev_stop; @@ -913,13 +899,20 @@ int ipoib_ib_dev_open(struct net_device *dev) return 0; dev_stop: - set_bit(IPOIB_STOP_REAPER, &priv->flags); - cancel_delayed_work(&priv->ah_reap_task); - set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - ipoib_ib_dev_stop(dev); + ipoib_stop_ah_reaper(priv); return -1; } +void ipoib_ib_dev_stop(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + priv->rn_ops->ndo_stop(dev); + + clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); + ipoib_stop_ah_reaper(priv); +} + void ipoib_pkey_dev_check_presence(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -1230,7 +1223,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_mcast_dev_flush(dev); if (oper_up) set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); - ipoib_flush_ah(dev); + ipoib_reap_dead_ahs(priv); } if (level >= IPOIB_FLUSH_NORMAL) @@ -1305,7 +1298,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) * the neighbor garbage collection is stopped and reaped. * That should all be done now, so make a final ah flush. */ - ipoib_stop_ah(dev); + ipoib_reap_dead_ahs(priv); clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3cfb682b91b0..ef60e8e4ae67 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1976,6 +1976,8 @@ static void ipoib_ndo_uninit(struct net_device *dev) /* no more works over the priv->wq */ if (priv->wq) { + /* See ipoib_mcast_carrier_on_task() */ + WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)); flush_workqueue(priv->wq); destroy_workqueue(priv->wq); priv->wq = NULL; -- cgit From 0cb42c0265837fafa2b4f302c8a7fed2631d7869 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 26 Jun 2020 14:49:10 -0300 Subject: RDMA/core: Fix bogus WARN_ON during ib_unregister_device_queued() ib_unregister_device_queued() can only be used by drivers using the new dealloc_device callback flow, and it has a safety WARN_ON to ensure drivers are using it properly. However, if unregister and register are raced there is a special destruction path that maintains the uniform error handling semantic of 'caller does ib_dealloc_device() on failure'. This requires disabling the dealloc_device callback which triggers the WARN_ON. Instead of using NULL to disable the callback use a special function pointer so the WARN_ON does not trigger. Fixes: d0899892edd0 ("RDMA/device: Provide APIs from the core code to help unregistration") Link: https://lore.kernel.org/r/0-v1-a36d512e0a99+762-syz_dealloc_driver_jgg@nvidia.com Reported-by: syzbot+4088ed905e4ae2b0e13b@syzkaller.appspotmail.com Suggested-by: Hillf Danton Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 1335ed1f1e4a..40cf07129f66 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1339,6 +1339,10 @@ out: return ret; } +static void prevent_dealloc_device(struct ib_device *ib_dev) +{ +} + /** * ib_register_device - Register an IB device with IB core * @device: Device to register @@ -1409,11 +1413,11 @@ int ib_register_device(struct ib_device *device, const char *name) * possibility for a parallel unregistration along with this * error flow. Since we have a refcount here we know any * parallel flow is stopped in disable_device and will see the - * NULL pointers, causing the responsibility to + * special dealloc_driver pointer, causing the responsibility to * ib_dealloc_device() to revert back to this thread. */ dealloc_fn = device->ops.dealloc_driver; - device->ops.dealloc_driver = NULL; + device->ops.dealloc_driver = prevent_dealloc_device; ib_device_put(device); __ib_unregister_device(device); device->ops.dealloc_driver = dealloc_fn; @@ -1462,7 +1466,8 @@ static void __ib_unregister_device(struct ib_device *ib_dev) * Drivers using the new flow may not call ib_dealloc_device except * in error unwind prior to registration success. */ - if (ib_dev->ops.dealloc_driver) { + if (ib_dev->ops.dealloc_driver && + ib_dev->ops.dealloc_driver != prevent_dealloc_device) { WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); ib_dealloc_device(ib_dev); } -- cgit From d473f4dc2f95c8c856b1659ced3502802b7d2fbe Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 21 Jun 2020 13:41:47 +0300 Subject: RDMA/mlx5: Introduce ODP prefetch counter For debugging purpose it will be easier to understand if prefetch works okay if it has its own counter. Introduce ODP prefetch counter and count per MR the total number of prefetched pages. In addition remove comment which is not relevant anymore and anyway not in the correct place. Link: https://lore.kernel.org/r/20200621104147.53795-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 19 ++++++++++--------- drivers/infiniband/hw/mlx5/restrack.c | 3 +++ include/rdma/ib_verbs.h | 1 + 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 7d2ec9ee5097..ee88b32d143d 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -913,11 +913,6 @@ next_mr: if (ret < 0) goto srcu_unlock; - /* - * When prefetching a page, page fault is generated - * in order to bring the page to the main memory. - * In the current flow, page faults are being counted. - */ mlx5_update_odp_stats(mr, faults, ret); npages += ret; @@ -1755,12 +1750,17 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) struct prefetch_mr_work *work = container_of(w, struct prefetch_mr_work, work); u32 bytes_mapped = 0; + int ret; u32 i; - for (i = 0; i < work->num_sge; ++i) - pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, - work->frags[i].length, &bytes_mapped, - work->pf_flags); + for (i = 0; i < work->num_sge; ++i) { + ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, + work->frags[i].length, &bytes_mapped, + work->pf_flags); + if (ret <= 0) + continue; + mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); + } destroy_prefetch_work(work); } @@ -1818,6 +1818,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, &bytes_mapped, pf_flags); if (ret < 0) goto out; + mlx5_update_odp_stats(mr, prefetch, ret); } ret = 0; diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 224a63975822..32c6d0397946 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -99,6 +99,9 @@ int mlx5_ib_fill_stat_mr_entry(struct sk_buff *msg, msg, "page_invalidations", atomic64_read(&mr->odp_stats.invalidations))) goto err_table; + if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch", + atomic64_read(&mr->odp_stats.prefetch))) + goto err_table; nla_nest_end(msg, table_attr); return 0; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1e902a8f1713..f6b51a709818 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2271,6 +2271,7 @@ struct rdma_netdev_alloc_params { struct ib_odp_counters { atomic64_t faults; atomic64_t invalidations; + atomic64_t prefetch; }; struct ib_counters { -- cgit From 4dca650991e4175b8d5bae7ff6f1637a0c42be4a Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Wed, 20 May 2020 13:59:06 +0300 Subject: net/mlx5: Enable QP number request when creating IPoIB underlay QP If in the process of creating the underlay QP for an IPoIB interface the user has set the address and specifically the 1st-3rd bytes representing the QP number, use the requested QP number when creating the underlay QP. For a user to be able to request a QP number on QP creation, the MKEY_BY_NAME NVCONFIG should be set. As mkey_by_name and qp_by_name are coupled in FW. This requires driver to query the mkey_by_name max cap during initialization and set the current cap if it was enabled in FW. Signed-off-by: Michael Guralnik Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 7 +++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +++ include/linux/mlx5/mlx5_ifc.h | 9 +++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 690b822c6152..d1266d8fed97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -226,13 +226,20 @@ void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv) int mlx5i_create_underlay_qp(struct mlx5e_priv *priv) { + unsigned char *dev_addr = priv->netdev->dev_addr; u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; u32 in[MLX5_ST_SZ_DW(create_qp_in)] = {}; struct mlx5i_priv *ipriv = priv->ppriv; void *addr_path; + int qpn = 0; int ret = 0; void *qpc; + if (MLX5_CAP_GEN(priv->mdev, mkey_by_name)) { + qpn = (dev_addr[1] << 16) + (dev_addr[2] << 8) + dev_addr[3]; + MLX5_SET(create_qp_in, in, input_qpn, qpn); + } + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8b658908f044..623785fe74b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -557,6 +557,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) if (MLX5_CAP_GEN_MAX(dev, release_all_pages)) MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1); + if (MLX5_CAP_GEN_MAX(dev, mkey_by_name)) + MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1); + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a227518c70cf..3786888cb1ba 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1392,7 +1392,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 bf[0x1]; u8 driver_version[0x1]; u8 pad_tx_eth_packet[0x1]; - u8 reserved_at_263[0x8]; + u8 reserved_at_263[0x3]; + u8 mkey_by_name[0x1]; + u8 reserved_at_267[0x4]; + u8 log_bf_reg_size[0x5]; u8 reserved_at_270[0x8]; @@ -7712,8 +7715,10 @@ struct mlx5_ifc_create_qp_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x8]; + u8 input_qpn[0x18]; + u8 reserved_at_60[0x20]; u8 opt_param_mask[0x20]; u8 ece[0x20]; -- cgit From 87fb5c1ccb856838f7f75b1cc85ed0691ee8eb79 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Tue, 23 Jun 2020 14:01:05 +0300 Subject: RDMA/ipoib: Handle user-supplied address when creating child Use the address supplied by user when creating a child interface. Previously, the address requested by the user was ignored and overridden with parent's GID and the random QP number assigned to the child. Link: https://lore.kernel.org/r/20200623110105.1225750-3-leon@kernel.org Signed-off-by: Michael Guralnik Reviewed-by: Feras Daoud Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3cfb682b91b0..a9f1174f7320 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1892,8 +1892,15 @@ static void ipoib_child_init(struct net_device *ndev) priv->max_ib_mtu = ppriv->max_ib_mtu; set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); - memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); - memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); + if (memchr_inv(priv->dev->dev_addr, 0, INFINIBAND_ALEN)) + memcpy(&priv->local_gid, priv->dev->dev_addr + 4, + sizeof(priv->local_gid)); + else { + memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, + INFINIBAND_ALEN); + memcpy(&priv->local_gid, &ppriv->local_gid, + sizeof(priv->local_gid)); + } } static int ipoib_ndo_init(struct net_device *ndev) -- cgit From c367124e6cb32e548bb7a2823e00889d09d6bc87 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 2 Jul 2020 10:19:46 -0400 Subject: RDMA/core: Clean up tracepoint headers There's no need for core/trace.c to include rdma/ib_verbs.h twice. Link: https://lore.kernel.org/r/20200702141946.3775.51943.stgit@klimt.1015granger.net Signed-off-by: Chuck Lever Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/trace.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/core/trace.c b/drivers/infiniband/core/trace.c index 6c3514beac4d..31e7860d35bf 100644 --- a/drivers/infiniband/core/trace.c +++ b/drivers/infiniband/core/trace.c @@ -9,6 +9,4 @@ #define CREATE_TRACE_POINTS -#include - #include -- cgit From c5f42b21051517a2070a67d21be8e423d5fdf0d9 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 6 Jul 2020 15:03:41 +0300 Subject: RDMA/core: Check for error instead of success in alloc MR function The common kernel pattern is to check for error, not success. Flip the if statement accordingly and keep the main flow unindented. Link: https://lore.kernel.org/r/20200706120343.10816-2-galpress@amazon.com Signed-off-by: Gal Pressman Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 7232e6ec2e91..48d194ec15d9 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2133,18 +2133,19 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, } mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata); - if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; - mr->dm = NULL; - mr->uobject = NULL; - atomic_inc(&pd->usecnt); - mr->need_inval = false; - mr->res.type = RDMA_RESTRACK_MR; - rdma_restrack_kadd(&mr->res); - mr->type = mr_type; - mr->sig_attrs = NULL; - } + if (IS_ERR(mr)) + goto out; + + mr->device = pd->device; + mr->pd = pd; + mr->dm = NULL; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + mr->need_inval = false; + mr->res.type = RDMA_RESTRACK_MR; + rdma_restrack_kadd(&mr->res); + mr->type = mr_type; + mr->sig_attrs = NULL; out: trace_mr_alloc(pd, mr_type, max_num_sg, mr); -- cgit From b64b74b1d51cce8c9496b6a071c1d59786c2144d Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 6 Jul 2020 15:03:42 +0300 Subject: RDMA/core: Remove ib_alloc_mr_user function Allocating an MR flow can only be initiated by kernel users, and not from userspace. As a result, the udata parameter is always being passed as NULL. Rename ib_alloc_mr_user function to ib_alloc_mr and remove the udata parameter. Link: https://lore.kernel.org/r/20200706120343.10816-3-galpress@amazon.com Signed-off-by: Gal Pressman Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 11 +++++------ include/rdma/ib_verbs.h | 10 ++-------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 48d194ec15d9..e98d3ada0951 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2104,11 +2104,10 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) EXPORT_SYMBOL(ib_dereg_mr_user); /** - * ib_alloc_mr_user() - Allocates a memory region + * ib_alloc_mr() - Allocates a memory region * @pd: protection domain associated with the region * @mr_type: memory region type * @max_num_sg: maximum sg entries available for registration. - * @udata: user data or null for kernel objects * * Notes: * Memory registeration page/sg lists must not exceed max_num_sg. @@ -2116,8 +2115,8 @@ EXPORT_SYMBOL(ib_dereg_mr_user); * max_num_sg * used_page_size. * */ -struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) +struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg) { struct ib_mr *mr; @@ -2132,7 +2131,7 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, goto out; } - mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata); + mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, NULL); if (IS_ERR(mr)) goto out; @@ -2151,7 +2150,7 @@ out: trace_mr_alloc(pd, mr_type, max_num_sg, mr); return mr; } -EXPORT_SYMBOL(ib_alloc_mr_user); +EXPORT_SYMBOL(ib_alloc_mr); /** * ib_alloc_mr_integrity() - Allocates an integrity memory region diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f6b51a709818..084fe8f53ae8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4269,14 +4269,8 @@ static inline int ib_dereg_mr(struct ib_mr *mr) return ib_dereg_mr_user(mr, NULL); } -struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); - -static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, u32 max_num_sg) -{ - return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL); -} +struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg); struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, u32 max_num_data_sg, -- cgit From 42a3b153966c9cd9a90f6a669d1ffed7fef2d325 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 6 Jul 2020 15:03:43 +0300 Subject: RDMA: Remove the udata parameter from alloc_mr callback Allocating an MR flow can only be initiated by kernel users, and not from userspace so a udata parameter is redundant. Link: https://lore.kernel.org/r/20200706120343.10816-4-galpress@amazon.com Signed-off-by: Gal Pressman Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 2 +- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 2 +- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- drivers/infiniband/hw/cxgb4/mem.c | 2 +- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 3 +-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- drivers/infiniband/hw/mlx4/mr.c | 2 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mlx5/mr.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 2 +- drivers/infiniband/hw/qedr/verbs.c | 2 +- drivers/infiniband/hw/qedr/verbs.h | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 2 +- drivers/infiniband/sw/rdmavt/mr.c | 2 +- drivers/infiniband/sw/rdmavt/mr.h | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 +- drivers/infiniband/sw/siw/siw_verbs.c | 2 +- drivers/infiniband/sw/siw/siw_verbs.h | 2 +- include/rdma/ib_verbs.h | 2 +- 24 files changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e98d3ada0951..f10dc00074a7 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2131,7 +2131,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, goto out; } - mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, NULL); + mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg); if (IS_ERR(mr)) goto out; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 8b6ad5cddfce..f32c7e85ae05 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3569,7 +3569,7 @@ int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents, } struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index e5fbbeba6d28..b4a06b553b04 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -201,7 +201,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags); int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int bnxt_re_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 27da0705c88a..2b2b009b371a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -980,7 +980,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); void c4iw_qp_add_ref(struct ib_qp *qp); void c4iw_qp_rem_ref(struct ib_qp *qp); struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int c4iw_dealloc_mw(struct ib_mw *mw); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 962dc97a8ff2..ea6fb2c5b1a7 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -691,7 +691,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw) } struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct c4iw_dev *rhp; struct c4iw_pd *php; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a61f0c4d4dbb..5b946b5bd586 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1191,7 +1191,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 4c0bbb12770d..1380cdab5701 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -414,7 +414,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 19af29a48c55..f9ef3ac2f4cd 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1543,10 +1543,9 @@ static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr * @pd: ibpd pointer * @mr_type: memory for stag registrion * @max_num_sg: man number of pages - * @udata: user data or NULL for kernel objects */ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct i40iw_pd *iwpd = to_iwpd(pd); struct i40iw_device *iwdev = to_iwdev(pd->device); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 6f4ea1067095..38e87a700a2a 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -729,7 +729,7 @@ struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 7e0b205c05eb..6eecedeff2d3 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -655,7 +655,7 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) } struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mr *mr; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2fd199c07dda..93a471191a1d 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1210,7 +1210,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, u32 max_num_sg, u32 max_num_meta_sg); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 44683073be0c..3e6f2f9c6655 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1961,7 +1961,7 @@ err_free: } struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index d11c74390a12..6cdbec13756a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2901,7 +2901,7 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) } struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { int status; struct ocrdma_mr *mr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 3a5010881be5..df8e3b923a44 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -101,7 +101,7 @@ struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc); struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, u64 virt, int acc, struct ib_udata *); struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 9b9e80266367..3d7d5617818f 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -3003,7 +3003,7 @@ err0: } struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct qedr_mr *mr; diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 5e02387e068d..39dd6286ba39 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -84,7 +84,7 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); int qedr_post_send(struct ib_qp *, const struct ib_send_wr *, const struct ib_send_wr **bad_wr); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index b039f1f00e05..77a010e68208 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -202,7 +202,7 @@ err_umem: * @return: ib_mr pointer on success, otherwise returns an errno. */ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct pvrdma_dev *dev = to_vdev(pd->device); struct pvrdma_user_mr *mr; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 267702226f10..699b20849a7e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -406,7 +406,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata); int pvrdma_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 60864e5ca7cb..2f7c25fea44a 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -576,7 +576,7 @@ out: * Return: the memory region on success, otherwise return an errno. */ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct rvt_mr *mr; diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index 780fc63af98b..b3aba359401b 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -71,7 +71,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata); int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index b8a22af724e8..0472df52d36d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -975,7 +975,7 @@ static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 987e2ba05dbc..0d509f7a10a6 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -1373,7 +1373,7 @@ err_out: } struct ib_mr *siw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_sge, struct ib_udata *udata) + u32 max_sge) { struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mr *mr = NULL; diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h index 1a731989fad6..9335c48c01de 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.h +++ b/drivers/infiniband/sw/siw/siw_verbs.h @@ -69,7 +69,7 @@ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags); struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, u64 len, u64 rnic_va, int rights, struct ib_udata *udata); struct ib_mr *siw_alloc_mr(struct ib_pd *base_pd, enum ib_mr_type mr_type, - u32 max_sge, struct ib_udata *udata); + u32 max_sge); struct ib_mr *siw_get_dma_mr(struct ib_pd *base_pd, int rights); int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, unsigned int *sg_off); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 084fe8f53ae8..204ec7516ef5 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2478,7 +2478,7 @@ struct ib_device_ops { struct ib_pd *pd, struct ib_udata *udata); int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd, u32 max_num_data_sg, u32 max_num_meta_sg); -- cgit From f4375443b7861ca5d93a10dba5ef5a478d5df96a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 6 Jul 2020 15:27:14 +0300 Subject: RDMA/mlx5: Get XRCD number directly for the internal use The mlx5_ib creates XRC domain and uses for creating internal SRQ. However all that is needed is XRCD number and not full blown ib_xrcd objects. Update the code to get and store the number only. Link: https://lore.kernel.org/r/20200706122716.647338-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 39 +++++++++++++----------------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++-- drivers/infiniband/hw/mlx5/qp.c | 10 ++++----- drivers/infiniband/hw/mlx5/srq.c | 4 ++-- 4 files changed, 23 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 9dbc87c540e4..ca6fac31a1b8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5003,6 +5003,9 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) dev = container_of(devr, struct mlx5_ib_dev, devr); ibdev = &dev->ib_dev; + if (!MLX5_CAP_GEN(dev->mdev, xrc)) + return -EOPNOTSUPP; + mutex_init(&devr->mutex); devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd); @@ -5030,34 +5033,19 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) if (ret) goto err_create_cq; - devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); - if (IS_ERR(devr->x0)) { - ret = PTR_ERR(devr->x0); + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0); + if (ret) goto error2; - } - devr->x0->device = &dev->ib_dev; - devr->x0->inode = NULL; - atomic_set(&devr->x0->usecnt, 0); - mutex_init(&devr->x0->tgt_qp_mutex); - INIT_LIST_HEAD(&devr->x0->tgt_qp_list); - devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); - if (IS_ERR(devr->x1)) { - ret = PTR_ERR(devr->x1); + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0); + if (ret) goto error3; - } - devr->x1->device = &dev->ib_dev; - devr->x1->inode = NULL; - atomic_set(&devr->x1->usecnt, 0); - mutex_init(&devr->x1->tgt_qp_mutex); - INIT_LIST_HEAD(&devr->x1->tgt_qp_list); memset(&attr, 0, sizeof(attr)); attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_XRC; attr.ext.cq = devr->c0; - attr.ext.xrc.xrcd = devr->x0; devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq); if (!devr->s0) { @@ -5068,13 +5056,11 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->s0->device = &dev->ib_dev; devr->s0->pd = devr->p0; devr->s0->srq_type = IB_SRQT_XRC; - devr->s0->ext.xrc.xrcd = devr->x0; devr->s0->ext.cq = devr->c0; ret = mlx5_ib_create_srq(devr->s0, &attr, NULL); if (ret) goto err_create; - atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); atomic_inc(&devr->s0->ext.cq->usecnt); atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); @@ -5116,9 +5102,9 @@ error5: err_create: kfree(devr->s0); error4: - mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); error3: - mlx5_ib_dealloc_xrcd(devr->x0, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); error2: mlx5_ib_destroy_cq(devr->c0, NULL); err_create_cq: @@ -5132,14 +5118,17 @@ error0: static void destroy_dev_resources(struct mlx5_ib_resources *devr) { + struct mlx5_ib_dev *dev; int port; + dev = container_of(devr, struct mlx5_ib_dev, devr); + mlx5_ib_destroy_srq(devr->s1, NULL); kfree(devr->s1); mlx5_ib_destroy_srq(devr->s0, NULL); kfree(devr->s0); - mlx5_ib_dealloc_xrcd(devr->x0, NULL); - mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); mlx5_ib_destroy_cq(devr->c0, NULL); kfree(devr->c0); mlx5_ib_dealloc_pd(devr->p0, NULL); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 93a471191a1d..74d3507d6f80 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -730,8 +730,8 @@ struct mlx5_ib_port_resources { struct mlx5_ib_resources { struct ib_cq *c0; - struct ib_xrcd *x0; - struct ib_xrcd *x1; + u32 xrcdn0; + u32 xrcdn1; struct ib_pd *p0; struct ib_srq *s0; struct ib_srq *s1; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 81bf6b975e0e..7489b0479e4f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2035,15 +2035,15 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, switch (init_attr->qp_type) { case IB_QPT_XRC_INI: MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); break; default: if (init_attr->srq) { - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn); } else { - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn); } } @@ -2183,11 +2183,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, no_sq, 1); if (attr->srq) { - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(attr->srq)->msrq.srqn); } else { - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn); } diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 6d1ff13d2283..7e10cbcb6d5c 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -274,10 +274,10 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, if (srq->wq_sig) in.flags |= MLX5_SRQ_FLAG_WQ_SIG; - if (init_attr->srq_type == IB_SRQT_XRC) + if (init_attr->srq_type == IB_SRQT_XRC && init_attr->ext.xrc.xrcd) in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; else - in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; + in.xrcd = dev->devr.xrcdn0; if (init_attr->srq_type == IB_SRQT_TM) { in.tm_log_list_size = -- cgit From b73efcb26e2c6d66797c57bed8df13c3718c58cb Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 6 Jul 2020 15:27:15 +0300 Subject: RDMA/core: Clean ib_alloc_xrcd() and reuse it to allocate XRC domain ib_alloc_xrcd() already does the required initialization, so move the uverbs to call it and save code duplication, while cleaning the function argument lists of that function. Link: https://lore.kernel.org/r/20200706122716.647338-3-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 12 +++--------- drivers/infiniband/core/verbs.c | 24 ++++++++++++++++++------ include/rdma/ib_verbs.h | 18 +++--------------- 3 files changed, 24 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b48b3f6e632d..89ff5d06c5d7 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -614,17 +614,11 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) } if (!xrcd) { - xrcd = ib_dev->ops.alloc_xrcd(ib_dev, &attrs->driver_udata); + xrcd = ib_alloc_xrcd_user(ib_dev, inode, &attrs->driver_udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; } - - xrcd->inode = inode; - xrcd->device = ib_dev; - atomic_set(&xrcd->usecnt, 0); - mutex_init(&xrcd->tgt_qp_mutex); - INIT_LIST_HEAD(&xrcd->tgt_qp_list); new_xrcd = 1; } @@ -663,7 +657,7 @@ err_copy: } err_dealloc_xrcd: - ib_dealloc_xrcd(xrcd, uverbs_get_cleared_udata(attrs)); + ib_dealloc_xrcd_user(xrcd, uverbs_get_cleared_udata(attrs)); err: uobj_alloc_abort(&obj->uobject, attrs); @@ -701,7 +695,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, if (inode && !atomic_dec_and_test(&xrcd->usecnt)) return 0; - ret = ib_dealloc_xrcd(xrcd, &attrs->driver_udata); + ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) { atomic_inc(&xrcd->usecnt); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f10dc00074a7..a9b99c3ff25c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2288,17 +2288,24 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) } EXPORT_SYMBOL(ib_detach_mcast); -struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) +/** + * ib_alloc_xrcd_user - Allocates an XRC domain. + * @device: The device on which to allocate the XRC domain. + * @inode: inode to connect XRCD + * @udata: Valid user data or NULL for kernel object + */ +struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, + struct inode *inode, struct ib_udata *udata) { struct ib_xrcd *xrcd; if (!device->ops.alloc_xrcd) return ERR_PTR(-EOPNOTSUPP); - xrcd = device->ops.alloc_xrcd(device, NULL); + xrcd = device->ops.alloc_xrcd(device, udata); if (!IS_ERR(xrcd)) { xrcd->device = device; - xrcd->inode = NULL; + xrcd->inode = inode; atomic_set(&xrcd->usecnt, 0); mutex_init(&xrcd->tgt_qp_mutex); INIT_LIST_HEAD(&xrcd->tgt_qp_list); @@ -2306,9 +2313,14 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) return xrcd; } -EXPORT_SYMBOL(__ib_alloc_xrcd); +EXPORT_SYMBOL(ib_alloc_xrcd_user); -int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +/** + * ib_dealloc_xrcd_user - Deallocates an XRC domain. + * @xrcd: The XRC domain to deallocate. + * @udata: Valid user data or NULL for kernel object + */ +int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct ib_qp *qp; int ret; @@ -2326,7 +2338,7 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) return xrcd->device->ops.dealloc_xrcd(xrcd, udata); } -EXPORT_SYMBOL(ib_dealloc_xrcd); +EXPORT_SYMBOL(ib_dealloc_xrcd_user); /** * ib_create_wq - Creates a WQ associated with the specified protection diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 204ec7516ef5..db6f78c5394f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4321,21 +4321,9 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); */ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); -/** - * ib_alloc_xrcd - Allocates an XRC domain. - * @device: The device on which to allocate the XRC domain. - * @caller: Module name for kernel consumers - */ -struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller); -#define ib_alloc_xrcd(device) \ - __ib_alloc_xrcd((device), KBUILD_MODNAME) - -/** - * ib_dealloc_xrcd - Deallocates an XRC domain. - * @xrcd: The XRC domain to deallocate. - * @udata: Valid user data or NULL for kernel object - */ -int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); +struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, + struct inode *inode, struct ib_udata *udata); +int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata); static inline int ib_check_mr_access(int flags) { -- cgit From 6f3ca6f4f5e05b52bf6851ada21026faa106de76 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 6 Jul 2020 15:27:16 +0300 Subject: RDMA/core: Optimize XRC target lookup Replace the mutex with read write semaphore and use xarray instead of linked list for XRC target QPs. This will give faster XRC target lookup. In addition, when QP is closed, don't insert it back to the xarray if the destroy command failed. Link: https://lore.kernel.org/r/20200706122716.647338-4-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 57 +++++++++++++++-------------------------- include/rdma/ib_verbs.h | 5 ++-- 2 files changed, 23 insertions(+), 39 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index a9b99c3ff25c..b1b6cc21ca96 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1090,13 +1090,6 @@ static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags); } -static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) -{ - mutex_lock(&xrcd->tgt_qp_mutex); - list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); - mutex_unlock(&xrcd->tgt_qp_mutex); -} - static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, void (*event_handler)(struct ib_event *, void *), void *qp_context) @@ -1139,16 +1132,15 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) return ERR_PTR(-EINVAL); - qp = ERR_PTR(-EINVAL); - mutex_lock(&xrcd->tgt_qp_mutex); - list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { - if (real_qp->qp_num == qp_open_attr->qp_num) { - qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, - qp_open_attr->qp_context); - break; - } + down_read(&xrcd->tgt_qps_rwsem); + real_qp = xa_load(&xrcd->tgt_qps, qp_open_attr->qp_num); + if (!real_qp) { + up_read(&xrcd->tgt_qps_rwsem); + return ERR_PTR(-EINVAL); } - mutex_unlock(&xrcd->tgt_qp_mutex); + qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, + qp_open_attr->qp_context); + up_read(&xrcd->tgt_qps_rwsem); return qp; } EXPORT_SYMBOL(ib_open_qp); @@ -1157,6 +1149,7 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr) { struct ib_qp *real_qp = qp; + int err; qp->event_handler = __ib_shared_qp_event_handler; qp->qp_context = qp; @@ -1172,7 +1165,12 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, if (IS_ERR(qp)) return qp; - __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); + err = xa_err(xa_store(&qp_init_attr->xrcd->tgt_qps, real_qp->qp_num, + real_qp, GFP_KERNEL)); + if (err) { + ib_close_qp(qp); + return ERR_PTR(err); + } return qp; } @@ -1887,21 +1885,18 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp) real_qp = qp->real_qp; xrcd = real_qp->xrcd; - - mutex_lock(&xrcd->tgt_qp_mutex); + down_write(&xrcd->tgt_qps_rwsem); ib_close_qp(qp); if (atomic_read(&real_qp->usecnt) == 0) - list_del(&real_qp->xrcd_list); + xa_erase(&xrcd->tgt_qps, real_qp->qp_num); else real_qp = NULL; - mutex_unlock(&xrcd->tgt_qp_mutex); + up_write(&xrcd->tgt_qps_rwsem); if (real_qp) { ret = ib_destroy_qp(real_qp); if (!ret) atomic_dec(&xrcd->usecnt); - else - __ib_insert_xrcd_qp(xrcd, real_qp); } return 0; @@ -2307,8 +2302,8 @@ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, xrcd->device = device; xrcd->inode = inode; atomic_set(&xrcd->usecnt, 0); - mutex_init(&xrcd->tgt_qp_mutex); - INIT_LIST_HEAD(&xrcd->tgt_qp_list); + init_rwsem(&xrcd->tgt_qps_rwsem); + xa_init(&xrcd->tgt_qps); } return xrcd; @@ -2322,20 +2317,10 @@ EXPORT_SYMBOL(ib_alloc_xrcd_user); */ int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata) { - struct ib_qp *qp; - int ret; - if (atomic_read(&xrcd->usecnt)) return -EBUSY; - while (!list_empty(&xrcd->tgt_qp_list)) { - qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); - ret = ib_destroy_qp(qp); - if (ret) - return ret; - } - mutex_destroy(&xrcd->tgt_qp_mutex); - + WARN_ON(!xa_empty(&xrcd->tgt_qps)); return xrcd->device->ops.dealloc_xrcd(xrcd, udata); } EXPORT_SYMBOL(ib_dealloc_xrcd_user); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index db6f78c5394f..20c801730fed 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1567,9 +1567,8 @@ struct ib_xrcd { struct ib_device *device; atomic_t usecnt; /* count all exposed resources */ struct inode *inode; - - struct mutex tgt_qp_mutex; - struct list_head tgt_qp_list; + struct rw_semaphore tgt_qps_rwsem; + struct xarray tgt_qps; }; struct ib_ah { -- cgit From 263427526f0c00490c20e4940d1fd4277fb5f7b3 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 30 Jun 2020 12:39:10 +0300 Subject: IB/uverbs: Enable CQ ioctl commands by default Enable CQ ioctl commands by default, this functionality is fully mature to be used over ioctl, no reason to maintain any more the EXP KCONFIG entry to enable it. Link: https://lore.kernel.org/r/20200630093916.332097-2-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/Kconfig | 8 -------- drivers/infiniband/core/uverbs_std_types_cq.c | 3 --- 2 files changed, 11 deletions(-) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index a83f9eb86bfe..91b023341b77 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -37,14 +37,6 @@ config INFINIBAND_USER_ACCESS libibverbs, libibcm and a hardware driver library from rdma-core . -config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI - bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)" - depends on INFINIBAND_USER_ACCESS - help - IOCTL based uAPI support for Infiniband is enabled by default for - new verbs only. This allows userspace to invoke the IOCTL based uAPI - for current legacy verbs too. - config INFINIBAND_USER_MEM bool depends on INFINIBAND_USER_ACCESS != n diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 5dce2c7cc323..b1c7dacc02de 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -207,11 +207,8 @@ DECLARE_UVERBS_NAMED_METHOD( DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_CQ, UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq), - -#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI) &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) -#endif ); const struct uapi_definition uverbs_def_obj_cq[] = { -- cgit From 04c0a5fcfcf65aade2fb238b6336445f1a99b646 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 30 Jun 2020 12:39:11 +0300 Subject: IB/uverbs: Set IOVA on IB MR in uverbs layer Set IOVA on IB MR in uverbs layer to let all drivers have it, this includes both reg/rereg MR flows. As part of this change cleaned-up this setting from the drivers that already did it by themselves in their user flows. Fixes: e6f0330106f4 ("mlx4_ib: set user mr attributes in struct ib_mr") Link: https://lore.kernel.org/r/20200630093916.332097-3-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 4 ++++ drivers/infiniband/hw/cxgb4/mem.c | 1 - drivers/infiniband/hw/mlx4/mr.c | 1 - 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 89ff5d06c5d7..68c9a0210220 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -764,6 +764,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->res.type = RDMA_RESTRACK_MR; + mr->iova = cmd.hca_va; rdma_restrack_uadd(&mr->res); uobj->object = mr; @@ -855,6 +856,9 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) atomic_dec(&old_pd->usecnt); } + if (cmd.flags & IB_MR_REREG_TRANS) + mr->iova = cmd.hca_va; + memset(&resp, 0, sizeof(resp)); resp.lkey = mr->lkey; resp.rkey = mr->rkey; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index ea6fb2c5b1a7..73936c3341b7 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -399,7 +399,6 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) mmid = stag >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; mhp->ibmr.length = mhp->attr.len; - mhp->ibmr.iova = mhp->attr.va_fbo; mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12); pr_debug("mmid 0x%x mhp %p\n", mmid, mhp); return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 6eecedeff2d3..1d5ef0de12c9 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -439,7 +439,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->ibmr.length = length; - mr->ibmr.iova = virt_addr; mr->ibmr.page_size = 1U << shift; return &mr->ibmr; -- cgit From 1c8fb1ea5a1dbd2159e78fa580aaffb001794cfa Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 30 Jun 2020 12:39:12 +0300 Subject: IB/uverbs: Expose UAPI to query ucontext Expose UAPI to query ucontext, this will let user space application that didn't allocate the ucontext but has access to by owning the matching command FD to retrieve the ucontext information. Link: https://lore.kernel.org/r/20200630093916.332097-4-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/uverbs_std_types_device.c | 41 ++++++++++++++++++++++- include/rdma/ib_verbs.h | 4 +++ include/uapi/rdma/ib_user_ioctl_cmds.h | 6 ++++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 40cf07129f66..1900c0df3c8a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2674,6 +2674,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, query_port); SET_DEVICE_OP(dev_ops, query_qp); SET_DEVICE_OP(dev_ops, query_srq); + SET_DEVICE_OP(dev_ops, query_ucontext); SET_DEVICE_OP(dev_ops, rdma_netdev_get_params); SET_DEVICE_OP(dev_ops, read_counters); SET_DEVICE_OP(dev_ops, reg_dm_mr); diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index ae4a59d6f9b1..8e58605a17be 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -229,6 +229,37 @@ static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)( return 0; } +static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_CONTEXT)( + struct uverbs_attr_bundle *attrs) +{ + u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; + u32 num_comp; + int ret; + + ucontext = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; + + if (!ib_dev->ops.query_ucontext) + return -EOPNOTSUPP; + + num_comp = attrs->ufile->device->num_comp_vectors; + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS, + &num_comp, sizeof(num_comp)); + if (IS_UVERBS_COPY_ERR(ret)) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT, + &core_support, sizeof(core_support)); + if (IS_UVERBS_COPY_ERR(ret)) + return ret; + + return ucontext->device->ops.query_ucontext(ucontext, attrs); +} + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_GET_CONTEXT, UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, @@ -237,6 +268,13 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_TYPE(u64), UA_OPTIONAL), UVERBS_ATTR_UHW()); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_QUERY_CONTEXT, + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS, + UVERBS_ATTR_TYPE(u32), UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT, + UVERBS_ATTR_TYPE(u64), UA_OPTIONAL)); + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_INFO_HANDLES, /* Also includes any device specific object ids */ @@ -260,7 +298,8 @@ DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE, &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT), &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE), &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES), - &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT)); + &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT), + &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT)); const struct uapi_definition uverbs_def_obj_device[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE), diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 20c801730fed..6c72bb194148 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2645,6 +2645,10 @@ struct ib_device_ops { */ int (*fill_stat_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); + /* query driver for its ucontext properties */ + int (*query_ucontext)(struct ib_ucontext *context, + struct uverbs_attr_bundle *attrs); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_cq); DECLARE_RDMA_OBJ_SIZE(ib_pd); diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 4961d5e858eb..83b6e71ea216 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -69,6 +69,7 @@ enum uverbs_methods_device { UVERBS_METHOD_INFO_HANDLES, UVERBS_METHOD_QUERY_PORT, UVERBS_METHOD_GET_CONTEXT, + UVERBS_METHOD_QUERY_CONTEXT, }; enum uverbs_attrs_invoke_write_cmd_attr_ids { @@ -87,6 +88,11 @@ enum uverbs_attrs_get_context_attr_ids { UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT, }; +enum uverbs_attrs_query_context_attr_ids { + UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS, + UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT, +}; + enum uverbs_attrs_create_cq_cmd_attr_ids { UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_ATTR_CREATE_CQ_CQE, -- cgit From 45ec21c971eddfb5e8e953e49a9dbe780f4a4997 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 30 Jun 2020 12:39:13 +0300 Subject: RDMA/mlx5: Refactor mlx5_ib_alloc_ucontext() response Refactor mlx5_ib_alloc_ucontext() to set its response fields in a cleaner way. It includes, - Move the relevant code to a self contained function. - Calculate the response length once and drop redundant code all around. - Reuse previously set ucontext fields once preparing the response. The self contained function will be used in next patch as part of implementing the query ucontext functionality. Link: https://lore.kernel.org/r/20200630093916.332097-5-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 196 ++++++++++++++++++-------------------- 1 file changed, 93 insertions(+), 103 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ca6fac31a1b8..56039fa6c530 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1765,6 +1765,92 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn, mlx5_ib_disable_lb(dev, true, false); } +static int set_ucontext_resp(struct ib_ucontext *uctx, + struct mlx5_ib_alloc_ucontext_resp *resp) +{ + struct ib_device *ibdev = uctx->device; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_ucontext *context = to_mucontext(uctx); + struct mlx5_bfreg_info *bfregi = &context->bfregi; + int err; + + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + err = mlx5_cmd_dump_fill_mkey(dev->mdev, + &resp->dump_fill_mkey); + if (err) + return err; + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; + } + + resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); + if (dev->wc_support) + resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, + log_bf_reg_size); + resp->cache_line_size = cache_line_size(); + resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); + resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); + resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + resp->cqe_version = context->cqe_version; + resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; + resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_CAP_GEN(dev->mdev, + num_of_uars_per_page) : 1; + + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) { + if (mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_EGRESS)) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; + if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; + /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ + } + + resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 : + bfregi->total_num_bfregs - bfregi->num_dyn_bfregs; + resp->num_ports = dev->num_ports; + resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | + MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; + + if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { + mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline); + resp->eth_min_inline++; + } + + if (dev->mdev->clock_info) + resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); + + /* + * We don't want to expose information from the PCI bar that is located + * after 4096 bytes, so if the arch only supports larger pages, let's + * pretend we don't support reading the HCA's core clock. This is also + * forced by mmap function. + */ + if (PAGE_SIZE <= 4096) { + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; + resp->hca_core_clock_offset = + offsetof(struct mlx5_init_seg, + internal_timer_h) % PAGE_SIZE; + } + + if (MLX5_CAP_GEN(dev->mdev, ece_support)) + resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; + + resp->num_dyn_bfregs = bfregi->num_dyn_bfregs; + return 0; +} + static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -1772,14 +1858,12 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_alloc_ucontext_req_v2 req = {}; struct mlx5_ib_alloc_ucontext_resp resp = {}; - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_ucontext *context = to_mucontext(uctx); struct mlx5_bfreg_info *bfregi; int ver; int err; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); - u32 dump_fill_mkey; bool lib_uar_4k; bool lib_uar_dyn; @@ -1808,37 +1892,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) return -EINVAL; - resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - if (dev->wc_support) - resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); - resp.cache_line_size = cache_line_size(); - resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); - resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); - resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); - resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); - resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); - resp.cqe_version = min_t(__u8, - (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), - req.max_cqe_version); - resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? - MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; - resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? - MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; - resp.response_length = min(offsetof(typeof(resp), response_length) + - sizeof(resp.response_length), udata->outlen); - - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) { - if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS)) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; - if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; - /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ - } - lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR; bfregi = &context->bfregi; @@ -1887,87 +1940,24 @@ uar_done: if (err) goto out_devx; - if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { - err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); - if (err) - goto out_mdev; - } - INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); - resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs; - resp.num_ports = dev->num_ports; - - if (offsetofend(typeof(resp), cqe_version) <= udata->outlen) - resp.response_length += sizeof(resp.cqe_version); - - if (offsetofend(typeof(resp), cmds_supp_uhw) <= udata->outlen) { - resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | - MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; - resp.response_length += sizeof(resp.cmds_supp_uhw); - } - - if (offsetofend(typeof(resp), eth_min_inline) <= udata->outlen) { - if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { - mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline); - resp.eth_min_inline++; - } - resp.response_length += sizeof(resp.eth_min_inline); - } - - if (offsetofend(typeof(resp), clock_info_versions) <= udata->outlen) { - if (mdev->clock_info) - resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); - resp.response_length += sizeof(resp.clock_info_versions); - } - - /* - * We don't want to expose information from the PCI bar that is located - * after 4096 bytes, so if the arch only supports larger pages, let's - * pretend we don't support reading the HCA's core clock. This is also - * forced by mmap function. - */ - if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) { - if (PAGE_SIZE <= 4096) { - resp.comp_mask |= - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; - resp.hca_core_clock_offset = - offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; - } - resp.response_length += sizeof(resp.hca_core_clock_offset); - } - - if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen) - resp.response_length += sizeof(resp.log_uar_size); - - if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen) - resp.response_length += sizeof(resp.num_uars_per_page); - - if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) { - resp.num_dyn_bfregs = bfregi->num_dyn_bfregs; - resp.response_length += sizeof(resp.num_dyn_bfregs); - } - - if (offsetofend(typeof(resp), dump_fill_mkey) <= udata->outlen) { - if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { - resp.dump_fill_mkey = dump_fill_mkey; - resp.comp_mask |= - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; - } - resp.response_length += sizeof(resp.dump_fill_mkey); - } + context->cqe_version = min_t(__u8, + (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), + req.max_cqe_version); - if (MLX5_CAP_GEN(dev->mdev, ece_support)) - resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; + err = set_ucontext_resp(uctx, &resp); + if (err) + goto out_mdev; + resp.response_length = min(udata->outlen, sizeof(resp)); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto out_mdev; bfregi->ver = ver; bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; - context->cqe_version = resp.cqe_version; context->lib_caps = req.lib_caps; print_lib_caps(dev, context->lib_caps); -- cgit From 0fb556b2b58d6b8336c94379042bad2a8512af1a Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 30 Jun 2020 12:39:14 +0300 Subject: RDMA/mlx5: Implement the query ucontext functionality Implement the query ucontext functionality by returning the original ucontext data as part of an extra mlx5 attribute that holds the driver UAPI response. Link: https://lore.kernel.org/r/20200630093916.332097-6-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 1 + drivers/infiniband/hw/mlx5/main.c | 35 ++++++++++++++++++++++++++++++++ include/uapi/rdma/mlx5_user_ioctl_cmds.h | 4 ++++ 3 files changed, 40 insertions(+) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 2d882c02387c..ef04a261097f 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -790,6 +790,7 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, } return uverbs_copy_to(bundle, idx, from, size); } +EXPORT_SYMBOL(uverbs_copy_to_struct_or_zero); /* Once called an abort will call through to the type's destroy_hw() */ void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 56039fa6c530..dc77388464c9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1990,6 +1990,29 @@ out_ctx: return err; } +static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_alloc_ucontext_resp uctx_resp = {}; + int ret; + + ret = set_ucontext_resp(ibcontext, &uctx_resp); + if (ret) + return ret; + + uctx_resp.response_length = + min_t(size_t, + uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX), + sizeof(uctx_resp)); + + ret = uverbs_copy_to_struct_or_zero(attrs, + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, + &uctx_resp, + sizeof(uctx_resp)); + return ret; +} + static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -6364,6 +6387,16 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, enum mlx5_ib_uapi_flow_action_flags)); +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_query_context, + UVERBS_OBJECT_DEVICE, + UVERBS_METHOD_QUERY_CONTEXT, + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, + UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp, + dump_fill_mkey), + UA_MANDATORY)); + static const struct uapi_definition mlx5_ib_defs[] = { UAPI_DEF_CHAIN(mlx5_ib_devx_defs), UAPI_DEF_CHAIN(mlx5_ib_flow_defs), @@ -6372,6 +6405,7 @@ static const struct uapi_definition mlx5_ib_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_action), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR), @@ -6605,6 +6639,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .query_pkey = mlx5_ib_query_pkey, .query_qp = mlx5_ib_query_qp, .query_srq = mlx5_ib_query_srq, + .query_ucontext = mlx5_ib_query_ucontext, .read_counters = mlx5_ib_read_counters, .reg_user_mr = mlx5_ib_reg_user_mr, .req_notify_cq = mlx5_ib_arm_cq, diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 8e316ef896b5..496309e8a856 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -228,6 +228,10 @@ enum mlx5_ib_flow_matcher_methods { MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, }; +enum mlx5_ib_device_query_context_attrs { + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX = (1U << UVERBS_ID_NS_SHIFT), +}; + #define MLX5_IB_DW_MATCH_PARAM 0x80 struct mlx5_ib_match_params { -- cgit From 05f71ef9797454b9384c740b8acd0d02eca1d1bc Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 30 Jun 2020 12:39:15 +0300 Subject: RDMA/mlx5: Introduce UAPI to query PD attributes Introduce UAPI to query PD attributes, this can be used to retrieve PD attributes by having the PD handle of the created one and owning the command FD for the ucontxet. Link: https://lore.kernel.org/r/20200630093916.332097-7-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/Makefile | 3 ++- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 ++ drivers/infiniband/hw/mlx5/std_types.c | 45 ++++++++++++++++++++++++++++++++ include/uapi/rdma/mlx5_user_ioctl_cmds.h | 10 +++++++ 5 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/hw/mlx5/std_types.c diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 8cca61c671f8..9838719aacb9 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -23,4 +23,5 @@ mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \ flow.o \ - qos.o + qos.o \ + std_types.o diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index dc77388464c9..324a98c77ad5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6401,6 +6401,7 @@ static const struct uapi_definition mlx5_ib_defs[] = { UAPI_DEF_CHAIN(mlx5_ib_devx_defs), UAPI_DEF_CHAIN(mlx5_ib_flow_defs), UAPI_DEF_CHAIN(mlx5_ib_qos_defs), + UAPI_DEF_CHAIN(mlx5_ib_std_types_defs), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_action), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 74d3507d6f80..27b069ef63d9 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1384,6 +1384,8 @@ int mlx5_ib_fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); extern const struct uapi_definition mlx5_ib_devx_defs[]; extern const struct uapi_definition mlx5_ib_flow_defs[]; extern const struct uapi_definition mlx5_ib_qos_defs[]; +extern const struct uapi_definition mlx5_ib_std_types_defs[]; + #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c new file mode 100644 index 000000000000..16145fda68d0 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/std_types.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include +#include +#include +#include "mlx5_ib.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include + +static int UVERBS_HANDLER(MLX5_IB_METHOD_PD_QUERY)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, MLX5_IB_ATTR_QUERY_PD_HANDLE); + struct mlx5_ib_pd *mpd = to_mpd(pd); + + return uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_PD_RESP_PDN, + &mpd->pdn, sizeof(mpd->pdn)); +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_PD_QUERY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_QUERY_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_PD_RESP_PDN, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +ADD_UVERBS_METHODS(mlx5_ib_pd, + UVERBS_OBJECT_PD, + &UVERBS_METHOD(MLX5_IB_METHOD_PD_QUERY)); + +const struct uapi_definition mlx5_ib_std_types_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE( + UVERBS_OBJECT_PD, + &mlx5_ib_pd), + {}, +}; diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 496309e8a856..b330e6eee626 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -290,4 +290,14 @@ enum mlx5_ib_create_flow_action_create_packet_reformat_attrs { MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, }; +enum mlx5_ib_query_pd_attrs { + MLX5_IB_ATTR_QUERY_PD_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_QUERY_PD_RESP_PDN, +}; + +enum mlx5_ib_pd_methods { + MLX5_IB_METHOD_PD_QUERY = (1U << UVERBS_ID_NS_SHIFT), + +}; + #endif -- cgit From 6c01e6b218aea09ec9947cbf88a4db97b4dd155c Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 30 Jun 2020 12:39:16 +0300 Subject: IB/uverbs: Expose UAPI to query MR Expose UAPI to query MR, this will let user space application that didn't allocate the MR but has access to by owning the matching command FD to retrieve its information. Link: https://lore.kernel.org/r/20200630093916.332097-8-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_mr.c | 52 ++++++++++++++++++++++++++- include/uapi/rdma/ib_user_ioctl_cmds.h | 9 +++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index a2722ef8496e..62f58ad56afd 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -148,6 +148,36 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( return ret; } +static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_MR)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_mr *mr = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_QUERY_MR_HANDLE); + int ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LKEY, &mr->lkey, + sizeof(mr->lkey)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_RKEY, + &mr->rkey, sizeof(mr->rkey)); + + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LENGTH, + &mr->length, sizeof(mr->length)); + + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_IOVA, + &mr->iova, sizeof(mr->iova)); + + return IS_UVERBS_COPY_ERR(ret) ? ret : 0; +} + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_ADVISE_MR, UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE, @@ -165,6 +195,25 @@ DECLARE_UVERBS_NAMED_METHOD( UA_MANDATORY, UA_ALLOC_AND_COPY)); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_QUERY_MR, + UVERBS_ATTR_IDR(UVERBS_ATTR_QUERY_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_RKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_IOVA, + UVERBS_ATTR_TYPE(u64), + UA_OPTIONAL)); + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_DM_MR_REG, UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, @@ -206,7 +255,8 @@ DECLARE_UVERBS_NAMED_OBJECT( UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG), &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY), - &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR)); + &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR), + &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR)); const struct uapi_definition uverbs_def_obj_mr[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR, diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 83b6e71ea216..99dcabf61a71 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -248,6 +248,7 @@ enum uverbs_methods_mr { UVERBS_METHOD_DM_MR_REG, UVERBS_METHOD_MR_DESTROY, UVERBS_METHOD_ADVISE_MR, + UVERBS_METHOD_QUERY_MR, }; enum uverbs_attrs_mr_destroy_ids { @@ -261,6 +262,14 @@ enum uverbs_attrs_advise_mr_cmd_attr_ids { UVERBS_ATTR_ADVISE_MR_SGE_LIST, }; +enum uverbs_attrs_query_mr_cmd_attr_ids { + UVERBS_ATTR_QUERY_MR_HANDLE, + UVERBS_ATTR_QUERY_MR_RESP_LKEY, + UVERBS_ATTR_QUERY_MR_RESP_RKEY, + UVERBS_ATTR_QUERY_MR_RESP_LENGTH, + UVERBS_ATTR_QUERY_MR_RESP_IOVA, +}; + enum uverbs_attrs_create_counters_cmd_attr_ids { UVERBS_ATTR_CREATE_COUNTERS_HANDLE, }; -- cgit From 3b023e1b680a56e84c22d43486875a5aa4c78afe Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 30 Jun 2020 13:18:52 +0300 Subject: RDMA/core: Create and destroy counters in the ib_core Move allocation and destruction of counters under ib_core responsibility Link: https://lore.kernel.org/r/20200630101855.368895-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/uverbs_std_types_counters.c | 17 +++++++++-------- drivers/infiniband/hw/mlx5/main.c | 20 ++++++-------------- include/rdma/ib_verbs.h | 7 ++++--- 4 files changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 1900c0df3c8a..0a259f475e89 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2687,6 +2687,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, set_vf_link_state); SET_OBJ_SIZE(dev_ops, ib_ah); + SET_OBJ_SIZE(dev_ops, ib_counters); SET_OBJ_SIZE(dev_ops, ib_cq); SET_OBJ_SIZE(dev_ops, ib_pd); SET_OBJ_SIZE(dev_ops, ib_srq); diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 9f013304e677..c7e7438752bc 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -46,7 +46,9 @@ static int uverbs_free_counters(struct ib_uobject *uobject, if (ret) return ret; - return counters->device->ops.destroy_counters(counters); + counters->device->ops.destroy_counters(counters); + kfree(counters); + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( @@ -66,20 +68,19 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( if (!ib_dev->ops.create_counters) return -EOPNOTSUPP; - counters = ib_dev->ops.create_counters(ib_dev, attrs); - if (IS_ERR(counters)) { - ret = PTR_ERR(counters); - goto err_create_counters; - } + counters = rdma_zalloc_drv_obj(ib_dev, ib_counters); + if (!counters) + return -ENOMEM; counters->device = ib_dev; counters->uobject = uobj; uobj->object = counters; atomic_set(&counters->usecnt, 0); - return 0; + ret = ib_dev->ops.create_counters(counters, attrs); + if (ret) + kfree(counters); -err_create_counters: return ret; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 324a98c77ad5..6e6e126c39ef 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6455,7 +6455,7 @@ err_bound: return ret; } -static int mlx5_ib_destroy_counters(struct ib_counters *counters) +static void mlx5_ib_destroy_counters(struct ib_counters *counters) { struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); @@ -6463,24 +6463,15 @@ static int mlx5_ib_destroy_counters(struct ib_counters *counters) if (mcounters->hw_cntrs_hndl) mlx5_fc_destroy(to_mdev(counters->device)->mdev, mcounters->hw_cntrs_hndl); - - kfree(mcounters); - - return 0; } -static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, - struct uverbs_attr_bundle *attrs) +static int mlx5_ib_create_counters(struct ib_counters *counters, + struct uverbs_attr_bundle *attrs) { - struct mlx5_ib_mcounters *mcounters; - - mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL); - if (!mcounters) - return ERR_PTR(-ENOMEM); + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); mutex_init(&mcounters->mcntrs_mutex); - - return &mcounters->ibcntrs; + return 0; } static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) @@ -6648,6 +6639,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .resize_cq = mlx5_ib_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq), diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6c72bb194148..ecaf299e0789 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2540,9 +2540,9 @@ struct ib_device_ops { struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs); - struct ib_counters *(*create_counters)( - struct ib_device *device, struct uverbs_attr_bundle *attrs); - int (*destroy_counters)(struct ib_counters *counters); + int (*create_counters)(struct ib_counters *counters, + struct uverbs_attr_bundle *attrs); + void (*destroy_counters)(struct ib_counters *counters); int (*read_counters)(struct ib_counters *counters, struct ib_counters_read_attr *counters_read_attr, struct uverbs_attr_bundle *attrs); @@ -2650,6 +2650,7 @@ struct ib_device_ops { struct uverbs_attr_bundle *attrs); DECLARE_RDMA_OBJ_SIZE(ib_ah); + DECLARE_RDMA_OBJ_SIZE(ib_counters); DECLARE_RDMA_OBJ_SIZE(ib_cq); DECLARE_RDMA_OBJ_SIZE(ib_pd); DECLARE_RDMA_OBJ_SIZE(ib_srq); -- cgit From 28ad5f65c314ffdd5888d6afa61772d3032a332c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 30 Jun 2020 13:18:54 +0300 Subject: RDMA: Move XRCD to be under ib_core responsibility Update the code to allocate and free ib_xrcd structure in the ib_core instead of inside drivers. Link: https://lore.kernel.org/r/20200630101855.368895-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/verbs.c | 28 ++++++++++++++++++--------- drivers/infiniband/hw/mlx4/main.c | 37 +++++++++++++++--------------------- drivers/infiniband/hw/mlx5/main.c | 2 ++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 ++--- drivers/infiniband/hw/mlx5/qp.c | 32 +++++++------------------------ include/rdma/ib_verbs.h | 6 +++--- 7 files changed, 49 insertions(+), 62 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 0a259f475e89..b2d617e599a1 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2692,6 +2692,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_OBJ_SIZE(dev_ops, ib_pd); SET_OBJ_SIZE(dev_ops, ib_srq); SET_OBJ_SIZE(dev_ops, ib_ucontext); + SET_OBJ_SIZE(dev_ops, ib_xrcd); } EXPORT_SYMBOL(ib_set_device_ops); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index b1b6cc21ca96..a92783105cea 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2293,20 +2293,28 @@ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, struct inode *inode, struct ib_udata *udata) { struct ib_xrcd *xrcd; + int ret; if (!device->ops.alloc_xrcd) return ERR_PTR(-EOPNOTSUPP); - xrcd = device->ops.alloc_xrcd(device, udata); - if (!IS_ERR(xrcd)) { - xrcd->device = device; - xrcd->inode = inode; - atomic_set(&xrcd->usecnt, 0); - init_rwsem(&xrcd->tgt_qps_rwsem); - xa_init(&xrcd->tgt_qps); - } + xrcd = rdma_zalloc_drv_obj(device, ib_xrcd); + if (!xrcd) + return ERR_PTR(-ENOMEM); + xrcd->device = device; + xrcd->inode = inode; + atomic_set(&xrcd->usecnt, 0); + init_rwsem(&xrcd->tgt_qps_rwsem); + xa_init(&xrcd->tgt_qps); + + ret = device->ops.alloc_xrcd(xrcd, udata); + if (ret) + goto err; return xrcd; +err: + kfree(xrcd); + return ERR_PTR(ret); } EXPORT_SYMBOL(ib_alloc_xrcd_user); @@ -2321,7 +2329,9 @@ int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata) return -EBUSY; WARN_ON(!xa_empty(&xrcd->tgt_qps)); - return xrcd->device->ops.dealloc_xrcd(xrcd, udata); + xrcd->device->ops.dealloc_xrcd(xrcd, udata); + kfree(xrcd); + return 0; } EXPORT_SYMBOL(ib_dealloc_xrcd_user); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 816d28854a8e..5e7910a517da 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1219,56 +1219,47 @@ static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); } -static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_udata *udata) +static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) { - struct mlx4_ib_xrcd *xrcd; + struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device); + struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd); struct ib_cq_init_attr cq_attr = {}; int err; - if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) - return ERR_PTR(-ENOSYS); - - xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); - if (!xrcd) - return ERR_PTR(-ENOMEM); + if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return -EOPNOTSUPP; - err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn); + err = mlx4_xrcd_alloc(dev->dev, &xrcd->xrcdn); if (err) - goto err1; + return err; - xrcd->pd = ib_alloc_pd(ibdev, 0); + xrcd->pd = ib_alloc_pd(ibxrcd->device, 0); if (IS_ERR(xrcd->pd)) { err = PTR_ERR(xrcd->pd); goto err2; } cq_attr.cqe = 1; - xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr); + xrcd->cq = ib_create_cq(ibxrcd->device, NULL, NULL, xrcd, &cq_attr); if (IS_ERR(xrcd->cq)) { err = PTR_ERR(xrcd->cq); goto err3; } - return &xrcd->ibxrcd; + return 0; err3: ib_dealloc_pd(xrcd->pd); err2: - mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn); -err1: - kfree(xrcd); - return ERR_PTR(err); + mlx4_xrcd_free(dev->dev, xrcd->xrcdn); + return err; } -static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +static void mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { ib_destroy_cq(to_mxrcd(xrcd)->cq); ib_dealloc_pd(to_mxrcd(xrcd)->pd); mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); - kfree(xrcd); - - return 0; } static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) @@ -2607,6 +2598,8 @@ static const struct ib_device_ops mlx4_ib_dev_mw_ops = { static const struct ib_device_ops mlx4_ib_dev_xrc_ops = { .alloc_xrcd = mlx4_ib_alloc_xrcd, .dealloc_xrcd = mlx4_ib_dealloc_xrcd, + + INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx4_ib_xrcd, ibxrcd), }; static const struct ib_device_ops mlx4_ib_dev_fs_ops = { diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6e6e126c39ef..2da592054a3f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6671,6 +6671,8 @@ static const struct ib_device_ops mlx5_ib_dev_mw_ops = { static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { .alloc_xrcd = mlx5_ib_alloc_xrcd, .dealloc_xrcd = mlx5_ib_dealloc_xrcd, + + INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd), }; static const struct ib_device_ops mlx5_ib_dev_dm_ops = { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 27b069ef63d9..88e30ff77c16 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1224,9 +1224,8 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad *in, struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index); -struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_udata *udata); -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); +int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); +void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7489b0479e4f..c6a4db9f6a0b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4700,41 +4700,23 @@ out: return err; } -struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_udata *udata) +int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_xrcd *xrcd; - int err; + struct mlx5_ib_dev *dev = to_mdev(ibxrcd->device); + struct mlx5_ib_xrcd *xrcd = to_mxrcd(ibxrcd); if (!MLX5_CAP_GEN(dev->mdev, xrc)) - return ERR_PTR(-ENOSYS); - - xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); - if (!xrcd) - return ERR_PTR(-ENOMEM); - - err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); - if (err) { - kfree(xrcd); - return ERR_PTR(-ENOMEM); - } + return -EOPNOTSUPP; - return &xrcd->ibxrcd; + return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); } -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; - int err; - err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); - if (err) - mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); - - kfree(xrcd); - return 0; + mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); } static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ecaf299e0789..087f001fd020 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2494,9 +2494,8 @@ struct ib_device_ops { int (*dealloc_mw)(struct ib_mw *mw); int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); - struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, - struct ib_udata *udata); - int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); + int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); + void (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); struct ib_flow *(*create_flow)(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain, struct ib_udata *udata); @@ -2655,6 +2654,7 @@ struct ib_device_ops { DECLARE_RDMA_OBJ_SIZE(ib_pd); DECLARE_RDMA_OBJ_SIZE(ib_srq); DECLARE_RDMA_OBJ_SIZE(ib_ucontext); + DECLARE_RDMA_OBJ_SIZE(ib_xrcd); }; struct ib_core_device { -- cgit From 5c99274be8864519328aa74bc550ba410095bc1c Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 30 Jun 2020 15:36:05 +0300 Subject: RDMA/rxe: Skip dgid check in loopback mode In the loopback tests, the following call trace occurs. Call Trace: __rxe_do_task+0x1a/0x30 [rdma_rxe] rxe_qp_destroy+0x61/0xa0 [rdma_rxe] rxe_destroy_qp+0x20/0x60 [rdma_rxe] ib_destroy_qp_user+0xcc/0x220 [ib_core] uverbs_free_qp+0x3c/0xc0 [ib_uverbs] destroy_hw_idr_uobject+0x24/0x70 [ib_uverbs] uverbs_destroy_uobject+0x43/0x1b0 [ib_uverbs] uobj_destroy+0x41/0x70 [ib_uverbs] __uobj_get_destroy+0x39/0x70 [ib_uverbs] ib_uverbs_destroy_qp+0x88/0xc0 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xb9/0xf0 [ib_uverbs] ib_uverbs_cmd_verbs+0xb16/0xc30 [ib_uverbs] The root cause is that the actual RDMA connection is not created in the loopback tests and the rxe_match_dgid will fail randomly. To fix this call trace which appear in the loopback tests, skip check of the dgid. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200630123605.446959-1-leon@kernel.org Signed-off-by: Zhu Yanjun Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_recv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 831ad578a7b2..46e111c218fd 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -330,10 +330,14 @@ err1: static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) { + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); const struct ib_gid_attr *gid_attr; union ib_gid dgid; union ib_gid *pdgid; + if (pkt->mask & RXE_LOOPBACK_MASK) + return 0; + if (skb->protocol == htons(ETH_P_IP)) { ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, (struct in6_addr *)&dgid); @@ -366,7 +370,7 @@ void rxe_rcv(struct sk_buff *skb) if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) goto drop; - if (unlikely(rxe_match_dgid(rxe, skb) < 0)) { + if (rxe_match_dgid(rxe, skb) < 0) { pr_warn_ratelimited("failed matching dgid\n"); goto drop; } -- cgit From cc33b23e1e1cf54bcb9e5c193e3888a3b17a5479 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Tue, 30 Jun 2020 22:01:36 +0800 Subject: RDMA/hns: Optimize MTR level-0 addressing to access huge page If hns ROCEE is set to level-0 addressing, the length of the entire buffer can be used as the page size. The driver needn't to split the buffer into small units because all pages are continuous. Link: https://lore.kernel.org/r/1593525696-12570-1-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 20 +-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 7 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 44 +----- drivers/infiniband/hw/hns/hns_roce_mr.c | 208 +++++++++++++++++----------- 4 files changed, 147 insertions(+), 132 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 5b946b5bd586..6e64931397ad 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -348,20 +348,22 @@ struct hns_roce_buf_attr { bool mtt_only; /* only alloc buffer-required MTT memory */ }; +struct hns_roce_hem_cfg { + dma_addr_t root_ba; /* root BA table's address */ + bool is_direct; /* addressing without BA table */ + unsigned int ba_pg_shift; /* BA table page shift */ + unsigned int buf_pg_shift; /* buffer page shift */ + unsigned int buf_pg_count; /* buffer page count */ + struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; + int region_count; +}; + /* memory translate region */ struct hns_roce_mtr { struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */ struct ib_umem *umem; /* user space buffer */ struct hns_roce_buf *kmem; /* kernel space buffer */ - struct { - dma_addr_t root_ba; /* root BA table's address */ - bool is_direct; /* addressing without BA table */ - unsigned int ba_pg_shift; /* BA table page shift */ - unsigned int buf_pg_shift; /* buffer page shift */ - int buf_pg_count; /* buffer page count */ - struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; - unsigned int region_count; - } hem_cfg; /* config for hardware addressing */ + struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */ }; struct hns_roce_mw { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index d02207cd30df..ef7f8b3177ff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2483,7 +2483,6 @@ static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba) { struct ib_device *ibdev = &hr_dev->ib_dev; - int rq_pa_start; int count; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba); @@ -2491,9 +2490,9 @@ static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ibdev_err(ibdev, "Failed to find SQ ba\n"); return -ENOBUFS; } - rq_pa_start = hr_qp->rq.offset >> hr_qp->mtr.hem_cfg.buf_pg_shift; - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, rq_pa_start, rq_ba, 1, - NULL); + + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, rq_ba, + 1, NULL); if (!count) { ibdev_err(ibdev, "Failed to find RQ ba\n"); return -ENOBUFS; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c597d7281629..2b0b676e4c2f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3744,51 +3744,23 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, } } -static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, int mtt_cnt, - u32 page_size) -{ - struct ib_device *ibdev = &hr_dev->ib_dev; - - if (hr_qp->rq.wqe_cnt < 1) - return true; - - if (mtt_cnt < 1) { - ibdev_err(ibdev, "failed to find RQWQE buf ba of QP(0x%lx)\n", - hr_qp->qpn); - return false; - } - - if (mtt_cnt < MTT_MIN_COUNT && - (hr_qp->rq.offset + page_size) < hr_qp->buff_size) { - ibdev_err(ibdev, - "failed to find next RQWQE buf ba of QP(0x%lx)\n", - hr_qp->qpn); - return false; - } - - return true; -} - static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { - struct ib_qp *ibqp = &hr_qp->ibqp; u64 mtts[MTT_MIN_COUNT] = { 0 }; u64 wqe_sge_ba; - u32 page_size; int count; /* Search qp buf's mtts */ - page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->rq.offset / page_size, mtts, + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts, MTT_MIN_COUNT, &wqe_sge_ba); - if (!ibqp->srq) - if (!check_wqe_rq_mtt_count(hr_dev, hr_qp, count, page_size)) - return -EINVAL; + if (hr_qp->rq.wqe_cnt && count < 1) { + ibdev_err(&hr_dev->ib_dev, + "failed to find RQ WQE, QPN = 0x%lx.\n", hr_qp->qpn); + return -EINVAL; + } context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3); qpc_mask->wqe_sge_ba = 0; @@ -3890,7 +3862,6 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, struct ib_device *ibdev = &hr_dev->ib_dev; u64 sge_cur_blk = 0; u64 sq_cur_blk = 0; - u32 page_size; int count; /* search qp buf's mtts */ @@ -3901,9 +3872,8 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, return -EINVAL; } if (hr_qp->sge.sge_cnt > 0) { - page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->sge.offset / page_size, + hr_qp->sge.offset, &sge_cur_blk, 1, NULL); if (count < 1) { ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n", diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 1380cdab5701..1655fcf21a76 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -870,6 +870,15 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int err; int i; + /* + * Only use the first page address as root ba when hopnum is 0, this + * is because the addresses of all pages are consecutive in this case. + */ + if (mtr->hem_cfg.is_direct) { + mtr->hem_cfg.root_ba = pages[0]; + return 0; + } + for (i = 0; i < mtr->hem_cfg.region_count; i++) { r = &mtr->hem_cfg.region[i]; if (r->offset + r->count > page_cnt) { @@ -895,6 +904,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int start_index; int mtt_count; int total = 0; __le64 *mtts; @@ -906,26 +917,32 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, goto done; /* no mtt memory in direct mode, so just return the buffer address */ - if (mtr->hem_cfg.is_direct) { - npage = offset; - for (total = 0; total < mtt_max; total++, npage++) { - addr = mtr->hem_cfg.root_ba + - (npage << mtr->hem_cfg.buf_pg_shift); - + if (cfg->is_direct) { + start_index = offset >> HNS_HW_PAGE_SHIFT; + for (mtt_count = 0; mtt_count < cfg->region_count && + total < mtt_max; mtt_count++) { + npage = cfg->region[mtt_count].offset; + if (npage < start_index) + continue; + + addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT); if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) mtt_buf[total] = to_hr_hw_page_addr(addr); else mtt_buf[total] = addr; + + total++; } goto done; } + start_index = offset >> cfg->buf_pg_shift; left = mtt_max; while (left > 0) { mtt_count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, - offset + total, + start_index + total, &mtt_count, NULL); if (!mtts || !mtt_count) goto done; @@ -938,104 +955,136 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, done: if (base_addr) - *base_addr = mtr->hem_cfg.root_ba; + *base_addr = cfg->root_ba; return total; } -/* convert buffer size to page index and page count */ -static unsigned int mtr_init_region(struct hns_roce_buf_attr *attr, - int page_cnt, - struct hns_roce_buf_region *regions, - int region_cnt, unsigned int page_shift) +static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, + struct hns_roce_buf_attr *attr, + struct hns_roce_hem_cfg *cfg, + unsigned int *buf_page_shift) { - unsigned int page_size = 1 << page_shift; - int max_region = attr->region_count; struct hns_roce_buf_region *r; - unsigned int i = 0; - int page_idx = 0; - - for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) { - r = ®ions[i]; - r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ? - 0 : attr->region[i].hopnum; - r->offset = page_idx; - r->count = DIV_ROUND_UP(attr->region[i].size, page_size); - page_idx += r->count; + unsigned int page_shift = 0; + int page_cnt = 0; + size_t buf_size; + int region_cnt; + + if (cfg->is_direct) { + buf_size = cfg->buf_pg_count << cfg->buf_pg_shift; + page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE); + /* + * When HEM buffer use level-0 addressing, the page size equals + * the buffer size, and the the page size = 4K * 2^N. + */ + cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt); + if (attr->region_count > 1) { + cfg->buf_pg_count = page_cnt; + page_shift = HNS_HW_PAGE_SHIFT; + } else { + cfg->buf_pg_count = 1; + page_shift = cfg->buf_pg_shift; + if (buf_size != 1 << page_shift) { + ibdev_err(&hr_dev->ib_dev, + "failed to check direct size %zu shift %d.\n", + buf_size, page_shift); + return -EINVAL; + } + } + } else { + page_shift = cfg->buf_pg_shift; + } + + /* convert buffer size to page index and page count */ + for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count && + region_cnt < attr->region_count && + region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) { + r = &cfg->region[region_cnt]; + r->offset = page_cnt; + buf_size = hr_hw_page_align(attr->region[region_cnt].size); + r->count = DIV_ROUND_UP(buf_size, 1 << page_shift); + page_cnt += r->count; + r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum, + r->count); + } + + if (region_cnt < 1) { + ibdev_err(&hr_dev->ib_dev, + "failed to check mtr region count, pages = %d.\n", + cfg->buf_pg_count); + return -ENOBUFS; } - return i; + cfg->region_count = region_cnt; + *buf_page_shift = page_shift; + + return page_cnt; } /** * hns_roce_mtr_create - Create hns memory translate region. * * @mtr: memory translate region - * @init_attr: init attribute for creating mtr - * @page_shift: page shift for multi-hop base address table + * @buf_attr: buffer attribute for creating mtr + * @ba_page_shift: page shift for multi-hop base address table * @udata: user space context, if it's NULL, means kernel space * @user_addr: userspace virtual address to start at - * @buf_alloced: mtr has private buffer, true means need to alloc */ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct hns_roce_buf_attr *buf_attr, - unsigned int page_shift, struct ib_udata *udata, + unsigned int ba_page_shift, struct ib_udata *udata, unsigned long user_addr) { + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned int buf_page_shift = 0; dma_addr_t *pages = NULL; - int region_cnt = 0; int all_pg_cnt; int get_pg_cnt; - bool has_mtt; - int err = 0; + int ret = 0; + + /* if disable mtt, all pages must in a continuous address range */ + cfg->is_direct = !mtr_has_mtt(buf_attr); - has_mtt = mtr_has_mtt(buf_attr); /* if buffer only need mtt, just init the hem cfg */ if (buf_attr->mtt_only) { - mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift; - mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >> - buf_attr->page_shift; + cfg->buf_pg_shift = buf_attr->page_shift; + cfg->buf_pg_count = mtr_bufs_size(buf_attr) >> + buf_attr->page_shift; mtr->umem = NULL; mtr->kmem = NULL; } else { - err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata, - user_addr); - if (err) { - ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n", - err); - return err; + ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct, + udata, user_addr); + if (ret) { + ibdev_err(ibdev, + "failed to alloc mtr bufs, ret = %d.\n", ret); + return ret; } } - /* alloc mtt memory */ - all_pg_cnt = mtr->hem_cfg.buf_pg_count; - hns_roce_hem_list_init(&mtr->hem_list); - mtr->hem_cfg.is_direct = !has_mtt; - mtr->hem_cfg.ba_pg_shift = page_shift; - mtr->hem_cfg.region_count = 0; - region_cnt = mtr_init_region(buf_attr, all_pg_cnt, - mtr->hem_cfg.region, - ARRAY_SIZE(mtr->hem_cfg.region), - mtr->hem_cfg.buf_pg_shift); - if (region_cnt < 1) { - err = -ENOBUFS; - ibdev_err(ibdev, "failed to init mtr region %d\n", region_cnt); + all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift); + if (all_pg_cnt < 1) { + ret = -ENOBUFS; + ibdev_err(ibdev, "failed to init mtr buf cfg.\n"); goto err_alloc_bufs; } - mtr->hem_cfg.region_count = region_cnt; - - if (has_mtt) { - err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, - mtr->hem_cfg.region, region_cnt, - page_shift); - if (err) { - ibdev_err(ibdev, "Failed to request mtr hem, err %d\n", - err); + hns_roce_hem_list_init(&mtr->hem_list); + if (!cfg->is_direct) { + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + cfg->region, cfg->region_count, + ba_page_shift); + if (ret) { + ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n", + ret); goto err_alloc_bufs; } - mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; + cfg->root_ba = mtr->hem_list.root_ba; + cfg->ba_pg_shift = ba_page_shift; + } else { + cfg->ba_pg_shift = cfg->buf_pg_shift; } /* no buffer to map */ @@ -1045,31 +1094,26 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, /* alloc a tmp array to store buffer's dma address */ pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); if (!pages) { - err = -ENOMEM; - ibdev_err(ibdev, "Failed to alloc mtr page list %d\n", + ret = -ENOMEM; + ibdev_err(ibdev, "failed to alloc mtr page list %d.\n", all_pg_cnt); goto err_alloc_hem_list; } get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, - mtr->hem_cfg.buf_pg_shift); + buf_page_shift); if (get_pg_cnt != all_pg_cnt) { - ibdev_err(ibdev, "Failed to get mtr page %d != %d\n", + ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", get_pg_cnt, all_pg_cnt); - err = -ENOBUFS; + ret = -ENOBUFS; goto err_alloc_page_list; } - if (!has_mtt) { - mtr->hem_cfg.root_ba = pages[0]; - } else { - /* write buffer's dma address to BA table */ - err = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); - if (err) { - ibdev_err(ibdev, "Failed to map mtr pages, err %d\n", - err); - goto err_alloc_page_list; - } + /* write buffer's dma address to BA table */ + ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); + if (ret) { + ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); + goto err_alloc_page_list; } /* drop tmp array */ @@ -1081,7 +1125,7 @@ err_alloc_hem_list: hns_roce_hem_list_release(hr_dev, &mtr->hem_list); err_alloc_bufs: mtr_free_bufs(hr_dev, mtr); - return err; + return ret; } void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) -- cgit From ac47bf5ef122591aa561fc16176ef0d1c7d24726 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 2 Jul 2020 11:18:04 +0300 Subject: RDMA/mlx5: Limit the scope of mlx5_ib_enable_driver function The mlx5_ib_enable_driver() is local function and doesn't need to be shared in mlx5_ib, so change it's signature to have static keyword in it. Link: https://lore.kernel.org/r/20200702081809.423482-2-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 24 ++++++++++++------------ drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2da592054a3f..a30c080a1c20 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6569,6 +6569,18 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) kfree(dev->flow_db); } +static int mlx5_ib_enable_driver(struct ib_device *dev) +{ + struct mlx5_ib_dev *mdev = to_mdev(dev); + int ret; + + ret = mlx5_ib_test_wc(mdev); + mlx5_ib_dbg(mdev, "Write-Combining %s", + mdev->wc_support ? "supported" : "not supported"); + + return ret; +} + static const struct ib_device_ops mlx5_ib_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_MLX5, @@ -7092,18 +7104,6 @@ static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) } } -int mlx5_ib_enable_driver(struct ib_device *dev) -{ - struct mlx5_ib_dev *mdev = to_mdev(dev); - int ret; - - ret = mlx5_ib_test_wc(mdev); - mlx5_ib_dbg(mdev, "Write-Combining %s", - mdev->wc_support ? "supported" : "not supported"); - - return ret; -} - void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 88e30ff77c16..1f34f64c6f27 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1542,7 +1542,6 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, return true; } -int mlx5_ib_enable_driver(struct ib_device *dev); int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) -- cgit From b572ebe6675526be5d3b0e831e9d567c4fc0fd5d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 2 Jul 2020 11:18:05 +0300 Subject: RDMA/mlx5: Separate restrack callbacks initialization from main.c The restrack code has separate .c, so move callbacks initialization to that file to improve code locality. Link: https://lore.kernel.org/r/20200702081809.423482-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 43 ++++++++--------------------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 38 +++---------------------------- drivers/infiniband/hw/mlx5/restrack.c | 29 ++++++++++++++++------- drivers/infiniband/hw/mlx5/restrack.h | 13 +++++++++++ 4 files changed, 46 insertions(+), 77 deletions(-) create mode 100644 drivers/infiniband/hw/mlx5/restrack.h diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a30c080a1c20..39dc5edcd7d7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1,33 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. */ #include @@ -62,6 +35,7 @@ #include "srq.h" #include "qp.h" #include "wr.h" +#include "restrack.h" #include #include #include @@ -6615,11 +6589,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, .enable_driver = mlx5_ib_enable_driver, - .fill_res_cq_entry_raw = mlx5_ib_fill_res_cq_entry_raw, - .fill_res_mr_entry = mlx5_ib_fill_res_mr_entry, - .fill_res_mr_entry_raw = mlx5_ib_fill_res_mr_entry_raw, - .fill_res_qp_entry_raw = mlx5_ib_fill_res_qp_entry_raw, - .fill_stat_mr_entry = mlx5_ib_fill_stat_mr_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, @@ -7205,6 +7174,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, mlx5_ib_stage_delay_drop_init, mlx5_ib_stage_delay_drop_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, + mlx5_ib_restrack_init, + NULL), }; const struct mlx5_ib_profile raw_eth_profile = { @@ -7259,6 +7231,9 @@ const struct mlx5_ib_profile raw_eth_profile = { STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, mlx5_ib_stage_post_ib_reg_umr_init, NULL), + STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, + mlx5_ib_restrack_init, + NULL), }; static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 1f34f64c6f27..4787cf41cc1d 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. */ #ifndef MLX5_IB_H @@ -850,7 +823,7 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_IB_REG, MLX5_IB_STAGE_POST_IB_REG_UMR, MLX5_IB_STAGE_DELAY_DROP, - MLX5_IB_STAGE_CLASS_ATTR, + MLX5_IB_STAGE_RESTRACK, MLX5_IB_STAGE_MAX, }; @@ -1374,11 +1347,6 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, u8 *native_port_num); void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); -int mlx5_ib_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); -int mlx5_ib_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr); -int mlx5_ib_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp); -int mlx5_ib_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq); -int mlx5_ib_fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); extern const struct uapi_definition mlx5_ib_devx_defs[]; extern const struct uapi_definition mlx5_ib_flow_defs[]; diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 32c6d0397946..887270dd3ce2 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2019-2020, Mellanox Technologies Ltd. All rights reserved. */ #include @@ -8,6 +8,7 @@ #include #include #include "mlx5_ib.h" +#include "restrack.h" #define MAX_DUMP_SIZE 1024 @@ -77,8 +78,7 @@ out: return err; } -int mlx5_ib_fill_stat_mr_entry(struct sk_buff *msg, - struct ib_mr *ibmr) +static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) { struct mlx5_ib_mr *mr = to_mmr(ibmr); struct nlattr *table_attr; @@ -112,7 +112,7 @@ err: return -EMSGSIZE; } -int mlx5_ib_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr) +static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr) { struct mlx5_ib_mr *mr = to_mmr(ibmr); @@ -120,8 +120,7 @@ int mlx5_ib_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr) mlx5_mkey_to_idx(mr->mmkey.key)); } -int mlx5_ib_fill_res_mr_entry(struct sk_buff *msg, - struct ib_mr *ibmr) +static int fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) { struct mlx5_ib_mr *mr = to_mmr(ibmr); struct nlattr *table_attr; @@ -149,7 +148,7 @@ err: return -EMSGSIZE; } -int mlx5_ib_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq) +static int fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq) { struct mlx5_ib_dev *dev = to_mdev(ibcq->device); struct mlx5_ib_cq *cq = to_mcq(ibcq); @@ -157,10 +156,24 @@ int mlx5_ib_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq) return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_CQ, cq->mcq.cqn); } -int mlx5_ib_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp) +static int fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_QP, ibqp->qp_num); } + +static const struct ib_device_ops restrack_ops = { + .fill_res_cq_entry_raw = fill_res_cq_entry_raw, + .fill_res_mr_entry = fill_res_mr_entry, + .fill_res_mr_entry_raw = fill_res_mr_entry_raw, + .fill_res_qp_entry_raw = fill_res_qp_entry_raw, + .fill_stat_mr_entry = fill_stat_mr_entry, +}; + +int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev) +{ + ib_set_device_ops(&dev->ib_dev, &restrack_ops); + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/restrack.h b/drivers/infiniband/hw/mlx5/restrack.h new file mode 100644 index 000000000000..e8d81270f1b6 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/restrack.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2020, Mellanox Technologies Ltd. All rights reserved. + */ + +#ifndef _MLX5_IB_RESTRACK_H +#define _MLX5_IB_RESTRACK_H + +#include "mlx5_ib.h" + +int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev); + +#endif /* _MLX5_IB_RESTRACK_H */ -- cgit From 64825827ae3a53e01fc9bb9483b1a2ed7cb2b657 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 2 Jul 2020 11:18:06 +0300 Subject: RDMA/mlx5: Separate counters from main.c There are number of counters types supported in mlx5_ib: HW counters, congestion counters, Q-counters and flow counters. Almost all supporting code was placed in main.c that made almost impossible to maintain the code anymore. Let's create separate code namespace for the counters to easy future generalization effort. Link: https://lore.kernel.org/r/20200702081809.423482-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/Makefile | 1 + drivers/infiniband/hw/mlx5/cmd.c | 12 - drivers/infiniband/hw/mlx5/cmd.h | 1 - drivers/infiniband/hw/mlx5/counters.c | 709 ++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/counters.h | 17 + drivers/infiniband/hw/mlx5/main.c | 701 +-------------------------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 - drivers/infiniband/hw/mlx5/qp.c | 1 + drivers/infiniband/hw/mlx5/qp.h | 1 + 9 files changed, 737 insertions(+), 709 deletions(-) create mode 100644 drivers/infiniband/hw/mlx5/counters.c create mode 100644 drivers/infiniband/hw/mlx5/counters.h diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 9838719aacb9..34eaceb293b4 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := ah.o \ cmd.o \ cong.o \ + counters.o \ cq.o \ doorbell.o \ gsi.o \ diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index cc24c711e92a..ebb2f108b64f 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -148,18 +148,6 @@ void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) spin_unlock(&dm->lock); } -int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) -{ - u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - - MLX5_SET(ppcnt_reg, in, local_port, 1); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); - return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT, - 0, 0); -} - void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid) { u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index f4d8558db434..1d192a8ca87d 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -41,7 +41,6 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey); int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out); -int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c new file mode 100644 index 000000000000..145f3cb40ccb --- /dev/null +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -0,0 +1,709 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + */ + +#include "mlx5_ib.h" +#include +#include "counters.h" +#include "ib_rep.h" +#include "qp.h" + +struct mlx5_ib_counter { + const char *name; + size_t offset; +}; + +#define INIT_Q_COUNTER(_name) \ + { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} + +static const struct mlx5_ib_counter basic_q_cnts[] = { + INIT_Q_COUNTER(rx_write_requests), + INIT_Q_COUNTER(rx_read_requests), + INIT_Q_COUNTER(rx_atomic_requests), + INIT_Q_COUNTER(out_of_buffer), +}; + +static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { + INIT_Q_COUNTER(out_of_sequence), +}; + +static const struct mlx5_ib_counter retrans_q_cnts[] = { + INIT_Q_COUNTER(duplicate_request), + INIT_Q_COUNTER(rnr_nak_retry_err), + INIT_Q_COUNTER(packet_seq_err), + INIT_Q_COUNTER(implied_nak_seq_err), + INIT_Q_COUNTER(local_ack_timeout_err), +}; + +#define INIT_CONG_COUNTER(_name) \ + { .name = #_name, .offset = \ + MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} + +static const struct mlx5_ib_counter cong_cnts[] = { + INIT_CONG_COUNTER(rp_cnp_ignored), + INIT_CONG_COUNTER(rp_cnp_handled), + INIT_CONG_COUNTER(np_ecn_marked_roce_packets), + INIT_CONG_COUNTER(np_cnp_sent), +}; + +static const struct mlx5_ib_counter extended_err_cnts[] = { + INIT_Q_COUNTER(resp_local_length_error), + INIT_Q_COUNTER(resp_cqe_error), + INIT_Q_COUNTER(req_cqe_error), + INIT_Q_COUNTER(req_remote_invalid_request), + INIT_Q_COUNTER(req_remote_access_errors), + INIT_Q_COUNTER(resp_remote_access_errors), + INIT_Q_COUNTER(resp_cqe_flush_error), + INIT_Q_COUNTER(req_cqe_flush_error), +}; + +static const struct mlx5_ib_counter roce_accl_cnts[] = { + INIT_Q_COUNTER(roce_adp_retrans), + INIT_Q_COUNTER(roce_adp_retrans_to), + INIT_Q_COUNTER(roce_slow_restart), + INIT_Q_COUNTER(roce_slow_restart_cnps), + INIT_Q_COUNTER(roce_slow_restart_trans), +}; + +#define INIT_EXT_PPCNT_COUNTER(_name) \ + { .name = #_name, .offset = \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} + +static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { + INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), +}; + +static int mlx5_ib_read_counters(struct ib_counters *counters, + struct ib_counters_read_attr *read_attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + struct mlx5_read_counters_attr mread_attr = {}; + struct mlx5_ib_flow_counters_desc *desc; + int ret, i; + + mutex_lock(&mcounters->mcntrs_mutex); + if (mcounters->cntrs_max_index > read_attr->ncounters) { + ret = -EINVAL; + goto err_bound; + } + + mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), + GFP_KERNEL); + if (!mread_attr.out) { + ret = -ENOMEM; + goto err_bound; + } + + mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; + mread_attr.flags = read_attr->flags; + ret = mcounters->read_counters(counters->device, &mread_attr); + if (ret) + goto err_read; + + /* do the pass over the counters data array to assign according to the + * descriptions and indexing pairs + */ + desc = mcounters->counters_data; + for (i = 0; i < mcounters->ncounters; i++) + read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; + +err_read: + kfree(mread_attr.out); +err_bound: + mutex_unlock(&mcounters->mcntrs_mutex); + return ret; +} + +static void mlx5_ib_destroy_counters(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + mlx5_ib_counters_clear_description(counters); + if (mcounters->hw_cntrs_hndl) + mlx5_fc_destroy(to_mdev(counters->device)->mdev, + mcounters->hw_cntrs_hndl); +} + +static int mlx5_ib_create_counters(struct ib_counters *counters, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + mutex_init(&mcounters->mcntrs_mutex); + return 0; +} + + +static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) +{ + return MLX5_ESWITCH_MANAGER(mdev) && + mlx5_ib_eswitch_mode(mdev->priv.eswitch) == + MLX5_ESWITCH_OFFLOADS; +} + +static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, + u8 port_num) +{ + return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : + &dev->port[port_num].cnts; +} + +/** + * mlx5_ib_get_counters_id - Returns counters id to use for device+port + * @dev: Pointer to mlx5 IB device + * @port_num: Zero based port number + * + * mlx5_ib_get_counters_id() Returns counters set id to use for given + * device port combination in switchdev and non switchdev mode of the + * parent device. + */ +u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num) +{ + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); + + return cnts->set_id; +} + +static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + bool is_switchdev = is_mdev_switchdev_mode(dev->mdev); + + if ((is_switchdev && port_num) || (!is_switchdev && !port_num)) + return NULL; + + cnts = get_counters(dev, port_num - 1); + + return rdma_alloc_hw_stats_struct(cnts->names, + cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, + const struct mlx5_ib_counters *cnts, + struct rdma_hw_stats *stats, + u16 set_id) +{ + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; + __be32 val; + int ret, i; + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + MLX5_SET(query_q_counter_in, in, counter_set_id, set_id); + ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); + if (ret) + return ret; + + for (i = 0; i < cnts->num_q_counters; i++) { + val = *(__be32 *)((void *)out + cnts->offsets[i]); + stats->value[i] = (u64)be32_to_cpu(val); + } + + return 0; +} + +static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, + const struct mlx5_ib_counters *cnts, + struct rdma_hw_stats *stats) +{ + int offset = cnts->num_q_counters + cnts->num_cong_counters; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int ret, i; + void *out; + + out = kvzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT, + 0, 0); + if (ret) + goto free; + + for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) + stats->value[i + offset] = + be64_to_cpup((__be64 *)(out + + cnts->offsets[i + offset])); +free: + kvfree(out); + return ret; +} + +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port_num, int index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); + struct mlx5_core_dev *mdev; + int ret, num_counters; + u8 mdev_port_num; + + if (!stats) + return -EINVAL; + + num_counters = cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + + /* q_counters are per IB device, query the master mdev */ + ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); + if (ret) + return ret; + + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); + if (ret) + return ret; + } + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + mdev = mlx5_ib_get_native_port_mdev(dev, port_num, + &mdev_port_num); + if (!mdev) { + /* If port is not affiliated yet, its in down state + * which doesn't have any counters yet, so it would be + * zero. So no need to read from the HCA. + */ + goto done; + } + ret = mlx5_lag_query_cong_counters(dev->mdev, + stats->value + + cnts->num_q_counters, + cnts->num_cong_counters, + cnts->offsets + + cnts->num_q_counters); + + mlx5_ib_put_native_port_mdev(dev, port_num); + if (ret) + return ret; + } + +done: + return num_counters; +} + +static struct rdma_hw_stats * +mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + const struct mlx5_ib_counters *cnts = + get_counters(dev, counter->port - 1); + + return rdma_alloc_hw_stats_struct(cnts->names, + cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + const struct mlx5_ib_counters *cnts = + get_counters(dev, counter->port - 1); + + return mlx5_ib_query_q_counters(dev->mdev, cnts, + counter->stats, counter->id); +} + +static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; + + if (!counter->id) + return 0; + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); + return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); +} + +static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, + struct ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + int err; + + if (!counter->id) { + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; + + MLX5_SET(alloc_q_counter_in, in, opcode, + MLX5_CMD_OP_ALLOC_Q_COUNTER); + MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID); + err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); + if (err) + return err; + counter->id = + MLX5_GET(alloc_q_counter_out, out, counter_set_id); + } + + err = mlx5_ib_qp_set_counter(qp, counter); + if (err) + goto fail_set_counter; + + return 0; + +fail_set_counter: + mlx5_ib_counter_dealloc(counter); + counter->id = 0; + + return err; +} + +static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) +{ + return mlx5_ib_qp_set_counter(qp, NULL); +} + + +static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, + const char **names, + size_t *offsets) +{ + int i; + int j = 0; + + for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { + names[j] = basic_q_cnts[i].name; + offsets[j] = basic_q_cnts[i].offset; + } + + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { + for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { + names[j] = out_of_seq_q_cnts[i].name; + offsets[j] = out_of_seq_q_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { + for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { + names[j] = retrans_q_cnts[i].name; + offsets[j] = retrans_q_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { + for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { + names[j] = extended_err_cnts[i].name; + offsets[j] = extended_err_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { + for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { + names[j] = roce_accl_cnts[i].name; + offsets[j] = roce_accl_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { + names[j] = cong_cnts[i].name; + offsets[j] = cong_cnts[i].offset; + } + } + + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { + names[j] = ext_ppcnt_cnts[i].name; + offsets[j] = ext_ppcnt_cnts[i].offset; + } + } +} + + +static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_counters *cnts) +{ + u32 num_counters; + + num_counters = ARRAY_SIZE(basic_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) + num_counters += ARRAY_SIZE(out_of_seq_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) + num_counters += ARRAY_SIZE(retrans_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) + num_counters += ARRAY_SIZE(extended_err_cnts); + + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) + num_counters += ARRAY_SIZE(roce_accl_cnts); + + cnts->num_q_counters = num_counters; + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); + num_counters += ARRAY_SIZE(cong_cnts); + } + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); + num_counters += ARRAY_SIZE(ext_ppcnt_cnts); + } + cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); + if (!cnts->names) + return -ENOMEM; + + cnts->offsets = kcalloc(num_counters, + sizeof(cnts->offsets), GFP_KERNEL); + if (!cnts->offsets) + goto err_names; + + return 0; + +err_names: + kfree(cnts->names); + cnts->names = NULL; + return -ENOMEM; +} + +static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; + int num_cnt_ports; + int i; + + num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + + for (i = 0; i < num_cnt_ports; i++) { + if (dev->port[i].cnts.set_id) { + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, + dev->port[i].cnts.set_id); + mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); + } + kfree(dev->port[i].cnts.names); + kfree(dev->port[i].cnts.offsets); + } +} + +static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; + int num_cnt_ports; + int err = 0; + int i; + bool is_shared; + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; + num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; + + for (i = 0; i < num_cnt_ports; i++) { + err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); + if (err) + goto err_alloc; + + mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, + dev->port[i].cnts.offsets); + + MLX5_SET(alloc_q_counter_in, in, uid, + is_shared ? MLX5_SHARED_RESOURCE_UID : 0); + + err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); + if (err) { + mlx5_ib_warn(dev, + "couldn't allocate queue counter for port %d, err %d\n", + i + 1, err); + goto err_alloc; + } + + dev->port[i].cnts.set_id = + MLX5_GET(alloc_q_counter_out, out, counter_set_id); + } + return 0; + +err_alloc: + mlx5_ib_dealloc_counters(dev); + return err; +} + +static int read_flow_counters(struct ib_device *ibdev, + struct mlx5_read_counters_attr *read_attr) +{ + struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + + return mlx5_fc_query(dev->mdev, fc, + &read_attr->out[IB_COUNTER_PACKETS], + &read_attr->out[IB_COUNTER_BYTES]); +} + +/* flow counters currently expose two counters packets and bytes */ +#define FLOW_COUNTERS_NUM 2 +static int counters_set_description( + struct ib_counters *counters, enum mlx5_ib_counters_type counters_type, + struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + u32 cntrs_max_index = 0; + int i; + + if (counters_type != MLX5_IB_COUNTERS_FLOW) + return -EINVAL; + + /* init the fields for the object */ + mcounters->type = counters_type; + mcounters->read_counters = read_flow_counters; + mcounters->counters_num = FLOW_COUNTERS_NUM; + mcounters->ncounters = ncounters; + /* each counter entry have both description and index pair */ + for (i = 0; i < ncounters; i++) { + if (desc_data[i].description > IB_COUNTER_BYTES) + return -EINVAL; + + if (cntrs_max_index <= desc_data[i].index) + cntrs_max_index = desc_data[i].index + 1; + } + + mutex_lock(&mcounters->mcntrs_mutex); + mcounters->counters_data = desc_data; + mcounters->cntrs_max_index = cntrs_max_index; + mutex_unlock(&mcounters->mcntrs_mutex); + + return 0; +} + +#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) +int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters, + struct mlx5_ib_create_flow *ucmd) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); + struct mlx5_ib_flow_counters_data *cntrs_data = NULL; + struct mlx5_ib_flow_counters_desc *desc_data = NULL; + bool hw_hndl = false; + int ret = 0; + + if (ucmd && ucmd->ncounters_data != 0) { + cntrs_data = ucmd->data; + if (cntrs_data->ncounters > MAX_COUNTERS_NUM) + return -EINVAL; + + desc_data = kcalloc(cntrs_data->ncounters, + sizeof(*desc_data), + GFP_KERNEL); + if (!desc_data) + return -ENOMEM; + + if (copy_from_user(desc_data, + u64_to_user_ptr(cntrs_data->counters_data), + sizeof(*desc_data) * cntrs_data->ncounters)) { + ret = -EFAULT; + goto free; + } + } + + if (!mcounters->hw_cntrs_hndl) { + mcounters->hw_cntrs_hndl = mlx5_fc_create( + to_mdev(ibcounters->device)->mdev, false); + if (IS_ERR(mcounters->hw_cntrs_hndl)) { + ret = PTR_ERR(mcounters->hw_cntrs_hndl); + goto free; + } + hw_hndl = true; + } + + if (desc_data) { + /* counters already bound to at least one flow */ + if (mcounters->cntrs_max_index) { + ret = -EINVAL; + goto free_hndl; + } + + ret = counters_set_description(ibcounters, + MLX5_IB_COUNTERS_FLOW, + desc_data, + cntrs_data->ncounters); + if (ret) + goto free_hndl; + + } else if (!mcounters->cntrs_max_index) { + /* counters not bound yet, must have udata passed */ + ret = -EINVAL; + goto free_hndl; + } + + return 0; + +free_hndl: + if (hw_hndl) { + mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, + mcounters->hw_cntrs_hndl); + mcounters->hw_cntrs_hndl = NULL; + } +free: + kfree(desc_data); + return ret; +} + +void mlx5_ib_counters_clear_description(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters; + + if (!counters || atomic_read(&counters->usecnt) != 1) + return; + + mcounters = to_mcounters(counters); + + mutex_lock(&mcounters->mcntrs_mutex); + kfree(mcounters->counters_data); + mcounters->counters_data = NULL; + mcounters->cntrs_max_index = 0; + mutex_unlock(&mcounters->mcntrs_mutex); +} + +static const struct ib_device_ops hw_stats_ops = { + .alloc_hw_stats = mlx5_ib_alloc_hw_stats, + .get_hw_stats = mlx5_ib_get_hw_stats, + .counter_bind_qp = mlx5_ib_counter_bind_qp, + .counter_unbind_qp = mlx5_ib_counter_unbind_qp, + .counter_dealloc = mlx5_ib_counter_dealloc, + .counter_alloc_stats = mlx5_ib_counter_alloc_stats, + .counter_update_stats = mlx5_ib_counter_update_stats, +}; + +static const struct ib_device_ops counters_ops = { + .create_counters = mlx5_ib_create_counters, + .destroy_counters = mlx5_ib_destroy_counters, + .read_counters = mlx5_ib_read_counters, + + INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), +}; + +int mlx5_ib_counters_init(struct mlx5_ib_dev *dev) +{ + ib_set_device_ops(&dev->ib_dev, &counters_ops); + + if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) + return 0; + + ib_set_device_ops(&dev->ib_dev, &hw_stats_ops); + return mlx5_ib_alloc_counters(dev); +} + +void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev) +{ + if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) + return; + + mlx5_ib_dealloc_counters(dev); +} diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h new file mode 100644 index 000000000000..1aa30c2f3f4d --- /dev/null +++ b/drivers/infiniband/hw/mlx5/counters.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + */ + +#ifndef _MLX5_IB_COUNTERS_H +#define _MLX5_IB_COUNTERS_H + +#include "mlx5_ib.h" + +int mlx5_ib_counters_init(struct mlx5_ib_dev *dev); +void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev); +void mlx5_ib_counters_clear_description(struct ib_counters *counters); +int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters, + struct mlx5_ib_create_flow *ucmd); +u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num); +#endif /* _MLX5_IB_COUNTERS_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 39dc5edcd7d7..c14955061273 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -36,6 +36,7 @@ #include "qp.h" #include "wr.h" #include "restrack.h" +#include "counters.h" #include #include #include @@ -3249,17 +3250,6 @@ static void put_flow_table(struct mlx5_ib_dev *dev, } } -static void counters_clear_description(struct ib_counters *counters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - - mutex_lock(&mcounters->mcntrs_mutex); - kfree(mcounters->counters_data); - mcounters->counters_data = NULL; - mcounters->cntrs_max_index = 0; - mutex_unlock(&mcounters->mcntrs_mutex); -} - static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) { struct mlx5_ib_flow_handler *handler = container_of(flow_id, @@ -3279,10 +3269,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) mlx5_del_flow_rules(handler->rule); put_flow_table(dev, handler->prio, true); - if (handler->ibcounters && - atomic_read(&handler->ibcounters->usecnt) == 1) - counters_clear_description(handler->ibcounters); - + mlx5_ib_counters_clear_description(handler->ibcounters); mutex_unlock(&dev->flow_db->lock); if (handler->flow_matcher) atomic_dec(&handler->flow_matcher->usecnt); @@ -3436,125 +3423,6 @@ static void set_underlay_qp(struct mlx5_ib_dev *dev, } } -static int read_flow_counters(struct ib_device *ibdev, - struct mlx5_read_counters_attr *read_attr) -{ - struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; - struct mlx5_ib_dev *dev = to_mdev(ibdev); - - return mlx5_fc_query(dev->mdev, fc, - &read_attr->out[IB_COUNTER_PACKETS], - &read_attr->out[IB_COUNTER_BYTES]); -} - -/* flow counters currently expose two counters packets and bytes */ -#define FLOW_COUNTERS_NUM 2 -static int counters_set_description(struct ib_counters *counters, - enum mlx5_ib_counters_type counters_type, - struct mlx5_ib_flow_counters_desc *desc_data, - u32 ncounters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - u32 cntrs_max_index = 0; - int i; - - if (counters_type != MLX5_IB_COUNTERS_FLOW) - return -EINVAL; - - /* init the fields for the object */ - mcounters->type = counters_type; - mcounters->read_counters = read_flow_counters; - mcounters->counters_num = FLOW_COUNTERS_NUM; - mcounters->ncounters = ncounters; - /* each counter entry have both description and index pair */ - for (i = 0; i < ncounters; i++) { - if (desc_data[i].description > IB_COUNTER_BYTES) - return -EINVAL; - - if (cntrs_max_index <= desc_data[i].index) - cntrs_max_index = desc_data[i].index + 1; - } - - mutex_lock(&mcounters->mcntrs_mutex); - mcounters->counters_data = desc_data; - mcounters->cntrs_max_index = cntrs_max_index; - mutex_unlock(&mcounters->mcntrs_mutex); - - return 0; -} - -#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) -static int flow_counters_set_data(struct ib_counters *ibcounters, - struct mlx5_ib_create_flow *ucmd) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); - struct mlx5_ib_flow_counters_data *cntrs_data = NULL; - struct mlx5_ib_flow_counters_desc *desc_data = NULL; - bool hw_hndl = false; - int ret = 0; - - if (ucmd && ucmd->ncounters_data != 0) { - cntrs_data = ucmd->data; - if (cntrs_data->ncounters > MAX_COUNTERS_NUM) - return -EINVAL; - - desc_data = kcalloc(cntrs_data->ncounters, - sizeof(*desc_data), - GFP_KERNEL); - if (!desc_data) - return -ENOMEM; - - if (copy_from_user(desc_data, - u64_to_user_ptr(cntrs_data->counters_data), - sizeof(*desc_data) * cntrs_data->ncounters)) { - ret = -EFAULT; - goto free; - } - } - - if (!mcounters->hw_cntrs_hndl) { - mcounters->hw_cntrs_hndl = mlx5_fc_create( - to_mdev(ibcounters->device)->mdev, false); - if (IS_ERR(mcounters->hw_cntrs_hndl)) { - ret = PTR_ERR(mcounters->hw_cntrs_hndl); - goto free; - } - hw_hndl = true; - } - - if (desc_data) { - /* counters already bound to at least one flow */ - if (mcounters->cntrs_max_index) { - ret = -EINVAL; - goto free_hndl; - } - - ret = counters_set_description(ibcounters, - MLX5_IB_COUNTERS_FLOW, - desc_data, - cntrs_data->ncounters); - if (ret) - goto free_hndl; - - } else if (!mcounters->cntrs_max_index) { - /* counters not bound yet, must have udata passed */ - ret = -EINVAL; - goto free_hndl; - } - - return 0; - -free_hndl: - if (hw_hndl) { - mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, - mcounters->hw_cntrs_hndl); - mcounters->hw_cntrs_hndl = NULL; - } -free: - kfree(desc_data); - return ret; -} - static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, struct mlx5_flow_spec *spec, struct mlx5_eswitch_rep *rep) @@ -3664,7 +3532,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { struct mlx5_ib_mcounters *mcounters; - err = flow_counters_set_data(flow_act.counters, ucmd); + err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd); if (err) goto free; @@ -3714,9 +3582,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, ft_prio->flow_table = ft; free: if (err && handler) { - if (handler->ibcounters && - atomic_read(&handler->ibcounters->usecnt) == 1) - counters_clear_description(handler->ibcounters); + mlx5_ib_counters_clear_description(handler->ibcounters); kfree(handler); } kvfree(spec); @@ -5308,466 +5174,6 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -struct mlx5_ib_counter { - const char *name; - size_t offset; -}; - -#define INIT_Q_COUNTER(_name) \ - { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} - -static const struct mlx5_ib_counter basic_q_cnts[] = { - INIT_Q_COUNTER(rx_write_requests), - INIT_Q_COUNTER(rx_read_requests), - INIT_Q_COUNTER(rx_atomic_requests), - INIT_Q_COUNTER(out_of_buffer), -}; - -static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { - INIT_Q_COUNTER(out_of_sequence), -}; - -static const struct mlx5_ib_counter retrans_q_cnts[] = { - INIT_Q_COUNTER(duplicate_request), - INIT_Q_COUNTER(rnr_nak_retry_err), - INIT_Q_COUNTER(packet_seq_err), - INIT_Q_COUNTER(implied_nak_seq_err), - INIT_Q_COUNTER(local_ack_timeout_err), -}; - -#define INIT_CONG_COUNTER(_name) \ - { .name = #_name, .offset = \ - MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} - -static const struct mlx5_ib_counter cong_cnts[] = { - INIT_CONG_COUNTER(rp_cnp_ignored), - INIT_CONG_COUNTER(rp_cnp_handled), - INIT_CONG_COUNTER(np_ecn_marked_roce_packets), - INIT_CONG_COUNTER(np_cnp_sent), -}; - -static const struct mlx5_ib_counter extended_err_cnts[] = { - INIT_Q_COUNTER(resp_local_length_error), - INIT_Q_COUNTER(resp_cqe_error), - INIT_Q_COUNTER(req_cqe_error), - INIT_Q_COUNTER(req_remote_invalid_request), - INIT_Q_COUNTER(req_remote_access_errors), - INIT_Q_COUNTER(resp_remote_access_errors), - INIT_Q_COUNTER(resp_cqe_flush_error), - INIT_Q_COUNTER(req_cqe_flush_error), -}; - -static const struct mlx5_ib_counter roce_accl_cnts[] = { - INIT_Q_COUNTER(roce_adp_retrans), - INIT_Q_COUNTER(roce_adp_retrans_to), - INIT_Q_COUNTER(roce_slow_restart), - INIT_Q_COUNTER(roce_slow_restart_cnps), - INIT_Q_COUNTER(roce_slow_restart_trans), -}; - -#define INIT_EXT_PPCNT_COUNTER(_name) \ - { .name = #_name, .offset = \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} - -static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { - INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), -}; - -static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) -{ - return MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == - MLX5_ESWITCH_OFFLOADS; -} - -static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) -{ - u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; - int num_cnt_ports; - int i; - - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; - - MLX5_SET(dealloc_q_counter_in, in, opcode, - MLX5_CMD_OP_DEALLOC_Q_COUNTER); - - for (i = 0; i < num_cnt_ports; i++) { - if (dev->port[i].cnts.set_id) { - MLX5_SET(dealloc_q_counter_in, in, counter_set_id, - dev->port[i].cnts.set_id); - mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); - } - kfree(dev->port[i].cnts.names); - kfree(dev->port[i].cnts.offsets); - } -} - -static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, - struct mlx5_ib_counters *cnts) -{ - u32 num_counters; - - num_counters = ARRAY_SIZE(basic_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) - num_counters += ARRAY_SIZE(out_of_seq_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) - num_counters += ARRAY_SIZE(retrans_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) - num_counters += ARRAY_SIZE(extended_err_cnts); - - if (MLX5_CAP_GEN(dev->mdev, roce_accl)) - num_counters += ARRAY_SIZE(roce_accl_cnts); - - cnts->num_q_counters = num_counters; - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); - num_counters += ARRAY_SIZE(cong_cnts); - } - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); - num_counters += ARRAY_SIZE(ext_ppcnt_cnts); - } - cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); - if (!cnts->names) - return -ENOMEM; - - cnts->offsets = kcalloc(num_counters, - sizeof(cnts->offsets), GFP_KERNEL); - if (!cnts->offsets) - goto err_names; - - return 0; - -err_names: - kfree(cnts->names); - cnts->names = NULL; - return -ENOMEM; -} - -static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, - const char **names, - size_t *offsets) -{ - int i; - int j = 0; - - for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { - names[j] = basic_q_cnts[i].name; - offsets[j] = basic_q_cnts[i].offset; - } - - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { - for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { - names[j] = out_of_seq_q_cnts[i].name; - offsets[j] = out_of_seq_q_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { - for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { - names[j] = retrans_q_cnts[i].name; - offsets[j] = retrans_q_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { - for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { - names[j] = extended_err_cnts[i].name; - offsets[j] = extended_err_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { - for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { - names[j] = roce_accl_cnts[i].name; - offsets[j] = roce_accl_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { - names[j] = cong_cnts[i].name; - offsets[j] = cong_cnts[i].offset; - } - } - - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { - names[j] = ext_ppcnt_cnts[i].name; - offsets[j] = ext_ppcnt_cnts[i].offset; - } - } -} - -static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) -{ - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; - int num_cnt_ports; - int err = 0; - int i; - bool is_shared; - - MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); - is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; - - for (i = 0; i < num_cnt_ports; i++) { - err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); - if (err) - goto err_alloc; - - mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, - dev->port[i].cnts.offsets); - - MLX5_SET(alloc_q_counter_in, in, uid, - is_shared ? MLX5_SHARED_RESOURCE_UID : 0); - - err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); - if (err) { - mlx5_ib_warn(dev, - "couldn't allocate queue counter for port %d, err %d\n", - i + 1, err); - goto err_alloc; - } - - dev->port[i].cnts.set_id = - MLX5_GET(alloc_q_counter_out, out, counter_set_id); - } - return 0; - -err_alloc: - mlx5_ib_dealloc_counters(dev); - return err; -} - -static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, - u8 port_num) -{ - return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : - &dev->port[port_num].cnts; -} - -/** - * mlx5_ib_get_counters_id - Returns counters id to use for device+port - * @dev: Pointer to mlx5 IB device - * @port_num: Zero based port number - * - * mlx5_ib_get_counters_id() Returns counters set id to use for given - * device port combination in switchdev and non switchdev mode of the - * parent device. - */ -u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num) -{ - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); - - return cnts->set_id; -} - -static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, - u8 port_num) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts; - bool is_switchdev = is_mdev_switchdev_mode(dev->mdev); - - if ((is_switchdev && port_num) || (!is_switchdev && !port_num)) - return NULL; - - cnts = get_counters(dev, port_num - 1); - - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, - const struct mlx5_ib_counters *cnts, - struct rdma_hw_stats *stats, - u16 set_id) -{ - u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; - __be32 val; - int ret, i; - - MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); - MLX5_SET(query_q_counter_in, in, counter_set_id, set_id); - ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); - if (ret) - return ret; - - for (i = 0; i < cnts->num_q_counters; i++) { - val = *(__be32 *)((void *)out + cnts->offsets[i]); - stats->value[i] = (u64)be32_to_cpu(val); - } - - return 0; -} - -static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, - const struct mlx5_ib_counters *cnts, - struct rdma_hw_stats *stats) -{ - int offset = cnts->num_q_counters + cnts->num_cong_counters; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - int ret, i; - void *out; - - out = kvzalloc(sz, GFP_KERNEL); - if (!out) - return -ENOMEM; - - ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out); - if (ret) - goto free; - - for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) - stats->value[i + offset] = - be64_to_cpup((__be64 *)(out + - cnts->offsets[i + offset])); -free: - kvfree(out); - return ret; -} - -static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, - u8 port_num, int index) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); - struct mlx5_core_dev *mdev; - int ret, num_counters; - u8 mdev_port_num; - - if (!stats) - return -EINVAL; - - num_counters = cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters; - - /* q_counters are per IB device, query the master mdev */ - ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); - if (ret) - return ret; - - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); - if (ret) - return ret; - } - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - mdev = mlx5_ib_get_native_port_mdev(dev, port_num, - &mdev_port_num); - if (!mdev) { - /* If port is not affiliated yet, its in down state - * which doesn't have any counters yet, so it would be - * zero. So no need to read from the HCA. - */ - goto done; - } - ret = mlx5_lag_query_cong_counters(dev->mdev, - stats->value + - cnts->num_q_counters, - cnts->num_cong_counters, - cnts->offsets + - cnts->num_q_counters); - - mlx5_ib_put_native_port_mdev(dev, port_num); - if (ret) - return ret; - } - -done: - return num_counters; -} - -static struct rdma_hw_stats * -mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - const struct mlx5_ib_counters *cnts = - get_counters(dev, counter->port - 1); - - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - const struct mlx5_ib_counters *cnts = - get_counters(dev, counter->port - 1); - - return mlx5_ib_query_q_counters(dev->mdev, cnts, - counter->stats, counter->id); -} - -static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; - - if (!counter->id) - return 0; - - MLX5_SET(dealloc_q_counter_in, in, opcode, - MLX5_CMD_OP_DEALLOC_Q_COUNTER); - MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); - return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); -} - -static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, - struct ib_qp *qp) -{ - struct mlx5_ib_dev *dev = to_mdev(qp->device); - int err; - - if (!counter->id) { - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; - - MLX5_SET(alloc_q_counter_in, in, opcode, - MLX5_CMD_OP_ALLOC_Q_COUNTER); - MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID); - err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); - if (err) - return err; - counter->id = - MLX5_GET(alloc_q_counter_out, out, counter_set_id); - } - - err = mlx5_ib_qp_set_counter(qp, counter); - if (err) - goto fail_set_counter; - - return 0; - -fail_set_counter: - mlx5_ib_counter_dealloc(counter); - counter->id = 0; - - return err; -} - -static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) -{ - return mlx5_ib_qp_set_counter(qp, NULL); -} - static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, enum rdma_netdev_t type, struct rdma_netdev_alloc_params *params) @@ -6387,67 +5793,6 @@ static const struct uapi_definition mlx5_ib_defs[] = { {} }; -static int mlx5_ib_read_counters(struct ib_counters *counters, - struct ib_counters_read_attr *read_attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - struct mlx5_read_counters_attr mread_attr = {}; - struct mlx5_ib_flow_counters_desc *desc; - int ret, i; - - mutex_lock(&mcounters->mcntrs_mutex); - if (mcounters->cntrs_max_index > read_attr->ncounters) { - ret = -EINVAL; - goto err_bound; - } - - mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), - GFP_KERNEL); - if (!mread_attr.out) { - ret = -ENOMEM; - goto err_bound; - } - - mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; - mread_attr.flags = read_attr->flags; - ret = mcounters->read_counters(counters->device, &mread_attr); - if (ret) - goto err_read; - - /* do the pass over the counters data array to assign according to the - * descriptions and indexing pairs - */ - desc = mcounters->counters_data; - for (i = 0; i < mcounters->ncounters; i++) - read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; - -err_read: - kfree(mread_attr.out); -err_bound: - mutex_unlock(&mcounters->mcntrs_mutex); - return ret; -} - -static void mlx5_ib_destroy_counters(struct ib_counters *counters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - - counters_clear_description(counters); - if (mcounters->hw_cntrs_hndl) - mlx5_fc_destroy(to_mdev(counters->device)->mdev, - mcounters->hw_cntrs_hndl); -} - -static int mlx5_ib_create_counters(struct ib_counters *counters, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - - mutex_init(&mcounters->mcntrs_mutex); - return 0; -} - static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); @@ -6568,7 +5913,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .attach_mcast = mlx5_ib_mcg_attach, .check_mr_status = mlx5_ib_check_mr_status, .create_ah = mlx5_ib_create_ah, - .create_counters = mlx5_ib_create_counters, .create_cq = mlx5_ib_create_cq, .create_flow = mlx5_ib_create_flow, .create_qp = mlx5_ib_create_qp, @@ -6578,7 +5922,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .del_gid = mlx5_ib_del_gid, .dereg_mr = mlx5_ib_dereg_mr, .destroy_ah = mlx5_ib_destroy_ah, - .destroy_counters = mlx5_ib_destroy_counters, .destroy_cq = mlx5_ib_destroy_cq, .destroy_flow = mlx5_ib_destroy_flow, .destroy_flow_action = mlx5_ib_destroy_flow_action, @@ -6613,7 +5956,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .query_qp = mlx5_ib_query_qp, .query_srq = mlx5_ib_query_srq, .query_ucontext = mlx5_ib_query_ucontext, - .read_counters = mlx5_ib_read_counters, .reg_user_mr = mlx5_ib_reg_user_mr, .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, @@ -6930,33 +6272,6 @@ static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) mlx5_ib_odp_cleanup_one(dev); } -static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { - .alloc_hw_stats = mlx5_ib_alloc_hw_stats, - .get_hw_stats = mlx5_ib_get_hw_stats, - .counter_bind_qp = mlx5_ib_counter_bind_qp, - .counter_unbind_qp = mlx5_ib_counter_unbind_qp, - .counter_dealloc = mlx5_ib_counter_dealloc, - .counter_alloc_stats = mlx5_ib_counter_alloc_stats, - .counter_update_stats = mlx5_ib_counter_update_stats, -}; - -static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) -{ - if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); - - return mlx5_ib_alloc_counters(dev); - } - - return 0; -} - -static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) -{ - if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) - mlx5_ib_dealloc_counters(dev); -} - static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev) { mlx5_ib_init_cong_debugfs(dev, @@ -7148,8 +6463,8 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_odp_init, mlx5_ib_stage_odp_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), + mlx5_ib_counters_init, + mlx5_ib_counters_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, mlx5_ib_stage_cong_debugfs_init, mlx5_ib_stage_cong_debugfs_cleanup), @@ -7208,8 +6523,8 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_ib_stage_dev_notifier_init, mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), + mlx5_ib_counters_init, + mlx5_ib_counters_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, mlx5_ib_stage_cong_debugfs_init, mlx5_ib_stage_cong_debugfs_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4787cf41cc1d..4444a09f6fbd 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1488,9 +1488,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, bool dyn_bfreg); -int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); -u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num); - static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, bool do_modify_atomic, int access_flags) { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index c6a4db9f6a0b..88236f84c99b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -38,6 +38,7 @@ #include #include "mlx5_ib.h" #include "ib_rep.h" +#include "counters.h" #include "cmd.h" #include "qp.h" #include "wr.h" diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index 82ea2b94dfa6..ba899df44c5b 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -43,4 +43,5 @@ void mlx5_core_res_put(struct mlx5_core_rsc_common *res); int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn); int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn); +int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); #endif /* _MLX5_IB_QP_H */ -- cgit From f7c4ffda0cbf7823915cbfebdbbe8460e7eeca67 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 2 Jul 2020 11:18:07 +0300 Subject: RDMA/mlx5: Separate flow steering logic from main.c Move flow steering logic to be in separate file and rename flow.c to be fs.c because it is better describe the content. Link: https://lore.kernel.org/r/20200702081809.423482-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/Makefile | 2 +- drivers/infiniband/hw/mlx5/devx.c | 59 +- drivers/infiniband/hw/mlx5/devx.h | 26 + drivers/infiniband/hw/mlx5/flow.c | 765 ----------- drivers/infiniband/hw/mlx5/fs.c | 2514 ++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/fs.h | 29 + drivers/infiniband/hw/mlx5/main.c | 1721 +---------------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 20 +- 8 files changed, 2579 insertions(+), 2557 deletions(-) create mode 100644 drivers/infiniband/hw/mlx5/devx.h delete mode 100644 drivers/infiniband/hw/mlx5/flow.c create mode 100644 drivers/infiniband/hw/mlx5/fs.c create mode 100644 drivers/infiniband/hw/mlx5/fs.h diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 34eaceb293b4..b4c009bb0db6 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -23,6 +23,6 @@ mlx5_ib-y := ah.o \ mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \ - flow.o \ + fs.o \ qos.o \ std_types.o diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9454a66c12cc..c7dffd799d16 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -14,6 +14,7 @@ #include #include #include "mlx5_ib.h" +#include "devx.h" #include "qp.h" #include @@ -89,22 +90,6 @@ struct devx_async_event_file { u8 is_destroyed:1; }; -#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) -struct devx_obj { - struct mlx5_ib_dev *ib_dev; - u64 obj_id; - u32 dinlen; /* destroy inbox length */ - u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; - u32 flags; - union { - struct mlx5_ib_devx_mr devx_mr; - struct mlx5_core_dct core_dct; - struct mlx5_core_cq core_cq; - u32 flow_counter_bulk_size; - }; - struct list_head event_sub; /* holds devx_event_subscription entries */ -}; - struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; @@ -171,48 +156,6 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } -bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) -{ - struct devx_obj *devx_obj = obj; - u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); - - switch (opcode) { - case MLX5_CMD_OP_DESTROY_TIR: - *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; - *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, - obj_id); - return true; - - case MLX5_CMD_OP_DESTROY_FLOW_TABLE: - *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox, - table_id); - return true; - default: - return false; - } -} - -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id) -{ - struct devx_obj *devx_obj = obj; - u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); - - if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { - - if (offset && offset >= devx_obj->flow_counter_bulk_size) - return false; - - *counter_id = MLX5_GET(dealloc_flow_counter_in, - devx_obj->dinbox, - flow_counter_id); - *counter_id += offset; - return true; - } - - return false; -} - static bool is_legacy_unaffiliated_event_num(u16 event_num) { switch (event_num) { diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h new file mode 100644 index 000000000000..9afaa5d22797 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/devx.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. + */ + +#ifndef _MLX5_IB_DEVX_H +#define _MLX5_IB_DEVX_H + +#include "mlx5_ib.h" + +#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) +struct devx_obj { + struct mlx5_ib_dev *ib_dev; + u64 obj_id; + u32 dinlen; /* destroy inbox length */ + u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; + u32 flags; + union { + struct mlx5_ib_devx_mr devx_mr; + struct mlx5_core_dct core_dct; + struct mlx5_core_cq core_cq; + u32 flow_counter_bulk_size; + }; + struct list_head event_sub; /* holds devx_event_subscription entries */ +}; +#endif /* _MLX5_IB_DEVX_H */ diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c deleted file mode 100644 index 216a1108ad34..000000000000 --- a/drivers/infiniband/hw/mlx5/flow.c +++ /dev/null @@ -1,765 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* - * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "mlx5_ib.h" - -#define UVERBS_MODULE_NAME mlx5_ib -#include - -static int -mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, - enum mlx5_flow_namespace_type *namespace) -{ - switch (table_type) { - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX: - *namespace = MLX5_FLOW_NAMESPACE_BYPASS; - break; - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: - *namespace = MLX5_FLOW_NAMESPACE_EGRESS; - break; - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: - *namespace = MLX5_FLOW_NAMESPACE_FDB; - break; - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX: - *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX; - break; - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX: - *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX; - break; - default: - return -EINVAL; - } - - return 0; -} - -static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { - [MLX5_IB_FLOW_TYPE_NORMAL] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - .u.ptr = { - .len = sizeof(u16), /* data is priority */ - .min_len = sizeof(u16), - } - }, - [MLX5_IB_FLOW_TYPE_SNIFFER] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_NO_DATA(), - }, - [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_NO_DATA(), - }, - [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_NO_DATA(), - }, -}; - -static int get_dests(struct uverbs_attr_bundle *attrs, - struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id, - int *dest_type, struct ib_qp **qp, u32 *flags) -{ - bool dest_devx, dest_qp; - void *devx_obj; - int err; - - dest_devx = uverbs_attr_is_valid(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); - dest_qp = uverbs_attr_is_valid(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); - - *flags = 0; - err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS, - MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS | - MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP); - if (err) - return err; - - /* Both flags are not allowed */ - if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS && - *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) - return -EINVAL; - - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { - if (dest_devx && (dest_qp || *flags)) - return -EINVAL; - else if (dest_qp && *flags) - return -EINVAL; - } - - /* Allow only DEVX object, drop as dest for FDB */ - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx || - (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP))) - return -EINVAL; - - /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ - if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && - ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) - return -EINVAL; - - *qp = NULL; - if (dest_devx) { - devx_obj = - uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); - - /* Verify that the given DEVX object is a flow - * steering destination. - */ - if (!mlx5_ib_devx_is_flow_dest(devx_obj, dest_id, dest_type)) - return -EINVAL; - /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ - if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB || - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && - *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) - return -EINVAL; - } else if (dest_qp) { - struct mlx5_ib_qp *mqp; - - *qp = uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); - if (IS_ERR(*qp)) - return PTR_ERR(*qp); - - if ((*qp)->qp_type != IB_QPT_RAW_PACKET) - return -EINVAL; - - mqp = to_mqp(*qp); - if (mqp->is_rss) - *dest_id = mqp->rss_qp.tirn; - else - *dest_id = mqp->raw_packet_qp.rq.tirn; - *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { - *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; - } - - if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) - return -EINVAL; - - return 0; -} - -#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 -static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_flow_context flow_context = {.flow_tag = - MLX5_FS_DEFAULT_FLOW_TAG}; - u32 *offset_attr, offset = 0, counter_id = 0; - int dest_id, dest_type, inlen, len, ret, i; - struct mlx5_ib_flow_handler *flow_handler; - struct mlx5_ib_flow_matcher *fs_matcher; - struct ib_uobject **arr_flow_actions; - struct ib_uflow_resources *uflow_res; - struct mlx5_flow_act flow_act = {}; - struct ib_qp *qp = NULL; - void *devx_obj, *cmd_in; - struct ib_uobject *uobj; - struct mlx5_ib_dev *dev; - u32 flags; - - if (!capable(CAP_NET_RAW)) - return -EPERM; - - fs_matcher = uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_MATCHER); - uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); - dev = mlx5_udata_to_mdev(&attrs->driver_udata); - - if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags)) - return -EINVAL; - - if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; - - if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; - - len = uverbs_attr_get_uobjs_arr(attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); - if (len) { - devx_obj = arr_flow_actions[0]->object; - - if (uverbs_attr_is_valid(attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) { - - int num_offsets = uverbs_attr_ptr_get_array_size( - attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, - sizeof(u32)); - - if (num_offsets != 1) - return -EINVAL; - - offset_attr = uverbs_attr_get_alloced_ptr( - attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET); - offset = *offset_attr; - } - - if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset, - &counter_id)) - return -EINVAL; - - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - } - - cmd_in = uverbs_attr_get_alloced_ptr( - attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); - inlen = uverbs_attr_get_len(attrs, - MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); - - uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); - if (!uflow_res) - return -ENOMEM; - - len = uverbs_attr_get_uobjs_arr(attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); - for (i = 0; i < len; i++) { - struct mlx5_ib_flow_action *maction = - to_mflow_act(arr_flow_actions[i]->object); - - ret = parse_flow_flow_action(maction, false, &flow_act); - if (ret) - goto err_out; - flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE, - arr_flow_actions[i]->object); - } - - ret = uverbs_copy_from(&flow_context.flow_tag, attrs, - MLX5_IB_ATTR_CREATE_FLOW_TAG); - if (!ret) { - if (flow_context.flow_tag >= BIT(24)) { - ret = -EINVAL; - goto err_out; - } - flow_context.flags |= FLOW_CONTEXT_HAS_TAG; - } - - flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, - &flow_context, - &flow_act, - counter_id, - cmd_in, inlen, - dest_id, dest_type); - if (IS_ERR(flow_handler)) { - ret = PTR_ERR(flow_handler); - goto err_out; - } - - ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res); - - return 0; -err_out: - ib_uverbs_flow_resources_free(uflow_res); - return ret; -} - -static int flow_matcher_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_flow_matcher *obj = uobject->object; - int ret; - - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); - if (ret) - return ret; - - kfree(obj); - return 0; -} - -static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, - struct mlx5_ib_flow_matcher *obj) -{ - enum mlx5_ib_uapi_flow_table_type ft_type = - MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX; - u32 flags; - int err; - - /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older - * users should switch to it. We leave this to not break userspace - */ - if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) && - uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) - return -EINVAL; - - if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) { - err = uverbs_get_const(&ft_type, attrs, - MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE); - if (err) - return err; - - err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type); - if (err) - return err; - - return 0; - } - - if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) { - err = uverbs_get_flags32(&flags, attrs, - MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, - IB_FLOW_ATTR_FLAGS_EGRESS); - if (err) - return err; - - if (flags) { - mlx5_ib_ft_type_to_namespace( - MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, - &obj->ns_type); - return 0; - } - } - - obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; - - return 0; -} - -static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject( - attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); - struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); - struct mlx5_ib_flow_matcher *obj; - int err; - - obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); - if (!obj) - return -ENOMEM; - - obj->mask_len = uverbs_attr_get_len( - attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); - err = uverbs_copy_from(&obj->matcher_mask, - attrs, - MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); - if (err) - goto end; - - obj->flow_type = uverbs_attr_get_enum_id( - attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); - - if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) { - err = uverbs_copy_from(&obj->priority, - attrs, - MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); - if (err) - goto end; - } - - err = uverbs_copy_from(&obj->match_criteria_enable, - attrs, - MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA); - if (err) - goto end; - - err = mlx5_ib_matcher_ns(attrs, obj); - if (err) - goto end; - - uobj->object = obj; - obj->mdev = dev->mdev; - atomic_set(&obj->usecnt, 0); - return 0; - -end: - kfree(obj); - return err; -} - -void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) -{ - switch (maction->flow_action_raw.sub_type) { - case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: - mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, - maction->flow_action_raw.modify_hdr); - break; - case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: - mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, - maction->flow_action_raw.pkt_reformat); - break; - case MLX5_IB_FLOW_ACTION_DECAP: - break; - default: - break; - } -} - -static struct ib_flow_action * -mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, - enum mlx5_ib_uapi_flow_table_type ft_type, - u8 num_actions, void *in) -{ - enum mlx5_flow_namespace_type namespace; - struct mlx5_ib_flow_action *maction; - int ret; - - ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); - if (ret) - return ERR_PTR(-EINVAL); - - maction = kzalloc(sizeof(*maction), GFP_KERNEL); - if (!maction) - return ERR_PTR(-ENOMEM); - - maction->flow_action_raw.modify_hdr = - mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in); - - if (IS_ERR(maction->flow_action_raw.modify_hdr)) { - ret = PTR_ERR(maction->flow_action_raw.modify_hdr); - kfree(maction); - return ERR_PTR(ret); - } - maction->flow_action_raw.sub_type = - MLX5_IB_FLOW_ACTION_MODIFY_HEADER; - maction->flow_action_raw.dev = dev; - - return &maction->ib_action; -} - -static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) -{ - return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - max_modify_header_actions) || - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - max_modify_header_actions) || - MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, - max_modify_header_actions); -} - -static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject( - attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); - struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); - enum mlx5_ib_uapi_flow_table_type ft_type; - struct ib_flow_action *action; - int num_actions; - void *in; - int ret; - - if (!mlx5_ib_modify_header_supported(mdev)) - return -EOPNOTSUPP; - - in = uverbs_attr_get_alloced_ptr(attrs, - MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); - - num_actions = uverbs_attr_ptr_get_array_size( - attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, - MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)); - if (num_actions < 0) - return num_actions; - - ret = uverbs_get_const(&ft_type, attrs, - MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); - if (ret) - return ret; - action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); - if (IS_ERR(action)) - return PTR_ERR(action); - - uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev, - IB_FLOW_ACTION_UNSPECIFIED); - - return 0; -} - -static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev, - u8 packet_reformat_type, - u8 ft_type) -{ - switch (packet_reformat_type) { - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) - return MLX5_CAP_FLOWTABLE(ibdev->mdev, - encap_general_header); - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) - return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev, - reformat_l2_to_l3_tunnel); - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) - return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, - reformat_l3_tunnel_to_l2); - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2: - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) - return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap); - break; - default: - break; - } - - return false; -} - -static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt) -{ - switch (dv_prt) { - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: - *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: - *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: - *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int mlx5_ib_flow_action_create_packet_reformat_ctx( - struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_action *maction, - u8 ft_type, u8 dv_prt, - void *in, size_t len) -{ - enum mlx5_flow_namespace_type namespace; - u8 prm_prt; - int ret; - - ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); - if (ret) - return ret; - - ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt); - if (ret) - return ret; - - maction->flow_action_raw.pkt_reformat = - mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, - in, namespace); - if (IS_ERR(maction->flow_action_raw.pkt_reformat)) { - ret = PTR_ERR(maction->flow_action_raw.pkt_reformat); - return ret; - } - - maction->flow_action_raw.sub_type = - MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; - maction->flow_action_raw.dev = dev; - - return 0; -} - -static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); - struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); - enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; - enum mlx5_ib_uapi_flow_table_type ft_type; - struct mlx5_ib_flow_action *maction; - int ret; - - ret = uverbs_get_const(&ft_type, attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE); - if (ret) - return ret; - - ret = uverbs_get_const(&dv_prt, attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE); - if (ret) - return ret; - - if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type)) - return -EOPNOTSUPP; - - maction = kzalloc(sizeof(*maction), GFP_KERNEL); - if (!maction) - return -ENOMEM; - - if (dv_prt == - MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) { - maction->flow_action_raw.sub_type = - MLX5_IB_FLOW_ACTION_DECAP; - maction->flow_action_raw.dev = mdev; - } else { - void *in; - int len; - - in = uverbs_attr_get_alloced_ptr(attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); - if (IS_ERR(in)) { - ret = PTR_ERR(in); - goto free_maction; - } - - len = uverbs_attr_get_len(attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); - - ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev, - maction, ft_type, dv_prt, in, len); - if (ret) - goto free_maction; - } - - uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev, - IB_FLOW_ACTION_UNSPECIFIED); - return 0; - -free_maction: - kfree(maction); - return ret; -} - -DECLARE_UVERBS_NAMED_METHOD( - MLX5_IB_METHOD_CREATE_FLOW, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, - UVERBS_OBJECT_FLOW, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN( - MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, - UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), - UA_MANDATORY, - UA_ALLOC_AND_COPY), - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER, - MLX5_IB_OBJECT_FLOW_MATCHER, - UVERBS_ACCESS_READ, - UA_MANDATORY), - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, - UVERBS_OBJECT_QP, - UVERBS_ACCESS_READ), - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, - MLX5_IB_OBJECT_DEVX_OBJ, - UVERBS_ACCESS_READ), - UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_READ, 1, - MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS, - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, - UVERBS_ATTR_TYPE(u32), - UA_OPTIONAL), - UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, - MLX5_IB_OBJECT_DEVX_OBJ, - UVERBS_ACCESS_READ, 1, 1, - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, - UVERBS_ATTR_MIN_SIZE(sizeof(u32)), - UA_OPTIONAL, - UA_ALLOC_AND_COPY), - UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS, - enum mlx5_ib_create_flow_flags, - UA_OPTIONAL)); - -DECLARE_UVERBS_NAMED_METHOD_DESTROY( - MLX5_IB_METHOD_DESTROY_FLOW, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, - UVERBS_OBJECT_FLOW, - UVERBS_ACCESS_DESTROY, - UA_MANDATORY)); - -ADD_UVERBS_METHODS(mlx5_ib_fs, - UVERBS_OBJECT_FLOW, - &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW), - &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); - -DECLARE_UVERBS_NAMED_METHOD( - MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, - UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES( - set_add_copy_action_in_auto)), - UA_MANDATORY, - UA_ALLOC_AND_COPY), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, - enum mlx5_ib_uapi_flow_table_type, - UA_MANDATORY)); - -DECLARE_UVERBS_NAMED_METHOD( - MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, - UVERBS_ATTR_MIN_SIZE(1), - UA_ALLOC_AND_COPY, - UA_OPTIONAL), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, - enum mlx5_ib_uapi_flow_action_packet_reformat_type, - UA_MANDATORY), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, - enum mlx5_ib_uapi_flow_table_type, - UA_MANDATORY)); - -ADD_UVERBS_METHODS( - mlx5_ib_flow_actions, - UVERBS_OBJECT_FLOW_ACTION, - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER), - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)); - -DECLARE_UVERBS_NAMED_METHOD( - MLX5_IB_METHOD_FLOW_MATCHER_CREATE, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, - MLX5_IB_OBJECT_FLOW_MATCHER, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN( - MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, - UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), - UA_MANDATORY), - UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, - mlx5_ib_flow_type, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, - UVERBS_ATTR_TYPE(u8), - UA_MANDATORY), - UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, - enum ib_flow_flags, - UA_OPTIONAL), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, - enum mlx5_ib_uapi_flow_table_type, - UA_OPTIONAL)); - -DECLARE_UVERBS_NAMED_METHOD_DESTROY( - MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, - MLX5_IB_OBJECT_FLOW_MATCHER, - UVERBS_ACCESS_DESTROY, - UA_MANDATORY)); - -DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, - UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup), - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); - -const struct uapi_definition mlx5_ib_flow_defs[] = { - UAPI_DEF_CHAIN_OBJ_TREE_NAMED( - MLX5_IB_OBJECT_FLOW_MATCHER), - UAPI_DEF_CHAIN_OBJ_TREE( - UVERBS_OBJECT_FLOW, - &mlx5_ib_fs), - UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, - &mlx5_ib_flow_actions), - {}, -}; diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c new file mode 100644 index 000000000000..0d8abb7c3cdf --- /dev/null +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -0,0 +1,2514 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mlx5_ib.h" +#include "counters.h" +#include "devx.h" +#include "fs.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include + +enum { + MATCH_CRITERIA_ENABLE_OUTER_BIT, + MATCH_CRITERIA_ENABLE_MISC_BIT, + MATCH_CRITERIA_ENABLE_INNER_BIT, + MATCH_CRITERIA_ENABLE_MISC2_BIT +}; + +#define HEADER_IS_ZERO(match_criteria, headers) \ + !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ + 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ + +static u8 get_match_criteria_enable(u32 *match_criteria) +{ + u8 match_criteria_enable; + + match_criteria_enable = + (!HEADER_IS_ZERO(match_criteria, outer_headers)) << + MATCH_CRITERIA_ENABLE_OUTER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << + MATCH_CRITERIA_ENABLE_MISC_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, inner_headers)) << + MATCH_CRITERIA_ENABLE_INNER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << + MATCH_CRITERIA_ENABLE_MISC2_BIT; + + return match_criteria_enable; +} + +static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) +{ + u8 entry_mask; + u8 entry_val; + int err = 0; + + if (!mask) + goto out; + + entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, + ip_protocol); + entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, + ip_protocol); + if (!entry_mask) { + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); + goto out; + } + /* Don't override existing ip protocol */ + if (mask != entry_mask || val != entry_val) + err = -EINVAL; +out: + return err; +} + +static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, + bool inner) +{ + if (inner) { + MLX5_SET(fte_match_set_misc, + misc_c, inner_ipv6_flow_label, mask); + MLX5_SET(fte_match_set_misc, + misc_v, inner_ipv6_flow_label, val); + } else { + MLX5_SET(fte_match_set_misc, + misc_c, outer_ipv6_flow_label, mask); + MLX5_SET(fte_match_set_misc, + misc_v, outer_ipv6_flow_label, val); + } +} + +static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) +{ + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); +} + +static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) +{ + if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) + return -EOPNOTSUPP; + + return 0; +} + +#define LAST_ETH_FIELD vlan_tag +#define LAST_IB_FIELD sl +#define LAST_IPV4_FIELD tos +#define LAST_IPV6_FIELD traffic_class +#define LAST_TCP_UDP_FIELD src_port +#define LAST_TUNNEL_FIELD tunnel_id +#define LAST_FLOW_TAG_FIELD tag_id +#define LAST_DROP_FIELD size +#define LAST_COUNTERS_FIELD counters + +/* Field is the last supported field */ +#define FIELDS_NOT_SUPPORTED(filter, field)\ + memchr_inv((void *)&filter.field +\ + sizeof(filter.field), 0,\ + sizeof(filter) -\ + offsetof(typeof(filter), field) -\ + sizeof(filter.field)) + +int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, + bool is_egress, + struct mlx5_flow_act *action) +{ + + switch (maction->ib_action.type) { + case IB_FLOW_ACTION_ESP: + if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) + return -EINVAL; + /* Currently only AES_GCM keymat is supported by the driver */ + action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; + action->action |= is_egress ? + MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : + MLX5_FLOW_CONTEXT_ACTION_DECRYPT; + return 0; + case IB_FLOW_ACTION_UNSPECIFIED: + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { + if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + return -EINVAL; + action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + action->modify_hdr = + maction->flow_action_raw.modify_hdr; + return 0; + } + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_DECAP) { + if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + return -EINVAL; + action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + return 0; + } + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { + if (action->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) + return -EINVAL; + action->action |= + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + action->pkt_reformat = + maction->flow_action_raw.pkt_reformat; + return 0; + } + fallthrough; + default: + return -EOPNOTSUPP; + } +} + +static int parse_flow_attr(struct mlx5_core_dev *mdev, + struct mlx5_flow_spec *spec, + const union ib_flow_spec *ib_spec, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_act *action, u32 prev_type) +{ + struct mlx5_flow_context *flow_context = &spec->flow_context; + u32 *match_c = spec->match_criteria; + u32 *match_v = spec->match_value; + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters); + void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters_2); + void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters_2); + void *headers_c; + void *headers_v; + int match_ipv; + int ret; + + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + inner_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + inner_headers); + match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version); + } else { + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version); + } + + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { + case IB_FLOW_SPEC_ETH: + if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) + return -EOPNOTSUPP; + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dmac_47_16), + ib_spec->eth.mask.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), + ib_spec->eth.val.dst_mac); + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + smac_47_16), + ib_spec->eth.mask.src_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + smac_47_16), + ib_spec->eth.val.src_mac); + + if (ib_spec->eth.mask.vlan_tag) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + cvlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + cvlan_tag, 1); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + first_vid, ntohs(ib_spec->eth.val.vlan_tag)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + first_cfi, + ntohs(ib_spec->eth.mask.vlan_tag) >> 12); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + first_cfi, + ntohs(ib_spec->eth.val.vlan_tag) >> 12); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + first_prio, + ntohs(ib_spec->eth.mask.vlan_tag) >> 13); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + first_prio, + ntohs(ib_spec->eth.val.vlan_tag) >> 13); + } + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ethertype, ntohs(ib_spec->eth.mask.ether_type)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ethertype, ntohs(ib_spec->eth.val.ether_type)); + break; + case IB_FLOW_SPEC_IPV4: + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) + return -EOPNOTSUPP; + + if (match_ipv) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ip_version, 0xf); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ip_version, MLX5_FS_IPV4_VERSION); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ethertype, ETH_P_IP); + } + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.mask.src_ip, + sizeof(ib_spec->ipv4.mask.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.val.src_ip, + sizeof(ib_spec->ipv4.val.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.mask.dst_ip, + sizeof(ib_spec->ipv4.mask.dst_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.val.dst_ip, + sizeof(ib_spec->ipv4.val.dst_ip)); + + set_tos(headers_c, headers_v, + ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); + + if (set_proto(headers_c, headers_v, + ib_spec->ipv4.mask.proto, + ib_spec->ipv4.val.proto)) + return -EINVAL; + break; + case IB_FLOW_SPEC_IPV6: + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) + return -EOPNOTSUPP; + + if (match_ipv) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ip_version, 0xf); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ip_version, MLX5_FS_IPV6_VERSION); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ethertype, ETH_P_IPV6); + } + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.mask.src_ip, + sizeof(ib_spec->ipv6.mask.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.val.src_ip, + sizeof(ib_spec->ipv6.val.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.mask.dst_ip, + sizeof(ib_spec->ipv6.mask.dst_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.val.dst_ip, + sizeof(ib_spec->ipv6.val.dst_ip)); + + set_tos(headers_c, headers_v, + ib_spec->ipv6.mask.traffic_class, + ib_spec->ipv6.val.traffic_class); + + if (set_proto(headers_c, headers_v, + ib_spec->ipv6.mask.next_hdr, + ib_spec->ipv6.val.next_hdr)) + return -EINVAL; + + set_flow_label(misc_params_c, misc_params_v, + ntohl(ib_spec->ipv6.mask.flow_label), + ntohl(ib_spec->ipv6.val.flow_label), + ib_spec->type & IB_FLOW_SPEC_INNER); + break; + case IB_FLOW_SPEC_ESP: + if (ib_spec->esp.mask.seq) + return -EOPNOTSUPP; + + MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, + ntohl(ib_spec->esp.mask.spi)); + MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, + ntohl(ib_spec->esp.val.spi)); + break; + case IB_FLOW_SPEC_TCP: + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, + LAST_TCP_UDP_FIELD)) + return -EOPNOTSUPP; + + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, + ntohs(ib_spec->tcp_udp.mask.src_port)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, + ntohs(ib_spec->tcp_udp.val.src_port)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, + ntohs(ib_spec->tcp_udp.mask.dst_port)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, + ntohs(ib_spec->tcp_udp.val.dst_port)); + break; + case IB_FLOW_SPEC_UDP: + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, + LAST_TCP_UDP_FIELD)) + return -EOPNOTSUPP; + + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, + ntohs(ib_spec->tcp_udp.mask.src_port)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, + ntohs(ib_spec->tcp_udp.val.src_port)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, + ntohs(ib_spec->tcp_udp.mask.dst_port)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + ntohs(ib_spec->tcp_udp.val.dst_port)); + break; + case IB_FLOW_SPEC_GRE: + if (ib_spec->gre.mask.c_ks_res0_ver) + return -EOPNOTSUPP; + + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + IPPROTO_GRE); + + MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, + ntohs(ib_spec->gre.mask.protocol)); + MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, + ntohs(ib_spec->gre.val.protocol)); + + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, + gre_key.nvgre.hi), + &ib_spec->gre.mask.key, + sizeof(ib_spec->gre.mask.key)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, + gre_key.nvgre.hi), + &ib_spec->gre.val.key, + sizeof(ib_spec->gre.val.key)); + break; + case IB_FLOW_SPEC_MPLS: + switch (prev_type) { + case IB_FLOW_SPEC_UDP: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_udp), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + case IB_FLOW_SPEC_GRE: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_gre), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + default: + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + inner_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + inner_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } else { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } + } + break; + case IB_FLOW_SPEC_VXLAN_TUNNEL: + if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, + LAST_TUNNEL_FIELD)) + return -EOPNOTSUPP; + + MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, + ntohl(ib_spec->tunnel.mask.tunnel_id)); + MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, + ntohl(ib_spec->tunnel.val.tunnel_id)); + break; + case IB_FLOW_SPEC_ACTION_TAG: + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag, + LAST_FLOW_TAG_FIELD)) + return -EOPNOTSUPP; + if (ib_spec->flow_tag.tag_id >= BIT(24)) + return -EINVAL; + + flow_context->flow_tag = ib_spec->flow_tag.tag_id; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + break; + case IB_FLOW_SPEC_ACTION_DROP: + if (FIELDS_NOT_SUPPORTED(ib_spec->drop, + LAST_DROP_FIELD)) + return -EOPNOTSUPP; + action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + break; + case IB_FLOW_SPEC_ACTION_HANDLE: + ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), + flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); + if (ret) + return ret; + break; + case IB_FLOW_SPEC_ACTION_COUNT: + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, + LAST_COUNTERS_FIELD)) + return -EOPNOTSUPP; + + /* for now support only one counters spec per flow */ + if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + return -EINVAL; + + action->counters = ib_spec->flow_count.counters; + action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* If a flow could catch both multicast and unicast packets, + * it won't fall into the multicast flow steering table and this rule + * could steal other multicast packets. + */ +static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) +{ + union ib_flow_spec *flow_spec; + + if (ib_attr->type != IB_FLOW_ATTR_NORMAL || + ib_attr->num_of_specs < 1) + return false; + + flow_spec = (union ib_flow_spec *)(ib_attr + 1); + if (flow_spec->type == IB_FLOW_SPEC_IPV4) { + struct ib_flow_spec_ipv4 *ipv4_spec; + + ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; + if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) + return true; + + return false; + } + + if (flow_spec->type == IB_FLOW_SPEC_ETH) { + struct ib_flow_spec_eth *eth_spec; + + eth_spec = (struct ib_flow_spec_eth *)flow_spec; + return is_multicast_ether_addr(eth_spec->mask.dst_mac) && + is_multicast_ether_addr(eth_spec->val.dst_mac); + } + + return false; +} + +enum valid_spec { + VALID_SPEC_INVALID, + VALID_SPEC_VALID, + VALID_SPEC_NA, +}; + +static enum valid_spec +is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, + const struct mlx5_flow_spec *spec, + const struct mlx5_flow_act *flow_act, + bool egress) +{ + const u32 *match_c = spec->match_criteria; + bool is_crypto = + (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); + bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); + bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; + + /* + * Currently only crypto is supported in egress, when regular egress + * rules would be supported, always return VALID_SPEC_NA. + */ + if (!is_crypto) + return VALID_SPEC_NA; + + return is_crypto && is_ipsec && + (!egress || (!is_drop && + !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? + VALID_SPEC_VALID : VALID_SPEC_INVALID; +} + +static bool is_valid_spec(struct mlx5_core_dev *mdev, + const struct mlx5_flow_spec *spec, + const struct mlx5_flow_act *flow_act, + bool egress) +{ + /* We curretly only support ipsec egress flow */ + return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; +} + +static bool is_valid_ethertype(struct mlx5_core_dev *mdev, + const struct ib_flow_attr *flow_attr, + bool check_inner) +{ + union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); + int match_ipv = check_inner ? + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version) : + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version); + int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0; + bool ipv4_spec_valid, ipv6_spec_valid; + unsigned int ip_spec_type = 0; + bool has_ethertype = false; + unsigned int spec_index; + bool mask_valid = true; + u16 eth_type = 0; + bool type_valid; + + /* Validate that ethertype is correct */ + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { + if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) && + ib_spec->eth.mask.ether_type) { + mask_valid = (ib_spec->eth.mask.ether_type == + htons(0xffff)); + has_ethertype = true; + eth_type = ntohs(ib_spec->eth.val.ether_type); + } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) || + (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) { + ip_spec_type = ib_spec->type; + } + ib_spec = (void *)ib_spec + ib_spec->size; + } + + type_valid = (!has_ethertype) || (!ip_spec_type); + if (!type_valid && mask_valid) { + ipv4_spec_valid = (eth_type == ETH_P_IP) && + (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit)); + ipv6_spec_valid = (eth_type == ETH_P_IPV6) && + (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit)); + + type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) || + (((eth_type == ETH_P_MPLS_UC) || + (eth_type == ETH_P_MPLS_MC)) && match_ipv); + } + + return type_valid; +} + +static bool is_valid_attr(struct mlx5_core_dev *mdev, + const struct ib_flow_attr *flow_attr) +{ + return is_valid_ethertype(mdev, flow_attr, false) && + is_valid_ethertype(mdev, flow_attr, true); +} + +static void put_flow_table(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *prio, bool ft_added) +{ + prio->refcount -= !!ft_added; + if (!prio->refcount) { + mlx5_destroy_flow_table(prio->flow_table); + prio->flow_table = NULL; + } +} + +static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) +{ + struct mlx5_ib_flow_handler *handler = container_of(flow_id, + struct mlx5_ib_flow_handler, + ibflow); + struct mlx5_ib_flow_handler *iter, *tmp; + struct mlx5_ib_dev *dev = handler->dev; + + mutex_lock(&dev->flow_db->lock); + + list_for_each_entry_safe(iter, tmp, &handler->list, list) { + mlx5_del_flow_rules(iter->rule); + put_flow_table(dev, iter->prio, true); + list_del(&iter->list); + kfree(iter); + } + + mlx5_del_flow_rules(handler->rule); + put_flow_table(dev, handler->prio, true); + mlx5_ib_counters_clear_description(handler->ibcounters); + mutex_unlock(&dev->flow_db->lock); + if (handler->flow_matcher) + atomic_dec(&handler->flow_matcher->usecnt); + kfree(handler); + + return 0; +} + +static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) +{ + priority *= 2; + if (!dont_trap) + priority++; + return priority; +} + +enum flow_table_type { + MLX5_IB_FT_RX, + MLX5_IB_FT_TX +}; + +#define MLX5_FS_MAX_TYPES 6 +#define MLX5_FS_MAX_ENTRIES BIT(16) + +static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, + struct mlx5_ib_flow_prio *prio, + int priority, + int num_entries, int num_groups, + u32 flags) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *ft; + + ft_attr.prio = priority; + ft_attr.max_fte = num_entries; + ft_attr.flags = flags; + ft_attr.autogroup.max_num_groups = num_groups; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) + return ERR_CAST(ft); + + prio->flow_table = ft; + prio->refcount = 0; + return prio; +} + +static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, + struct ib_flow_attr *flow_attr, + enum flow_table_type ft_type) +{ + bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_ib_flow_prio *prio; + struct mlx5_flow_table *ft; + int max_table_size; + int num_entries; + int num_groups; + bool esw_encap; + u32 flags = 0; + int priority; + + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + log_max_ft_size)); + esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != + DEVLINK_ESWITCH_ENCAP_MODE_NONE; + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + enum mlx5_flow_namespace_type fn_type; + + if (flow_is_multicast_only(flow_attr) && + !dont_trap) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = ib_prio_to_core_prio(flow_attr->priority, + dont_trap); + if (ft_type == MLX5_IB_FT_RX) { + fn_type = MLX5_FLOW_NAMESPACE_BYPASS; + prio = &dev->flow_db->prios[priority]; + if (!dev->is_rep && !esw_encap && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (!dev->is_rep && !esw_encap && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + reformat_l3_tunnel_to_l2)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, + log_max_ft_size)); + fn_type = MLX5_FLOW_NAMESPACE_EGRESS; + prio = &dev->flow_db->egress_prios[priority]; + if (!dev->is_rep && !esw_encap && + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); + num_entries = MLX5_FS_MAX_ENTRIES; + num_groups = MLX5_FS_MAX_TYPES; + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + ns = mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_LEFTOVERS); + build_leftovers_ft_param(&priority, + &num_entries, + &num_groups); + prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + allow_sniffer_and_nic_rx_shared_tir)) + return ERR_PTR(-EOPNOTSUPP); + + ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? + MLX5_FLOW_NAMESPACE_SNIFFER_RX : + MLX5_FLOW_NAMESPACE_SNIFFER_TX); + + prio = &dev->flow_db->sniffer[ft_type]; + priority = 0; + num_entries = 1; + num_groups = 1; + } + + if (!ns) + return ERR_PTR(-EOPNOTSUPP); + + max_table_size = min_t(int, num_entries, max_table_size); + + ft = prio->flow_table; + if (!ft) + return _get_prio(ns, prio, priority, max_table_size, num_groups, + flags); + + return prio; +} + +static void set_underlay_qp(struct mlx5_ib_dev *dev, + struct mlx5_flow_spec *spec, + u32 underlay_qpn) +{ + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (underlay_qpn && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + ft_field_support.bth_dst_qp)) { + MLX5_SET(fte_match_set_misc, + misc_params_v, bth_dst_qp, underlay_qpn); + MLX5_SET(fte_match_set_misc, + misc_params_c, bth_dst_qp, 0xffffff); + } +} + +static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, + struct mlx5_flow_spec *spec, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, + rep->vport)); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + } +} + +static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst, + u32 underlay_qpn, + struct mlx5_ib_create_flow *ucmd) +{ + struct mlx5_flow_table *ft = ft_prio->flow_table; + struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + struct mlx5_flow_destination dest_arr[2] = {}; + struct mlx5_flow_destination *rule_dst = dest_arr; + const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); + unsigned int spec_index; + u32 prev_type = 0; + int err = 0; + int dest_num = 0; + bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; + + if (!is_valid_attr(dev->mdev, flow_attr)) + return ERR_PTR(-EINVAL); + + if (dev->is_rep && is_egress) + return ERR_PTR(-EINVAL); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler || !spec) { + err = -ENOMEM; + goto free; + } + + INIT_LIST_HEAD(&handler->list); + + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { + err = parse_flow_attr(dev->mdev, spec, + ib_flow, flow_attr, &flow_act, + prev_type); + if (err < 0) + goto free; + + prev_type = ((union ib_flow_spec *)ib_flow)->type; + ib_flow += ((union ib_flow_spec *)ib_flow)->size; + } + + if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { + memcpy(&dest_arr[0], dst, sizeof(*dst)); + dest_num++; + } + + if (!flow_is_multicast_only(flow_attr)) + set_underlay_qp(dev, spec, underlay_qpn); + + if (dev->is_rep) { + struct mlx5_eswitch_rep *rep; + + rep = dev->port[flow_attr->port - 1].rep; + if (!rep) { + err = -EINVAL; + goto free; + } + + mlx5_ib_set_rule_source_port(dev, spec, rep); + } + + spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); + + if (is_egress && + !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { + err = -EINVAL; + goto free; + } + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + struct mlx5_ib_mcounters *mcounters; + + err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd); + if (err) + goto free; + + mcounters = to_mcounters(flow_act.counters); + handler->ibcounters = flow_act.counters; + dest_arr[dest_num].type = + MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest_arr[dest_num].counter_id = + mlx5_fc_id(mcounters->hw_cntrs_hndl); + dest_num++; + } + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { + if (!dest_num) + rule_dst = NULL; + } else { + if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) + flow_act.action |= + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + if (is_egress) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; + else if (dest_num) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } + + if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && + (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { + mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", + spec->flow_context.flow_tag, flow_attr->type); + err = -EINVAL; + goto free; + } + handler->rule = mlx5_add_flow_rules(ft, spec, + &flow_act, + rule_dst, dest_num); + + if (IS_ERR(handler->rule)) { + err = PTR_ERR(handler->rule); + goto free; + } + + ft_prio->refcount++; + handler->prio = ft_prio; + handler->dev = dev; + + ft_prio->flow_table = ft; +free: + if (err && handler) { + mlx5_ib_counters_clear_description(handler->ibcounters); + kfree(handler); + } + kvfree(spec); + return err ? ERR_PTR(err) : handler; +} + +static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); +} + +enum { + LEFTOVERS_MC, + LEFTOVERS_UC, +}; + +static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_ucast = NULL; + struct mlx5_ib_flow_handler *handler = NULL; + + static struct { + struct ib_flow_attr flow_attr; + struct ib_flow_spec_eth eth_flow; + } leftovers_specs[] = { + [LEFTOVERS_MC] = { + .flow_attr = { + .num_of_specs = 1, + .size = sizeof(leftovers_specs[0]) + }, + .eth_flow = { + .type = IB_FLOW_SPEC_ETH, + .size = sizeof(struct ib_flow_spec_eth), + .mask = {.dst_mac = {0x1} }, + .val = {.dst_mac = {0x1} } + } + }, + [LEFTOVERS_UC] = { + .flow_attr = { + .num_of_specs = 1, + .size = sizeof(leftovers_specs[0]) + }, + .eth_flow = { + .type = IB_FLOW_SPEC_ETH, + .size = sizeof(struct ib_flow_spec_eth), + .mask = {.dst_mac = {0x1} }, + .val = {.dst_mac = {} } + } + } + }; + + handler = create_flow_rule(dev, ft_prio, + &leftovers_specs[LEFTOVERS_MC].flow_attr, + dst); + if (!IS_ERR(handler) && + flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { + handler_ucast = create_flow_rule(dev, ft_prio, + &leftovers_specs[LEFTOVERS_UC].flow_attr, + dst); + if (IS_ERR(handler_ucast)) { + mlx5_del_flow_rules(handler->rule); + ft_prio->refcount--; + kfree(handler); + handler = handler_ucast; + } else { + list_add(&handler_ucast->list, &handler->list); + } + } + + return handler; +} + +static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_rx, + struct mlx5_ib_flow_prio *ft_tx, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_rx; + struct mlx5_ib_flow_handler *handler_tx; + int err; + static const struct ib_flow_attr flow_attr = { + .num_of_specs = 0, + .size = sizeof(flow_attr) + }; + + handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); + if (IS_ERR(handler_rx)) { + err = PTR_ERR(handler_rx); + goto err; + } + + handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); + if (IS_ERR(handler_tx)) { + err = PTR_ERR(handler_tx); + goto err_tx; + } + + list_add(&handler_tx->list, &handler_rx->list); + + return handler_rx; + +err_tx: + mlx5_del_flow_rules(handler_rx->rule); + ft_rx->refcount--; + kfree(handler_rx); +err: + return ERR_PTR(err); +} + + +static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_flow_handler *handler = NULL; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_ib_flow_prio *ft_prio_tx = NULL; + struct mlx5_ib_flow_prio *ft_prio; + bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; + struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; + size_t min_ucmd_sz, required_ucmd_sz; + int err; + int underlay_qpn; + + if (udata && udata->inlen) { + min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + + sizeof(ucmd_hdr.reserved); + if (udata->inlen < min_ucmd_sz) + return ERR_PTR(-EOPNOTSUPP); + + err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); + if (err) + return ERR_PTR(err); + + /* currently supports only one counters data */ + if (ucmd_hdr.ncounters_data > 1) + return ERR_PTR(-EINVAL); + + required_ucmd_sz = min_ucmd_sz + + sizeof(struct mlx5_ib_flow_counters_data) * + ucmd_hdr.ncounters_data; + if (udata->inlen > required_ucmd_sz && + !ib_is_udata_cleared(udata, required_ucmd_sz, + udata->inlen - required_ucmd_sz)) + return ERR_PTR(-EOPNOTSUPP); + + ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); + if (!ucmd) + return ERR_PTR(-ENOMEM); + + err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); + if (err) + goto free_ucmd; + } + + if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) { + err = -ENOMEM; + goto free_ucmd; + } + + if (domain != IB_FLOW_DOMAIN_USER || + flow_attr->port > dev->num_ports || + (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | + IB_FLOW_ATTR_FLAGS_EGRESS))) { + err = -EINVAL; + goto free_ucmd; + } + + if (is_egress && + (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { + err = -EINVAL; + goto free_ucmd; + } + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto free_ucmd; + } + + mutex_lock(&dev->flow_db->lock); + + ft_prio = get_flow_table(dev, flow_attr, + is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX); + if (IS_ERR(ft_prio)) { + err = PTR_ERR(ft_prio); + goto unlock; + } + if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); + if (IS_ERR(ft_prio_tx)) { + err = PTR_ERR(ft_prio_tx); + ft_prio_tx = NULL; + goto destroy_ft; + } + } + + if (is_egress) { + dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; + } else { + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + if (mqp->is_rss) + dst->tir_num = mqp->rss_qp.tirn; + else + dst->tir_num = mqp->raw_packet_qp.rq.tirn; + } + + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? + mqp->underlay_qpn : + 0; + handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, + underlay_qpn, ucmd); + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + handler = create_leftovers_rule(dev, ft_prio, flow_attr, + dst); + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); + } else { + err = -EINVAL; + goto destroy_ft; + } + + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + handler = NULL; + goto destroy_ft; + } + + mutex_unlock(&dev->flow_db->lock); + kfree(dst); + kfree(ucmd); + + return &handler->ibflow; + +destroy_ft: + put_flow_table(dev, ft_prio, false); + if (ft_prio_tx) + put_flow_table(dev, ft_prio_tx, false); +unlock: + mutex_unlock(&dev->flow_db->lock); + kfree(dst); +free_ucmd: + kfree(ucmd); + return ERR_PTR(err); +} + +static struct mlx5_ib_flow_prio * +_get_flow_table(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_matcher *fs_matcher, + bool mcast) +{ + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_ib_flow_prio *prio = NULL; + int max_table_size = 0; + bool esw_encap; + u32 flags = 0; + int priority; + + if (mcast) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = ib_prio_to_core_prio(fs_matcher->priority, false); + + esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != + DEVLINK_ESWITCH_ENCAP_MODE_NONE; + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + log_max_ft_size)); + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + reformat_l3_tunnel_to_l2) && + !esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { + max_table_size = BIT( + MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && + esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + priority = FDB_BYPASS_PATH; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + log_max_ft_size)); + priority = fs_matcher->priority; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, + log_max_ft_size)); + priority = fs_matcher->priority; + } + + max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); + + ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); + if (!ns) + return ERR_PTR(-EOPNOTSUPP); + + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) + prio = &dev->flow_db->prios[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + prio = &dev->flow_db->egress_prios[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) + prio = &dev->flow_db->fdb; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) + prio = &dev->flow_db->rdma_rx[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) + prio = &dev->flow_db->rdma_tx[priority]; + + if (!prio) + return ERR_PTR(-EINVAL); + + if (prio->flow_table) + return prio; + + return _get_prio(ns, prio, priority, max_table_size, + MLX5_FS_MAX_TYPES, flags); +} + +static struct mlx5_ib_flow_handler * +_create_raw_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct mlx5_flow_destination *dst, + struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_context *flow_context, + struct mlx5_flow_act *flow_act, + void *cmd_in, int inlen, + int dst_num) +{ + struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft = ft_prio->flow_table; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler || !spec) { + err = -ENOMEM; + goto free; + } + + INIT_LIST_HEAD(&handler->list); + + memcpy(spec->match_value, cmd_in, inlen); + memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, + fs_matcher->mask_len); + spec->match_criteria_enable = fs_matcher->match_criteria_enable; + spec->flow_context = *flow_context; + + handler->rule = mlx5_add_flow_rules(ft, spec, + flow_act, dst, dst_num); + + if (IS_ERR(handler->rule)) { + err = PTR_ERR(handler->rule); + goto free; + } + + ft_prio->refcount++; + handler->prio = ft_prio; + handler->dev = dev; + ft_prio->flow_table = ft; + +free: + if (err) + kfree(handler); + kvfree(spec); + return err ? ERR_PTR(err) : handler; +} + +static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, + void *match_v) +{ + void *match_c; + void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; + void *dmac, *dmac_mask; + void *ipv4, *ipv4_mask; + + if (!(fs_matcher->match_criteria_enable & + (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) + return false; + + match_c = fs_matcher->matcher_mask.match_params; + match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + + dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, + dmac_47_16); + dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, + dmac_47_16); + + if (is_multicast_ether_addr(dmac) && + is_multicast_ether_addr(dmac_mask)) + return true; + + ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + if (ipv4_is_multicast(*(__be32 *)(ipv4)) && + ipv4_is_multicast(*(__be32 *)(ipv4_mask))) + return true; + + return false; +} + +static struct mlx5_ib_flow_handler *raw_fs_rule_add( + struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, + u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type) +{ + struct mlx5_flow_destination *dst; + struct mlx5_ib_flow_prio *ft_prio; + struct mlx5_ib_flow_handler *handler; + int dst_num = 0; + bool mcast; + int err; + + if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) + return ERR_PTR(-EOPNOTSUPP); + + if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) + return ERR_PTR(-ENOMEM); + + dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); + if (!dst) + return ERR_PTR(-ENOMEM); + + mcast = raw_fs_is_multicast(fs_matcher, cmd_in); + mutex_lock(&dev->flow_db->lock); + + ft_prio = _get_flow_table(dev, fs_matcher, mcast); + if (IS_ERR(ft_prio)) { + err = PTR_ERR(ft_prio); + goto unlock; + } + + if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { + dst[dst_num].type = dest_type; + dst[dst_num++].tir_num = dest_id; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; + dst[dst_num++].ft_num = dest_id; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { + dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; + } + + + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst[dst_num].counter_id = counter_id; + dst_num++; + } + + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, + flow_context, flow_act, + cmd_in, inlen, dst_num); + + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + goto destroy_ft; + } + + mutex_unlock(&dev->flow_db->lock); + atomic_inc(&fs_matcher->usecnt); + handler->flow_matcher = fs_matcher; + + kfree(dst); + + return handler; + +destroy_ft: + put_flow_table(dev, ft_prio, false); +unlock: + mutex_unlock(&dev->flow_db->lock); + kfree(dst); + + return ERR_PTR(err); +} + +static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) +{ + u32 flags = 0; + + if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) + flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; + + return flags; +} + +#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED \ + MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA +static struct ib_flow_action * +mlx5_ib_create_flow_action_esp(struct ib_device *device, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_dev *mdev = to_mdev(device); + struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; + struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; + struct mlx5_ib_flow_action *action; + u64 action_flags; + u64 flags; + int err = 0; + + err = uverbs_get_flags64( + &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); + if (err) + return ERR_PTR(err); + + flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); + + /* We current only support a subset of the standard features. Only a + * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn + * (with overlap). Full offload mode isn't supported. + */ + if (!attr->keymat || attr->replay || attr->encap || + attr->spi || attr->seq || attr->tfc_pad || + attr->hard_limit_pkts || + (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) + return ERR_PTR(-EOPNOTSUPP); + + if (attr->keymat->protocol != + IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) + return ERR_PTR(-EOPNOTSUPP); + + aes_gcm = &attr->keymat->keymat.aes_gcm; + + if (aes_gcm->icv_len != 16 || + aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) + return ERR_PTR(-EOPNOTSUPP); + + action = kmalloc(sizeof(*action), GFP_KERNEL); + if (!action) + return ERR_PTR(-ENOMEM); + + action->esp_aes_gcm.ib_flags = attr->flags; + memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, + sizeof(accel_attrs.keymat.aes_gcm.aes_key)); + accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; + memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, + sizeof(accel_attrs.keymat.aes_gcm.salt)); + memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, + sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); + accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; + accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; + accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; + + accel_attrs.esn = attr->esn; + if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) + accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; + + action->esp_aes_gcm.ctx = + mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); + if (IS_ERR(action->esp_aes_gcm.ctx)) { + err = PTR_ERR(action->esp_aes_gcm.ctx); + goto err_parse; + } + + action->esp_aes_gcm.ib_flags = attr->flags; + + return &action->ib_action; + +err_parse: + kfree(action); + return ERR_PTR(err); +} + +static int +mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_flow_action *maction = to_mflow_act(action); + struct mlx5_accel_esp_xfrm_attrs accel_attrs; + int err = 0; + + if (attr->keymat || attr->replay || attr->encap || + attr->spi || attr->seq || attr->tfc_pad || + attr->hard_limit_pkts || + (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) + return -EOPNOTSUPP; + + /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can + * be modified. + */ + if (!(maction->esp_aes_gcm.ib_flags & + IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && + attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) + return -EINVAL; + + memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, + sizeof(accel_attrs)); + + accel_attrs.esn = attr->esn; + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + else + accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + + err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, + &accel_attrs); + if (err) + return err; + + maction->esp_aes_gcm.ib_flags &= + ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; + maction->esp_aes_gcm.ib_flags |= + attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; + + return 0; +} + +static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) +{ + switch (maction->flow_action_raw.sub_type) { + case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: + mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, + maction->flow_action_raw.modify_hdr); + break; + case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: + mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, + maction->flow_action_raw.pkt_reformat); + break; + case MLX5_IB_FLOW_ACTION_DECAP: + break; + default: + break; + } +} + +static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) +{ + struct mlx5_ib_flow_action *maction = to_mflow_act(action); + + switch (action->type) { + case IB_FLOW_ACTION_ESP: + /* + * We only support aes_gcm by now, so we implicitly know this is + * the underline crypto. + */ + mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); + break; + case IB_FLOW_ACTION_UNSPECIFIED: + destroy_flow_action_raw(maction); + break; + default: + WARN_ON(true); + break; + } + + kfree(maction); + return 0; +} + +static int +mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, + enum mlx5_flow_namespace_type *namespace) +{ + switch (table_type) { + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX: + *namespace = MLX5_FLOW_NAMESPACE_BYPASS; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: + *namespace = MLX5_FLOW_NAMESPACE_EGRESS; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: + *namespace = MLX5_FLOW_NAMESPACE_FDB; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX; + break; + default: + return -EINVAL; + } + + return 0; +} + +static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { + [MLX5_IB_FLOW_TYPE_NORMAL] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + .u.ptr = { + .len = sizeof(u16), /* data is priority */ + .min_len = sizeof(u16), + } + }, + [MLX5_IB_FLOW_TYPE_SNIFFER] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, +}; + +static bool is_flow_dest(void *obj, int *dest_id, int *dest_type) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + switch (opcode) { + case MLX5_CMD_OP_DESTROY_TIR: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, + obj_id); + return true; + + case MLX5_CMD_OP_DESTROY_FLOW_TABLE: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox, + table_id); + return true; + default: + return false; + } +} + +static int get_dests(struct uverbs_attr_bundle *attrs, + struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id, + int *dest_type, struct ib_qp **qp, u32 *flags) +{ + bool dest_devx, dest_qp; + void *devx_obj; + int err; + + dest_devx = uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + dest_qp = uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + + *flags = 0; + err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS, + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS | + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP); + if (err) + return err; + + /* Both flags are not allowed */ + if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS && + *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) + return -EINVAL; + + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { + if (dest_devx && (dest_qp || *flags)) + return -EINVAL; + else if (dest_qp && *flags) + return -EINVAL; + } + + /* Allow only DEVX object, drop as dest for FDB */ + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx || + (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP))) + return -EINVAL; + + /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && + ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) + return -EINVAL; + + *qp = NULL; + if (dest_devx) { + devx_obj = + uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + + /* Verify that the given DEVX object is a flow + * steering destination. + */ + if (!is_flow_dest(devx_obj, dest_id, dest_type)) + return -EINVAL; + /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && + *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + return -EINVAL; + } else if (dest_qp) { + struct mlx5_ib_qp *mqp; + + *qp = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + if (IS_ERR(*qp)) + return PTR_ERR(*qp); + + if ((*qp)->qp_type != IB_QPT_RAW_PACKET) + return -EINVAL; + + mqp = to_mqp(*qp); + if (mqp->is_rss) + *dest_id = mqp->rss_qp.tirn; + else + *dest_id = mqp->raw_packet_qp.rq.tirn; + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { + *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; + } + + if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + return -EINVAL; + + return 0; +} + +static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { + + if (offset && offset >= devx_obj->flow_counter_bulk_size) + return false; + + *counter_id = MLX5_GET(dealloc_flow_counter_in, + devx_obj->dinbox, + flow_counter_id); + *counter_id += offset; + return true; + } + + return false; +} + +#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 +static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_flow_context flow_context = {.flow_tag = + MLX5_FS_DEFAULT_FLOW_TAG}; + u32 *offset_attr, offset = 0, counter_id = 0; + int dest_id, dest_type, inlen, len, ret, i; + struct mlx5_ib_flow_handler *flow_handler; + struct mlx5_ib_flow_matcher *fs_matcher; + struct ib_uobject **arr_flow_actions; + struct ib_uflow_resources *uflow_res; + struct mlx5_flow_act flow_act = {}; + struct ib_qp *qp = NULL; + void *devx_obj, *cmd_in; + struct ib_uobject *uobj; + struct mlx5_ib_dev *dev; + u32 flags; + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + fs_matcher = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCHER); + uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); + dev = mlx5_udata_to_mdev(&attrs->driver_udata); + + if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags)) + return -EINVAL; + + if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; + + if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + + len = uverbs_attr_get_uobjs_arr(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); + if (len) { + devx_obj = arr_flow_actions[0]->object; + + if (uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) { + + int num_offsets = uverbs_attr_ptr_get_array_size( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + sizeof(u32)); + + if (num_offsets != 1) + return -EINVAL; + + offset_attr = uverbs_attr_get_alloced_ptr( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET); + offset = *offset_attr; + } + + if (!is_flow_counter(devx_obj, offset, &counter_id)) + return -EINVAL; + + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } + + cmd_in = uverbs_attr_get_alloced_ptr( + attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); + inlen = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); + + uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); + if (!uflow_res) + return -ENOMEM; + + len = uverbs_attr_get_uobjs_arr(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); + for (i = 0; i < len; i++) { + struct mlx5_ib_flow_action *maction = + to_mflow_act(arr_flow_actions[i]->object); + + ret = parse_flow_flow_action(maction, false, &flow_act); + if (ret) + goto err_out; + flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE, + arr_flow_actions[i]->object); + } + + ret = uverbs_copy_from(&flow_context.flow_tag, attrs, + MLX5_IB_ATTR_CREATE_FLOW_TAG); + if (!ret) { + if (flow_context.flow_tag >= BIT(24)) { + ret = -EINVAL; + goto err_out; + } + flow_context.flags |= FLOW_CONTEXT_HAS_TAG; + } + + flow_handler = + raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act, + counter_id, cmd_in, inlen, dest_id, dest_type); + if (IS_ERR(flow_handler)) { + ret = PTR_ERR(flow_handler); + goto err_out; + } + + ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res); + + return 0; +err_out: + ib_uverbs_flow_resources_free(uflow_res); + return ret; +} + +static int flow_matcher_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_flow_matcher *obj = uobject->object; + int ret; + + ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); + if (ret) + return ret; + + kfree(obj); + return 0; +} + +static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, + struct mlx5_ib_flow_matcher *obj) +{ + enum mlx5_ib_uapi_flow_table_type ft_type = + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX; + u32 flags; + int err; + + /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older + * users should switch to it. We leave this to not break userspace + */ + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) && + uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) + return -EINVAL; + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) { + err = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE); + if (err) + return err; + + err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type); + if (err) + return err; + + return 0; + } + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) { + err = uverbs_get_flags32(&flags, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + IB_FLOW_ATTR_FLAGS_EGRESS); + if (err) + return err; + + if (flags) { + mlx5_ib_ft_type_to_namespace( + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, + &obj->ns_type); + return 0; + } + } + + obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; + + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); + struct mlx5_ib_flow_matcher *obj; + int err; + + obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + obj->mask_len = uverbs_attr_get_len( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); + err = uverbs_copy_from(&obj->matcher_mask, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); + if (err) + goto end; + + obj->flow_type = uverbs_attr_get_enum_id( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); + + if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) { + err = uverbs_copy_from(&obj->priority, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); + if (err) + goto end; + } + + err = uverbs_copy_from(&obj->match_criteria_enable, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA); + if (err) + goto end; + + err = mlx5_ib_matcher_ns(attrs, obj); + if (err) + goto end; + + uobj->object = obj; + obj->mdev = dev->mdev; + atomic_set(&obj->usecnt, 0); + return 0; + +end: + kfree(obj); + return err; +} + +static struct ib_flow_action * +mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, + enum mlx5_ib_uapi_flow_table_type ft_type, + u8 num_actions, void *in) +{ + enum mlx5_flow_namespace_type namespace; + struct mlx5_ib_flow_action *maction; + int ret; + + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); + if (ret) + return ERR_PTR(-EINVAL); + + maction = kzalloc(sizeof(*maction), GFP_KERNEL); + if (!maction) + return ERR_PTR(-ENOMEM); + + maction->flow_action_raw.modify_hdr = + mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in); + + if (IS_ERR(maction->flow_action_raw.modify_hdr)) { + ret = PTR_ERR(maction->flow_action_raw.modify_hdr); + kfree(maction); + return ERR_PTR(ret); + } + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_MODIFY_HEADER; + maction->flow_action_raw.dev = dev; + + return &maction->ib_action; +} + +static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) +{ + return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + max_modify_header_actions) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, + max_modify_header_actions) || + MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, + max_modify_header_actions); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); + enum mlx5_ib_uapi_flow_table_type ft_type; + struct ib_flow_action *action; + int num_actions; + void *in; + int ret; + + if (!mlx5_ib_modify_header_supported(mdev)) + return -EOPNOTSUPP; + + in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); + + num_actions = uverbs_attr_ptr_get_array_size( + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)); + if (num_actions < 0) + return num_actions; + + ret = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); + if (ret) + return ret; + action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); + if (IS_ERR(action)) + return PTR_ERR(action); + + uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev, + IB_FLOW_ACTION_UNSPECIFIED); + + return 0; +} + +static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev, + u8 packet_reformat_type, + u8 ft_type) +{ + switch (packet_reformat_type) { + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) + return MLX5_CAP_FLOWTABLE(ibdev->mdev, + encap_general_header); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) + return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev, + reformat_l2_to_l3_tunnel); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, + reformat_l3_tunnel_to_l2); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap); + break; + default: + break; + } + + return false; +} + +static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt) +{ + switch (dv_prt) { + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int mlx5_ib_flow_action_create_packet_reformat_ctx( + struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_action *maction, + u8 ft_type, u8 dv_prt, + void *in, size_t len) +{ + enum mlx5_flow_namespace_type namespace; + u8 prm_prt; + int ret; + + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); + if (ret) + return ret; + + ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt); + if (ret) + return ret; + + maction->flow_action_raw.pkt_reformat = + mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, + in, namespace); + if (IS_ERR(maction->flow_action_raw.pkt_reformat)) { + ret = PTR_ERR(maction->flow_action_raw.pkt_reformat); + return ret; + } + + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; + maction->flow_action_raw.dev = dev; + + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); + enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; + enum mlx5_ib_uapi_flow_table_type ft_type; + struct mlx5_ib_flow_action *maction; + int ret; + + ret = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE); + if (ret) + return ret; + + ret = uverbs_get_const(&dv_prt, attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE); + if (ret) + return ret; + + if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type)) + return -EOPNOTSUPP; + + maction = kzalloc(sizeof(*maction), GFP_KERNEL); + if (!maction) + return -ENOMEM; + + if (dv_prt == + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) { + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_DECAP; + maction->flow_action_raw.dev = mdev; + } else { + void *in; + int len; + + in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); + if (IS_ERR(in)) { + ret = PTR_ERR(in); + goto free_maction; + } + + len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); + + ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev, + maction, ft_type, dv_prt, in, len); + if (ret) + goto free_maction; + } + + uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev, + IB_FLOW_ACTION_UNSPECIFIED); + return 0; + +free_maction: + kfree(maction); + return ret; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_CREATE_FLOW, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, + UVERBS_OBJECT_QP, + UVERBS_ACCESS_READ), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ), + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_READ, 1, + MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL), + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, 1, 1, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), + UA_OPTIONAL, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS, + enum mlx5_ib_create_flow_flags, + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DESTROY_FLOW, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +ADD_UVERBS_METHODS(mlx5_ib_fs, + UVERBS_OBJECT_FLOW, + &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW), + &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES( + set_add_copy_action_in_auto)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, + UVERBS_ATTR_MIN_SIZE(1), + UA_ALLOC_AND_COPY, + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, + enum mlx5_ib_uapi_flow_action_packet_reformat_type, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_MANDATORY)); + +ADD_UVERBS_METHODS( + mlx5_ib_flow_actions, + UVERBS_OBJECT_FLOW_ACTION, + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_MATCHER_CREATE, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), + UA_MANDATORY), + UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, + mlx5_ib_flow_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, + UVERBS_ATTR_TYPE(u8), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + enum ib_flow_flags, + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); + +const struct uapi_definition mlx5_ib_flow_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_FLOW_MATCHER), + UAPI_DEF_CHAIN_OBJ_TREE( + UVERBS_OBJECT_FLOW, + &mlx5_ib_fs), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, + &mlx5_ib_flow_actions), + {}, +}; + +static const struct ib_device_ops flow_ops = { + .create_flow = mlx5_ib_create_flow, + .destroy_flow = mlx5_ib_destroy_flow, + .destroy_flow_action = mlx5_ib_destroy_flow_action, +}; + +static const struct ib_device_ops flow_ipsec_ops = { + .create_flow_action_esp = mlx5_ib_create_flow_action_esp, + .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, +}; + +int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) +{ + dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); + + if (!dev->flow_db) + return -ENOMEM; + + mutex_init(&dev->flow_db->lock); + + ib_set_device_ops(&dev->ib_dev, &flow_ops); + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) + ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops); + + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h new file mode 100644 index 000000000000..ad320adaf321 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/fs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + */ + +#ifndef _MLX5_IB_FS_H +#define _MLX5_IB_FS_H + +#include "mlx5_ib.h" + +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int mlx5_ib_fs_init(struct mlx5_ib_dev *dev); +#else +static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) +{ + dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); + + if (!dev->flow_db) + return -ENOMEM; + + mutex_init(&dev->flow_db->lock); + return 0; +} +#endif +static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev) +{ + kfree(dev->flow_db); +} +#endif /* _MLX5_IB_FS_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c14955061273..f6cb8c7f7183 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -32,12 +32,12 @@ #include "mlx5_ib.h" #include "ib_rep.h" #include "cmd.h" +#include "fs.h" #include "srq.h" #include "qp.h" #include "wr.h" #include "restrack.h" #include "counters.h" -#include #include #include #include @@ -2579,1685 +2579,6 @@ static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); } -enum { - MATCH_CRITERIA_ENABLE_OUTER_BIT, - MATCH_CRITERIA_ENABLE_MISC_BIT, - MATCH_CRITERIA_ENABLE_INNER_BIT, - MATCH_CRITERIA_ENABLE_MISC2_BIT -}; - -#define HEADER_IS_ZERO(match_criteria, headers) \ - !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ - 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ - -static u8 get_match_criteria_enable(u32 *match_criteria) -{ - u8 match_criteria_enable; - - match_criteria_enable = - (!HEADER_IS_ZERO(match_criteria, outer_headers)) << - MATCH_CRITERIA_ENABLE_OUTER_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << - MATCH_CRITERIA_ENABLE_MISC_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, inner_headers)) << - MATCH_CRITERIA_ENABLE_INNER_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << - MATCH_CRITERIA_ENABLE_MISC2_BIT; - - return match_criteria_enable; -} - -static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) -{ - u8 entry_mask; - u8 entry_val; - int err = 0; - - if (!mask) - goto out; - - entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, - ip_protocol); - entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, - ip_protocol); - if (!entry_mask) { - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); - goto out; - } - /* Don't override existing ip protocol */ - if (mask != entry_mask || val != entry_val) - err = -EINVAL; -out: - return err; -} - -static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, - bool inner) -{ - if (inner) { - MLX5_SET(fte_match_set_misc, - misc_c, inner_ipv6_flow_label, mask); - MLX5_SET(fte_match_set_misc, - misc_v, inner_ipv6_flow_label, val); - } else { - MLX5_SET(fte_match_set_misc, - misc_c, outer_ipv6_flow_label, mask); - MLX5_SET(fte_match_set_misc, - misc_v, outer_ipv6_flow_label, val); - } -} - -static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) -{ - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); -} - -static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) -{ - if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) - return -EOPNOTSUPP; - - return 0; -} - -#define LAST_ETH_FIELD vlan_tag -#define LAST_IB_FIELD sl -#define LAST_IPV4_FIELD tos -#define LAST_IPV6_FIELD traffic_class -#define LAST_TCP_UDP_FIELD src_port -#define LAST_TUNNEL_FIELD tunnel_id -#define LAST_FLOW_TAG_FIELD tag_id -#define LAST_DROP_FIELD size -#define LAST_COUNTERS_FIELD counters - -/* Field is the last supported field */ -#define FIELDS_NOT_SUPPORTED(filter, field)\ - memchr_inv((void *)&filter.field +\ - sizeof(filter.field), 0,\ - sizeof(filter) -\ - offsetof(typeof(filter), field) -\ - sizeof(filter.field)) - -int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, - bool is_egress, - struct mlx5_flow_act *action) -{ - - switch (maction->ib_action.type) { - case IB_FLOW_ACTION_ESP: - if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) - return -EINVAL; - /* Currently only AES_GCM keymat is supported by the driver */ - action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; - action->action |= is_egress ? - MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : - MLX5_FLOW_CONTEXT_ACTION_DECRYPT; - return 0; - case IB_FLOW_ACTION_UNSPECIFIED: - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { - if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - return -EINVAL; - action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - action->modify_hdr = - maction->flow_action_raw.modify_hdr; - return 0; - } - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_DECAP) { - if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) - return -EINVAL; - action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; - return 0; - } - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { - if (action->action & - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) - return -EINVAL; - action->action |= - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - action->pkt_reformat = - maction->flow_action_raw.pkt_reformat; - return 0; - } - /* fall through */ - default: - return -EOPNOTSUPP; - } -} - -static int parse_flow_attr(struct mlx5_core_dev *mdev, - struct mlx5_flow_spec *spec, - const union ib_flow_spec *ib_spec, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_act *action, u32 prev_type) -{ - struct mlx5_flow_context *flow_context = &spec->flow_context; - u32 *match_c = spec->match_criteria; - u32 *match_v = spec->match_value; - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, - misc_parameters); - void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, - misc_parameters); - void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, - misc_parameters_2); - void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, - misc_parameters_2); - void *headers_c; - void *headers_v; - int match_ipv; - int ret; - - if (ib_spec->type & IB_FLOW_SPEC_INNER) { - headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - inner_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - inner_headers); - match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_ip_version); - } else { - headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); - match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_ip_version); - } - - switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { - case IB_FLOW_SPEC_ETH: - if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) - return -EOPNOTSUPP; - - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dmac_47_16), - ib_spec->eth.mask.dst_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), - ib_spec->eth.val.dst_mac); - - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - smac_47_16), - ib_spec->eth.mask.src_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - smac_47_16), - ib_spec->eth.val.src_mac); - - if (ib_spec->eth.mask.vlan_tag) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - cvlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - cvlan_tag, 1); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_vid, ntohs(ib_spec->eth.val.vlan_tag)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_cfi, - ntohs(ib_spec->eth.mask.vlan_tag) >> 12); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_cfi, - ntohs(ib_spec->eth.val.vlan_tag) >> 12); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_prio, - ntohs(ib_spec->eth.mask.vlan_tag) >> 13); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_prio, - ntohs(ib_spec->eth.val.vlan_tag) >> 13); - } - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, ntohs(ib_spec->eth.mask.ether_type)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ntohs(ib_spec->eth.val.ether_type)); - break; - case IB_FLOW_SPEC_IPV4: - if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) - return -EOPNOTSUPP; - - if (match_ipv) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ip_version, 0xf); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_version, MLX5_FS_IPV4_VERSION); - } else { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ETH_P_IP); - } - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.mask.src_ip, - sizeof(ib_spec->ipv4.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.val.src_ip, - sizeof(ib_spec->ipv4.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.mask.dst_ip, - sizeof(ib_spec->ipv4.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.val.dst_ip, - sizeof(ib_spec->ipv4.val.dst_ip)); - - set_tos(headers_c, headers_v, - ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); - - if (set_proto(headers_c, headers_v, - ib_spec->ipv4.mask.proto, - ib_spec->ipv4.val.proto)) - return -EINVAL; - break; - case IB_FLOW_SPEC_IPV6: - if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) - return -EOPNOTSUPP; - - if (match_ipv) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ip_version, 0xf); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_version, MLX5_FS_IPV6_VERSION); - } else { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ETH_P_IPV6); - } - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.mask.src_ip, - sizeof(ib_spec->ipv6.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.val.src_ip, - sizeof(ib_spec->ipv6.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.mask.dst_ip, - sizeof(ib_spec->ipv6.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.val.dst_ip, - sizeof(ib_spec->ipv6.val.dst_ip)); - - set_tos(headers_c, headers_v, - ib_spec->ipv6.mask.traffic_class, - ib_spec->ipv6.val.traffic_class); - - if (set_proto(headers_c, headers_v, - ib_spec->ipv6.mask.next_hdr, - ib_spec->ipv6.val.next_hdr)) - return -EINVAL; - - set_flow_label(misc_params_c, misc_params_v, - ntohl(ib_spec->ipv6.mask.flow_label), - ntohl(ib_spec->ipv6.val.flow_label), - ib_spec->type & IB_FLOW_SPEC_INNER); - break; - case IB_FLOW_SPEC_ESP: - if (ib_spec->esp.mask.seq) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, - ntohl(ib_spec->esp.mask.spi)); - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, - ntohl(ib_spec->esp.val.spi)); - break; - case IB_FLOW_SPEC_TCP: - if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, - LAST_TCP_UDP_FIELD)) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, - ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, - ntohs(ib_spec->tcp_udp.val.src_port)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, - ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, - ntohs(ib_spec->tcp_udp.val.dst_port)); - break; - case IB_FLOW_SPEC_UDP: - if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, - LAST_TCP_UDP_FIELD)) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, - ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, - ntohs(ib_spec->tcp_udp.val.src_port)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, - ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, - ntohs(ib_spec->tcp_udp.val.dst_port)); - break; - case IB_FLOW_SPEC_GRE: - if (ib_spec->gre.mask.c_ks_res0_ver) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - 0xff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - IPPROTO_GRE); - - MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, - ntohs(ib_spec->gre.mask.protocol)); - MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, - ntohs(ib_spec->gre.val.protocol)); - - memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - gre_key.nvgre.hi), - &ib_spec->gre.mask.key, - sizeof(ib_spec->gre.mask.key)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, - gre_key.nvgre.hi), - &ib_spec->gre.val.key, - sizeof(ib_spec->gre.val.key)); - break; - case IB_FLOW_SPEC_MPLS: - switch (prev_type) { - case IB_FLOW_SPEC_UDP: - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls_over_udp), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls_over_udp), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls_over_udp), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - break; - case IB_FLOW_SPEC_GRE: - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls_over_gre), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls_over_gre), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls_over_gre), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - break; - default: - if (ib_spec->type & IB_FLOW_SPEC_INNER) { - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_first_mpls), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - inner_first_mpls), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - inner_first_mpls), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - } else { - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - } - } - break; - case IB_FLOW_SPEC_VXLAN_TUNNEL: - if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, - LAST_TUNNEL_FIELD)) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, - ntohl(ib_spec->tunnel.mask.tunnel_id)); - MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, - ntohl(ib_spec->tunnel.val.tunnel_id)); - break; - case IB_FLOW_SPEC_ACTION_TAG: - if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag, - LAST_FLOW_TAG_FIELD)) - return -EOPNOTSUPP; - if (ib_spec->flow_tag.tag_id >= BIT(24)) - return -EINVAL; - - flow_context->flow_tag = ib_spec->flow_tag.tag_id; - flow_context->flags |= FLOW_CONTEXT_HAS_TAG; - break; - case IB_FLOW_SPEC_ACTION_DROP: - if (FIELDS_NOT_SUPPORTED(ib_spec->drop, - LAST_DROP_FIELD)) - return -EOPNOTSUPP; - action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; - break; - case IB_FLOW_SPEC_ACTION_HANDLE: - ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), - flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); - if (ret) - return ret; - break; - case IB_FLOW_SPEC_ACTION_COUNT: - if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, - LAST_COUNTERS_FIELD)) - return -EOPNOTSUPP; - - /* for now support only one counters spec per flow */ - if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - return -EINVAL; - - action->counters = ib_spec->flow_count.counters; - action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - break; - default: - return -EINVAL; - } - - return 0; -} - -/* If a flow could catch both multicast and unicast packets, - * it won't fall into the multicast flow steering table and this rule - * could steal other multicast packets. - */ -static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) -{ - union ib_flow_spec *flow_spec; - - if (ib_attr->type != IB_FLOW_ATTR_NORMAL || - ib_attr->num_of_specs < 1) - return false; - - flow_spec = (union ib_flow_spec *)(ib_attr + 1); - if (flow_spec->type == IB_FLOW_SPEC_IPV4) { - struct ib_flow_spec_ipv4 *ipv4_spec; - - ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; - if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) - return true; - - return false; - } - - if (flow_spec->type == IB_FLOW_SPEC_ETH) { - struct ib_flow_spec_eth *eth_spec; - - eth_spec = (struct ib_flow_spec_eth *)flow_spec; - return is_multicast_ether_addr(eth_spec->mask.dst_mac) && - is_multicast_ether_addr(eth_spec->val.dst_mac); - } - - return false; -} - -enum valid_spec { - VALID_SPEC_INVALID, - VALID_SPEC_VALID, - VALID_SPEC_NA, -}; - -static enum valid_spec -is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - const u32 *match_c = spec->match_criteria; - bool is_crypto = - (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); - bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); - bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; - - /* - * Currently only crypto is supported in egress, when regular egress - * rules would be supported, always return VALID_SPEC_NA. - */ - if (!is_crypto) - return VALID_SPEC_NA; - - return is_crypto && is_ipsec && - (!egress || (!is_drop && - !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? - VALID_SPEC_VALID : VALID_SPEC_INVALID; -} - -static bool is_valid_spec(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - /* We curretly only support ipsec egress flow */ - return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; -} - -static bool is_valid_ethertype(struct mlx5_core_dev *mdev, - const struct ib_flow_attr *flow_attr, - bool check_inner) -{ - union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); - int match_ipv = check_inner ? - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_ip_version) : - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_ip_version); - int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0; - bool ipv4_spec_valid, ipv6_spec_valid; - unsigned int ip_spec_type = 0; - bool has_ethertype = false; - unsigned int spec_index; - bool mask_valid = true; - u16 eth_type = 0; - bool type_valid; - - /* Validate that ethertype is correct */ - for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) && - ib_spec->eth.mask.ether_type) { - mask_valid = (ib_spec->eth.mask.ether_type == - htons(0xffff)); - has_ethertype = true; - eth_type = ntohs(ib_spec->eth.val.ether_type); - } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) || - (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) { - ip_spec_type = ib_spec->type; - } - ib_spec = (void *)ib_spec + ib_spec->size; - } - - type_valid = (!has_ethertype) || (!ip_spec_type); - if (!type_valid && mask_valid) { - ipv4_spec_valid = (eth_type == ETH_P_IP) && - (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit)); - ipv6_spec_valid = (eth_type == ETH_P_IPV6) && - (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit)); - - type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) || - (((eth_type == ETH_P_MPLS_UC) || - (eth_type == ETH_P_MPLS_MC)) && match_ipv); - } - - return type_valid; -} - -static bool is_valid_attr(struct mlx5_core_dev *mdev, - const struct ib_flow_attr *flow_attr) -{ - return is_valid_ethertype(mdev, flow_attr, false) && - is_valid_ethertype(mdev, flow_attr, true); -} - -static void put_flow_table(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *prio, bool ft_added) -{ - prio->refcount -= !!ft_added; - if (!prio->refcount) { - mlx5_destroy_flow_table(prio->flow_table); - prio->flow_table = NULL; - } -} - -static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) -{ - struct mlx5_ib_flow_handler *handler = container_of(flow_id, - struct mlx5_ib_flow_handler, - ibflow); - struct mlx5_ib_flow_handler *iter, *tmp; - struct mlx5_ib_dev *dev = handler->dev; - - mutex_lock(&dev->flow_db->lock); - - list_for_each_entry_safe(iter, tmp, &handler->list, list) { - mlx5_del_flow_rules(iter->rule); - put_flow_table(dev, iter->prio, true); - list_del(&iter->list); - kfree(iter); - } - - mlx5_del_flow_rules(handler->rule); - put_flow_table(dev, handler->prio, true); - mlx5_ib_counters_clear_description(handler->ibcounters); - mutex_unlock(&dev->flow_db->lock); - if (handler->flow_matcher) - atomic_dec(&handler->flow_matcher->usecnt); - kfree(handler); - - return 0; -} - -static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) -{ - priority *= 2; - if (!dont_trap) - priority++; - return priority; -} - -enum flow_table_type { - MLX5_IB_FT_RX, - MLX5_IB_FT_TX -}; - -#define MLX5_FS_MAX_TYPES 6 -#define MLX5_FS_MAX_ENTRIES BIT(16) - -static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, - struct mlx5_ib_flow_prio *prio, - int priority, - int num_entries, int num_groups, - u32 flags) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_table *ft; - - ft_attr.prio = priority; - ft_attr.max_fte = num_entries; - ft_attr.flags = flags; - ft_attr.autogroup.max_num_groups = num_groups; - ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(ft)) - return ERR_CAST(ft); - - prio->flow_table = ft; - prio->refcount = 0; - return prio; -} - -static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, - struct ib_flow_attr *flow_attr, - enum flow_table_type ft_type) -{ - bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; - struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio; - struct mlx5_flow_table *ft; - int max_table_size; - int num_entries; - int num_groups; - bool esw_encap; - u32 flags = 0; - int priority; - - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); - esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != - DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - enum mlx5_flow_namespace_type fn_type; - - if (flow_is_multicast_only(flow_attr) && - !dont_trap) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(flow_attr->priority, - dont_trap); - if (ft_type == MLX5_IB_FT_RX) { - fn_type = MLX5_FLOW_NAMESPACE_BYPASS; - prio = &dev->flow_db->prios[priority]; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - log_max_ft_size)); - fn_type = MLX5_FLOW_NAMESPACE_EGRESS; - prio = &dev->flow_db->egress_prios[priority]; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } - ns = mlx5_get_flow_namespace(dev->mdev, fn_type); - num_entries = MLX5_FS_MAX_ENTRIES; - num_groups = MLX5_FS_MAX_TYPES; - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - ns = mlx5_get_flow_namespace(dev->mdev, - MLX5_FLOW_NAMESPACE_LEFTOVERS); - build_leftovers_ft_param(&priority, - &num_entries, - &num_groups); - prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - if (!MLX5_CAP_FLOWTABLE(dev->mdev, - allow_sniffer_and_nic_rx_shared_tir)) - return ERR_PTR(-ENOTSUPP); - - ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? - MLX5_FLOW_NAMESPACE_SNIFFER_RX : - MLX5_FLOW_NAMESPACE_SNIFFER_TX); - - prio = &dev->flow_db->sniffer[ft_type]; - priority = 0; - num_entries = 1; - num_groups = 1; - } - - if (!ns) - return ERR_PTR(-ENOTSUPP); - - max_table_size = min_t(int, num_entries, max_table_size); - - ft = prio->flow_table; - if (!ft) - return _get_prio(ns, prio, priority, max_table_size, num_groups, - flags); - - return prio; -} - -static void set_underlay_qp(struct mlx5_ib_dev *dev, - struct mlx5_flow_spec *spec, - u32 underlay_qpn) -{ - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, - spec->match_criteria, - misc_parameters); - void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - - if (underlay_qpn && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - ft_field_support.bth_dst_qp)) { - MLX5_SET(fte_match_set_misc, - misc_params_v, bth_dst_qp, underlay_qpn); - MLX5_SET(fte_match_set_misc, - misc_params_c, bth_dst_qp, 0xffffff); - } -} - -static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, - struct mlx5_flow_spec *spec, - struct mlx5_eswitch_rep *rep) -{ - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - void *misc; - - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters_2); - - MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_for_match(esw, - rep->vport)); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters_2); - - MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_mask()); - } else { - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - - MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport); - - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - } -} - -static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst, - u32 underlay_qpn, - struct mlx5_ib_create_flow *ucmd) -{ - struct mlx5_flow_table *ft = ft_prio->flow_table; - struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_spec *spec; - struct mlx5_flow_destination dest_arr[2] = {}; - struct mlx5_flow_destination *rule_dst = dest_arr; - const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); - unsigned int spec_index; - u32 prev_type = 0; - int err = 0; - int dest_num = 0; - bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; - - if (!is_valid_attr(dev->mdev, flow_attr)) - return ERR_PTR(-EINVAL); - - if (dev->is_rep && is_egress) - return ERR_PTR(-EINVAL); - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - handler = kzalloc(sizeof(*handler), GFP_KERNEL); - if (!handler || !spec) { - err = -ENOMEM; - goto free; - } - - INIT_LIST_HEAD(&handler->list); - - for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - err = parse_flow_attr(dev->mdev, spec, - ib_flow, flow_attr, &flow_act, - prev_type); - if (err < 0) - goto free; - - prev_type = ((union ib_flow_spec *)ib_flow)->type; - ib_flow += ((union ib_flow_spec *)ib_flow)->size; - } - - if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { - memcpy(&dest_arr[0], dst, sizeof(*dst)); - dest_num++; - } - - if (!flow_is_multicast_only(flow_attr)) - set_underlay_qp(dev, spec, underlay_qpn); - - if (dev->is_rep) { - struct mlx5_eswitch_rep *rep; - - rep = dev->port[flow_attr->port - 1].rep; - if (!rep) { - err = -EINVAL; - goto free; - } - - mlx5_ib_set_rule_source_port(dev, spec, rep); - } - - spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); - - if (is_egress && - !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { - err = -EINVAL; - goto free; - } - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - struct mlx5_ib_mcounters *mcounters; - - err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd); - if (err) - goto free; - - mcounters = to_mcounters(flow_act.counters); - handler->ibcounters = flow_act.counters; - dest_arr[dest_num].type = - MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest_arr[dest_num].counter_id = - mlx5_fc_id(mcounters->hw_cntrs_hndl); - dest_num++; - } - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { - if (!dest_num) - rule_dst = NULL; - } else { - if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) - flow_act.action |= - MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; - if (is_egress) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; - else if (dest_num) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } - - if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && - (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { - mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", - spec->flow_context.flow_tag, flow_attr->type); - err = -EINVAL; - goto free; - } - handler->rule = mlx5_add_flow_rules(ft, spec, - &flow_act, - rule_dst, dest_num); - - if (IS_ERR(handler->rule)) { - err = PTR_ERR(handler->rule); - goto free; - } - - ft_prio->refcount++; - handler->prio = ft_prio; - handler->dev = dev; - - ft_prio->flow_table = ft; -free: - if (err && handler) { - mlx5_ib_counters_clear_description(handler->ibcounters); - kfree(handler); - } - kvfree(spec); - return err ? ERR_PTR(err) : handler; -} - -static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) -{ - return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); -} - -enum { - LEFTOVERS_MC, - LEFTOVERS_UC, -}; - -static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) -{ - struct mlx5_ib_flow_handler *handler_ucast = NULL; - struct mlx5_ib_flow_handler *handler = NULL; - - static struct { - struct ib_flow_attr flow_attr; - struct ib_flow_spec_eth eth_flow; - } leftovers_specs[] = { - [LEFTOVERS_MC] = { - .flow_attr = { - .num_of_specs = 1, - .size = sizeof(leftovers_specs[0]) - }, - .eth_flow = { - .type = IB_FLOW_SPEC_ETH, - .size = sizeof(struct ib_flow_spec_eth), - .mask = {.dst_mac = {0x1} }, - .val = {.dst_mac = {0x1} } - } - }, - [LEFTOVERS_UC] = { - .flow_attr = { - .num_of_specs = 1, - .size = sizeof(leftovers_specs[0]) - }, - .eth_flow = { - .type = IB_FLOW_SPEC_ETH, - .size = sizeof(struct ib_flow_spec_eth), - .mask = {.dst_mac = {0x1} }, - .val = {.dst_mac = {} } - } - } - }; - - handler = create_flow_rule(dev, ft_prio, - &leftovers_specs[LEFTOVERS_MC].flow_attr, - dst); - if (!IS_ERR(handler) && - flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { - handler_ucast = create_flow_rule(dev, ft_prio, - &leftovers_specs[LEFTOVERS_UC].flow_attr, - dst); - if (IS_ERR(handler_ucast)) { - mlx5_del_flow_rules(handler->rule); - ft_prio->refcount--; - kfree(handler); - handler = handler_ucast; - } else { - list_add(&handler_ucast->list, &handler->list); - } - } - - return handler; -} - -static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_rx, - struct mlx5_ib_flow_prio *ft_tx, - struct mlx5_flow_destination *dst) -{ - struct mlx5_ib_flow_handler *handler_rx; - struct mlx5_ib_flow_handler *handler_tx; - int err; - static const struct ib_flow_attr flow_attr = { - .num_of_specs = 0, - .size = sizeof(flow_attr) - }; - - handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); - if (IS_ERR(handler_rx)) { - err = PTR_ERR(handler_rx); - goto err; - } - - handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); - if (IS_ERR(handler_tx)) { - err = PTR_ERR(handler_tx); - goto err_tx; - } - - list_add(&handler_tx->list, &handler_rx->list); - - return handler_rx; - -err_tx: - mlx5_del_flow_rules(handler_rx->rule); - ft_rx->refcount--; - kfree(handler_rx); -err: - return ERR_PTR(err); -} - -static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain, - struct ib_udata *udata) -{ - struct mlx5_ib_dev *dev = to_mdev(qp->device); - struct mlx5_ib_qp *mqp = to_mqp(qp); - struct mlx5_ib_flow_handler *handler = NULL; - struct mlx5_flow_destination *dst = NULL; - struct mlx5_ib_flow_prio *ft_prio_tx = NULL; - struct mlx5_ib_flow_prio *ft_prio; - bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; - struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; - size_t min_ucmd_sz, required_ucmd_sz; - int err; - int underlay_qpn; - - if (udata && udata->inlen) { - min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + - sizeof(ucmd_hdr.reserved); - if (udata->inlen < min_ucmd_sz) - return ERR_PTR(-EOPNOTSUPP); - - err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); - if (err) - return ERR_PTR(err); - - /* currently supports only one counters data */ - if (ucmd_hdr.ncounters_data > 1) - return ERR_PTR(-EINVAL); - - required_ucmd_sz = min_ucmd_sz + - sizeof(struct mlx5_ib_flow_counters_data) * - ucmd_hdr.ncounters_data; - if (udata->inlen > required_ucmd_sz && - !ib_is_udata_cleared(udata, required_ucmd_sz, - udata->inlen - required_ucmd_sz)) - return ERR_PTR(-EOPNOTSUPP); - - ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); - if (!ucmd) - return ERR_PTR(-ENOMEM); - - err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); - if (err) - goto free_ucmd; - } - - if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) { - err = -ENOMEM; - goto free_ucmd; - } - - if (domain != IB_FLOW_DOMAIN_USER || - flow_attr->port > dev->num_ports || - (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | - IB_FLOW_ATTR_FLAGS_EGRESS))) { - err = -EINVAL; - goto free_ucmd; - } - - if (is_egress && - (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { - err = -EINVAL; - goto free_ucmd; - } - - dst = kzalloc(sizeof(*dst), GFP_KERNEL); - if (!dst) { - err = -ENOMEM; - goto free_ucmd; - } - - mutex_lock(&dev->flow_db->lock); - - ft_prio = get_flow_table(dev, flow_attr, - is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX); - if (IS_ERR(ft_prio)) { - err = PTR_ERR(ft_prio); - goto unlock; - } - if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); - if (IS_ERR(ft_prio_tx)) { - err = PTR_ERR(ft_prio_tx); - ft_prio_tx = NULL; - goto destroy_ft; - } - } - - if (is_egress) { - dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; - } else { - dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - if (mqp->is_rss) - dst->tir_num = mqp->rss_qp.tirn; - else - dst->tir_num = mqp->raw_packet_qp.rq.tirn; - } - - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? - mqp->underlay_qpn : - 0; - handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, - underlay_qpn, ucmd); - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - handler = create_leftovers_rule(dev, ft_prio, flow_attr, - dst); - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); - } else { - err = -EINVAL; - goto destroy_ft; - } - - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - handler = NULL; - goto destroy_ft; - } - - mutex_unlock(&dev->flow_db->lock); - kfree(dst); - kfree(ucmd); - - return &handler->ibflow; - -destroy_ft: - put_flow_table(dev, ft_prio, false); - if (ft_prio_tx) - put_flow_table(dev, ft_prio_tx, false); -unlock: - mutex_unlock(&dev->flow_db->lock); - kfree(dst); -free_ucmd: - kfree(ucmd); - return ERR_PTR(err); -} - -static struct mlx5_ib_flow_prio * -_get_flow_table(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_matcher *fs_matcher, - bool mcast) -{ - struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio = NULL; - int max_table_size = 0; - bool esw_encap; - u32 flags = 0; - int priority; - - if (mcast) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(fs_matcher->priority, false); - - esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != - DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2) && - !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { - max_table_size = BIT( - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { - max_table_size = BIT( - MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && - esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - priority = FDB_BYPASS_PATH; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, - log_max_ft_size)); - priority = fs_matcher->priority; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, - log_max_ft_size)); - priority = fs_matcher->priority; - } - - max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); - - ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); - if (!ns) - return ERR_PTR(-ENOTSUPP); - - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) - prio = &dev->flow_db->prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) - prio = &dev->flow_db->egress_prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) - prio = &dev->flow_db->fdb; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) - prio = &dev->flow_db->rdma_rx[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) - prio = &dev->flow_db->rdma_tx[priority]; - - if (!prio) - return ERR_PTR(-EINVAL); - - if (prio->flow_table) - return prio; - - return _get_prio(ns, prio, priority, max_table_size, - MLX5_FS_MAX_TYPES, flags); -} - -static struct mlx5_ib_flow_handler * -_create_raw_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - struct mlx5_flow_destination *dst, - struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, - void *cmd_in, int inlen, - int dst_num) -{ - struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_spec *spec; - struct mlx5_flow_table *ft = ft_prio->flow_table; - int err = 0; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - handler = kzalloc(sizeof(*handler), GFP_KERNEL); - if (!handler || !spec) { - err = -ENOMEM; - goto free; - } - - INIT_LIST_HEAD(&handler->list); - - memcpy(spec->match_value, cmd_in, inlen); - memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, - fs_matcher->mask_len); - spec->match_criteria_enable = fs_matcher->match_criteria_enable; - spec->flow_context = *flow_context; - - handler->rule = mlx5_add_flow_rules(ft, spec, - flow_act, dst, dst_num); - - if (IS_ERR(handler->rule)) { - err = PTR_ERR(handler->rule); - goto free; - } - - ft_prio->refcount++; - handler->prio = ft_prio; - handler->dev = dev; - ft_prio->flow_table = ft; - -free: - if (err) - kfree(handler); - kvfree(spec); - return err ? ERR_PTR(err) : handler; -} - -static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, - void *match_v) -{ - void *match_c; - void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; - void *dmac, *dmac_mask; - void *ipv4, *ipv4_mask; - - if (!(fs_matcher->match_criteria_enable & - (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) - return false; - - match_c = fs_matcher->matcher_mask.match_params; - match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); - match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - - dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, - dmac_47_16); - dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, - dmac_47_16); - - if (is_multicast_ether_addr(dmac) && - is_multicast_ether_addr(dmac_mask)) - return true; - - ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - if (ipv4_is_multicast(*(__be32 *)(ipv4)) && - ipv4_is_multicast(*(__be32 *)(ipv4_mask))) - return true; - - return false; -} - -struct mlx5_ib_flow_handler * -mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, - u32 counter_id, - void *cmd_in, int inlen, int dest_id, - int dest_type) -{ - struct mlx5_flow_destination *dst; - struct mlx5_ib_flow_prio *ft_prio; - struct mlx5_ib_flow_handler *handler; - int dst_num = 0; - bool mcast; - int err; - - if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) - return ERR_PTR(-EOPNOTSUPP); - - if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) - return ERR_PTR(-ENOMEM); - - dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); - if (!dst) - return ERR_PTR(-ENOMEM); - - mcast = raw_fs_is_multicast(fs_matcher, cmd_in); - mutex_lock(&dev->flow_db->lock); - - ft_prio = _get_flow_table(dev, fs_matcher, mcast); - if (IS_ERR(ft_prio)) { - err = PTR_ERR(ft_prio); - goto unlock; - } - - if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { - dst[dst_num].type = dest_type; - dst[dst_num++].tir_num = dest_id; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; - dst[dst_num++].ft_num = dest_id; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { - dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; - } - - - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dst[dst_num].counter_id = counter_id; - dst_num++; - } - - handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, - flow_context, flow_act, - cmd_in, inlen, dst_num); - - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - goto destroy_ft; - } - - mutex_unlock(&dev->flow_db->lock); - atomic_inc(&fs_matcher->usecnt); - handler->flow_matcher = fs_matcher; - - kfree(dst); - - return handler; - -destroy_ft: - put_flow_table(dev, ft_prio, false); -unlock: - mutex_unlock(&dev->flow_db->lock); - kfree(dst); - - return ERR_PTR(err); -} - -static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) -{ - u32 flags = 0; - - if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) - flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; - - return flags; -} - -#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA -static struct ib_flow_action * -mlx5_ib_create_flow_action_esp(struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_dev *mdev = to_mdev(device); - struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; - struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; - struct mlx5_ib_flow_action *action; - u64 action_flags; - u64 flags; - int err = 0; - - err = uverbs_get_flags64( - &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); - if (err) - return ERR_PTR(err); - - flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); - - /* We current only support a subset of the standard features. Only a - * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn - * (with overlap). Full offload mode isn't supported. - */ - if (!attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) - return ERR_PTR(-EOPNOTSUPP); - - if (attr->keymat->protocol != - IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) - return ERR_PTR(-EOPNOTSUPP); - - aes_gcm = &attr->keymat->keymat.aes_gcm; - - if (aes_gcm->icv_len != 16 || - aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) - return ERR_PTR(-EOPNOTSUPP); - - action = kmalloc(sizeof(*action), GFP_KERNEL); - if (!action) - return ERR_PTR(-ENOMEM); - - action->esp_aes_gcm.ib_flags = attr->flags; - memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, - sizeof(accel_attrs.keymat.aes_gcm.aes_key)); - accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; - memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, - sizeof(accel_attrs.keymat.aes_gcm.salt)); - memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, - sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); - accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; - accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; - accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) - accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; - - action->esp_aes_gcm.ctx = - mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); - if (IS_ERR(action->esp_aes_gcm.ctx)) { - err = PTR_ERR(action->esp_aes_gcm.ctx); - goto err_parse; - } - - action->esp_aes_gcm.ib_flags = attr->flags; - - return &action->ib_action; - -err_parse: - kfree(action); - return ERR_PTR(err); -} - -static int -mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - struct mlx5_accel_esp_xfrm_attrs accel_attrs; - int err = 0; - - if (attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) - return -EOPNOTSUPP; - - /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can - * be modified. - */ - if (!(maction->esp_aes_gcm.ib_flags & - IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && - attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) - return -EINVAL; - - memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, - sizeof(accel_attrs)); - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - else - accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, - &accel_attrs); - if (err) - return err; - - maction->esp_aes_gcm.ib_flags &= - ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - maction->esp_aes_gcm.ib_flags |= - attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - - return 0; -} - -static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - - switch (action->type) { - case IB_FLOW_ACTION_ESP: - /* - * We only support aes_gcm by now, so we implicitly know this is - * the underline crypto. - */ - mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); - break; - case IB_FLOW_ACTION_UNSPECIFIED: - mlx5_ib_destroy_flow_action_raw(maction); - break; - default: - WARN_ON(true); - break; - } - - kfree(maction); - return 0; -} - static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -5871,23 +4192,6 @@ err_mp: return -ENOMEM; } -static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev) -{ - dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); - - if (!dev->flow_db) - return -ENOMEM; - - mutex_init(&dev->flow_db->lock); - - return 0; -} - -static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) -{ - kfree(dev->flow_db); -} - static int mlx5_ib_enable_driver(struct ib_device *dev) { struct mlx5_ib_dev *mdev = to_mdev(dev); @@ -5914,7 +4218,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .check_mr_status = mlx5_ib_check_mr_status, .create_ah = mlx5_ib_create_ah, .create_cq = mlx5_ib_create_cq, - .create_flow = mlx5_ib_create_flow, .create_qp = mlx5_ib_create_qp, .create_srq = mlx5_ib_create_srq, .dealloc_pd = mlx5_ib_dealloc_pd, @@ -5923,8 +4226,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .dereg_mr = mlx5_ib_dereg_mr, .destroy_ah = mlx5_ib_destroy_ah, .destroy_cq = mlx5_ib_destroy_cq, - .destroy_flow = mlx5_ib_destroy_flow, - .destroy_flow_action = mlx5_ib_destroy_flow_action, .destroy_qp = mlx5_ib_destroy_qp, .destroy_srq = mlx5_ib_destroy_srq, .detach_mcast = mlx5_ib_mcg_detach, @@ -5969,11 +4270,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), }; -static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { - .create_flow_action_esp = mlx5_ib_create_flow_action_esp, - .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, -}; - static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { .rdma_netdev_get_params = mlx5_ib_rn_get_params, }; @@ -6104,9 +4400,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops); ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) @@ -6435,9 +4728,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_flow_db_init, - mlx5_ib_stage_flow_db_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FS, + mlx5_ib_fs_init, + mlx5_ib_fs_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, mlx5_ib_stage_caps_cleanup), @@ -6498,9 +4791,9 @@ const struct mlx5_ib_profile raw_eth_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_flow_db_init, - mlx5_ib_stage_flow_db_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FS, + mlx5_ib_fs_init, + mlx5_ib_fs_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, mlx5_ib_stage_caps_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4444a09f6fbd..e4a5fb017e08 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -805,7 +805,7 @@ struct mlx5_ib_delay_drop { enum mlx5_ib_stages { MLX5_IB_STAGE_INIT, - MLX5_IB_STAGE_FLOW_DB, + MLX5_IB_STAGE_FS, MLX5_IB_STAGE_CAPS, MLX5_IB_STAGE_NON_DEFAULT_CB, MLX5_IB_STAGE_ROCE, @@ -1359,14 +1359,6 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev); void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev); -struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( - struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, u32 counter_id, - void *cmd_in, int inlen, int dest_id, int dest_type); -bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id); -void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, @@ -1374,16 +1366,6 @@ mlx5_ib_devx_create(struct mlx5_ib_dev *dev, static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {} static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {} -static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, - int *dest_type) -{ - return false; -} -static inline void -mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) -{ - return; -}; #endif static inline void init_query_mad(struct ib_smp *mad) { -- cgit From d8b7515e25ec0a294c18e366f98e85fcb4a035e3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 2 Jul 2020 11:18:08 +0300 Subject: RDMA/mlx5: Cleanup DEVX initialization flow Move DEVX initialization and cleanup flows to the devx.c instead of having almost empty functions in main.c Link: https://lore.kernel.org/r/20200702081809.423482-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 43 ++++++++++++++++++++++-------------- drivers/infiniband/hw/mlx5/devx.h | 19 ++++++++++++++++ drivers/infiniband/hw/mlx5/main.c | 29 +++++------------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 14 ------------ 4 files changed, 51 insertions(+), 54 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index c7dffd799d16..e9cf294f8529 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2362,17 +2362,24 @@ static int devx_event_notifier(struct notifier_block *nb, return NOTIFY_OK; } -void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) +int mlx5_ib_devx_init(struct mlx5_ib_dev *dev) { struct mlx5_devx_event_table *table = &dev->devx_event_table; + int uid; + + uid = mlx5_ib_devx_create(dev, false); + if (uid > 0) { + dev->devx_whitelist_uid = uid; + xa_init(&table->event_xa); + mutex_init(&table->event_xa_lock); + MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY); + mlx5_eq_notifier_register(dev->mdev, &table->devx_nb); + } - xa_init(&table->event_xa); - mutex_init(&table->event_xa_lock); - MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY); - mlx5_eq_notifier_register(dev->mdev, &table->devx_nb); + return 0; } -void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) +void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_devx_event_table *table = &dev->devx_event_table; struct devx_event_subscription *sub, *tmp; @@ -2380,17 +2387,21 @@ void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) void *entry; unsigned long id; - mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb); - mutex_lock(&dev->devx_event_table.event_xa_lock); - xa_for_each(&table->event_xa, id, entry) { - event = entry; - list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list, - xa_list) - devx_cleanup_subscription(dev, sub); - kfree(entry); + if (dev->devx_whitelist_uid) { + mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb); + mutex_lock(&dev->devx_event_table.event_xa_lock); + xa_for_each(&table->event_xa, id, entry) { + event = entry; + list_for_each_entry_safe( + sub, tmp, &event->unaffiliated_list, xa_list) + devx_cleanup_subscription(dev, sub); + kfree(entry); + } + mutex_unlock(&dev->devx_event_table.event_xa_lock); + xa_destroy(&table->event_xa); + + mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); } - mutex_unlock(&dev->devx_event_table.event_xa_lock); - xa_destroy(&table->event_xa); } static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h index 9afaa5d22797..1f69866aed16 100644 --- a/drivers/infiniband/hw/mlx5/devx.h +++ b/drivers/infiniband/hw/mlx5/devx.h @@ -23,4 +23,23 @@ struct devx_obj { }; struct list_head event_sub; /* holds devx_event_subscription entries */ }; +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); +int mlx5_ib_devx_init(struct mlx5_ib_dev *dev); +void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev); +#else +static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) +{ + return -EOPNOTSUPP; +} +static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} +static inline int mlx5_ib_devx_init(struct mlx5_ib_dev *dev) +{ + return 0; +} +static inline void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev) +{ +} +#endif #endif /* _MLX5_IB_DEVX_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f6cb8c7f7183..f529f32b4440 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -32,6 +32,7 @@ #include "mlx5_ib.h" #include "ib_rep.h" #include "cmd.h" +#include "devx.h" #include "fs.h" #include "srq.h" #include "qp.h" @@ -4661,26 +4662,6 @@ static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); } -static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) -{ - int uid; - - uid = mlx5_ib_devx_create(dev, false); - if (uid > 0) { - dev->devx_whitelist_uid = uid; - mlx5_ib_devx_init_event_table(dev); - } - - return 0; -} -static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) -{ - if (dev->devx_whitelist_uid) { - mlx5_ib_devx_cleanup_event_table(dev); - mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); - } -} - void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) @@ -4771,8 +4752,8 @@ static const struct mlx5_ib_profile pf_profile = { NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, - mlx5_ib_stage_devx_init, - mlx5_ib_stage_devx_cleanup), + mlx5_ib_devx_init, + mlx5_ib_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -4831,8 +4812,8 @@ const struct mlx5_ib_profile raw_eth_profile = { NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, - mlx5_ib_stage_devx_init, - mlx5_ib_stage_devx_cleanup), + mlx5_ib_devx_init, + mlx5_ib_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e4a5fb017e08..13a2d1f3f14d 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1353,20 +1353,6 @@ extern const struct uapi_definition mlx5_ib_flow_defs[]; extern const struct uapi_definition mlx5_ib_qos_defs[]; extern const struct uapi_definition mlx5_ib_std_types_defs[]; - -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); -void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); -void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev); -void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev); -#else -static inline int -mlx5_ib_devx_create(struct mlx5_ib_dev *dev, - bool is_user) { return -EOPNOTSUPP; } -static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} -static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {} -static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {} -#endif static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; -- cgit From 1e2b5a90ded2852017e327e93861346ee70c5384 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 2 Jul 2020 11:18:09 +0300 Subject: RDMA/mlx5: Delete one-time used functions Merge them into their callers, usually the only thing the caller did was to call the one function, so this is clearer. Link: https://lore.kernel.org/r/20200702081809.423482-7-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 475 ++++++++++++++++---------------------- 1 file changed, 198 insertions(+), 277 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f529f32b4440..b730297b4d69 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3023,137 +3023,6 @@ static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) return __get_port_caps(dev, port); } -static void destroy_umrc_res(struct mlx5_ib_dev *dev) -{ - int err; - - err = mlx5_mr_cache_cleanup(dev); - if (err) - mlx5_ib_warn(dev, "mr cache cleanup failed\n"); - - if (dev->umrc.qp) - mlx5_ib_destroy_qp(dev->umrc.qp, NULL); - if (dev->umrc.cq) - ib_free_cq(dev->umrc.cq); - if (dev->umrc.pd) - ib_dealloc_pd(dev->umrc.pd); -} - -enum { - MAX_UMR_WR = 128, -}; - -static int create_umr_res(struct mlx5_ib_dev *dev) -{ - struct ib_qp_init_attr *init_attr = NULL; - struct ib_qp_attr *attr = NULL; - struct ib_pd *pd; - struct ib_cq *cq; - struct ib_qp *qp; - int ret; - - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); - if (!attr || !init_attr) { - ret = -ENOMEM; - goto error_0; - } - - pd = ib_alloc_pd(&dev->ib_dev, 0); - if (IS_ERR(pd)) { - mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); - ret = PTR_ERR(pd); - goto error_0; - } - - cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); - if (IS_ERR(cq)) { - mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); - ret = PTR_ERR(cq); - goto error_2; - } - - init_attr->send_cq = cq; - init_attr->recv_cq = cq; - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; - init_attr->cap.max_send_wr = MAX_UMR_WR; - init_attr->cap.max_send_sge = 1; - init_attr->qp_type = MLX5_IB_QPT_REG_UMR; - init_attr->port_num = 1; - qp = mlx5_ib_create_qp(pd, init_attr, NULL); - if (IS_ERR(qp)) { - mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); - ret = PTR_ERR(qp); - goto error_3; - } - qp->device = &dev->ib_dev; - qp->real_qp = qp; - qp->uobject = NULL; - qp->qp_type = MLX5_IB_QPT_REG_UMR; - qp->send_cq = init_attr->send_cq; - qp->recv_cq = init_attr->recv_cq; - - attr->qp_state = IB_QPS_INIT; - attr->port_num = 1; - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | - IB_QP_PORT, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTR; - attr->path_mtu = IB_MTU_256; - - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTS; - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); - goto error_4; - } - - dev->umrc.qp = qp; - dev->umrc.cq = cq; - dev->umrc.pd = pd; - - sema_init(&dev->umrc.sem, MAX_UMR_WR); - ret = mlx5_mr_cache_init(dev); - if (ret) { - mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); - goto error_4; - } - - kfree(attr); - kfree(init_attr); - - return 0; - -error_4: - mlx5_ib_destroy_qp(qp, NULL); - dev->umrc.qp = NULL; - -error_3: - ib_free_cq(cq); - dev->umrc.cq = NULL; - -error_2: - ib_dealloc_pd(pd); - dev->umrc.pd = NULL; - -error_0: - kfree(attr); - kfree(init_attr); - return ret; -} - static u8 mlx5_get_umr_fence(u8 umr_fence_cap) { switch (umr_fence_cap) { @@ -3166,16 +3035,15 @@ static u8 mlx5_get_umr_fence(u8 umr_fence_cap) } } -static int create_dev_resources(struct mlx5_ib_resources *devr) +static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) { + struct mlx5_ib_resources *devr = &dev->devr; struct ib_srq_init_attr attr; - struct mlx5_ib_dev *dev; struct ib_device *ibdev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; int port; int ret = 0; - dev = container_of(devr, struct mlx5_ib_dev, devr); ibdev = &dev->ib_dev; if (!MLX5_CAP_GEN(dev->mdev, xrc)) @@ -3291,13 +3159,11 @@ error0: return ret; } -static void destroy_dev_resources(struct mlx5_ib_resources *devr) +static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev) { - struct mlx5_ib_dev *dev; + struct mlx5_ib_resources *devr = &dev->devr; int port; - dev = container_of(devr, struct mlx5_ib_dev, devr); - mlx5_ib_destroy_srq(devr->s1, NULL); kfree(devr->s1); mlx5_ib_destroy_srq(devr->s0, NULL); @@ -3506,23 +3372,6 @@ static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params); } -static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev) -{ - if (!dev->delay_drop.dir_debugfs) - return; - debugfs_remove_recursive(dev->delay_drop.dir_debugfs); - dev->delay_drop.dir_debugfs = NULL; -} - -static void cancel_delay_drop(struct mlx5_ib_dev *dev) -{ - if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) - return; - - cancel_work_sync(&dev->delay_drop.delay_drop_work); - delay_drop_debugfs_cleanup(dev); -} - static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { @@ -3562,40 +3411,6 @@ static const struct file_operations fops_delay_drop_timeout = { .read = delay_drop_timeout_read, }; -static void delay_drop_debugfs_init(struct mlx5_ib_dev *dev) -{ - struct dentry *root; - - if (!mlx5_debugfs_root) - return; - - root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); - dev->delay_drop.dir_debugfs = root; - - debugfs_create_atomic_t("num_timeout_events", 0400, root, - &dev->delay_drop.events_cnt); - debugfs_create_atomic_t("num_rqs", 0400, root, - &dev->delay_drop.rqs_cnt); - debugfs_create_file("timeout", 0600, root, &dev->delay_drop, - &fops_delay_drop_timeout); -} - -static void init_delay_drop(struct mlx5_ib_dev *dev) -{ - if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) - return; - - mutex_init(&dev->delay_drop.lock); - dev->delay_drop.dev = dev; - dev->delay_drop.activate = false; - dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; - INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); - atomic_set(&dev->delay_drop.rqs_cnt, 0); - atomic_set(&dev->delay_drop.events_cnt, 0); - - delay_drop_debugfs_init(dev); -} - static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { @@ -4458,65 +4273,36 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { .modify_wq = mlx5_ib_modify_wq, }; -static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) -{ - u8 port_num; - - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); - - port_num = mlx5_core_native_port_num(dev->mdev) - 1; - - /* Register only for native ports */ - return mlx5_add_netdev_notifier(dev, port_num); -} - -static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) -{ - u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1; - - mlx5_remove_netdev_notifier(dev, port_num); -} - -static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev) -{ - struct mlx5_core_dev *mdev = dev->mdev; - enum rdma_link_layer ll; - int port_type_cap; - int err = 0; - - port_type_cap = MLX5_CAP_GEN(mdev, port_type); - ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - - if (ll == IB_LINK_LAYER_ETHERNET) - err = mlx5_ib_stage_common_roce_init(dev); - - return err; -} - -static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev) -{ - mlx5_ib_stage_common_roce_cleanup(dev); -} - -static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; + u8 port_num = 0; int err; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - err = mlx5_ib_stage_common_roce_init(dev); - if (err) + dev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); + + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + + /* Register only for native ports */ + err = mlx5_add_netdev_notifier(dev, port_num); + if (err || dev->is_rep || !mlx5_is_roce_enabled(mdev)) + /* + * We don't enable ETH interface for + * 1. IB representors + * 2. User disabled ROCE through devlink interface + */ return err; err = mlx5_enable_eth(dev); @@ -4526,44 +4312,27 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) return 0; cleanup: - mlx5_ib_stage_common_roce_cleanup(dev); - + mlx5_remove_netdev_notifier(dev, port_num); return err; } -static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; + u8 port_num; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - mlx5_disable_eth(dev); - mlx5_ib_stage_common_roce_cleanup(dev); - } -} - -static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) -{ - return create_dev_resources(&dev->devr); -} - -static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) -{ - destroy_dev_resources(&dev->devr); -} - -static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) -{ - return mlx5_ib_odp_init_one(dev); -} + if (!dev->is_rep) + mlx5_disable_eth(dev); -static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) -{ - mlx5_ib_odp_cleanup_one(dev); + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + mlx5_remove_netdev_notifier(dev, port_num); + } } static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev) @@ -4625,7 +4394,18 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { - destroy_umrc_res(dev); + int err; + + err = mlx5_mr_cache_cleanup(dev); + if (err) + mlx5_ib_warn(dev, "mr cache cleanup failed\n"); + + if (dev->umrc.qp) + mlx5_ib_destroy_qp(dev->umrc.qp, NULL); + if (dev->umrc.cq) + ib_free_cq(dev->umrc.cq); + if (dev->umrc.pd) + ib_dealloc_pd(dev->umrc.pd); } static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) @@ -4633,21 +4413,162 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) ib_unregister_device(&dev->ib_dev); } +enum { + MAX_UMR_WR = 128, +}; + static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) { - return create_umr_res(dev); + struct ib_qp_init_attr *init_attr = NULL; + struct ib_qp_attr *attr = NULL; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + int ret; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); + if (!attr || !init_attr) { + ret = -ENOMEM; + goto error_0; + } + + pd = ib_alloc_pd(&dev->ib_dev, 0); + if (IS_ERR(pd)) { + mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); + ret = PTR_ERR(pd); + goto error_0; + } + + cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); + if (IS_ERR(cq)) { + mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); + ret = PTR_ERR(cq); + goto error_2; + } + + init_attr->send_cq = cq; + init_attr->recv_cq = cq; + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->cap.max_send_wr = MAX_UMR_WR; + init_attr->cap.max_send_sge = 1; + init_attr->qp_type = MLX5_IB_QPT_REG_UMR; + init_attr->port_num = 1; + qp = mlx5_ib_create_qp(pd, init_attr, NULL); + if (IS_ERR(qp)) { + mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); + ret = PTR_ERR(qp); + goto error_3; + } + qp->device = &dev->ib_dev; + qp->real_qp = qp; + qp->uobject = NULL; + qp->qp_type = MLX5_IB_QPT_REG_UMR; + qp->send_cq = init_attr->send_cq; + qp->recv_cq = init_attr->recv_cq; + + attr->qp_state = IB_QPS_INIT; + attr->port_num = 1; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | + IB_QP_PORT, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTR; + attr->path_mtu = IB_MTU_256; + + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTS; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); + goto error_4; + } + + dev->umrc.qp = qp; + dev->umrc.cq = cq; + dev->umrc.pd = pd; + + sema_init(&dev->umrc.sem, MAX_UMR_WR); + ret = mlx5_mr_cache_init(dev); + if (ret) { + mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); + goto error_4; + } + + kfree(attr); + kfree(init_attr); + + return 0; + +error_4: + mlx5_ib_destroy_qp(qp, NULL); + dev->umrc.qp = NULL; + +error_3: + ib_free_cq(cq); + dev->umrc.cq = NULL; + +error_2: + ib_dealloc_pd(pd); + dev->umrc.pd = NULL; + +error_0: + kfree(attr); + kfree(init_attr); + return ret; } static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev) { - init_delay_drop(dev); + struct dentry *root; + + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return 0; + + mutex_init(&dev->delay_drop.lock); + dev->delay_drop.dev = dev; + dev->delay_drop.activate = false; + dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; + INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); + atomic_set(&dev->delay_drop.rqs_cnt, 0); + atomic_set(&dev->delay_drop.events_cnt, 0); + + if (!mlx5_debugfs_root) + return 0; + root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); + dev->delay_drop.dir_debugfs = root; + + debugfs_create_atomic_t("num_timeout_events", 0400, root, + &dev->delay_drop.events_cnt); + debugfs_create_atomic_t("num_rqs", 0400, root, + &dev->delay_drop.rqs_cnt); + debugfs_create_file("timeout", 0600, root, &dev->delay_drop, + &fops_delay_drop_timeout); return 0; } static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev) { - cancel_delay_drop(dev); + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + cancel_work_sync(&dev->delay_drop.delay_drop_work); + if (!dev->delay_drop.dir_debugfs) + return; + + debugfs_remove_recursive(dev->delay_drop.dir_debugfs); + dev->delay_drop.dir_debugfs = NULL; } static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) @@ -4719,8 +4640,8 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_non_default_cb, NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_roce_init, - mlx5_ib_stage_roce_cleanup), + mlx5_ib_roce_init, + mlx5_ib_roce_cleanup), STAGE_CREATE(MLX5_IB_STAGE_QP, mlx5_init_qp_table, mlx5_cleanup_qp_table), @@ -4728,14 +4649,14 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_init_srq_table, mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), + mlx5_ib_dev_res_init, + mlx5_ib_dev_res_cleanup), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, mlx5_ib_stage_dev_notifier_init, mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_ODP, - mlx5_ib_stage_odp_init, - mlx5_ib_stage_odp_cleanup), + mlx5_ib_odp_init_one, + mlx5_ib_odp_cleanup_one), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, mlx5_ib_counters_init, mlx5_ib_counters_cleanup), @@ -4782,8 +4703,8 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_ib_stage_raw_eth_non_default_cb, NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_raw_eth_roce_init, - mlx5_ib_stage_raw_eth_roce_cleanup), + mlx5_ib_roce_init, + mlx5_ib_roce_cleanup), STAGE_CREATE(MLX5_IB_STAGE_QP, mlx5_init_qp_table, mlx5_cleanup_qp_table), @@ -4791,8 +4712,8 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_init_srq_table, mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), + mlx5_ib_dev_res_init, + mlx5_ib_dev_res_cleanup), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, mlx5_ib_stage_dev_notifier_init, mlx5_ib_stage_dev_notifier_cleanup), -- cgit From 6c72a038bf4235180550dff9ddff7392e3cff893 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 5 Jul 2020 17:11:43 +0300 Subject: RDMA/mlx5: Remove unused to_mibmr function The to_mibmr function is unused, remove it. Link: https://lore.kernel.org/r/20200705141143.47303-1-galpress@amazon.com Signed-off-by: Gal Pressman Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 13a2d1f3f14d..b93f3aa9e218 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1051,11 +1051,6 @@ static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp) return container_of(core_qp, struct mlx5_ib_rwq, core_qp); } -static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey) -{ - return container_of(mmkey, struct mlx5_ib_mr, mmkey); -} - static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) { return container_of(ibpd, struct mlx5_ib_pd, ibpd); -- cgit From 7c97f3aded10aa86fc1944341288434117e9c926 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 2 Jul 2020 11:29:31 +0300 Subject: RDMA/counter: Add PID category support in auto mode With the "PID" category QPs have same PID will be bound to same counter; If this category is not set then QPs have different PIDs will be bound to same counter. This is implemented for 2 reasons: 1. The counter is a limited resource, while there may be dozens of applications, each of which creates several types of QPs, which means it may doesn't have enough counter. 2. The system administrator needs all QPs created by all applications with same type bound to one counter. The counter name and PID is only make sense when "PID" category are configured. This category can also be used in combine with others, e.g. QP type. Link: https://lore.kernel.org/r/20200702082933.424537-2-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 20 +++++--------------- drivers/infiniband/core/nldev.c | 8 ++++++-- include/uapi/rdma/rdma_netlink.h | 1 + 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 2257d7f7810f..40204c6caa5b 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -8,7 +8,7 @@ #include "core_priv.h" #include "restrack.h" -#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE) +#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) static int __counter_set_mode(struct rdma_counter_mode *curr, enum rdma_nl_counter_mode new_mode, @@ -149,23 +149,13 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, struct auto_mode_param *param = &counter->mode.param; bool match = true; - /* - * Ensure that counter belongs to the right PID. This operation can - * race with user space which kills the process and leaves QP and - * counters orphans. - * - * It is not a big deal because exitted task will leave both QP and - * counter in the same bucket of zombie process. Just ensure that - * process is still alive before procedding. - * - */ - if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) || - !task_pid_nr(qp->res.task)) - return false; - if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) match &= (param->qp_type == qp->qp_type); + if (auto_mask & RDMA_COUNTER_MASK_PID) + match &= (task_pid_nr(counter->res.task) == + task_pid_nr(qp->res.task)); + return match; } diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 1051b5622b62..76af7ea2875d 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -711,11 +711,16 @@ static int fill_stat_counter_mode(struct sk_buff *msg, if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode)) return -EMSGSIZE; - if (m->mode == RDMA_COUNTER_MODE_AUTO) + if (m->mode == RDMA_COUNTER_MODE_AUTO) { if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) && nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type)) return -EMSGSIZE; + if ((m->mask & RDMA_COUNTER_MASK_PID) && + fill_res_name_pid(msg, &counter->res)) + return -EMSGSIZE; + } + return 0; } @@ -855,7 +860,6 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin, if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) || - fill_res_name_pid(msg, &counter->res) || fill_stat_counter_mode(msg, counter) || fill_stat_counter_qps(msg, counter) || fill_stat_counter_hwcounters(msg, counter)) diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 3826143d420d..d2f5b8396243 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -569,5 +569,6 @@ enum rdma_nl_counter_mode { */ enum rdma_nl_counter_mask { RDMA_COUNTER_MASK_QP_TYPE = 1, + RDMA_COUNTER_MASK_PID = 1 << 1, }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit From c9f557421e505f75da4234a6af8eff46bc08614b Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 2 Jul 2020 11:29:32 +0300 Subject: RDMA/counter: Only bind user QPs in auto mode In auto mode only bind user QPs to a dynamic counter, since this feature is mainly used for system statistic and diagnostic purpose, while there's no need to counter kernel QPs so far. Fixes: 99fa331dc862 ("RDMA/counter: Add "auto" configuration mode support") Link: https://lore.kernel.org/r/20200702082933.424537-3-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 40204c6caa5b..c88139fa859e 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -276,7 +276,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; - if (!qp->res.valid) + if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) return 0; if (!rdma_is_port_valid(dev, port)) -- cgit From cbeb7d896c0f296451ffa7b67e7706786b8364c8 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 2 Jul 2020 11:29:33 +0300 Subject: RDMA/counter: Allow manually bind QPs with different pids to same counter In manual mode allow bind user QPs with different pids to same counter, since this is allowed in auto mode. Bind kernel QPs and user QPs to the same counter are not allowed. Fixes: 1bd8e0a9d0fd ("RDMA/counter: Allow manual mode configuration support") Link: https://lore.kernel.org/r/20200702082933.424537-4-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index c88139fa859e..edf77d061127 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -471,7 +471,7 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, goto err; } - if (counter->res.task != qp->res.task) { + if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) { ret = -EINVAL; goto err_task; } -- cgit From 1dcb6c36a5ebac46099b6363ccf8f4e7563d51e2 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jul 2020 21:28:32 -0700 Subject: net/mlx5: Support setting access rights of dma addresses mlx5_fill_page_frag_array() is used to populate dma addresses to resources that require it, such as QPs, RQs etc. When the resource is used, PA list permissions are ignored. For resources that use MTT list, the user is required to provide the access rights. Subsequent patches use resources that require MTT lists, so modify API and implementation to support that. Signed-off-by: Eli Cohen Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/alloc.c | 11 +++++++++-- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 6 ++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 42198e64a7f4..8db4b5f0f963 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -299,11 +299,18 @@ void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas) } EXPORT_SYMBOL_GPL(mlx5_fill_page_array); -void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas) +void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm) { int i; + WARN_ON(perm & 0xfc); for (i = 0; i < buf->npages; i++) - pas[i] = cpu_to_be64(buf->frags[i].map); + pas[i] = cpu_to_be64(buf->frags[i].map | perm); +} +EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array_perm); + +void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas) +{ + mlx5_fill_page_frag_array_perm(buf, pas, 0); } EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 13c0e4556eda..f2557d7e1355 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -971,6 +971,7 @@ void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); +void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3786888cb1ba..5890e5c9da77 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10653,4 +10653,10 @@ struct mlx5_ifc_tls_progress_params_bits { u8 hw_offset_record_number[0x18]; }; +enum { + MLX5_MTT_PERM_READ = 1 << 0, + MLX5_MTT_PERM_WRITE = 1 << 1, + MLX5_MTT_PERM_RW = MLX5_MTT_PERM_READ | MLX5_MTT_PERM_WRITE, +}; + #endif /* MLX5_IFC_H */ -- cgit From 2a913f23447ce7ef2a4dcaaa230ff43116cf5249 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jul 2020 21:28:33 -0700 Subject: net/mlx5: Add VDPA interface type to supported enumerations VDPA is a new interface that will be added in subsequent patches. It uses mlx5 core devices and resources. Add an interface type for it. Signed-off-by: Eli Cohen Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f2557d7e1355..5ecc48831ae8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1054,6 +1054,7 @@ enum { enum { MLX5_INTERFACE_PROTOCOL_IB = 0, MLX5_INTERFACE_PROTOCOL_ETH = 1, + MLX5_INTERFACE_PROTOCOL_VDPA = 2, }; struct mlx5_interface { -- cgit From 8a06a79b0aa811eee6d56b3cfc738c5d08b0dc74 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jul 2020 21:28:34 -0700 Subject: net/mlx5: Add interface changes required for VDPA Rename mlx5_ifc_device_virtio_emulation_cap_bits to mlx5_ifc_virtio_emulation_cap_bits to match names produced by the tools producing these auto generated files. In addition missing capabilities that will be required by VDPA implementation. Signed-off-by: Eli Cohen Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 4 +- include/linux/mlx5/mlx5_ifc.h | 112 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 100 insertions(+), 16 deletions(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 57db125e5802..2aacf9a8ee4d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1361,11 +1361,11 @@ enum mlx5_qcam_feature_groups { MLX5_ADDR_OF(device_event_cap, (mdev)->caps.hca_cur[MLX5_CAP_DEV_EVENT], cap) #define MLX5_CAP_DEV_VDPA_EMULATION(mdev, cap)\ - MLX5_GET(device_virtio_emulation_cap, \ + MLX5_GET(virtio_emulation_cap, \ (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap) #define MLX5_CAP64_DEV_VDPA_EMULATION(mdev, cap)\ - MLX5_GET64(device_virtio_emulation_cap, \ + MLX5_GET64(virtio_emulation_cap, \ (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap) #define MLX5_CAP_IPSEC(mdev, cap)\ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5890e5c9da77..435ab47d5362 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -93,6 +93,7 @@ enum { enum { MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b, + MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d, MLX5_OBJ_TYPE_MKEY = 0xff01, MLX5_OBJ_TYPE_QP = 0xff02, MLX5_OBJ_TYPE_PSV = 0xff03, @@ -981,17 +982,40 @@ struct mlx5_ifc_device_event_cap_bits { u8 user_unaffiliated_events[4][0x40]; }; -struct mlx5_ifc_device_virtio_emulation_cap_bits { - u8 reserved_at_0[0x20]; +struct mlx5_ifc_virtio_emulation_cap_bits { + u8 desc_tunnel_offload_type[0x1]; + u8 eth_frame_offload_type[0x1]; + u8 virtio_version_1_0[0x1]; + u8 device_features_bits_mask[0xd]; + u8 event_mode[0x8]; + u8 virtio_queue_type[0x8]; - u8 reserved_at_20[0x13]; + u8 max_tunnel_desc[0x10]; + u8 reserved_at_30[0x3]; u8 log_doorbell_stride[0x5]; u8 reserved_at_38[0x3]; u8 log_doorbell_bar_size[0x5]; u8 doorbell_bar_offset[0x40]; - u8 reserved_at_80[0x780]; + u8 max_emulated_devices[0x8]; + u8 max_num_virtio_queues[0x18]; + + u8 reserved_at_a0[0x60]; + + u8 umem_1_buffer_param_a[0x20]; + + u8 umem_1_buffer_param_b[0x20]; + + u8 umem_2_buffer_param_a[0x20]; + + u8 umem_2_buffer_param_b[0x20]; + + u8 umem_3_buffer_param_a[0x20]; + + u8 umem_3_buffer_param_b[0x20]; + + u8 reserved_at_1c0[0x640]; }; enum { @@ -1216,7 +1240,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_sgl_for_optimized_performance[0x8]; u8 log_max_cq_sz[0x8]; - u8 reserved_at_d0[0xb]; + u8 reserved_at_d0[0x9]; + u8 virtio_net_device_emualtion_manager[0x1]; + u8 virtio_blk_device_emualtion_manager[0x1]; u8 log_max_cq[0x5]; u8 log_max_eq_sz[0x8]; @@ -2952,7 +2978,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_fpga_cap_bits fpga_cap; struct mlx5_ifc_tls_cap_bits tls_cap; struct mlx5_ifc_device_mem_cap_bits device_mem_cap; - struct mlx5_ifc_device_virtio_emulation_cap_bits virtio_emulation_cap; + struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap; u8 reserved_at_0[0x8000]; }; @@ -3298,15 +3324,18 @@ struct mlx5_ifc_scheduling_context_bits { }; struct mlx5_ifc_rqtc_bits { - u8 reserved_at_0[0xa0]; + u8 reserved_at_0[0xa0]; - u8 reserved_at_a0[0x10]; - u8 rqt_max_size[0x10]; + u8 reserved_at_a0[0x5]; + u8 list_q_type[0x3]; + u8 reserved_at_a8[0x8]; + u8 rqt_max_size[0x10]; - u8 reserved_at_c0[0x10]; - u8 rqt_actual_size[0x10]; + u8 rq_vhca_id_format[0x1]; + u8 reserved_at_c1[0xf]; + u8 rqt_actual_size[0x10]; - u8 reserved_at_e0[0x6a0]; + u8 reserved_at_e0[0x6a0]; struct mlx5_ifc_rq_num_bits rq_num[]; }; @@ -7084,7 +7113,7 @@ struct mlx5_ifc_destroy_mkey_out_bits { struct mlx5_ifc_destroy_mkey_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7782,7 +7811,7 @@ struct mlx5_ifc_create_mkey_out_bits { struct mlx5_ifc_create_mkey_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -10312,6 +10341,40 @@ struct mlx5_ifc_create_umem_in_bits { struct mlx5_ifc_umem_bits umem; }; +struct mlx5_ifc_create_umem_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x8]; + u8 umem_id[0x18]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_destroy_umem_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 umem_id[0x18]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_destroy_umem_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_create_uctx_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -10324,6 +10387,18 @@ struct mlx5_ifc_create_uctx_in_bits { struct mlx5_ifc_uctx_bits uctx; }; +struct mlx5_ifc_create_uctx_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x10]; + u8 uid[0x10]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_destroy_uctx_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -10337,6 +10412,15 @@ struct mlx5_ifc_destroy_uctx_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_destroy_uctx_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_create_sw_icm_in_bits { struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; struct mlx5_ifc_sw_icm_bits sw_icm; -- cgit From 4c2573e1f6c79525df9094f6ba085fcc35c18c45 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Tue, 14 Jul 2020 21:28:35 -0700 Subject: net/mlx5: Enable count action for rules with allow action Enable the creation of rules with allow and count actions. This enables using counters on egress flow tables. Signed-off-by: Michael Guralnik Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e47a66983935..644fe4c2f0fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1598,6 +1598,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, static bool counter_is_valid(u32 action) { return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)); } -- cgit From 6112ef62826e91afbae5446d5d47b38e25f47e3f Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Sun, 5 Jul 2020 13:43:10 +0300 Subject: RDMA/rxe: Drop pointless checks in rxe_init_ports Both pkey_tbl_len and gid_tbl_len are set in rxe_init_port_param() - so no need to check if they aren't set. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200705104313.283034-2-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 5642eefb4ba1..c7191b5e04a5 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -147,9 +147,6 @@ static int rxe_init_ports(struct rxe_dev *rxe) rxe_init_port_param(port); - if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len) - return -EINVAL; - port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len, sizeof(*port->pkey_tbl), GFP_KERNEL); -- cgit From 9d576eac6380534081a5ce98b0776993f7bb6a2e Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Sun, 5 Jul 2020 13:43:11 +0300 Subject: RDMA/rxe: Return void from rxe_init_port_param() The return value from rxe_init_port_param() is always 0 - change it to be void. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200705104313.283034-3-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index c7191b5e04a5..efcb72c92be6 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -111,7 +111,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe) } /* initialize port attributes */ -static int rxe_init_port_param(struct rxe_port *port) +static void rxe_init_port_param(struct rxe_port *port) { port->attr.state = IB_PORT_DOWN; port->attr.max_mtu = IB_MTU_4096; @@ -134,8 +134,6 @@ static int rxe_init_port_param(struct rxe_port *port) port->attr.phys_state = RXE_PORT_PHYS_STATE; port->mtu_cap = ib_mtu_enum_to_int(IB_MTU_256); port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); - - return 0; } /* initialize port state, note IB convention that HCA ports are always -- cgit From 293d8440a0bdd246d523802f62fb1d16c238a485 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Sun, 5 Jul 2020 13:43:12 +0300 Subject: RDMA/rxe: Return void from rxe_mem_init_dma() The return value from rxe_mem_init_dma() is always 0 - change it to be void and fix the callers accordingly. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200705104313.283034-4-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 4 ++-- drivers/infiniband/sw/rxe/rxe_mr.c | 6 ++---- drivers/infiniband/sw/rxe/rxe_verbs.c | 20 +++----------------- 3 files changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 238d6a357aac..0688928cf2b1 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -103,8 +103,8 @@ enum copy_direction { from_mem_obj, }; -int rxe_mem_init_dma(struct rxe_pd *pd, - int access, struct rxe_mem *mem); +void rxe_mem_init_dma(struct rxe_pd *pd, + int access, struct rxe_mem *mem); int rxe_mem_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int access, struct ib_udata *udata, diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index a63cb5fac01f..cdd811a45120 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -144,8 +144,8 @@ err1: return -ENOMEM; } -int rxe_mem_init_dma(struct rxe_pd *pd, - int access, struct rxe_mem *mem) +void rxe_mem_init_dma(struct rxe_pd *pd, + int access, struct rxe_mem *mem) { rxe_mem_init(access, mem); @@ -153,8 +153,6 @@ int rxe_mem_init_dma(struct rxe_pd *pd, mem->access = access; mem->state = RXE_MEM_STATE_VALID; mem->type = RXE_MEM_TYPE_DMA; - - return 0; } int rxe_mem_init_user(struct rxe_pd *pd, u64 start, diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 0472df52d36d..0f5b3c062f32 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -901,30 +901,16 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); struct rxe_mem *mr; - int err; mr = rxe_alloc(&rxe->mr_pool); - if (!mr) { - err = -ENOMEM; - goto err1; - } + if (!mr) + return ERR_PTR(-ENOMEM); rxe_add_index(mr); - rxe_add_ref(pd); - - err = rxe_mem_init_dma(pd, access, mr); - if (err) - goto err2; + rxe_mem_init_dma(pd, access, mr); return &mr->ibmr; - -err2: - rxe_drop_ref(pd); - rxe_drop_index(mr); - rxe_drop_ref(mr); -err1: - return ERR_PTR(err); } static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, -- cgit From 420bd9e2d93e4a4121d5bdf3a051b0ddefa9c1cc Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Sun, 5 Jul 2020 13:43:13 +0300 Subject: RDMA/rxe: Remove rxe_link_layer() Instead of returning IB_LINK_LAYER_ETHERNET from rxe_link_layer, return it directly from get_link_layer callback and remove rxe_link_layer(). Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200705104313.283034-5-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 1 - drivers/infiniband/sw/rxe/rxe_net.c | 5 ----- drivers/infiniband/sw/rxe/rxe_verbs.c | 4 +--- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 0688928cf2b1..39dc3bfa5d5d 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -142,7 +142,6 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb); struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt); int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc); -enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num); const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); struct device *rxe_dma_device(struct rxe_dev *rxe); int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 312c2fc961c0..0c3808611f95 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -520,11 +520,6 @@ const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num) return rxe->ndev->name; } -enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num) -{ - return IB_LINK_LAYER_ETHERNET; -} - int rxe_net_add(const char *ibdev_name, struct net_device *ndev) { int err; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 0f5b3c062f32..74f071003690 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -141,9 +141,7 @@ static int rxe_modify_port(struct ib_device *dev, static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, u8 port_num) { - struct rxe_dev *rxe = to_rdev(dev); - - return rxe_link_layer(rxe, port_num); + return IB_LINK_LAYER_ETHERNET; } static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) -- cgit From 0829d2da608c6ce225052a99b1bc7a2a1c421173 Mon Sep 17 00:00:00 2001 From: Daria Velikovsky Date: Tue, 7 Jul 2020 14:02:59 +0300 Subject: RDMA/mlx5: Init dest_type when create flow When using action drop dest_type was never assigned to any value. Add initialization of dest_type to -1 since 0 is valid. Fixes: f29de9eee782 ("RDMA/mlx5: Add support for drop action in DV steering") Link: https://lore.kernel.org/r/20200707110259.882276-1-leon@kernel.org Signed-off-by: Daria Velikovsky Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 0d8abb7c3cdf..1a7e6226f11a 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -1903,7 +1903,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct mlx5_flow_context flow_context = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; u32 *offset_attr, offset = 0, counter_id = 0; - int dest_id, dest_type, inlen, len, ret, i; + int dest_id, dest_type = -1, inlen, len, ret, i; struct mlx5_ib_flow_handler *flow_handler; struct mlx5_ib_flow_matcher *fs_matcher; struct ib_uobject **arr_flow_actions; -- cgit From 317000b926b07c07e7f6c4032bd3344639f5eb4c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 8 Jul 2020 12:19:08 +0300 Subject: IB/isert: allocate RW ctxs according to max IO size Current iSER target code allocates MR pool budget based on queue size. Since there is no handshake between iSER initiator and target on max IO size, we'll set the iSER target to support upto 16MiB IO operations and allocate the correct number of RDMA ctxs according to the factor of MR's per IO operation. This would guarantee sufficient size of the MR pool for the required IO queue depth and IO size. Link: https://lore.kernel.org/r/20200708091908.162263-1-maxg@mellanox.com Reported-by: Krishnamraju Eraparaju Tested-by: Krishnamraju Eraparaju Signed-off-by: Max Gurtovoy Acked-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/isert/ib_isert.c | 6 ++++-- drivers/infiniband/ulp/isert/ib_isert.h | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index b7df38ee8ae0..49f5f053c7b2 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -119,7 +119,7 @@ isert_create_qp(struct isert_conn *isert_conn, { struct isert_device *device = isert_conn->device; struct ib_qp_init_attr attr; - int ret; + int ret, factor; memset(&attr, 0, sizeof(struct ib_qp_init_attr)); attr.event_handler = isert_qp_event_callback; @@ -128,7 +128,9 @@ isert_create_qp(struct isert_conn *isert_conn, attr.recv_cq = comp->cq; attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; - attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; + factor = rdma_rw_mr_factor(device->ib_device, cma_id->port_num, + ISCSI_ISER_MAX_SG_TABLESIZE); + attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX * factor; attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 3b296bac4f60..c9ccf1d87833 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -63,7 +63,8 @@ (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \ sizeof(struct ib_cqe) + sizeof(bool))) -#define ISCSI_ISER_SG_TABLESIZE 256 +/* Maximum support is 16MB I/O size */ +#define ISCSI_ISER_MAX_SG_TABLESIZE 4096 enum isert_desc_type { ISCSI_TX_CONTROL, -- cgit From acca72e2b031b9fbb4184511072bd246a0abcebc Mon Sep 17 00:00:00 2001 From: Yuval Basson Date: Wed, 8 Jul 2020 22:55:26 +0300 Subject: RDMA/qedr: SRQ's bug fixes QP's with the same SRQ, working on different CQs and running in parallel on different CPUs could lead to a race when maintaining the SRQ consumer count, and leads to FW running out of SRQs. Update the consumer atomically. Make sure the wqe_prod is updated after the sge_prod due to FW requirements. Fixes: 3491c9e799fb ("qedr: Add support for kernel mode SRQ's") Link: https://lore.kernel.org/r/20200708195526.31040-1-ybason@marvell.com Signed-off-by: Michal Kalderon Signed-off-by: Yuval Basson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr.h | 4 ++-- drivers/infiniband/hw/qedr/verbs.c | 22 ++++++++++------------ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index fdf90ecb2699..aa332027da86 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -344,10 +344,10 @@ struct qedr_srq_hwq_info { u32 wqe_prod; u32 sge_prod; u32 wr_prod_cnt; - u32 wr_cons_cnt; + atomic_t wr_cons_cnt; u32 num_elems; - u32 *virt_prod_pair_addr; + struct rdma_srq_producers *virt_prod_pair_addr; dma_addr_t phy_prod_pair_addr; }; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 3d7d5617818f..42273aa0b5e1 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -3686,7 +3686,7 @@ static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq) * count and consumer count and subtract it from max * work request supported so that we get elements left. */ - used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt; + used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt); return hw_srq->max_wr - used; } @@ -3701,7 +3701,6 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, unsigned long flags; int status = 0; u32 num_sge; - u32 offset; spin_lock_irqsave(&srq->lock, flags); @@ -3714,7 +3713,8 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, if (!qedr_srq_elem_left(hw_srq) || wr->num_sge > srq->hw_srq.max_sges) { DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n", - hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, + hw_srq->wr_prod_cnt, + atomic_read(&hw_srq->wr_cons_cnt), wr->num_sge, srq->hw_srq.max_sges); status = -ENOMEM; *bad_wr = wr; @@ -3748,22 +3748,20 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, hw_srq->sge_prod++; } - /* Flush WQE and SGE information before + /* Update WQE and SGE information before * updating producer. */ - wmb(); + dma_wmb(); /* SRQ producer is 8 bytes. Need to update SGE producer index * in first 4 bytes and need to update WQE producer in * next 4 bytes. */ - *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod; - offset = offsetof(struct rdma_srq_producers, wqe_prod); - *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) = - hw_srq->wqe_prod; + srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod; + /* Make sure sge producer is updated first */ + dma_wmb(); + srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod; - /* Flush producer after updating it. */ - wmb(); wr = wr->next; } @@ -4182,7 +4180,7 @@ static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp, } else { __process_resp_one(dev, qp, cq, wc, resp, wr_id); } - srq->hw_srq.wr_cons_cnt++; + atomic_inc(&srq->hw_srq.wr_cons_cnt); return 1; } -- cgit From 535ee8cdbcf8c570eccdfe9f8821ddda75fcaaff Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 9 Jul 2020 18:52:50 -0500 Subject: IB/hfi1: Remove unnecessary fall-through markings Reorganize the code a bit in a more standard way[1] and remove unnecessary fall-through markings. [1] https://lore.kernel.org/lkml/20200708054703.GR207186@unreal/ Link: https://lore.kernel.org/r/20200709235250.GA26678@embeddedor Signed-off-by: Gustavo A. R. Silva Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 15f9c635f292..7eaf99538216 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7317,11 +7317,11 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width) case 1: return OPA_LINK_WIDTH_1X; case 2: return OPA_LINK_WIDTH_2X; case 3: return OPA_LINK_WIDTH_3X; + case 4: return OPA_LINK_WIDTH_4X; default: dd_dev_info(dd, "%s: invalid width %d, using 4\n", __func__, width); - /* fall through */ - case 4: return OPA_LINK_WIDTH_4X; + return OPA_LINK_WIDTH_4X; } } @@ -7376,12 +7376,13 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width, case 0: dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G; break; + case 1: + dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G; + break; default: dd_dev_err(dd, "%s: unexpected max rate %d, using 25Gb\n", __func__, (int)max_rate); - /* fall through */ - case 1: dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G; break; } @@ -12878,11 +12879,6 @@ bail: static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate) { switch (chip_lstate) { - default: - dd_dev_err(dd, - "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n", - chip_lstate); - /* fall through */ case LSTATE_DOWN: return IB_PORT_DOWN; case LSTATE_INIT: @@ -12891,6 +12887,11 @@ static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate) return IB_PORT_ARMED; case LSTATE_ACTIVE: return IB_PORT_ACTIVE; + default: + dd_dev_err(dd, + "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n", + chip_lstate); + return IB_PORT_DOWN; } } @@ -12898,10 +12899,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate) { /* look at the HFI meta-states only */ switch (chip_pstate & 0xf0) { - default: - dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n", - chip_pstate); - /* fall through */ case PLS_DISABLED: return IB_PORTPHYSSTATE_DISABLED; case PLS_OFFLINE: @@ -12914,6 +12911,10 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate) return IB_PORTPHYSSTATE_LINKUP; case PLS_PHYTEST: return IB_PORTPHYSSTATE_PHY_TEST; + default: + dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n", + chip_pstate); + return IB_PORTPHYSSTATE_DISABLED; } } -- cgit From 3e9fed7fb63366ae125355b98653ed1c907ed85d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 11 Jul 2020 09:31:20 +0200 Subject: RDMA/usnic: switch from 'pci_' to 'dma_' API The wrappers in include/linux/pci-dma-compat.h should go away. The patch has been generated with the coccinelle script bellow. It has been compile tested. When memory is allocated, GFP_ATOMIC should be used to be consistent with the surrounding code. @@ @@ - PCI_DMA_BIDIRECTIONAL + DMA_BIDIRECTIONAL @@ @@ - PCI_DMA_TODEVICE + DMA_TO_DEVICE @@ @@ - PCI_DMA_FROMDEVICE + DMA_FROM_DEVICE @@ @@ - PCI_DMA_NONE + DMA_NONE @@ expression e1, e2, e3; @@ - pci_alloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3; @@ - pci_zalloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3, e4; @@ - pci_free_consistent(e1, e2, e3, e4) + dma_free_coherent(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_single(e1, e2, e3, e4) + dma_map_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_single(e1, e2, e3, e4) + dma_unmap_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4, e5; @@ - pci_map_page(e1, e2, e3, e4, e5) + dma_map_page(&e1->dev, e2, e3, e4, e5) @@ expression e1, e2, e3, e4; @@ - pci_unmap_page(e1, e2, e3, e4) + dma_unmap_page(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_sg(e1, e2, e3, e4) + dma_map_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_sg(e1, e2, e3, e4) + dma_unmap_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_cpu(e1, e2, e3, e4) + dma_sync_single_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_device(e1, e2, e3, e4) + dma_sync_single_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_cpu(e1, e2, e3, e4) + dma_sync_sg_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_device(e1, e2, e3, e4) + dma_sync_sg_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2; @@ - pci_dma_mapping_error(e1, e2) + dma_mapping_error(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_dma_mask(e1, e2) + dma_set_mask(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_consistent_dma_mask(e1, e2) + dma_set_coherent_mask(&e1->dev, e2) Link: https://lore.kernel.org/r/20200711073120.249146-1-christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/usnic_fwd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c index 7875883621f4..398c4c00b932 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -214,7 +214,7 @@ usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, if (!flow) return ERR_PTR(-ENOMEM); - tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa); + tlv = dma_alloc_coherent(&pdev->dev, tlv_size, &tlv_pa, GFP_ATOMIC); if (!tlv) { usnic_err("Failed to allocate memory\n"); status = -ENOMEM; @@ -258,7 +258,7 @@ usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, out_free_tlv: spin_unlock(&ufdev->lock); - pci_free_consistent(pdev, tlv_size, tlv, tlv_pa); + dma_free_coherent(&pdev->dev, tlv_size, tlv, tlv_pa); if (!status) return flow; out_free_flow: -- cgit From bbe4f4245271bd0f21bf826996c0c5d87a3529c9 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Tue, 7 Jul 2020 09:30:59 +0300 Subject: RDMA/qedr: Add EDPM mode type for user-fw compatibility In older FW versions the completion flag was treated as the ack flag in edpm messages. commit ff937b916eb6 ("qed: Add EDPM mode type for user-fw compatibility") exposed the FW option of setting which mode the QP is in by adding a flag to the qedr <-> qed API. This patch adds the qedr <-> libqedr interface so that the libqedr can set the flag appropriately and qedr can pass it down to FW. Flag is added for backward compatibility with libqedr. For older libs, this flag didn't exist and therefore set to zero. Fixes: ac1b36e55a51 ("qedr: Add support for user context verbs") Link: https://lore.kernel.org/r/20200707063100.3811-2-michal.kalderon@marvell.com Signed-off-by: Yuval Bason Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr.h | 1 + drivers/infiniband/hw/qedr/verbs.c | 11 ++++++++--- include/uapi/rdma/qedr-abi.h | 4 ++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index aa332027da86..460292179b32 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -235,6 +235,7 @@ struct qedr_ucontext { u32 dpi_size; u16 dpi; bool db_rec; + u8 edpm_mode; }; union db_prod32 { diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 42273aa0b5e1..5008149ea116 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -275,7 +275,8 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) DP_ERR(dev, "Problem copying data from user space\n"); return -EFAULT; } - + ctx->edpm_mode = !!(ureq.context_flags & + QEDR_ALLOC_UCTX_EDPM_MODE); ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC); } @@ -316,7 +317,8 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY; else uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED | - QEDR_DPM_TYPE_ROCE_LEGACY; + QEDR_DPM_TYPE_ROCE_LEGACY | + QEDR_DPM_TYPE_ROCE_EDPM_MODE; uresp.dpm_flags |= QEDR_DPM_SIZES_SET; uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE; @@ -1750,7 +1752,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, struct qed_rdma_create_qp_out_params out_params; struct qedr_pd *pd = get_qedr_pd(ibpd); struct qedr_create_qp_uresp uresp; - struct qedr_ucontext *ctx = NULL; + struct qedr_ucontext *ctx = pd ? pd->uctx : NULL; struct qedr_create_qp_ureq ureq; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; @@ -1788,6 +1790,9 @@ static int qedr_create_user_qp(struct qedr_dev *dev, in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; } + if (ctx) + SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode); + qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, &in_params, &out_params); diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index a0b83c9d4498..b261c9fca07b 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -39,7 +39,7 @@ /* user kernel communication data structures. */ enum qedr_alloc_ucontext_flags { - QEDR_ALLOC_UCTX_RESERVED = 1 << 0, + QEDR_ALLOC_UCTX_EDPM_MODE = 1 << 0, QEDR_ALLOC_UCTX_DB_REC = 1 << 1 }; @@ -56,7 +56,7 @@ enum qedr_rdma_dpm_type { QEDR_DPM_TYPE_ROCE_ENHANCED = 1 << 0, QEDR_DPM_TYPE_ROCE_LEGACY = 1 << 1, QEDR_DPM_TYPE_IWARP_LEGACY = 1 << 2, - QEDR_DPM_TYPE_RESERVED = 1 << 3, + QEDR_DPM_TYPE_ROCE_EDPM_MODE = 1 << 3, QEDR_DPM_SIZES_SET = 1 << 4, }; -- cgit From eb7f84e379daad69b4c92538baeaf93bbf493c14 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Tue, 7 Jul 2020 09:31:00 +0300 Subject: RDMA/qedr: Add EDPM max size to alloc ucontext response User space should receive the maximum edpm size from kernel driver, similar to other edpm/ldpm related limits. Add an additional parameter to the alloc_ucontext_resp structure for the edpm maximum size. In addition, pass an indication from user-space to kernel (and not just kernel to user) that the DPM sizes are supported. This is for supporting backward-forward compatibility between driver and lib for everything related to DPM transaction and limit sizes. This should have been part of commit mentioned in Fixes tag. Link: https://lore.kernel.org/r/20200707063100.3811-3-michal.kalderon@marvell.com Fixes: 93a3d05f9d68 ("RDMA/qedr: Add kernel capability flags for dpm enabled mode") Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/verbs.c | 9 ++++++--- include/uapi/rdma/qedr-abi.h | 6 +++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 5008149ea116..fcf2eaa3b459 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -320,9 +320,12 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) QEDR_DPM_TYPE_ROCE_LEGACY | QEDR_DPM_TYPE_ROCE_EDPM_MODE; - uresp.dpm_flags |= QEDR_DPM_SIZES_SET; - uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE; - uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE; + if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) { + uresp.dpm_flags |= QEDR_DPM_SIZES_SET; + uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE; + uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE; + uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE; + } uresp.wids_enabled = 1; uresp.wid_count = oparams.wid_count; diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index b261c9fca07b..bf7333b2b5d7 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -40,7 +40,8 @@ /* user kernel communication data structures. */ enum qedr_alloc_ucontext_flags { QEDR_ALLOC_UCTX_EDPM_MODE = 1 << 0, - QEDR_ALLOC_UCTX_DB_REC = 1 << 1 + QEDR_ALLOC_UCTX_DB_REC = 1 << 1, + QEDR_SUPPORT_DPM_SIZES = 1 << 2, }; struct qedr_alloc_ucontext_req { @@ -50,6 +51,7 @@ struct qedr_alloc_ucontext_req { #define QEDR_LDPM_MAX_SIZE (8192) #define QEDR_EDPM_TRANS_SIZE (64) +#define QEDR_EDPM_MAX_SIZE (ROCE_REQ_MAX_INLINE_DATA_SIZE) enum qedr_rdma_dpm_type { QEDR_DPM_TYPE_NONE = 0, @@ -77,6 +79,8 @@ struct qedr_alloc_ucontext_resp { __u16 ldpm_limit_size; __u8 edpm_trans_size; __u8 reserved; + __u16 edpm_limit_size; + __u8 padding[6]; }; struct qedr_alloc_pd_ureq { -- cgit From 5f0b2a6093a4d9aab093964c65083fe801ef1e58 Mon Sep 17 00:00:00 2001 From: Mikhail Malygin Date: Thu, 16 Jul 2020 22:03:41 +0300 Subject: RDMA/rxe: Prevent access to wr->next ptr afrer wr is posted to send queue rxe_post_send_kernel() iterates over linked list of wr's, until the wr->next ptr is NULL. However if we've got an interrupt after last wr is posted, control may be returned to the code after send completion callback is executed and wr memory is freed. As a result, wr->next pointer may contain incorrect value leading to panic. Store the wr->next on the stack before posting it. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200716190340.23453-1-m.malygin@yadro.com Signed-off-by: Mikhail Malygin Signed-off-by: Sergey Kojushev Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 74f071003690..c1649aec8c23 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -682,6 +682,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, unsigned int mask; unsigned int length = 0; int i; + struct ib_send_wr *next; while (wr) { mask = wr_opcode_mask(wr->opcode, qp); @@ -698,6 +699,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, break; } + next = wr->next; + length = 0; for (i = 0; i < wr->num_sge; i++) length += wr->sg_list[i].length; @@ -708,7 +711,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, *bad_wr = wr; break; } - wr = wr->next; + wr = next; } rxe_run_task(&qp->req.task, 1); -- cgit From 90efc8b2d4582feef270a5c9460178f59d812f67 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 14 Jul 2020 21:34:08 +0300 Subject: RDMA/core: Expose pkeys sysfs files only if pkey_tbl_len is set Expose the pkeys sysfs files only if the pkey_tbl_len is set by the providers. Link: https://lore.kernel.org/r/20200714183414.61069-2-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sysfs.c | 61 +++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index defe9cd4c5ee..c11e50510e49 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -58,7 +58,7 @@ struct ib_port { struct ib_device *ibdev; struct gid_attr_group *gid_attr_group; struct attribute_group gid_group; - struct attribute_group pkey_group; + struct attribute_group *pkey_group; struct attribute_group *pma_table; struct attribute_group *hw_stats_ag; struct rdma_hw_stats *hw_stats; @@ -681,11 +681,16 @@ static void ib_port_release(struct kobject *kobj) kfree(p->gid_group.attrs); } - if (p->pkey_group.attrs) { - for (i = 0; (a = p->pkey_group.attrs[i]); ++i) - kfree(a); + if (p->pkey_group) { + if (p->pkey_group->attrs) { + for (i = 0; (a = p->pkey_group->attrs[i]); ++i) + kfree(a); + + kfree(p->pkey_group->attrs); + } - kfree(p->pkey_group.attrs); + kfree(p->pkey_group); + p->pkey_group = NULL; } kfree(p); @@ -1118,17 +1123,26 @@ static int add_port(struct ib_core_device *coredev, int port_num) if (ret) goto err_free_gid_type; - p->pkey_group.name = "pkeys"; - p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, - attr.pkey_tbl_len); - if (!p->pkey_group.attrs) { - ret = -ENOMEM; - goto err_remove_gid_type; + if (attr.pkey_tbl_len) { + p->pkey_group = kzalloc(sizeof(*p->pkey_group), GFP_KERNEL); + if (!p->pkey_group) { + ret = -ENOMEM; + goto err_remove_gid_type; + } + + p->pkey_group->name = "pkeys"; + p->pkey_group->attrs = alloc_group_attrs(show_port_pkey, + attr.pkey_tbl_len); + if (!p->pkey_group->attrs) { + ret = -ENOMEM; + goto err_free_pkey_group; + } + + ret = sysfs_create_group(&p->kobj, p->pkey_group); + if (ret) + goto err_free_pkey; } - ret = sysfs_create_group(&p->kobj, &p->pkey_group); - if (ret) - goto err_free_pkey; if (device->ops.init_port && is_full_dev) { ret = device->ops.init_port(device, port_num, &p->kobj); @@ -1150,14 +1164,20 @@ static int add_port(struct ib_core_device *coredev, int port_num) return 0; err_remove_pkey: - sysfs_remove_group(&p->kobj, &p->pkey_group); + if (p->pkey_group) + sysfs_remove_group(&p->kobj, p->pkey_group); err_free_pkey: - for (i = 0; i < attr.pkey_tbl_len; ++i) - kfree(p->pkey_group.attrs[i]); + if (p->pkey_group) { + for (i = 0; i < attr.pkey_tbl_len; ++i) + kfree(p->pkey_group->attrs[i]); + + kfree(p->pkey_group->attrs); + p->pkey_group->attrs = NULL; + } - kfree(p->pkey_group.attrs); - p->pkey_group.attrs = NULL; +err_free_pkey_group: + kfree(p->pkey_group); err_remove_gid_type: sysfs_remove_group(&p->gid_attr_group->kobj, @@ -1317,7 +1337,8 @@ void ib_free_port_attrs(struct ib_core_device *coredev) if (port->pma_table) sysfs_remove_group(p, port->pma_table); - sysfs_remove_group(p, &port->pkey_group); + if (port->pkey_group) + sysfs_remove_group(p, port->pkey_group); sysfs_remove_group(p, &port->gid_group); sysfs_remove_group(&port->gid_attr_group->kobj, &port->gid_attr_group->ndev); -- cgit From 6f38efca9bf09b7e4faf8cc4d221e4336aa303ef Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 14 Jul 2020 21:34:09 +0300 Subject: RDMA/core: Allocate the pkey cache only if the pkey_tbl_len is set Allocate the pkey cache only if the pkey_tbl_len is set by the provider, also add checks to avoid accessing the pkey cache when it not initialized. Link: https://lore.kernel.org/r/20200714183414.61069-3-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 45 ++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index a670209bbce6..ffad73bb40ff 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1054,7 +1054,7 @@ int ib_get_cached_pkey(struct ib_device *device, cache = device->port_data[port_num].cache.pkey; - if (index < 0 || index >= cache->table_len) + if (!cache || index < 0 || index >= cache->table_len) ret = -EINVAL; else *pkey = cache->table[index]; @@ -1099,6 +1099,10 @@ int ib_find_cached_pkey(struct ib_device *device, read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; + if (!cache) { + ret = -EINVAL; + goto err; + } *index = -1; @@ -1117,6 +1121,7 @@ int ib_find_cached_pkey(struct ib_device *device, ret = 0; } +err: read_unlock_irqrestore(&device->cache_lock, flags); return ret; @@ -1139,6 +1144,10 @@ int ib_find_exact_cached_pkey(struct ib_device *device, read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; + if (!cache) { + ret = -EINVAL; + goto err; + } *index = -1; @@ -1149,6 +1158,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device, break; } +err: read_unlock_irqrestore(&device->cache_lock, flags); return ret; @@ -1425,23 +1435,26 @@ ib_cache_update(struct ib_device *device, u8 port, bool enforce_security) goto err; } - pkey_cache = kmalloc(struct_size(pkey_cache, table, - tprops->pkey_tbl_len), - GFP_KERNEL); - if (!pkey_cache) { - ret = -ENOMEM; - goto err; - } + if (tprops->pkey_tbl_len) { + pkey_cache = kmalloc(struct_size(pkey_cache, table, + tprops->pkey_tbl_len), + GFP_KERNEL); + if (!pkey_cache) { + ret = -ENOMEM; + goto err; + } - pkey_cache->table_len = tprops->pkey_tbl_len; + pkey_cache->table_len = tprops->pkey_tbl_len; - for (i = 0; i < pkey_cache->table_len; ++i) { - ret = ib_query_pkey(device, port, i, pkey_cache->table + i); - if (ret) { - dev_warn(&device->dev, - "ib_query_pkey failed (%d) for index %d\n", - ret, i); - goto err; + for (i = 0; i < pkey_cache->table_len; ++i) { + ret = ib_query_pkey(device, port, i, + pkey_cache->table + i); + if (ret) { + dev_warn(&device->dev, + "ib_query_pkey failed (%d) for index %d\n", + ret, i); + goto err; + } } } -- cgit From ab75a6cb8cb27ff7f65ab66962975be30507ee29 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 14 Jul 2020 21:34:10 +0300 Subject: RDMA/core: Remove query_pkey from the mandatory ops The query_pkey() isn't mandatory for the iwarp providers, so remove this requirement from the RDMA core. Link: https://lore.kernel.org/r/20200714183414.61069-4-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b2d617e599a1..d293b826acbc 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -272,7 +272,6 @@ static void ib_device_check_mandatory(struct ib_device *device) } mandatory_table[] = { IB_MANDATORY_FUNC(query_device), IB_MANDATORY_FUNC(query_port), - IB_MANDATORY_FUNC(query_pkey), IB_MANDATORY_FUNC(alloc_pd), IB_MANDATORY_FUNC(dealloc_pd), IB_MANDATORY_FUNC(create_qp), @@ -2362,6 +2361,9 @@ int ib_query_pkey(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; + if (!device->ops.query_pkey) + return -EOPNOTSUPP; + return device->ops.query_pkey(device, port_num, index, pkey); } EXPORT_SYMBOL(ib_query_pkey); -- cgit From c4995bd35471d00cd1409b3dba6955b45a08590b Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 14 Jul 2020 21:34:11 +0300 Subject: RDMA/siw: Remove the query_pkey callback Now that the query_pkey() isn't mandatory by the RDMA core for iwarp providers, this callback can be removed. Link: https://lore.kernel.org/r/20200714183414.61069-5-kamalheib1@gmail.com Signed-off-by: Kamal Heib Acked-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_main.c | 1 - drivers/infiniband/sw/siw/siw_verbs.c | 9 --------- drivers/infiniband/sw/siw/siw_verbs.h | 1 - 3 files changed, 11 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index a0b8cc643c5c..18c08259157f 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -288,7 +288,6 @@ static const struct ib_device_ops siw_device_ops = { .post_srq_recv = siw_post_srq_recv, .query_device = siw_query_device, .query_gid = siw_query_gid, - .query_pkey = siw_query_pkey, .query_port = siw_query_port, .query_qp = siw_query_qp, .query_srq = siw_query_srq, diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 0d509f7a10a6..adafa1b8bebe 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -176,7 +176,6 @@ int siw_query_port(struct ib_device *base_dev, u8 port, attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); attr->phys_state = sdev->state == IB_PORT_ACTIVE ? IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; - attr->pkey_tbl_len = 1; attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; attr->state = sdev->state; /* @@ -204,20 +203,12 @@ int siw_get_port_immutable(struct ib_device *base_dev, u8 port, if (rv) return rv; - port_immutable->pkey_tbl_len = attr.pkey_tbl_len; port_immutable->gid_tbl_len = attr.gid_tbl_len; port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; return 0; } -int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey) -{ - /* Report the default pkey */ - *pkey = 0xffff; - return 0; -} - int siw_query_gid(struct ib_device *base_dev, u8 port, int idx, union ib_gid *gid) { diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h index 9335c48c01de..d9572275a6b6 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.h +++ b/drivers/infiniband/sw/siw/siw_verbs.h @@ -46,7 +46,6 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int siw_query_port(struct ib_device *base_dev, u8 port, struct ib_port_attr *attr); -int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey); int siw_query_gid(struct ib_device *base_dev, u8 port, int idx, union ib_gid *gid); int siw_alloc_pd(struct ib_pd *base_pd, struct ib_udata *udata); -- cgit From ce07f1c6a8be401e060acd601abab42c37dc7b7c Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 14 Jul 2020 21:34:12 +0300 Subject: RDMA/cxgb4: Remove the query_pkey callback Now that the query_pkey() isn't mandatory by the RDMA core for iwarp providers, this callback can be removed. Link: https://lore.kernel.org/r/20200714183414.61069-6-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/provider.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 1d3ff59e4060..6c579d2d3997 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -236,14 +236,6 @@ static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) return 0; } -static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) -{ - pr_debug("ibdev %p\n", ibdev); - *pkey = 0; - return 0; -} - static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { @@ -317,7 +309,6 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; props->max_msg_sz = -1; return ret; @@ -439,7 +430,6 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; - immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; return 0; @@ -503,7 +493,6 @@ static const struct ib_device_ops c4iw_dev_ops = { .post_srq_recv = c4iw_post_srq_recv, .query_device = c4iw_query_device, .query_gid = c4iw_query_gid, - .query_pkey = c4iw_query_pkey, .query_port = c4iw_query_port, .query_qp = c4iw_ib_query_qp, .reg_user_mr = c4iw_reg_user_mr, -- cgit From c1c5e9fd3ab58732d40848a57236622af0a7e1db Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 14 Jul 2020 21:34:13 +0300 Subject: RDMA/i40iw: Remove the query_pkey callback Now that the query_pkey() isn't mandatory by the RDMA core for iwarp providers, this callback can be removed. Link: https://lore.kernel.org/r/20200714183414.61069-7-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index f9ef3ac2f4cd..6957e4f3404b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -101,7 +101,6 @@ static int i40iw_query_port(struct ib_device *ibdev, props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; props->active_width = IB_WIDTH_4X; props->active_speed = 1; props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; @@ -2459,7 +2458,6 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; - immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; return 0; @@ -2615,22 +2613,6 @@ static int i40iw_query_gid(struct ib_device *ibdev, return 0; } -/** - * i40iw_query_pkey - Query partition key - * @ibdev: device pointer from stack - * @port: port number - * @index: index of pkey - * @pkey: pointer to store the pkey - */ -static int i40iw_query_pkey(struct ib_device *ibdev, - u8 port, - u16 index, - u16 *pkey) -{ - *pkey = 0; - return 0; -} - static const struct ib_device_ops i40iw_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_I40IW, @@ -2670,7 +2652,6 @@ static const struct ib_device_ops i40iw_dev_ops = { .post_send = i40iw_post_send, .query_device = i40iw_query_device, .query_gid = i40iw_query_gid, - .query_pkey = i40iw_query_pkey, .query_port = i40iw_query_port, .query_qp = i40iw_query_qp, .reg_user_mr = i40iw_reg_user_mr, -- cgit From ca4beeee9895f14df22940d804aab4534c23f03f Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 14 Jul 2020 21:34:14 +0300 Subject: RDMA/qedr: Remove the query_pkey callback Now that the query_pkey() isn't mandatory by the RDMA core for iwarp providers, this callback can be removed from the common ops and moved to the RoCE only ops within the qedr driver. Link: https://lore.kernel.org/r/20200714183414.61069-8-kamalheib1@gmail.com Signed-off-by: Kamal Heib Acked-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 3 +-- drivers/infiniband/hw/qedr/verbs.c | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index ccaedfd53e49..c9eeed25c662 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -110,7 +110,6 @@ static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; - immutable->pkey_tbl_len = 1; immutable->gid_tbl_len = 1; immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; immutable->max_mad_size = 0; @@ -179,6 +178,7 @@ static int qedr_iw_register_device(struct qedr_dev *dev) static const struct ib_device_ops qedr_roce_dev_ops = { .get_port_immutable = qedr_roce_port_immutable, + .query_pkey = qedr_query_pkey, }; static void qedr_roce_register_device(struct qedr_dev *dev) @@ -221,7 +221,6 @@ static const struct ib_device_ops qedr_dev_ops = { .post_srq_recv = qedr_post_srq_recv, .process_mad = qedr_process_mad, .query_device = qedr_query_device, - .query_pkey = qedr_query_pkey, .query_port = qedr_query_port, .query_qp = qedr_query_qp, .query_srq = qedr_query_srq, diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index fcf2eaa3b459..bd37eafb7cc4 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -239,7 +239,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) attr->ip_gids = true; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { attr->gid_tbl_len = 1; - attr->pkey_tbl_len = 1; } else { attr->gid_tbl_len = QEDR_MAX_SGID; attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; -- cgit From 1da968e0ef1f640e54ca9c86970b8093c1729753 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Wed, 15 Jul 2020 10:16:54 -0400 Subject: RDMA/bnxt_re: introduce wqe mode to select execution path The bnxt_re driver need to decide on how much SQ and RQ memory should to be allocated and which wqe posting/polling algorithm to use. Making changes to set the wqe-mode to a default value during device registration sequence. The wqe-mode is passed to the lower layer driver as well. Going forward in the lower layer driver wqe-mode will be used to decide execution path. Initializing the wqe-mode to static wqe type for now. Link: https://lore.kernel.org/r/1594822619-4098-2-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 + drivers/infiniband/hw/bnxt_re/main.c | 23 ++++++++++++++++------ drivers/infiniband/hw/bnxt_re/qplib_fp.h | 3 ++- drivers/infiniband/hw/bnxt_re/qplib_res.h | 32 +++++++++++++++++++++---------- 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index f32c7e85ae05..e97bee3ff5de 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1183,6 +1183,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, goto out; } qplqp->type = (u8)qptype; + qplqp->wqe_mode = rdev->chip_ctx->modes.wqe_mode; if (init_attr->qp_type == IB_QPT_RC) { qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index b12fbc857f94..dad0df8a2467 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -82,6 +82,15 @@ static void bnxt_re_remove_device(struct bnxt_re_dev *rdev); static void bnxt_re_dealloc_driver(struct ib_device *ib_dev); static void bnxt_re_stop_irq(void *handle); +static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) +{ + struct bnxt_qplib_chip_ctx *cctx; + + cctx = rdev->chip_ctx; + cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + mode : BNXT_QPLIB_WQE_MODE_STATIC; +} + static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *chip_ctx; @@ -97,7 +106,7 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) kfree(chip_ctx); } -static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) +static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) { struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; @@ -117,6 +126,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) rdev->qplib_res.cctx = rdev->chip_ctx; rdev->rcfw.res = &rdev->qplib_res; + bnxt_re_set_drv_mode(rdev, wqe_mode); return 0; } @@ -1386,7 +1396,7 @@ static void bnxt_re_worker(struct work_struct *work) schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); } -static int bnxt_re_dev_init(struct bnxt_re_dev *rdev) +static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) { struct bnxt_qplib_creq_ctx *creq; struct bnxt_re_ring_attr rattr; @@ -1406,7 +1416,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev) } set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); - rc = bnxt_re_setup_chip_ctx(rdev); + rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode); if (rc) { ibdev_err(&rdev->ibdev, "Failed to get chip context\n"); return -EINVAL; @@ -1585,7 +1595,7 @@ static void bnxt_re_remove_device(struct bnxt_re_dev *rdev) } static int bnxt_re_add_device(struct bnxt_re_dev **rdev, - struct net_device *netdev) + struct net_device *netdev, u8 wqe_mode) { int rc; @@ -1599,7 +1609,7 @@ static int bnxt_re_add_device(struct bnxt_re_dev **rdev, } pci_dev_get((*rdev)->en_dev->pdev); - rc = bnxt_re_dev_init(*rdev); + rc = bnxt_re_dev_init(*rdev, wqe_mode); if (rc) { pci_dev_put((*rdev)->en_dev->pdev); bnxt_re_dev_unreg(*rdev); @@ -1711,7 +1721,8 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, case NETDEV_REGISTER: if (rdev) break; - rc = bnxt_re_add_device(&rdev, real_dev); + rc = bnxt_re_add_device(&rdev, real_dev, + BNXT_QPLIB_WQE_MODE_STATIC); if (!rc) sch_work = true; release = false; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 568ca390322c..6146f7dc368c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -224,9 +224,10 @@ struct bnxt_qplib_qp { u32 id; u8 type; u8 sig_type; - u32 modify_flags; + u8 wqe_mode; u8 state; u8 cur_qp_state; + u64 modify_flags; u32 max_inline_data; u32 mtu; u8 path_mtu; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index c29cbd3a2d7b..03a0f29e7432 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -41,6 +41,28 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; +#define CHIP_NUM_57508 0x1750 +#define CHIP_NUM_57504 0x1751 +#define CHIP_NUM_57502 0x1752 + +enum bnxt_qplib_wqe_mode { + BNXT_QPLIB_WQE_MODE_STATIC = 0x00, + BNXT_QPLIB_WQE_MODE_VARIABLE = 0x01, + BNXT_QPLIB_WQE_MODE_INVALID = 0x02 +}; + +struct bnxt_qplib_drv_modes { + u8 wqe_mode; + /* Other modes to follow here */ +}; + +struct bnxt_qplib_chip_ctx { + u16 chip_num; + u8 chip_rev; + u8 chip_metal; + struct bnxt_qplib_drv_modes modes; +}; + #define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *)) #define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1) #define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG) @@ -230,16 +252,6 @@ struct bnxt_qplib_ctx { u64 hwrm_intf_ver; }; -struct bnxt_qplib_chip_ctx { - u16 chip_num; - u8 chip_rev; - u8 chip_metal; -}; - -#define CHIP_NUM_57508 0x1750 -#define CHIP_NUM_57504 0x1751 -#define CHIP_NUM_57502 0x1752 - struct bnxt_qplib_res { struct pci_dev *pdev; struct bnxt_qplib_chip_ctx *cctx; -- cgit From 159fb4ceacd79b07bc4a78a897ef2a98dc1dcbf9 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Wed, 15 Jul 2020 10:16:55 -0400 Subject: RDMA/bnxt_re: introduce a function to allocate swq The bnxt_re driver now allocates shadow sq and rq to maintain per wqe wr_id and few other flags required to support variable wqe. Segregated the allocation of shadow queue in a separate function and adjust the cqe polling logic. The new polling logic is based on shadow queue indices. Link: https://lore.kernel.org/r/1594822619-4098-3-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 348 +++++++++++++++--------------- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 19 ++ drivers/infiniband/hw/bnxt_re/qplib_res.h | 11 + 3 files changed, 207 insertions(+), 171 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index c5e29577cd43..c9e7be3e5152 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -178,11 +178,11 @@ static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res, if (qp->rq_hdr_buf) dma_free_coherent(&res->pdev->dev, - rq->hwq.max_elements * qp->rq_hdr_buf_size, + rq->max_wqe * qp->rq_hdr_buf_size, qp->rq_hdr_buf, qp->rq_hdr_buf_map); if (qp->sq_hdr_buf) dma_free_coherent(&res->pdev->dev, - sq->hwq.max_elements * qp->sq_hdr_buf_size, + sq->max_wqe * qp->sq_hdr_buf_size, qp->sq_hdr_buf, qp->sq_hdr_buf_map); qp->rq_hdr_buf = NULL; qp->sq_hdr_buf = NULL; @@ -199,10 +199,9 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res, struct bnxt_qplib_q *sq = &qp->sq; int rc = 0; - if (qp->sq_hdr_buf_size && sq->hwq.max_elements) { + if (qp->sq_hdr_buf_size && sq->max_wqe) { qp->sq_hdr_buf = dma_alloc_coherent(&res->pdev->dev, - sq->hwq.max_elements * - qp->sq_hdr_buf_size, + sq->max_wqe * qp->sq_hdr_buf_size, &qp->sq_hdr_buf_map, GFP_KERNEL); if (!qp->sq_hdr_buf) { rc = -ENOMEM; @@ -212,9 +211,9 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res, } } - if (qp->rq_hdr_buf_size && rq->hwq.max_elements) { + if (qp->rq_hdr_buf_size && rq->max_wqe) { qp->rq_hdr_buf = dma_alloc_coherent(&res->pdev->dev, - rq->hwq.max_elements * + rq->max_wqe * qp->rq_hdr_buf_size, &qp->rq_hdr_buf_map, GFP_KERNEL); @@ -784,6 +783,30 @@ done: } /* QP */ + +static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que) +{ + int rc = 0; + int indx; + + que->swq = kcalloc(que->max_wqe, sizeof(*que->swq), GFP_KERNEL); + if (!que->swq) { + rc = -ENOMEM; + goto out; + } + + que->swq_start = 0; + que->swq_last = que->max_wqe - 1; + for (indx = 0; indx < que->max_wqe; indx++) { + que->swq[indx].slots = 1; + que->swq[indx].next_idx = indx + 1; + } + que->swq[que->swq_last].next_idx = 0; /* Make it circular */ + que->swq_last = 0; +out: + return rc; +} + int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { struct bnxt_qplib_hwq_attr hwq_attr = {}; @@ -815,20 +838,22 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (rc) goto exit; - sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL); - if (!sq->swq) { - rc = -ENOMEM; + rc = bnxt_qplib_alloc_init_swq(sq); + if (rc) goto fail_sq; - } + + req.sq_size = cpu_to_le32(sq->max_wqe); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT); pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK); req.sq_pg_size_sq_lvl = pg_sz_lvl; + req.sq_fwo_sq_sge = + cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) << + CMDQ_CREATE_QP1_SQ_SGE_SFT); + req.scq_cid = cpu_to_le32(qp->scq->id); - if (qp->scq) - req.scq_cid = cpu_to_le32(qp->scq->id); /* RQ */ if (rq->max_wqe) { hwq_attr.res = res; @@ -838,41 +863,31 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) - goto fail_sq; - - rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq), - GFP_KERNEL); - if (!rq->swq) { - rc = -ENOMEM; + goto sq_swq; + rc = bnxt_qplib_alloc_init_swq(rq); + if (rc) goto fail_rq; - } + req.rq_size = cpu_to_le32(rq->max_wqe); pbl = &rq->hwq.pbl[PBL_LVL_0]; req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT); pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK); req.rq_pg_size_rq_lvl = pg_sz_lvl; - if (qp->rcq) - req.rcq_cid = cpu_to_le32(qp->rcq->id); + req.rq_fwo_rq_sge = + cpu_to_le16((rq->max_sge & + CMDQ_CREATE_QP1_RQ_SGE_MASK) << + CMDQ_CREATE_QP1_RQ_SGE_SFT); } + req.rcq_cid = cpu_to_le32(qp->rcq->id); /* Header buffer - allow hdr_buf pass in */ rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp); if (rc) { rc = -ENOMEM; - goto fail; + goto rq_rwq; } qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE; req.qp_flags = cpu_to_le32(qp_flags); - req.sq_size = cpu_to_le32(sq->hwq.max_elements); - req.rq_size = cpu_to_le32(rq->hwq.max_elements); - - req.sq_fwo_sq_sge = - cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) << - CMDQ_CREATE_QP1_SQ_SGE_SFT); - req.rq_fwo_rq_sge = - cpu_to_le16((rq->max_sge & CMDQ_CREATE_QP1_RQ_SGE_MASK) << - CMDQ_CREATE_QP1_RQ_SGE_SFT); - req.pd_id = cpu_to_le32(qp->pd->id); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, @@ -898,12 +913,14 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) fail: bnxt_qplib_free_qp_hdr_buf(res, qp); +rq_rwq: + kfree(rq->swq); fail_rq: bnxt_qplib_free_hwq(res, &rq->hwq); - kfree(rq->swq); +sq_swq: + kfree(sq->swq); fail_sq: bnxt_qplib_free_hwq(res, &sq->hwq); - kfree(sq->swq); exit: return rc; } @@ -944,12 +961,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) struct creq_create_qp_resp resp; int rc, req_size, psn_sz = 0; struct bnxt_qplib_hwq *xrrq; - u16 cmd_flags = 0, max_ssge; struct bnxt_qplib_pbl *pbl; struct cmdq_create_qp req; + u16 cmd_flags = 0; u32 qp_flags = 0; u8 pg_sz_lvl; - u16 max_rsge; + u16 nsge; RCFW_CMD_PREP(req, CREATE_QP, cmd_flags); @@ -976,27 +993,27 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (rc) goto exit; - sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL); - if (!sq->swq) { - rc = -ENOMEM; + rc = bnxt_qplib_alloc_init_swq(sq); + if (rc) goto fail_sq; - } if (psn_sz) bnxt_qplib_init_psn_ptr(qp, psn_sz); + req.sq_size = cpu_to_le32(sq->max_wqe); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << CMDQ_CREATE_QP_SQ_PG_SIZE_SFT); pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK); req.sq_pg_size_sq_lvl = pg_sz_lvl; - - if (qp->scq) - req.scq_cid = cpu_to_le32(qp->scq->id); + req.sq_fwo_sq_sge = + cpu_to_le16(((sq->max_sge & CMDQ_CREATE_QP_SQ_SGE_MASK) << + CMDQ_CREATE_QP_SQ_SGE_SFT) | 0); + req.scq_cid = cpu_to_le32(qp->scq->id); /* RQ */ - if (rq->max_wqe) { + if (!qp->srq) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; hwq_attr.stride = rq->wqe_size; @@ -1006,30 +1023,30 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) - goto fail_sq; - - rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq), - GFP_KERNEL); - if (!rq->swq) { - rc = -ENOMEM; + goto sq_swq; + rc = bnxt_qplib_alloc_init_swq(rq); + if (rc) goto fail_rq; - } + + req.rq_size = cpu_to_le32(rq->max_wqe); pbl = &rq->hwq.pbl[PBL_LVL_0]; req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << CMDQ_CREATE_QP_RQ_PG_SIZE_SFT); pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK); req.rq_pg_size_rq_lvl = pg_sz_lvl; + nsge = (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + 6 : rq->max_sge; + req.rq_fwo_rq_sge = + cpu_to_le16(((nsge & + CMDQ_CREATE_QP_RQ_SGE_MASK) << + CMDQ_CREATE_QP_RQ_SGE_SFT) | 0); } else { /* SRQ */ - if (qp->srq) { - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED; - req.srq_cid = cpu_to_le32(qp->srq->id); - } + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED; + req.srq_cid = cpu_to_le32(qp->srq->id); } - - if (qp->rcq) - req.rcq_cid = cpu_to_le32(qp->rcq->id); + req.rcq_cid = cpu_to_le32(qp->rcq->id); qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE; qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; @@ -1037,27 +1054,6 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; req.qp_flags = cpu_to_le32(qp_flags); - req.sq_size = cpu_to_le32(sq->hwq.max_elements); - req.rq_size = cpu_to_le32(rq->hwq.max_elements); - qp->sq_hdr_buf = NULL; - qp->rq_hdr_buf = NULL; - - rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp); - if (rc) - goto fail_rq; - - /* CTRL-22434: Irrespective of the requested SGE count on the SQ - * always create the QP with max send sges possible if the requested - * inline size is greater than 0. - */ - max_ssge = qp->max_inline_data ? 6 : sq->max_sge; - req.sq_fwo_sq_sge = cpu_to_le16( - ((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK) - << CMDQ_CREATE_QP_SQ_SGE_SFT) | 0); - max_rsge = bnxt_qplib_is_chip_gen_p5(res->cctx) ? 6 : rq->max_sge; - req.rq_fwo_rq_sge = cpu_to_le16( - ((max_rsge & CMDQ_CREATE_QP_RQ_SGE_MASK) - << CMDQ_CREATE_QP_RQ_SGE_SFT) | 0); /* ORRQ and IRRQ */ if (psn_sz) { xrrq = &qp->orrq; @@ -1078,7 +1074,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.type = HWQ_TYPE_CTX; rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr); if (rc) - goto fail_buf_free; + goto rq_swq; pbl = &xrrq->pbl[PBL_LVL_0]; req.orrq_addr = cpu_to_le64(pbl->pg_map_arr[0]); @@ -1122,21 +1118,18 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; return 0; - fail: - if (qp->irrq.max_elements) - bnxt_qplib_free_hwq(res, &qp->irrq); + bnxt_qplib_free_hwq(res, &qp->irrq); fail_orrq: - if (qp->orrq.max_elements) - bnxt_qplib_free_hwq(res, &qp->orrq); -fail_buf_free: - bnxt_qplib_free_qp_hdr_buf(res, qp); + bnxt_qplib_free_hwq(res, &qp->orrq); +rq_swq: + kfree(rq->swq); fail_rq: bnxt_qplib_free_hwq(res, &rq->hwq); - kfree(rq->swq); +sq_swq: + kfree(sq->swq); fail_sq: bnxt_qplib_free_hwq(res, &sq->hwq); - kfree(sq->swq); exit: return rc; } @@ -1512,7 +1505,7 @@ void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp, memset(sge, 0, sizeof(*sge)); if (qp->sq_hdr_buf) { - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); + sw_prod = sq->swq_start; sge->addr = (dma_addr_t)(qp->sq_hdr_buf_map + sw_prod * qp->sq_hdr_buf_size); sge->lkey = 0xFFFFFFFF; @@ -1526,7 +1519,7 @@ u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *rq = &qp->rq; - return HWQ_CMP(rq->hwq.prod, &rq->hwq); + return rq->swq_start; } dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp, u32 index) @@ -1543,7 +1536,7 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, memset(sge, 0, sizeof(*sge)); if (qp->rq_hdr_buf) { - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); + sw_prod = rq->swq_start; sge->addr = (dma_addr_t)(qp->rq_hdr_buf_map + sw_prod * qp->rq_hdr_buf_size); sge->lkey = 0xFFFFFFFF; @@ -1620,8 +1613,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, rc = -ENOMEM; goto done; } - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); - swq = &sq->swq[sw_prod]; + sw_prod = sq->hwq.prod; + swq = bnxt_qplib_get_swqe(sq, NULL); swq->wr_id = wqe->wr_id; swq->type = wqe->type; swq->flags = wqe->flags; @@ -1831,7 +1824,8 @@ queue_err: swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; swq->start_psn = sq->psn & BTH_PSN_MASK; } - sq->hwq.prod++; + bnxt_qplib_swq_mod_start(sq, sw_prod); + bnxt_qplib_hwq_incr_prod(&sq->hwq, 1); qp->wqe_cnt++; done: @@ -1863,6 +1857,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, { struct bnxt_qplib_nq_work *nq_work = NULL; struct bnxt_qplib_q *rq = &qp->rq; + struct bnxt_qplib_swq *swq; bool sch_handler = false; struct sq_sge *hw_sge; struct rq_wqe *rqe; @@ -1881,8 +1876,9 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, rc = -EINVAL; goto done; } - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); - rq->swq[sw_prod].wr_id = wqe->wr_id; + sw_prod = rq->hwq.prod; + swq = bnxt_qplib_get_swqe(rq, NULL); + swq->wr_id = wqe->wr_id; rqe = bnxt_qplib_get_qe(&rq->hwq, sw_prod, NULL); memset(rqe, 0, rq->wqe_size); @@ -1911,10 +1907,12 @@ queue_err: if (sch_handler) { /* Store the ULP info in the software structures */ sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); - rq->swq[sw_prod].wr_id = wqe->wr_id; + swq = bnxt_qplib_get_swqe(rq, NULL); + swq->wr_id = wqe->wr_id; } - rq->hwq.prod++; + bnxt_qplib_swq_mod_start(rq, sw_prod); + bnxt_qplib_hwq_incr_prod(&rq->hwq, 1); if (sch_handler) { nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); if (nq_work) { @@ -2026,20 +2024,19 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, struct bnxt_qplib_cqe **pcqe, int *budget) { - u32 sw_prod, sw_cons; struct bnxt_qplib_cqe *cqe; + u32 start, last; int rc = 0; /* Now complete all outstanding SQEs with FLUSHED_ERR */ - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); + start = sq->swq_start; cqe = *pcqe; while (*budget) { - sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); - if (sw_cons == sw_prod) { + last = sq->swq_last; + if (start == last) break; - } /* Skip the FENCE WQE completions */ - if (sq->swq[sw_cons].wr_id == BNXT_QPLIB_FENCE_WRID) { + if (sq->swq[last].wr_id == BNXT_QPLIB_FENCE_WRID) { bnxt_qplib_cancel_phantom_processing(qp); goto skip_compl; } @@ -2047,16 +2044,17 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR; cqe->opcode = CQ_BASE_CQE_TYPE_REQ; cqe->qp_handle = (u64)(unsigned long)qp; - cqe->wr_id = sq->swq[sw_cons].wr_id; + cqe->wr_id = sq->swq[last].wr_id; cqe->src_qp = qp->id; - cqe->type = sq->swq[sw_cons].type; + cqe->type = sq->swq[last].type; cqe++; (*budget)--; skip_compl: - sq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[last].slots); + sq->swq_last = sq->swq[last].next_idx; } *pcqe = cqe; - if (!(*budget) && HWQ_CMP(sq->hwq.cons, &sq->hwq) != sw_prod) + if (!(*budget) && sq->swq_last != start) /* Out of budget */ rc = -EAGAIN; @@ -2067,9 +2065,9 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, struct bnxt_qplib_cqe **pcqe, int *budget) { struct bnxt_qplib_cqe *cqe; - u32 sw_prod, sw_cons; - int rc = 0; + u32 start, last; int opcode = 0; + int rc = 0; switch (qp->type) { case CMDQ_CREATE_QP1_TYPE_GSI: @@ -2085,24 +2083,25 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, } /* Flush the rest of the RQ */ - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); + start = rq->swq_start; cqe = *pcqe; while (*budget) { - sw_cons = HWQ_CMP(rq->hwq.cons, &rq->hwq); - if (sw_cons == sw_prod) + last = rq->swq_last; + if (last == start) break; memset(cqe, 0, sizeof(*cqe)); cqe->status = CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR; cqe->opcode = opcode; cqe->qp_handle = (unsigned long)qp; - cqe->wr_id = rq->swq[sw_cons].wr_id; + cqe->wr_id = rq->swq[last].wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&rq->hwq, rq->swq[last].slots); + rq->swq_last = rq->swq[last].next_idx; } *pcqe = cqe; - if (!*budget && HWQ_CMP(rq->hwq.cons, &rq->hwq) != sw_prod) + if (!*budget && rq->swq_last != start) /* Out of budget */ rc = -EAGAIN; @@ -2125,7 +2124,7 @@ void bnxt_qplib_mark_qp_error(void *qp_handle) * CQE is track from sw_cq_cons to max_element but valid only if VALID=1 */ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, - u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons) + u32 cq_cons, u32 swq_last, u32 cqe_sq_cons) { u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx; struct bnxt_qplib_q *sq = &qp->sq; @@ -2138,7 +2137,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, /* Normal mode */ /* Check for the psn_search marking before completing */ - swq = &sq->swq[sw_sq_cons]; + swq = &sq->swq[swq_last]; if (swq->psn_search && le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) { /* Unmark */ @@ -2147,7 +2146,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, & ~0x80000000); dev_dbg(&cq->hwq.pdev->dev, "FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n", - cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); + cq_cons, qp->id, swq_last, cqe_sq_cons); sq->condition = true; sq->send_phantom = true; @@ -2184,9 +2183,10 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, le64_to_cpu (peek_req_hwcqe->qp_handle)); peek_sq = &peek_qp->sq; - peek_sq_cons_idx = HWQ_CMP(le16_to_cpu( - peek_req_hwcqe->sq_cons_idx) - 1 - , &sq->hwq); + peek_sq_cons_idx = + ((le16_to_cpu( + peek_req_hwcqe->sq_cons_idx) + - 1) % sq->max_wqe); /* If the hwcqe's sq's wr_id matches */ if (peek_sq == sq && sq->swq[peek_sq_cons_idx].wr_id == @@ -2214,7 +2214,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, } dev_err(&cq->hwq.pdev->dev, "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n", - cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); + cq_cons, qp->id, swq_last, cqe_sq_cons); rc = -EINVAL; } out: @@ -2226,11 +2226,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe **pcqe, int *budget, u32 cq_cons, struct bnxt_qplib_qp **lib_qp) { - u32 sw_sq_cons, cqe_sq_cons; struct bnxt_qplib_swq *swq; struct bnxt_qplib_cqe *cqe; struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *sq; + u32 cqe_sq_cons; int rc = 0; qp = (struct bnxt_qplib_qp *)((unsigned long) @@ -2242,14 +2242,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, } sq = &qp->sq; - cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq); - if (cqe_sq_cons > sq->hwq.max_elements) { - dev_err(&cq->hwq.pdev->dev, - "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n", - cqe_sq_cons, sq->hwq.max_elements); - return -EINVAL; - } - + cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_wqe; if (qp->sq.flushed) { dev_dbg(&cq->hwq.pdev->dev, "%s: QP in Flush QP = %p\n", __func__, qp); @@ -2261,12 +2254,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, */ cqe = *pcqe; while (*budget) { - sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); - if (sw_sq_cons == cqe_sq_cons) + if (sq->swq_last == cqe_sq_cons) /* Done */ break; - swq = &sq->swq[sw_sq_cons]; + swq = &sq->swq[sq->swq_last]; memset(cqe, 0, sizeof(*cqe)); cqe->opcode = CQ_BASE_CQE_TYPE_REQ; cqe->qp_handle = (u64)(unsigned long)qp; @@ -2280,12 +2272,12 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, * of the request being signaled or not, it must complete with * the hwcqe error status */ - if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons && + if (swq->next_idx == cqe_sq_cons && hwcqe->status != CQ_REQ_STATUS_OK) { cqe->status = hwcqe->status; dev_err(&cq->hwq.pdev->dev, "FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n", - sw_sq_cons, cqe->wr_id, cqe->status); + sq->swq_last, cqe->wr_id, cqe->status); cqe++; (*budget)--; bnxt_qplib_mark_qp_error(qp); @@ -2293,7 +2285,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, bnxt_qplib_add_flush_qp(qp); } else { /* Before we complete, do WA 9060 */ - if (do_wa9060(qp, cq, cq_cons, sw_sq_cons, + if (do_wa9060(qp, cq, cq_cons, sq->swq_last, cqe_sq_cons)) { *lib_qp = qp; goto out; @@ -2305,13 +2297,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, } } skip: - sq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&sq->hwq, swq->slots); + sq->swq_last = swq->next_idx; if (sq->single) break; } out: *pcqe = cqe; - if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) { + if (sq->swq_last != cqe_sq_cons) { /* Out of budget */ rc = -EAGAIN; goto done; @@ -2386,17 +2379,23 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, (*budget)--; *pcqe = cqe; } else { + struct bnxt_qplib_swq *swq; + rq = &qp->rq; - if (wr_id_idx >= rq->hwq.max_elements) { + if (wr_id_idx > (rq->max_wqe - 1)) { dev_err(&cq->hwq.pdev->dev, "FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n", - wr_id_idx, rq->hwq.max_elements); + wr_id_idx, rq->max_wqe); return -EINVAL; } - cqe->wr_id = rq->swq[wr_id_idx].wr_id; + if (wr_id_idx != rq->swq_last) + return -EINVAL; + swq = &rq->swq[rq->swq_last]; + cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + rq->swq_last = swq->next_idx; *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { @@ -2467,18 +2466,24 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, (*budget)--; *pcqe = cqe; } else { + struct bnxt_qplib_swq *swq; + rq = &qp->rq; - if (wr_id_idx >= rq->hwq.max_elements) { + if (wr_id_idx > (rq->max_wqe - 1)) { dev_err(&cq->hwq.pdev->dev, "FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n", - wr_id_idx, rq->hwq.max_elements); + wr_id_idx, rq->max_wqe); return -EINVAL; } - cqe->wr_id = rq->swq[wr_id_idx].wr_id; + if (rq->swq_last != wr_id_idx) + return -EINVAL; + swq = &rq->swq[rq->swq_last]; + cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + rq->swq_last = swq->next_idx; *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { @@ -2569,17 +2574,23 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, (*budget)--; *pcqe = cqe; } else { + struct bnxt_qplib_swq *swq; + rq = &qp->rq; - if (wr_id_idx >= rq->hwq.max_elements) { + if (wr_id_idx > (rq->max_wqe - 1)) { dev_err(&cq->hwq.pdev->dev, "FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n", - wr_id_idx, rq->hwq.max_elements); + wr_id_idx, rq->max_wqe); return -EINVAL; } - cqe->wr_id = rq->swq[wr_id_idx].wr_id; + if (rq->swq_last != wr_id_idx) + return -EINVAL; + swq = &rq->swq[rq->swq_last]; + cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + rq->swq_last = swq->next_idx; *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { @@ -2601,7 +2612,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *sq, *rq; struct bnxt_qplib_cqe *cqe; - u32 sw_cons = 0, cqe_cons; + u32 swq_last = 0, cqe_cons; int rc = 0; /* Check the Status */ @@ -2627,13 +2638,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx); if (cqe_cons == 0xFFFF) goto do_rq; - - if (cqe_cons > sq->hwq.max_elements) { - dev_err(&cq->hwq.pdev->dev, - "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n", - cqe_cons, sq->hwq.max_elements); - goto do_rq; - } + cqe_cons %= sq->max_wqe; if (qp->sq.flushed) { dev_dbg(&cq->hwq.pdev->dev, @@ -2647,24 +2652,25 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, */ cqe = *pcqe; while (*budget) { - sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); - if (sw_cons == cqe_cons) + swq_last = sq->swq_last; + if (swq_last == cqe_cons) break; - if (sq->swq[sw_cons].flags & SQ_SEND_FLAGS_SIGNAL_COMP) { + if (sq->swq[swq_last].flags & SQ_SEND_FLAGS_SIGNAL_COMP) { memset(cqe, 0, sizeof(*cqe)); cqe->status = CQ_REQ_STATUS_OK; cqe->opcode = CQ_BASE_CQE_TYPE_REQ; cqe->qp_handle = (u64)(unsigned long)qp; cqe->src_qp = qp->id; - cqe->wr_id = sq->swq[sw_cons].wr_id; - cqe->type = sq->swq[sw_cons].type; + cqe->wr_id = sq->swq[swq_last].wr_id; + cqe->type = sq->swq[swq_last].type; cqe++; (*budget)--; } - sq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[swq_last].slots); + sq->swq_last = sq->swq[swq_last].next_idx; } *pcqe = cqe; - if (!(*budget) && sw_cons != cqe_cons) { + if (!(*budget) && swq_last != cqe_cons) { /* Out of budget */ rc = -EAGAIN; goto sq_done; @@ -2676,10 +2682,10 @@ do_rq: cqe_cons = le16_to_cpu(hwcqe->rq_cons_idx); if (cqe_cons == 0xFFFF) { goto done; - } else if (cqe_cons > rq->hwq.max_elements) { + } else if (cqe_cons > rq->max_wqe - 1) { dev_err(&cq->hwq.pdev->dev, "FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n", - cqe_cons, rq->hwq.max_elements); + cqe_cons, rq->max_wqe); goto done; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 6146f7dc368c..50e4f79d87d3 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -74,6 +74,7 @@ struct bnxt_qplib_swq { u8 flags; u32 start_psn; u32 next_psn; + u8 slots; struct sq_psn_search *psn_search; struct sq_psn_search_ext *psn_ext; }; @@ -213,6 +214,8 @@ struct bnxt_qplib_q { u32 phantom_cqe_cnt; u32 next_cq_cons; bool flushed; + u32 swq_start; + u32 swq_last; }; struct bnxt_qplib_qp { @@ -490,4 +493,20 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num_cqes); void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp); + +static inline void *bnxt_qplib_get_swqe(struct bnxt_qplib_q *que, u32 *swq_idx) +{ + u32 idx; + + idx = que->swq_start; + if (swq_idx) + *swq_idx = idx; + return &que->swq[idx]; +} + +static inline void bnxt_qplib_swq_mod_start(struct bnxt_qplib_q *que, u32 idx) +{ + que->swq_start = que->swq[idx].next_idx; +} + #endif /* __BNXT_QPLIB_FP_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 03a0f29e7432..98df68a475af 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -363,6 +363,17 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx, bool virt_fn, bool is_p5); +static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt) +{ + hwq->prod = (hwq->prod + cnt) % hwq->depth; +} + +static inline void bnxt_qplib_hwq_incr_cons(struct bnxt_qplib_hwq *hwq, + u32 cnt) +{ + hwq->cons = (hwq->cons + cnt) % hwq->depth; +} + static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info, bool arm) { -- cgit From 5ac5396a6c6a8a701de305f209c57038b167a727 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Wed, 15 Jul 2020 10:16:56 -0400 Subject: RDMA/bnxt_re: Pull psn buffer dynamically based on prod Changing the PSN management memory buffers from statically initialized to dynamic pull scheme. During create qp only the start pointers are initialized and during post-send the psn buffer is pulled based on current producer index. Adjusting post_send code to accommodate dynamic psn-pull and changing post_recv code to match post-send code wrt pseudo flush wqe generation. Link: https://lore.kernel.org/r/1594822619-4098-4-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 129 ++++++++++++++++-------------- drivers/infiniband/hw/bnxt_re/qplib_res.h | 3 + 2 files changed, 74 insertions(+), 58 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index c9e7be3e5152..e1896d32987f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -929,26 +929,18 @@ static void bnxt_qplib_init_psn_ptr(struct bnxt_qplib_qp *qp, int size) { struct bnxt_qplib_hwq *hwq; struct bnxt_qplib_q *sq; - u64 fpsne, psne, psn_pg; - u16 indx_pad = 0, indx; - u16 pg_num, pg_indx; - u64 *page; + u64 fpsne, psn_pg; + u16 indx_pad = 0; sq = &qp->sq; hwq = &sq->hwq; - - fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->max_elements, &psn_pg); + fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->depth, &psn_pg); if (!IS_ALIGNED(fpsne, PAGE_SIZE)) indx_pad = ALIGN(fpsne, PAGE_SIZE) / size; - page = (u64 *)psn_pg; - for (indx = 0; indx < hwq->max_elements; indx++) { - pg_num = (indx + indx_pad) / (PAGE_SIZE / size); - pg_indx = (indx + indx_pad) % (PAGE_SIZE / size); - psne = page[pg_num] + pg_indx * size; - sq->swq[indx].psn_ext = (struct sq_psn_search_ext *)psne; - sq->swq[indx].psn_search = (struct sq_psn_search *)psne; - } + hwq->pad_pgofft = indx_pad; + hwq->pad_pg = (u64 *)psn_pg; + hwq->pad_stride = size; } int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) @@ -1555,6 +1547,8 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, u32 flg_npsn; u32 op_spsn; + if (!swq->psn_search) + return; psns = swq->psn_search; psns_ext = swq->psn_ext; @@ -1574,6 +1568,23 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, } } +static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq, + struct bnxt_qplib_swq *swq, u32 tail) +{ + struct bnxt_qplib_hwq *hwq; + u32 pg_num, pg_indx; + void *buff; + + hwq = &sq->hwq; + if (!hwq->pad_pg) + return; + pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride); + pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride); + buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride); + swq->psn_ext = buff; + swq->psn_search = buff; +} + void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *sq = &qp->sq; @@ -1588,6 +1599,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, int i, rc = 0, data_len = 0, pkt_num = 0; struct bnxt_qplib_q *sq = &qp->sq; struct sq_send *hw_sq_send_hdr; + struct bnxt_qplib_hwq *hwq; struct bnxt_qplib_swq *swq; bool sch_handler = false; struct sq_sge *hw_sge; @@ -1595,40 +1607,48 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, __le32 temp32; u32 sw_prod; - if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) { - if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { - sch_handler = true; - dev_dbg(&sq->hwq.pdev->dev, - "%s Error QP. Scheduling for poll_cq\n", - __func__); - goto queue_err; - } + hwq = &sq->hwq; + if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS && + qp->state != CMDQ_MODIFY_QP_NEW_STATE_ERR) { + dev_err(&hwq->pdev->dev, + "QPLIB: FP: QP (0x%x) is in the 0x%x state", + qp->id, qp->state); + rc = -EINVAL; + goto done; } if (bnxt_qplib_queue_full(sq)) { - dev_err(&sq->hwq.pdev->dev, + dev_err(&hwq->pdev->dev, "prod = %#x cons = %#x qdepth = %#x delta = %#x\n", - sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements, + hwq->prod, hwq->cons, hwq->max_elements, sq->q_full_delta); rc = -ENOMEM; goto done; } + sw_prod = sq->hwq.prod; swq = bnxt_qplib_get_swqe(sq, NULL); + bnxt_qplib_pull_psn_buff(sq, swq, sw_prod); swq->wr_id = wqe->wr_id; swq->type = wqe->type; swq->flags = wqe->flags; + swq->start_psn = sq->psn & BTH_PSN_MASK; if (qp->sig_type) swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; - swq->start_psn = sq->psn & BTH_PSN_MASK; - hw_sq_send_hdr = bnxt_qplib_get_qe(&sq->hwq, sw_prod, NULL); + hw_sq_send_hdr = bnxt_qplib_get_qe(hwq, sw_prod, NULL); memset(hw_sq_send_hdr, 0, sq->wqe_size); + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + sch_handler = true; + dev_dbg(&hwq->pdev->dev, + "%s Error QP. Scheduling for poll_cq\n", __func__); + goto queue_err; + } if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { /* Copy the inline data */ if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { - dev_warn(&sq->hwq.pdev->dev, + dev_warn(&hwq->pdev->dev, "Inline data length > 96 detected\n"); data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; } else { @@ -1810,24 +1830,11 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, goto done; } swq->next_psn = sq->psn & BTH_PSN_MASK; - if (qp->type == CMDQ_CREATE_QP_TYPE_RC) - bnxt_qplib_fill_psn_search(qp, wqe, swq); + bnxt_qplib_fill_psn_search(qp, wqe, swq); queue_err: - if (sch_handler) { - /* Store the ULP info in the software structures */ - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); - swq = &sq->swq[sw_prod]; - swq->wr_id = wqe->wr_id; - swq->type = wqe->type; - swq->flags = wqe->flags; - if (qp->sig_type) - swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; - swq->start_psn = sq->psn & BTH_PSN_MASK; - } bnxt_qplib_swq_mod_start(sq, sw_prod); bnxt_qplib_hwq_incr_prod(&sq->hwq, 1); qp->wqe_cnt++; - done: if (sch_handler) { nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); @@ -1837,7 +1844,7 @@ done: INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); queue_work(qp->scq->nq->cqn_wq, &nq_work->work); } else { - dev_err(&sq->hwq.pdev->dev, + dev_err(&hwq->pdev->dev, "FP: Failed to allocate SQ nq_work!\n"); rc = -ENOMEM; } @@ -1858,29 +1865,41 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, struct bnxt_qplib_nq_work *nq_work = NULL; struct bnxt_qplib_q *rq = &qp->rq; struct bnxt_qplib_swq *swq; + struct bnxt_qplib_hwq *hwq; bool sch_handler = false; struct sq_sge *hw_sge; struct rq_wqe *rqe; int i, rc = 0; u32 sw_prod; - if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { - sch_handler = true; - dev_dbg(&rq->hwq.pdev->dev, - "%s: Error QP. Scheduling for poll_cq\n", __func__); - goto queue_err; + hwq = &rq->hwq; + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { + dev_err(&hwq->pdev->dev, + "QPLIB: FP: QP (0x%x) is in the 0x%x state", + qp->id, qp->state); + rc = -EINVAL; + goto done; } + if (bnxt_qplib_queue_full(rq)) { - dev_err(&rq->hwq.pdev->dev, + dev_err(&hwq->pdev->dev, "FP: QP (0x%x) RQ is full!\n", qp->id); rc = -EINVAL; goto done; } + sw_prod = rq->hwq.prod; swq = bnxt_qplib_get_swqe(rq, NULL); swq->wr_id = wqe->wr_id; - rqe = bnxt_qplib_get_qe(&rq->hwq, sw_prod, NULL); + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + sch_handler = true; + dev_dbg(&hwq->pdev->dev, + "%s: Error QP. Scheduling for poll_cq\n", __func__); + goto queue_err; + } + + rqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL); memset(rqe, 0, rq->wqe_size); /* Calculate wqe_size16 and data_len */ @@ -1904,15 +1923,9 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, rqe->wr_id[0] = cpu_to_le32(sw_prod); queue_err: - if (sch_handler) { - /* Store the ULP info in the software structures */ - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); - swq = bnxt_qplib_get_swqe(rq, NULL); - swq->wr_id = wqe->wr_id; - } - bnxt_qplib_swq_mod_start(rq, sw_prod); bnxt_qplib_hwq_incr_prod(&rq->hwq, 1); +done: if (sch_handler) { nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); if (nq_work) { @@ -1921,12 +1934,12 @@ queue_err: INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); queue_work(qp->rcq->nq->cqn_wq, &nq_work->work); } else { - dev_err(&rq->hwq.pdev->dev, + dev_err(&hwq->pdev->dev, "FP: Failed to allocate RQ nq_work!\n"); rc = -ENOMEM; } } -done: + return rc; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 98df68a475af..b29c2ad6a47c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -163,6 +163,9 @@ struct bnxt_qplib_hwq { u32 cons; /* raw */ u8 cp_bit; u8 is_user; + u64 *pad_pg; + u32 pad_stride; + u32 pad_pgofft; }; struct bnxt_qplib_db_info { -- cgit From 54ace98443ab9b8611b288359e20168e3e1571c1 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Wed, 15 Jul 2020 10:16:57 -0400 Subject: RDMA/bnxt_re: Add helper data structures Adding few helper data structure which are useful to initialize hardware send wqe in variable wqe mode. Adding a qp flag in HSI to indicate variable wqe is enabled for this qp. Link: https://lore.kernel.org/r/1594822619-4098-5-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 45 ++++++++++++++++++++++++++++++++ drivers/infiniband/hw/bnxt_re/roce_hsi.h | 1 + 2 files changed, 46 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 50e4f79d87d3..bf96a74098d3 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -39,6 +39,51 @@ #ifndef __BNXT_QPLIB_FP_H__ #define __BNXT_QPLIB_FP_H__ +/* Few helper structures temporarily defined here + * should get rid of these when roce_hsi.h is updated + * in original code base + */ +struct sq_ud_ext_hdr { + __le32 dst_qp; + __le32 avid; + __le64 rsvd; +}; + +struct sq_raw_ext_hdr { + __le32 cfa_meta; + __le32 rsvd0; + __le64 rsvd1; +}; + +struct sq_rdma_ext_hdr { + __le64 remote_va; + __le32 remote_key; + __le32 rsvd; +}; + +struct sq_atomic_ext_hdr { + __le64 swap_data; + __le64 cmp_data; +}; + +struct sq_fr_pmr_ext_hdr { + __le64 pblptr; + __le64 va; +}; + +struct sq_bind_ext_hdr { + __le64 va; + __le32 length_lo; + __le32 length_hi; +}; + +struct rq_ext_hdr { + __le64 rsvd1; + __le64 rsvd2; +}; + +/* Helper structures end */ + struct bnxt_qplib_srq { struct bnxt_qplib_pd *pd; struct bnxt_qplib_dpi *dpi; diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 6f00f07420b7..3e40e0d76efd 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -1126,6 +1126,7 @@ struct cmdq_create_qp { #define CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION 0x2UL #define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL #define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL + #define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL u8 type; #define CMDQ_CREATE_QP_TYPE_RC 0x2UL #define CMDQ_CREATE_QP_TYPE_UD 0x4UL -- cgit From 2bb3c32c5c5fe98b7407a0befc32f16c959a0a92 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Wed, 15 Jul 2020 10:16:58 -0400 Subject: RDMA/bnxt_re: Change wr posting logic to accommodate variable wqes Modifying the post-send and post-recv to initialize the wqes slot by slot dynamically depending on the number of max sges requested by consumer at the time of QP creation. Changed the QP creation logic to determine the size of SQ and RQ in 16B slots based on the number of wqe and number of SGEs requested by consumer Link: https://lore.kernel.org/r/1594822619-4098-6-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 167 +++++++++++---- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 8 +- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 324 +++++++++++++++++++----------- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 60 +++++- drivers/infiniband/hw/bnxt_re/qplib_res.h | 12 +- 5 files changed, 398 insertions(+), 173 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index e97bee3ff5de..3f18efc0c297 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -842,16 +842,79 @@ static u8 __from_ib_qp_type(enum ib_qp_type type) } } +static u16 bnxt_re_setup_rwqe_size(struct bnxt_qplib_qp *qplqp, + int rsge, int max) +{ + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + rsge = max; + return bnxt_re_get_rwqe_size(rsge); +} + +static u16 bnxt_re_get_wqe_size(int ilsize, int nsge) +{ + u16 wqe_size, calc_ils; + + wqe_size = bnxt_re_get_swqe_size(nsge); + if (ilsize) { + calc_ils = sizeof(struct sq_send_hdr) + ilsize; + wqe_size = max_t(u16, calc_ils, wqe_size); + wqe_size = ALIGN(wqe_size, sizeof(struct sq_send_hdr)); + } + return wqe_size; +} + +static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp, + struct ib_qp_init_attr *init_attr) +{ + struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_qplib_qp *qplqp; + struct bnxt_re_dev *rdev; + struct bnxt_qplib_q *sq; + int align, ilsize; + + rdev = qp->rdev; + qplqp = &qp->qplib_qp; + sq = &qplqp->sq; + dev_attr = &rdev->dev_attr; + + align = sizeof(struct sq_send_hdr); + ilsize = ALIGN(init_attr->cap.max_inline_data, align); + + sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge); + if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges)) + return -EINVAL; + /* For gen p4 and gen p5 backward compatibility mode + * wqe size is fixed to 128 bytes + */ + if (sq->wqe_size < bnxt_re_get_swqe_size(dev_attr->max_qp_sges) && + qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + sq->wqe_size = bnxt_re_get_swqe_size(dev_attr->max_qp_sges); + + if (init_attr->cap.max_inline_data) { + qplqp->max_inline_data = sq->wqe_size - + sizeof(struct sq_send_hdr); + init_attr->cap.max_inline_data = qplqp->max_inline_data; + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + sq->max_sge = qplqp->max_inline_data / + sizeof(struct sq_sge); + } + + return 0; +} + static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, struct bnxt_re_qp *qp, struct ib_udata *udata) { + struct bnxt_qplib_qp *qplib_qp; + struct bnxt_re_ucontext *cntx; struct bnxt_re_qp_req ureq; - struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp; - struct ib_umem *umem; int bytes = 0, psn_sz; - struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context( - udata, struct bnxt_re_ucontext, ib_uctx); + struct ib_umem *umem; + int psn_nume; + qplib_qp = &qp->qplib_qp; + cntx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, + ib_uctx); if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) return -EFAULT; @@ -859,10 +922,15 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, /* Consider mapping PSN search memory only for RC QPs. */ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? - sizeof(struct sq_psn_search_ext) : - sizeof(struct sq_psn_search); - bytes += (qplib_qp->sq.max_wqe * psn_sz); + sizeof(struct sq_psn_search_ext) : + sizeof(struct sq_psn_search); + psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + qplib_qp->sq.max_wqe : + ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) / + sizeof(struct bnxt_qplib_sge)); + bytes += (psn_nume * psn_sz); } + bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(&rdev->ibdev, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE); @@ -975,7 +1043,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.sig_type = true; /* Shadow QP SQ depth should be same as QP1 RQ depth */ - qp->qplib_qp.sq.wqe_size = bnxt_re_get_swqe_size(); + qp->qplib_qp.sq.wqe_size = bnxt_re_get_wqe_size(0, 6); qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.sq.max_sge = 2; /* Q full delta can be 1 since it is internal QP */ @@ -986,7 +1054,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.scq = qp1_qp->scq; qp->qplib_qp.rcq = qp1_qp->rcq; - qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(); + qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(6); qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge; /* Q full delta can be 1 since it is internal QP */ @@ -1041,19 +1109,21 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, qplqp->srq = &srq->qplib_srq; rq->max_wqe = 0; } else { - rq->wqe_size = bnxt_re_get_rwqe_size(); + rq->max_sge = init_attr->cap.max_recv_sge; + if (rq->max_sge > dev_attr->max_qp_sges) + rq->max_sge = dev_attr->max_qp_sges; + init_attr->cap.max_recv_sge = rq->max_sge; + rq->wqe_size = bnxt_re_setup_rwqe_size(qplqp, rq->max_sge, + dev_attr->max_qp_sges); /* Allocate 1 more than what's provided so posting max doesn't * mean empty. */ entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1); rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); - rq->q_full_delta = rq->max_wqe - init_attr->cap.max_recv_wr; - rq->max_sge = init_attr->cap.max_recv_sge; - if (rq->max_sge > dev_attr->max_qp_sges) - rq->max_sge = dev_attr->max_qp_sges; + rq->q_full_delta = 0; + rq->sg_info.pgsize = PAGE_SIZE; + rq->sg_info.pgshft = PAGE_SHIFT; } - rq->sg_info.pgsize = PAGE_SIZE; - rq->sg_info.pgshft = PAGE_SHIFT; return 0; } @@ -1068,41 +1138,48 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - qplqp->rq.max_sge = dev_attr->max_qp_sges; - if (qplqp->rq.max_sge > dev_attr->max_qp_sges) + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { qplqp->rq.max_sge = dev_attr->max_qp_sges; - qplqp->rq.max_sge = 6; + if (qplqp->rq.max_sge > dev_attr->max_qp_sges) + qplqp->rq.max_sge = dev_attr->max_qp_sges; + qplqp->rq.max_sge = 6; + } } -static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; struct bnxt_qplib_q *sq; int entries; + int diff; + int rc; rdev = qp->rdev; qplqp = &qp->qplib_qp; sq = &qplqp->sq; dev_attr = &rdev->dev_attr; - sq->wqe_size = bnxt_re_get_swqe_size(); sq->max_sge = init_attr->cap.max_send_sge; - if (sq->max_sge > dev_attr->max_qp_sges) + if (sq->max_sge > dev_attr->max_qp_sges) { sq->max_sge = dev_attr->max_qp_sges; - /* - * Change the SQ depth if user has requested minimum using - * configfs. Only supported for kernel consumers - */ + init_attr->cap.max_send_sge = sq->max_sge; + } + + rc = bnxt_re_setup_swqe_size(qp, init_attr); + if (rc) + return rc; + entries = init_attr->cap.max_send_wr; /* Allocate 128 + 1 more than what's provided */ - entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1); - sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + - BNXT_QPLIB_RESERVED_QP_WRS + 1); - sq->q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1; + diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ? + 0 : BNXT_QPLIB_RESERVED_QP_WRS; + entries = roundup_pow_of_two(entries + diff + 1); + sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1); + sq->q_full_delta = diff + 1; /* * Reserving one slot for Phantom WQE. Application can * post one extra entry in this case. But allowing this to avoid @@ -1111,6 +1188,8 @@ static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, qplqp->sq.q_full_delta -= 1; qplqp->sq.sg_info.pgsize = PAGE_SIZE; qplqp->sq.sg_info.pgshft = PAGE_SHIFT; + + return 0; } static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, @@ -1125,13 +1204,16 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); - qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); - qplqp->sq.q_full_delta = qplqp->sq.max_wqe - - init_attr->cap.max_send_wr; - qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ - if (qplqp->sq.max_sge > dev_attr->max_qp_sges) - qplqp->sq.max_sge = dev_attr->max_qp_sges; + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); + qplqp->sq.max_wqe = min_t(u32, entries, + dev_attr->max_qp_wqes + 1); + qplqp->sq.q_full_delta = qplqp->sq.max_wqe - + init_attr->cap.max_send_wr; + qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ + if (qplqp->sq.max_sge > dev_attr->max_qp_sges) + qplqp->sq.max_sge = dev_attr->max_qp_sges; + } } static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev, @@ -1227,7 +1309,9 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, bnxt_re_adjust_gsi_rq_attr(qp); /* Setup SQ */ - bnxt_re_init_sq_attr(qp, init_attr, udata); + rc = bnxt_re_init_sq_attr(qp, init_attr, udata); + if (rc) + goto out; if (init_attr->qp_type == IB_QPT_GSI) bnxt_re_adjust_gsi_sq_attr(qp, init_attr); @@ -1575,8 +1659,9 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, entries = dev_attr->max_srq_wqes + 1; srq->qplib_srq.max_wqe = entries; - srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(); srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; + srq->qplib_srq.wqe_size = + bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index b4a06b553b04..1daeb30e06fd 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -136,14 +136,14 @@ struct bnxt_re_ucontext { spinlock_t sh_lock; /* protect shpg */ }; -static inline u16 bnxt_re_get_swqe_size(void) +static inline u16 bnxt_re_get_swqe_size(int nsge) { - return sizeof(struct sq_send); + return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); } -static inline u16 bnxt_re_get_rwqe_size(void) +static inline u16 bnxt_re_get_rwqe_size(int nsge) { - return sizeof(struct rq_wqe); + return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge)); } int bnxt_re_query_device(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index e1896d32987f..117b42349a28 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -660,6 +660,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, srq->dbinfo.hwq = &srq->hwq; srq->dbinfo.xid = srq->id; srq->dbinfo.db = srq->dpi->dbr; + srq->dbinfo.max_slot = 1; srq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem; if (srq->threshold) bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); @@ -797,10 +798,8 @@ static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que) que->swq_start = 0; que->swq_last = que->max_wqe - 1; - for (indx = 0; indx < que->max_wqe; indx++) { - que->swq[indx].slots = 1; + for (indx = 0; indx < que->max_wqe; indx++) que->swq[indx].next_idx = indx + 1; - } que->swq[que->swq_last].next_idx = 0; /* Make it circular */ que->swq_last = 0; out: @@ -831,8 +830,8 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* SQ */ hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; - hwq_attr.depth = sq->max_wqe; - hwq_attr.stride = sq->wqe_size; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(sq); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) @@ -842,7 +841,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (rc) goto fail_sq; - req.sq_size = cpu_to_le32(sq->max_wqe); + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << @@ -858,8 +857,8 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (rq->max_wqe) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; - hwq_attr.stride = rq->wqe_size; - hwq_attr.depth = qp->rq.max_wqe; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(rq); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) @@ -901,10 +900,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) sq->dbinfo.hwq = &sq->hwq; sq->dbinfo.xid = qp->id; sq->dbinfo.db = qp->dpi->dbr; + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); if (rq->max_wqe) { rq->dbinfo.hwq = &rq->hwq; rq->dbinfo.xid = qp->id; rq->dbinfo.db = qp->dpi->dbr; + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); } rcfw->qp_tbl[qp->id].qp_id = qp->id; rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; @@ -976,10 +977,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; - hwq_attr.stride = sq->wqe_size; - hwq_attr.depth = sq->max_wqe; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(sq); hwq_attr.aux_stride = psn_sz; - hwq_attr.aux_depth = hwq_attr.depth; + hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) @@ -992,7 +993,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (psn_sz) bnxt_qplib_init_psn_ptr(qp, psn_sz); - req.sq_size = cpu_to_le32(sq->max_wqe); + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << @@ -1008,8 +1009,8 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (!qp->srq) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; - hwq_attr.stride = rq->wqe_size; - hwq_attr.depth = rq->max_wqe; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(rq); hwq_attr.aux_stride = 0; hwq_attr.aux_depth = 0; hwq_attr.type = HWQ_TYPE_QUEUE; @@ -1044,6 +1045,8 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; if (qp->sig_type) qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; + if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED; req.qp_flags = cpu_to_le32(qp_flags); /* ORRQ and IRRQ */ @@ -1101,10 +1104,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) sq->dbinfo.hwq = &sq->hwq; sq->dbinfo.xid = qp->id; sq->dbinfo.db = qp->dpi->dbr; + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); if (rq->max_wqe) { rq->dbinfo.hwq = &rq->hwq; rq->dbinfo.xid = qp->id; rq->dbinfo.db = qp->dpi->dbr; + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); } rcfw->qp_tbl[qp->id].qp_id = qp->id; rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; @@ -1562,22 +1567,115 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { psns_ext->opcode_start_psn = cpu_to_le32(op_spsn); psns_ext->flags_next_psn = cpu_to_le32(flg_npsn); + psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx); } else { psns->opcode_start_psn = cpu_to_le32(op_spsn); psns->flags_next_psn = cpu_to_le32(flg_npsn); } } +static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + u16 *idx) +{ + struct bnxt_qplib_hwq *hwq; + int len, t_len, offt; + bool pull_dst = true; + void *il_dst = NULL; + void *il_src = NULL; + int t_cplen, cplen; + int indx; + + hwq = &qp->sq.hwq; + t_len = 0; + for (indx = 0; indx < wqe->num_sge; indx++) { + len = wqe->sg_list[indx].size; + il_src = (void *)wqe->sg_list[indx].addr; + t_len += len; + if (t_len > qp->max_inline_data) + goto bad; + while (len) { + if (pull_dst) { + pull_dst = false; + il_dst = bnxt_qplib_get_prod_qe(hwq, *idx); + (*idx)++; + t_cplen = 0; + offt = 0; + } + cplen = min_t(int, len, sizeof(struct sq_sge)); + cplen = min_t(int, cplen, + (sizeof(struct sq_sge) - offt)); + memcpy(il_dst, il_src, cplen); + t_cplen += cplen; + il_src += cplen; + il_dst += cplen; + offt += cplen; + len -= cplen; + if (t_cplen == sizeof(struct sq_sge)) + pull_dst = true; + } + } + + return t_len; +bad: + return -ENOMEM; +} + +static u32 bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq, + struct bnxt_qplib_sge *ssge, + u16 nsge, u16 *idx) +{ + struct sq_sge *dsge; + int indx, len = 0; + + for (indx = 0; indx < nsge; indx++, (*idx)++) { + dsge = bnxt_qplib_get_prod_qe(hwq, *idx); + dsge->va_or_pa = cpu_to_le64(ssge[indx].addr); + dsge->l_key = cpu_to_le32(ssge[indx].lkey); + dsge->size = cpu_to_le32(ssge[indx].size); + len += ssge[indx].size; + } + + return len; +} + +static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + u16 *wqe_sz, u16 *qdf, u8 mode) +{ + u32 ilsize, bytes; + u16 nsge; + u16 slot; + + nsge = wqe->num_sge; + /* Adding sq_send_hdr is a misnomer, for rq also hdr size is same. */ + bytes = sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { + ilsize = bnxt_qplib_calc_ilsize(wqe, qp->max_inline_data); + bytes = ALIGN(ilsize, sizeof(struct sq_sge)); + bytes += sizeof(struct sq_send_hdr); + } + + *qdf = __xlate_qfd(qp->sq.q_full_delta, bytes); + slot = bytes >> 4; + *wqe_sz = slot; + if (mode == BNXT_QPLIB_WQE_MODE_STATIC) + slot = 8; + return slot; +} + static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq, - struct bnxt_qplib_swq *swq, u32 tail) + struct bnxt_qplib_swq *swq) { struct bnxt_qplib_hwq *hwq; u32 pg_num, pg_indx; void *buff; + u32 tail; hwq = &sq->hwq; if (!hwq->pad_pg) return; + tail = swq->slot_idx / sq->dbinfo.max_slot; pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride); pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride); buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride); @@ -1598,14 +1696,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, struct bnxt_qplib_nq_work *nq_work = NULL; int i, rc = 0, data_len = 0, pkt_num = 0; struct bnxt_qplib_q *sq = &qp->sq; - struct sq_send *hw_sq_send_hdr; struct bnxt_qplib_hwq *hwq; struct bnxt_qplib_swq *swq; bool sch_handler = false; - struct sq_sge *hw_sge; - u8 wqe_size16; + u16 wqe_sz, qdf = 0; + void *base_hdr; + void *ext_hdr; __le32 temp32; - u32 sw_prod; + u32 wqe_idx; + u32 slots; + u16 idx; hwq = &sq->hwq; if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS && @@ -1617,18 +1717,21 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, goto done; } - if (bnxt_qplib_queue_full(sq)) { + slots = bnxt_qplib_required_slots(qp, wqe, &wqe_sz, &qdf, qp->wqe_mode); + if (bnxt_qplib_queue_full(sq, slots + qdf)) { dev_err(&hwq->pdev->dev, "prod = %#x cons = %#x qdepth = %#x delta = %#x\n", - hwq->prod, hwq->cons, hwq->max_elements, - sq->q_full_delta); + hwq->prod, hwq->cons, hwq->depth, sq->q_full_delta); rc = -ENOMEM; goto done; } - sw_prod = sq->hwq.prod; - swq = bnxt_qplib_get_swqe(sq, NULL); - bnxt_qplib_pull_psn_buff(sq, swq, sw_prod); + swq = bnxt_qplib_get_swqe(sq, &wqe_idx); + bnxt_qplib_pull_psn_buff(sq, swq); + + idx = 0; + swq->slot_idx = hwq->prod; + swq->slots = slots; swq->wr_id = wqe->wr_id; swq->type = wqe->type; swq->flags = wqe->flags; @@ -1636,8 +1739,6 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, if (qp->sig_type) swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; - hw_sq_send_hdr = bnxt_qplib_get_qe(hwq, sw_prod, NULL); - memset(hw_sq_send_hdr, 0, sq->wqe_size); if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { sch_handler = true; dev_dbg(&hwq->pdev->dev, @@ -1645,50 +1746,34 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, goto queue_err; } - if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { - /* Copy the inline data */ - if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { - dev_warn(&hwq->pdev->dev, - "Inline data length > 96 detected\n"); - data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; - } else { - data_len = wqe->inline_len; - } - memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len); - wqe_size16 = (data_len + 15) >> 4; - } else { - for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data; - i < wqe->num_sge; i++, hw_sge++) { - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); - data_len += wqe->sg_list[i].size; - } - /* Each SGE entry = 1 WQE size16 */ - wqe_size16 = wqe->num_sge; - /* HW requires wqe size has room for atleast one SGE even if - * none was supplied by ULP - */ - if (!wqe->num_sge) - wqe_size16++; - } + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + memset(base_hdr, 0, sizeof(struct sq_sge)); + memset(ext_hdr, 0, sizeof(struct sq_sge)); + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) + /* Copy the inline data */ + data_len = bnxt_qplib_put_inline(qp, wqe, &idx); + else + data_len = bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, + &idx); + if (data_len < 0) + goto queue_err; /* Specifics */ switch (wqe->type) { case BNXT_QPLIB_SWQE_TYPE_SEND: if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) { + struct sq_send_raweth_qp1_hdr *sqe = base_hdr; + struct sq_raw_ext_hdr *ext_sqe = ext_hdr; /* Assemble info for Raw Ethertype QPs */ - struct sq_send_raweth_qp1 *sqe = - (struct sq_send_raweth_qp1 *)hw_sq_send_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); + sqe->wqe_size = wqe_sz; sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action); sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags); sqe->length = cpu_to_le32(data_len); - sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & + ext_sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) << SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT); @@ -1698,27 +1783,24 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM: case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV: { - struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr; + struct sq_ud_ext_hdr *ext_sqe = ext_hdr; + struct sq_send_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); - sqe->inv_key_or_imm_data = cpu_to_le32( - wqe->send.inv_key); + sqe->wqe_size = wqe_sz; + sqe->inv_key_or_imm_data = cpu_to_le32(wqe->send.inv_key); if (qp->type == CMDQ_CREATE_QP_TYPE_UD || qp->type == CMDQ_CREATE_QP_TYPE_GSI) { sqe->q_key = cpu_to_le32(wqe->send.q_key); - sqe->dst_qp = cpu_to_le32( - wqe->send.dst_qp & SQ_SEND_DST_QP_MASK); sqe->length = cpu_to_le32(data_len); - sqe->avid = cpu_to_le32(wqe->send.avid & - SQ_SEND_AVID_MASK); sq->psn = (sq->psn + 1) & BTH_PSN_MASK; + ext_sqe->dst_qp = cpu_to_le32(wqe->send.dst_qp & + SQ_SEND_DST_QP_MASK); + ext_sqe->avid = cpu_to_le32(wqe->send.avid & + SQ_SEND_AVID_MASK); } else { sqe->length = cpu_to_le32(data_len); - sqe->dst_qp = 0; - sqe->avid = 0; if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1731,16 +1813,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM: case BNXT_QPLIB_SWQE_TYPE_RDMA_READ: { - struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr; + struct sq_rdma_ext_hdr *ext_sqe = ext_hdr; + struct sq_rdma_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); + sqe->wqe_size = wqe_sz; sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key); sqe->length = cpu_to_le32((u32)data_len); - sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); - sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); + ext_sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); + ext_sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1751,14 +1833,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP: case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD: { - struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr; + struct sq_atomic_ext_hdr *ext_sqe = ext_hdr; + struct sq_atomic_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; sqe->remote_key = cpu_to_le32(wqe->atomic.r_key); sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va); - sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); - sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); + ext_sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); + ext_sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1768,8 +1851,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV: { - struct sq_localinvalidate *sqe = - (struct sq_localinvalidate *)hw_sq_send_hdr; + struct sq_localinvalidate *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1779,7 +1861,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR: { - struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr; + struct sq_fr_pmr_ext_hdr *ext_sqe = ext_hdr; + struct sq_fr_pmr_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1803,14 +1886,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, wqe->frmr.pbl_ptr[i] = cpu_to_le64( wqe->frmr.page_list[i] | PTU_PTE_VALID); - sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); - sqe->va = cpu_to_le64(wqe->frmr.va); + ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); + ext_sqe->va = cpu_to_le64(wqe->frmr.va); break; } case BNXT_QPLIB_SWQE_TYPE_BIND_MW: { - struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr; + struct sq_bind_ext_hdr *ext_sqe = ext_hdr; + struct sq_bind_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1819,9 +1903,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, (wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0); sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key); sqe->l_key = cpu_to_le32(wqe->bind.r_key); - sqe->va = cpu_to_le64(wqe->bind.va); - temp32 = cpu_to_le32(wqe->bind.length); - memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length)); + ext_sqe->va = cpu_to_le64(wqe->bind.va); + ext_sqe->length_lo = cpu_to_le32(wqe->bind.length); break; } default: @@ -1832,8 +1915,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, swq->next_psn = sq->psn & BTH_PSN_MASK; bnxt_qplib_fill_psn_search(qp, wqe, swq); queue_err: - bnxt_qplib_swq_mod_start(sq, sw_prod); - bnxt_qplib_hwq_incr_prod(&sq->hwq, 1); + bnxt_qplib_swq_mod_start(sq, wqe_idx); + bnxt_qplib_hwq_incr_prod(hwq, swq->slots); qp->wqe_cnt++; done: if (sch_handler) { @@ -1864,13 +1947,14 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, { struct bnxt_qplib_nq_work *nq_work = NULL; struct bnxt_qplib_q *rq = &qp->rq; - struct bnxt_qplib_swq *swq; + struct rq_wqe_hdr *base_hdr; + struct rq_ext_hdr *ext_hdr; struct bnxt_qplib_hwq *hwq; + struct bnxt_qplib_swq *swq; bool sch_handler = false; - struct sq_sge *hw_sge; - struct rq_wqe *rqe; - int i, rc = 0; - u32 sw_prod; + u16 wqe_sz, idx; + u32 wqe_idx; + int rc = 0; hwq = &rq->hwq; if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { @@ -1881,16 +1965,16 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, goto done; } - if (bnxt_qplib_queue_full(rq)) { + if (bnxt_qplib_queue_full(rq, rq->dbinfo.max_slot)) { dev_err(&hwq->pdev->dev, "FP: QP (0x%x) RQ is full!\n", qp->id); rc = -EINVAL; goto done; } - sw_prod = rq->hwq.prod; - swq = bnxt_qplib_get_swqe(rq, NULL); + swq = bnxt_qplib_get_swqe(rq, &wqe_idx); swq->wr_id = wqe->wr_id; + swq->slots = rq->dbinfo.max_slot; if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { sch_handler = true; @@ -1899,32 +1983,28 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, goto queue_err; } - rqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL); - memset(rqe, 0, rq->wqe_size); - - /* Calculate wqe_size16 and data_len */ - for (i = 0, hw_sge = (struct sq_sge *)rqe->data; - i < wqe->num_sge; i++, hw_sge++) { - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); - } - rqe->wqe_type = wqe->type; - rqe->flags = wqe->flags; - rqe->wqe_size = wqe->num_sge + - ((offsetof(typeof(*rqe), data) + 15) >> 4); - /* HW requires wqe size has room for atleast one SGE even if none - * was supplied by ULP - */ - if (!wqe->num_sge) - rqe->wqe_size++; - - /* Supply the rqe->wr_id index to the wr_id_tbl for now */ - rqe->wr_id[0] = cpu_to_le32(sw_prod); - + idx = 0; + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + memset(base_hdr, 0, sizeof(struct sq_sge)); + memset(ext_hdr, 0, sizeof(struct sq_sge)); + wqe_sz = (sizeof(struct rq_wqe_hdr) + + wqe->num_sge * sizeof(struct sq_sge)) >> 4; + bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, &idx); + if (!wqe->num_sge) { + struct sq_sge *sge; + + sge = bnxt_qplib_get_prod_qe(hwq, idx++); + sge->size = 0; + wqe_sz++; + } + base_hdr->wqe_type = wqe->type; + base_hdr->flags = wqe->flags; + base_hdr->wqe_size = wqe_sz; + base_hdr->wr_id[0] = cpu_to_le32(wqe_idx); queue_err: - bnxt_qplib_swq_mod_start(rq, sw_prod); - bnxt_qplib_hwq_incr_prod(&rq->hwq, 1); + bnxt_qplib_swq_mod_start(rq, wqe_idx); + bnxt_qplib_hwq_incr_prod(hwq, swq->slots); done: if (sch_handler) { nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index bf96a74098d3..f50784405e27 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -119,6 +119,7 @@ struct bnxt_qplib_swq { u8 flags; u32 start_psn; u32 next_psn; + u32 slot_idx; u8 slots; struct sq_psn_search *psn_search; struct sq_psn_search_ext *psn_ext; @@ -349,11 +350,18 @@ struct bnxt_qplib_qp { (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \ !((raw_cons) & (cp_bit))) -static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q) +static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que, + u8 slots) { - return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta), - &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons, - &qplib_q->hwq); + struct bnxt_qplib_hwq *hwq; + int avail; + + hwq = &que->hwq; + /* False full is possible, retrying post-send makes sense */ + avail = hwq->cons - hwq->prod; + if (hwq->cons <= hwq->prod) + avail += hwq->depth; + return avail <= slots; } struct bnxt_qplib_cqe { @@ -554,4 +562,48 @@ static inline void bnxt_qplib_swq_mod_start(struct bnxt_qplib_q *que, u32 idx) que->swq_start = que->swq[idx].next_idx; } +static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que) +{ + return (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge); +} + +static inline u32 bnxt_qplib_set_sq_size(struct bnxt_qplib_q *que, u8 wqe_mode) +{ + return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + que->max_wqe : bnxt_qplib_get_depth(que); +} + +static inline u32 bnxt_qplib_set_sq_max_slot(u8 wqe_mode) +{ + return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + sizeof(struct sq_send) / sizeof(struct sq_sge) : 1; +} + +static inline u32 bnxt_qplib_set_rq_max_slot(u32 wqe_size) +{ + return (wqe_size / sizeof(struct sq_sge)); +} + +static inline u16 __xlate_qfd(u16 delta, u16 wqe_bytes) +{ + /* For Cu/Wh delta = 128, stride = 16, wqe_bytes = 128 + * For Gen-p5 B/C mode delta = 0, stride = 16, wqe_bytes = 128. + * For Gen-p5 delta = 0, stride = 16, 32 <= wqe_bytes <= 512. + * when 8916 is disabled. + */ + return (delta * wqe_bytes) / sizeof(struct sq_sge); +} + +static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max) +{ + u16 size = 0; + int indx; + + for (indx = 0; indx < wqe->num_sge; indx++) + size += wqe->sg_list[indx].size; + if (size > max) + size = max; + + return size; +} #endif /* __BNXT_QPLIB_FP_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index b29c2ad6a47c..9da470d1e4a3 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -173,6 +173,7 @@ struct bnxt_qplib_db_info { void __iomem *priv_db; struct bnxt_qplib_hwq *hwq; u32 xid; + u32 max_slot; }; /* Tables */ @@ -332,6 +333,14 @@ static inline void *bnxt_qplib_get_qe(struct bnxt_qplib_hwq *hwq, return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx); } +static inline void *bnxt_qplib_get_prod_qe(struct bnxt_qplib_hwq *hwq, u32 idx) +{ + idx += hwq->prod; + if (idx >= hwq->depth) + idx -= hwq->depth; + return bnxt_qplib_get_qe(hwq, idx, NULL); +} + #define to_bnxt_qplib(ptr, type, member) \ container_of(ptr, type, member) @@ -409,8 +418,7 @@ static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info, key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; key <<= 32; - key |= (info->hwq->prod & (info->hwq->max_elements - 1)) & - DBC_DBC_INDEX_MASK; + key |= ((info->hwq->prod / info->max_slot)) & DBC_DBC_INDEX_MASK; writeq(key, info->db); } -- cgit From e25c52c7ccf784df6cd711c82d302b29da4e0730 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Wed, 15 Jul 2020 10:16:59 -0400 Subject: RDMA/bnxt_re: Update maintainers for Broadcom rdma driver Adding a new co-maintainer for Broadcom's RDMA driver. Link: https://lore.kernel.org/r/1594822619-4098-7-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7b5ffd646c6b..96d64055a841 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3582,6 +3582,7 @@ M: Selvin Xavier M: Devesh Sharma M: Somnath Kotur M: Sriharsha Basavapatna +M: Naresh Kumar PBS L: linux-rdma@vger.kernel.org S: Supported W: http://www.broadcom.com -- cgit From 8e7eafb816ab7e5047b74cb8eb1db2f8f14f7d7a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Jul 2020 17:32:20 -0700 Subject: RDMA: rdma_user_ioctl.h: fix a duplicated word + clarify Change the repeated word "it" in a comment to "it to". Also insert a dash in the sentence to add clarity. Link: https://lore.kernel.org/r/20200719003220.21250-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/rdma_user_ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h index d92d2721b28c..53c55188dd2a 100644 --- a/include/uapi/rdma/rdma_user_ioctl.h +++ b/include/uapi/rdma/rdma_user_ioctl.h @@ -43,7 +43,7 @@ /* * General blocks assignments - * It is closed on purpose do not expose it it user space + * It is closed on purpose - do not expose it to user space * #define MAD_CMD_BASE 0x00 * #define HFI1_CMD_BAS 0xE0 */ -- cgit From c94e272b57abb0cade9addcafc93ac1f566305cf Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 16 Jul 2020 13:54:16 +0300 Subject: RDMA/mlx5: Allow SQ modification Currently the SQ is set to a ready state when the RAW QP is modified to INIT. When the TIS is modified, e.g. to change the lag_tx_affinity, then SQs which are already in the ready state will not be affected. Open a window to modify the SQ behavior by setting the SQ as ready only when QP was modified to RTS. Link: https://lore.kernel.org/r/20200716105416.1423826-1-leon@kernel.org Signed-off-by: Maor Gottlieb Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 88236f84c99b..3da8af9e4ee4 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3544,7 +3544,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, switch (raw_qp_param->operation) { case MLX5_CMD_OP_RST2INIT_QP: rq_state = MLX5_RQC_STATE_RDY; - sq_state = MLX5_SQC_STATE_RDY; + sq_state = MLX5_SQC_STATE_RST; break; case MLX5_CMD_OP_2ERR_QP: rq_state = MLX5_RQC_STATE_ERR; @@ -3556,13 +3556,11 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, break; case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: - if (raw_qp_param->set_mask == - MLX5_RAW_QP_RATE_LIMIT) { - modify_rq = 0; - sq_state = sq->state; - } else { - return raw_qp_param->set_mask ? -EINVAL : 0; - } + if (raw_qp_param->set_mask & ~MLX5_RAW_QP_RATE_LIMIT) + return -EINVAL; + + modify_rq = 0; + sq_state = MLX5_SQC_STATE_RDY; break; case MLX5_CMD_OP_INIT2INIT_QP: case MLX5_CMD_OP_INIT2RTR_QP: @@ -4443,7 +4441,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state, [MLX5_SQ_STATE_NA] = IB_QPS_RESET, }, [MLX5_RQC_STATE_RDY] = { - [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD, + [MLX5_SQC_STATE_RST] = MLX5_QP_STATE, [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, [MLX5_SQC_STATE_ERR] = IB_QPS_SQE, [MLX5_SQ_STATE_NA] = MLX5_QP_STATE, @@ -4455,7 +4453,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state, [MLX5_SQ_STATE_NA] = IB_QPS_ERR, }, [MLX5_RQ_STATE_NA] = { - [MLX5_SQC_STATE_RST] = IB_QPS_RESET, + [MLX5_SQC_STATE_RST] = MLX5_QP_STATE, [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE, [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD, -- cgit From 0f63ef1dd581de1655075e2dbdf2ef302514a983 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 19 Jul 2020 08:22:22 +0300 Subject: RDMA/core: Align abort/commit object scheme for write() and ioctl() paths Create the same logic flow for the write() interface as we have for the ioctl() path by making sure that the object is committed or aborted automatically after HW object creation. Link: https://lore.kernel.org/r/20200719052223.75245-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 4 ++++ drivers/infiniband/core/uverbs_std_types_device.c | 7 ++++++- include/rdma/uverbs_ioctl.h | 1 + include/rdma/uverbs_std_types.h | 14 ++++++++++++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 69e4755cc04b..37794d88b1f3 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -601,6 +601,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, memset(bundle.attr_present, 0, sizeof(bundle.attr_present)); bundle.ufile = file; bundle.context = NULL; /* only valid if bundle has uobject */ + bundle.uobject = NULL; if (!method_elm->is_ex) { size_t in_len = hdr.in_words * 4 - sizeof(hdr); size_t out_len = hdr.out_words * 4; @@ -664,6 +665,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, } ret = method_elm->handler(&bundle); + if (bundle.uobject) + uverbs_finalize_object(bundle.uobject, UVERBS_ACCESS_NEW, true, + !ret, &bundle); out_unlock: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return (ret) ? : count; diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 8e58605a17be..75df2094a010 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -38,7 +38,12 @@ static int UVERBS_HANDLER(UVERBS_METHOD_INVOKE_WRITE)( attrs->ucore.outlen < method_elm->resp_size) return -ENOSPC; - return method_elm->handler(attrs); + attrs->uobject = NULL; + rc = method_elm->handler(attrs); + if (attrs->uobject) + uverbs_finalize_object(attrs->uobject, UVERBS_ACCESS_NEW, true, + !rc, attrs); + return rc; } DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE, diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 86de10ea30af..db419c8dbd10 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -652,6 +652,7 @@ struct uverbs_attr_bundle { struct ib_udata ucore; struct ib_uverbs_file *ufile; struct ib_ucontext *context; + struct ib_uobject *uobject; DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN); struct uverbs_attr attrs[]; }; diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index bf0392ae15eb..8451b19103ee 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -110,6 +110,20 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj, rdma_alloc_abort_uobject(uobj, attrs, false); } +static inline void uobj_finalize_uobj_create(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) +{ + /* + * Tell the core code that the write() handler has completed + * initializing the object and that the core should commit or + * abort this object based upon the return code from the write() + * method. Similar to what uverbs_finalize_uobj_create() does for + * ioctl() + */ + WARN_ON(attrs->uobject); + attrs->uobject = uobj; +} + static inline struct ib_uobject * __uobj_alloc(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev) -- cgit From 16e51f78a9db3645d2d2dd7c0a78e7e7776b630e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 19 Jul 2020 08:22:23 +0300 Subject: RDMA/core: Update write interface to use automatic object lifetime The automatic object lifetime model allows us to change the write() interface to have the same logic as the ioctl() path. Update the create/alloc functions to be in the following format, so the code flow will be the same: * Allocate objects * Initialize them * Call to the drivers, this is last step that is allowed to fail * Finalize object * Return response and allow to core code to handle abort/commit respectively. Link: https://lore.kernel.org/r/20200719052223.75245-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 293 ++++++++++------------------------- 1 file changed, 86 insertions(+), 207 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 68c9a0210220..479895db72e2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -415,8 +415,8 @@ static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs) static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_alloc_pd_resp resp = {}; struct ib_uverbs_alloc_pd cmd; - struct ib_uverbs_alloc_pd_resp resp; struct ib_uobject *uobj; struct ib_pd *pd; int ret; @@ -438,29 +438,20 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) pd->device = ib_dev; pd->uobject = uobj; - pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); pd->res.type = RDMA_RESTRACK_PD; ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata); if (ret) goto err_alloc; - - uobj->object = pd; - memset(&resp, 0, sizeof resp); - resp.pd_handle = uobj->id; rdma_restrack_uadd(&pd->res); - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_copy; + uobj->object = pd; + uobj_finalize_uobj_create(uobj, attrs); - rdma_alloc_commit_uobject(uobj, attrs); - return 0; + resp.pd_handle = uobj->id; + return uverbs_response(attrs, &resp, sizeof(resp)); -err_copy: - ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs)); - pd = NULL; err_alloc: kfree(pd); err: @@ -568,8 +559,8 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev, static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_device *ibudev = attrs->ufile->device; + struct ib_uverbs_open_xrcd_resp resp = {}; struct ib_uverbs_open_xrcd cmd; - struct ib_uverbs_open_xrcd_resp resp; struct ib_uxrcd_object *obj; struct ib_xrcd *xrcd = NULL; struct fd f = {NULL, 0}; @@ -624,8 +615,6 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) atomic_set(&obj->refcnt, 0); obj->uobject.object = xrcd; - memset(&resp, 0, sizeof resp); - resp.xrcd_handle = obj->uobject.id; if (inode) { if (new_xrcd) { @@ -637,24 +626,14 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) atomic_inc(&xrcd->usecnt); } - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_copy; - if (f.file) fdput(f); mutex_unlock(&ibudev->xrcd_tree_mutex); + uobj_finalize_uobj_create(&obj->uobject, attrs); - rdma_alloc_commit_uobject(&obj->uobject, attrs); - return 0; - -err_copy: - if (inode) { - if (new_xrcd) - xrcd_table_delete(ibudev, inode); - atomic_dec(&xrcd->usecnt); - } + resp.xrcd_handle = obj->uobject.id; + return uverbs_response(attrs, &resp, sizeof(resp)); err_dealloc_xrcd: ib_dealloc_xrcd_user(xrcd, uverbs_get_cleared_udata(attrs)); @@ -710,8 +689,8 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_reg_mr_resp resp = {}; struct ib_uverbs_reg_mr cmd; - struct ib_uverbs_reg_mr_resp resp; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mr *mr; @@ -768,27 +747,16 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) rdma_restrack_uadd(&mr->res); uobj->object = mr; - - memset(&resp, 0, sizeof resp); - resp.lkey = mr->lkey; - resp.rkey = mr->rkey; - resp.mr_handle = uobj->id; - - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_copy; - uobj_put_obj_read(pd); + uobj_finalize_uobj_create(uobj, attrs); - rdma_alloc_commit_uobject(uobj, attrs); - return 0; - -err_copy: - ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; + resp.mr_handle = uobj->id; + return uverbs_response(attrs, &resp, sizeof(resp)); err_put: uobj_put_obj_read(pd); - err_free: uobj_alloc_abort(uobj, attrs); return ret; @@ -928,21 +896,13 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) atomic_inc(&pd->usecnt); uobj->object = mw; + uobj_put_obj_read(pd); + uobj_finalize_uobj_create(uobj, attrs); - memset(&resp, 0, sizeof(resp)); - resp.rkey = mw->rkey; + resp.rkey = mw->rkey; resp.mw_handle = uobj->id; + return uverbs_response(attrs, &resp, sizeof(resp)); - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_copy; - - uobj_put_obj_read(pd); - rdma_alloc_commit_uobject(uobj, attrs); - return 0; - -err_copy: - uverbs_dealloc_mw(mw); err_put: uobj_put_obj_read(pd); err_free: @@ -979,40 +939,33 @@ static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs) if (IS_ERR(uobj)) return PTR_ERR(uobj); - resp.fd = uobj->id; - ev_file = container_of(uobj, struct ib_uverbs_completion_event_file, uobj); ib_uverbs_init_event_queue(&ev_file->ev_queue); + uobj_finalize_uobj_create(uobj, attrs); - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) { - uobj_alloc_abort(uobj, attrs); - return ret; - } - - rdma_alloc_commit_uobject(uobj, attrs); - return 0; + resp.fd = uobj->id; + return uverbs_response(attrs, &resp, sizeof(resp)); } -static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, - struct ib_uverbs_ex_create_cq *cmd) +static int create_cq(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_ex_create_cq *cmd) { struct ib_ucq_object *obj; struct ib_uverbs_completion_event_file *ev_file = NULL; struct ib_cq *cq; int ret; - struct ib_uverbs_ex_create_cq_resp resp; + struct ib_uverbs_ex_create_cq_resp resp = {}; struct ib_cq_init_attr attr = {}; struct ib_device *ib_dev; if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors) - return ERR_PTR(-EINVAL); + return -EINVAL; obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs, &ib_dev); if (IS_ERR(obj)) - return obj; + return PTR_ERR(obj); if (cmd->comp_channel >= 0) { ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs); @@ -1041,53 +994,38 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; atomic_set(&cq->usecnt, 0); + cq->res.type = RDMA_RESTRACK_CQ; ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); if (ret) goto err_free; + rdma_restrack_uadd(&cq->res); obj->uevent.uobject.object = cq; obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file); if (obj->uevent.event_file) uverbs_uobject_get(&obj->uevent.event_file->uobj); + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); - memset(&resp, 0, sizeof resp); resp.base.cq_handle = obj->uevent.uobject.id; - resp.base.cqe = cq->cqe; + resp.base.cqe = cq->cqe; resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + return uverbs_response(attrs, &resp, sizeof(resp)); - cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_uadd(&cq->res); - - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_cb; - - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); - return obj; - -err_cb: - if (obj->uevent.event_file) - uverbs_uobject_put(&obj->uevent.event_file->uobj); - ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); - cq = NULL; err_free: kfree(cq); err_file: if (ev_file) ib_uverbs_release_ucq(ev_file, obj); - err: uobj_alloc_abort(&obj->uevent.uobject, attrs); - - return ERR_PTR(ret); + return ret; } static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_cq cmd; struct ib_uverbs_ex_create_cq cmd_ex; - struct ib_ucq_object *obj; int ret; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); @@ -1100,14 +1038,12 @@ static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs) cmd_ex.comp_vector = cmd.comp_vector; cmd_ex.comp_channel = cmd.comp_channel; - obj = create_cq(attrs, &cmd_ex); - return PTR_ERR_OR_ZERO(obj); + return create_cq(attrs, &cmd_ex); } static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_ex_create_cq cmd; - struct ib_ucq_object *obj; int ret; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); @@ -1120,8 +1056,7 @@ static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs) if (cmd.reserved) return -EINVAL; - obj = create_cq(attrs, &cmd); - return PTR_ERR_OR_ZERO(obj); + return create_cq(attrs, &cmd); } static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs) @@ -1296,7 +1231,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, struct ib_srq *srq = NULL; struct ib_qp *qp; struct ib_qp_init_attr attr = {}; - struct ib_uverbs_ex_create_qp_resp resp; + struct ib_uverbs_ex_create_qp_resp resp = {}; int ret; struct ib_rwq_ind_table *ind_tbl = NULL; bool has_sq = true; @@ -1466,20 +1401,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (obj->uevent.event_file) uverbs_uobject_get(&obj->uevent.event_file->uobj); - memset(&resp, 0, sizeof resp); - resp.base.qpn = qp->qp_num; - resp.base.qp_handle = obj->uevent.uobject.id; - resp.base.max_recv_sge = attr.cap.max_recv_sge; - resp.base.max_send_sge = attr.cap.max_send_sge; - resp.base.max_recv_wr = attr.cap.max_recv_wr; - resp.base.max_send_wr = attr.cap.max_send_wr; - resp.base.max_inline_data = attr.cap.max_inline_data; - resp.response_length = uverbs_response_length(attrs, sizeof(resp)); - - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_uevent; - if (xrcd) { obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); @@ -1500,12 +1421,18 @@ static int create_qp(struct uverbs_attr_bundle *attrs, UVERBS_LOOKUP_READ); if (ind_tbl) uobj_put_obj_read(ind_tbl); + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); + + resp.base.qpn = qp->qp_num; + resp.base.qp_handle = obj->uevent.uobject.id; + resp.base.max_recv_sge = attr.cap.max_recv_sge; + resp.base.max_send_sge = attr.cap.max_send_sge; + resp.base.max_recv_wr = attr.cap.max_recv_wr; + resp.base.max_send_wr = attr.cap.max_send_wr; + resp.base.max_inline_data = attr.cap.max_inline_data; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + return uverbs_response(attrs, &resp, sizeof(resp)); - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); - return 0; -err_uevent: - if (obj->uevent.event_file) - uverbs_uobject_put(&obj->uevent.event_file->uobj); err_cb: ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); @@ -1578,8 +1505,8 @@ static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs) static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_create_qp_resp resp = {}; struct ib_uverbs_open_qp cmd; - struct ib_uverbs_create_qp_resp resp; struct ib_uqp_object *obj; struct ib_xrcd *xrcd; struct ib_uobject *uninitialized_var(xrcd_uobj); @@ -1625,24 +1552,16 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) obj->uevent.uobject.object = qp; obj->uevent.uobject.user_handle = cmd.user_handle; - memset(&resp, 0, sizeof resp); - resp.qpn = qp->qp_num; - resp.qp_handle = obj->uevent.uobject.id; - - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_destroy; - obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); qp->uobject = obj; uobj_put_read(xrcd_uobj); + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); - return 0; + resp.qpn = qp->qp_num; + resp.qp_handle = obj->uevent.uobject.id; + return uverbs_response(attrs, &resp, sizeof(resp)); -err_destroy: - ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); err_xrcd: uobj_put_read(xrcd_uobj); err_put: @@ -2478,24 +2397,14 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) ah->uobject = uobj; uobj->user_handle = cmd.user_handle; uobj->object = ah; - - resp.ah_handle = uobj->id; - - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_copy; - uobj_put_obj_read(pd); - rdma_alloc_commit_uobject(uobj, attrs); - return 0; + uobj_finalize_uobj_create(uobj, attrs); -err_copy: - rdma_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE, - uverbs_get_cleared_udata(attrs)); + resp.ah_handle = uobj->id; + return uverbs_response(attrs, &resp, sizeof(resp)); err_put: uobj_put_obj_read(pd); - err: uobj_alloc_abort(uobj, attrs); return ret; @@ -2987,26 +2896,18 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) if (obj->uevent.event_file) uverbs_uobject_get(&obj->uevent.event_file->uobj); - memset(&resp, 0, sizeof(resp)); + uobj_put_obj_read(pd); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); + resp.wq_handle = obj->uevent.uobject.id; resp.max_sge = wq_init_attr.max_sge; resp.max_wr = wq_init_attr.max_wr; resp.wqn = wq->wq_num; resp.response_length = uverbs_response_length(attrs, sizeof(resp)); - err = uverbs_response(attrs, &resp, sizeof(resp)); - if (err) - goto err_copy; - - uobj_put_obj_read(pd); - rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, - UVERBS_LOOKUP_READ); - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); - return 0; + return uverbs_response(attrs, &resp, sizeof(resp)); -err_copy: - if (obj->uevent.event_file) - uverbs_uobject_put(&obj->uevent.event_file->uobj); - ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs)); err_put_cq: rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, UVERBS_LOOKUP_READ); @@ -3091,7 +2992,7 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) struct ib_wq **wqs = NULL; u32 *wqs_handles = NULL; struct ib_wq *wq = NULL; - int i, j, num_read_wqs; + int i, num_read_wqs; u32 num_wq_handles; struct uverbs_req_iter iter; struct ib_device *ib_dev; @@ -3137,6 +3038,7 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) } wqs[num_read_wqs] = wq; + atomic_inc(&wqs[num_read_wqs]->usecnt); } uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev); @@ -3164,33 +3066,24 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) atomic_set(&rwq_ind_tbl->usecnt, 0); for (i = 0; i < num_wq_handles; i++) - atomic_inc(&wqs[i]->usecnt); + rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); + kfree(wqs_handles); + uobj_finalize_uobj_create(uobj, attrs); resp.ind_tbl_handle = uobj->id; resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + return uverbs_response(attrs, &resp, sizeof(resp)); - err = uverbs_response(attrs, &resp, sizeof(resp)); - if (err) - goto err_copy; - - kfree(wqs_handles); - - for (j = 0; j < num_read_wqs; j++) - rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, - UVERBS_LOOKUP_READ); - - rdma_alloc_commit_uobject(uobj, attrs); - return 0; - -err_copy: - ib_destroy_rwq_ind_table(rwq_ind_tbl); err_uobj: uobj_alloc_abort(uobj, attrs); put_wqs: - for (j = 0; j < num_read_wqs; j++) - rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, + for (i = 0; i < num_read_wqs; i++) { + rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject, UVERBS_LOOKUP_READ); + atomic_dec(&wqs[i]->usecnt); + } err_free: kfree(wqs_handles); kfree(wqs); @@ -3216,7 +3109,7 @@ static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs) static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_flow cmd; - struct ib_uverbs_create_flow_resp resp; + struct ib_uverbs_create_flow_resp resp = {}; struct ib_uobject *uobj; struct ib_flow *flow_id; struct ib_uverbs_flow_attr *kern_flow_attr; @@ -3349,23 +3242,17 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res); - memset(&resp, 0, sizeof(resp)); - resp.flow_handle = uobj->id; - - err = uverbs_response(attrs, &resp, sizeof(resp)); - if (err) - goto err_copy; - rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, UVERBS_LOOKUP_READ); kfree(flow_attr); + if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); - rdma_alloc_commit_uobject(uobj, attrs); - return 0; -err_copy: - if (!qp->device->ops.destroy_flow(flow_id)) - atomic_dec(&qp->usecnt); + uobj_finalize_uobj_create(uobj, attrs); + + resp.flow_handle = uobj->id; + return uverbs_response(attrs, &resp, sizeof(resp)); + err_free: ib_uverbs_flow_resources_free(uflow_res); err_free_flow_attr: @@ -3400,7 +3287,7 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) { - struct ib_uverbs_create_srq_resp resp; + struct ib_uverbs_create_srq_resp resp = {}; struct ib_usrq_object *obj; struct ib_pd *pd; struct ib_srq *srq; @@ -3471,17 +3358,9 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, if (obj->uevent.event_file) uverbs_uobject_get(&obj->uevent.event_file->uobj); - memset(&resp, 0, sizeof resp); - resp.srq_handle = obj->uevent.uobject.id; - resp.max_wr = attr.attr.max_wr; - resp.max_sge = attr.attr.max_sge; if (cmd->srq_type == IB_SRQT_XRC) resp.srqn = srq->ext.xrc.srq_num; - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_copy; - if (cmd->srq_type == IB_SRQT_XRC) uobj_put_read(xrcd_uobj); @@ -3490,13 +3369,13 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, UVERBS_LOOKUP_READ); uobj_put_obj_read(pd); - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); - return 0; + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); + + resp.srq_handle = obj->uevent.uobject.id; + resp.max_wr = attr.attr.max_wr; + resp.max_sge = attr.attr.max_sge; + return uverbs_response(attrs, &resp, sizeof(resp)); -err_copy: - if (obj->uevent.event_file) - uverbs_uobject_put(&obj->uevent.event_file->uobj); - ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs)); err_put_pd: uobj_put_obj_read(pd); err_put_cq: -- cgit From 042dd05bddbd84e6a52b337a65d1994003c9b9bb Mon Sep 17 00:00:00 2001 From: Meir Lichtinger Date: Thu, 4 Jun 2020 08:49:38 +0300 Subject: RDMA/mlx5: ConnectX-7 new capabilities to set relaxed ordering by UMR Up to ConnectX-7 setting mkey relaxed ordering read/write attributes by UMR is not supported. ConnectX-7 supports this option, which is indicated by two new HCA capabilities - relaxed_ordering_write_umr and relaxed_ordering_read_umr. Signed-off-by: Meir Lichtinger Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 435ab47d5362..0257329431e8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1240,7 +1240,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_sgl_for_optimized_performance[0x8]; u8 log_max_cq_sz[0x8]; - u8 reserved_at_d0[0x9]; + u8 relaxed_ordering_write_umr[0x1]; + u8 relaxed_ordering_read_umr[0x1]; + u8 reserved_at_d2[0x7]; u8 virtio_net_device_emualtion_manager[0x1]; u8 virtio_blk_device_emualtion_manager[0x1]; u8 log_max_cq[0x5]; -- cgit From d4d7f59643f795ae2f3233e3dddce3b4f549dcdc Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 19 Jul 2020 09:57:47 +0300 Subject: RDMA/mlx5: Add missing srcu_read_lock in ODP implicit flow According to the locking scheme, mlx5_ib_update_xlt() should be called with srcu_read_lock(dev->odp->srcu). Prefetch missed this. This fixes the below WARN from lockdep_assert_held(): WARNING: CPU: 1 PID: 1130 at drivers/infiniband/hw/mlx5/odp.c:132 mlx5_odp_populate_xlt+0x175/0x180 [mlx5_ib] Modules linked in: xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter overlay ib_srp scsi_transport_srp rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_umad ib_ipoib ib_cm mlx5_ib ib_uverbs ib_core mlx5_core mlxfw ptp pps_core CPU: 1 PID: 1130 Comm: kworker/u16:11 Tainted: G W 5.8.0-rc5_for_upstream_debug_2020_07_13_11_04 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Workqueue: events_unbound mlx5_ib_prefetch_mr_work [mlx5_ib] RIP: 0010:mlx5_odp_populate_xlt+0x175/0x180 [mlx5_ib] Code: 08 e2 85 c0 0f 84 65 ff ff ff 49 8b 87 60 01 00 00 be ff ff ff ff 48 8d b8 b0 39 00 00 e8 93 e0 50 e1 85 c0 0f 85 45 ff ff ff <0f> 0b e9 3e ff ff ff 0f 0b eb c7 0f 1f 44 00 00 48 8b 87 98 0f 00 RSP: 0018:ffff88840f44fc68 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88840cc9d000 RCX: ffff88840efcd940 RDX: 0000000000000000 RSI: ffff88844871b9b0 RDI: ffff88840efce100 RBP: ffff88840cc9d040 R08: 0000000000000040 R09: 0000000000000001 R10: ffff88846ced3068 R11: 0000000000000000 R12: 00000000000156ec R13: 0000000000000004 R14: 0000000000000004 R15: ffff888439941000 FS: 0000000000000000(0000) GS:ffff88846fa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8536d12430 CR3: 0000000437a5e006 CR4: 0000000000360ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: mlx5_ib_update_xlt+0x37c/0x7c0 [mlx5_ib] pagefault_mr+0x315/0x440 [mlx5_ib] mlx5_ib_prefetch_mr_work+0x56/0xa0 [mlx5_ib] process_one_work+0x215/0x5c0 worker_thread+0x3c/0x380 ? process_one_work+0x5c0/0x5c0 kthread+0x133/0x150 ? kthread_park+0x90/0x90 ret_from_fork+0x1f/0x30 Hold the SRCU during prefetch, even though it strictly isn't needed since prefetch is holding the num_deferred_work it does make it easier to reason about. Fixes: 5256edcb98a1 ("RDMA/mlx5: Rework implicit ODP destroy") Link: https://lore.kernel.org/r/20200719065747.131157-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_mr.c | 2 +- drivers/infiniband/core/verbs.c | 3 +++ drivers/infiniband/hw/mlx5/odp.c | 9 +++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 62f58ad56afd..9b22bb553e8b 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -69,7 +69,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)( num_sge = uverbs_attr_ptr_get_array_size( attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge)); - if (num_sge < 0) + if (num_sge <= 0) return num_sge; sg_list = uverbs_attr_get_alloced_ptr(attrs, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index a92783105cea..10f650a17ae7 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2072,6 +2072,9 @@ int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, if (!pd->device->ops.advise_mr) return -EOPNOTSUPP; + if (!num_sge) + return 0; + return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge, NULL); } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index ee88b32d143d..8f4426496dc7 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -800,6 +800,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + lockdep_assert_held(&mr->dev->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; @@ -1749,10 +1750,17 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) { struct prefetch_mr_work *work = container_of(w, struct prefetch_mr_work, work); + struct mlx5_ib_dev *dev; u32 bytes_mapped = 0; + int srcu_key; int ret; u32 i; + /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ + WARN_ON(!work->num_sge); + dev = work->frags[0].mr->dev; + /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ + srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < work->num_sge; ++i) { ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, work->frags[i].length, &bytes_mapped, @@ -1761,6 +1769,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) continue; mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); } + srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); } -- cgit From 29f3fe1d6854053b32432d01f9810d6fff152ece Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 20 Jul 2020 20:56:26 +0300 Subject: RDMA/uverbs: Remove redundant assignments The kbuild reported the following warning, so clean whole uverbs_cmd.c file. drivers/infiniband/core/uverbs_cmd.c:1066:6: warning: Variable 'ret' is reassigned a value before the old one has been used. [redundantAssignment] ret = uverbs_request(attrs, &cmd, sizeof(cmd)); ^ drivers/infiniband/core/uverbs_cmd.c:1064:0: note: Variable 'ret' is reassigned a value before the old one has been used. int ret = -EINVAL; ^ Link: https://lore.kernel.org/r/20200720175627.1273096-2-leon@kernel.org Reported-by: kernel test robot Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 479895db72e2..119646478fa7 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -563,11 +563,11 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) struct ib_uverbs_open_xrcd cmd; struct ib_uxrcd_object *obj; struct ib_xrcd *xrcd = NULL; - struct fd f = {NULL, 0}; struct inode *inode = NULL; - int ret = 0; int new_xrcd = 0; struct ib_device *ib_dev; + struct fd f = {}; + int ret; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) @@ -1064,7 +1064,7 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs) struct ib_uverbs_resize_cq cmd; struct ib_uverbs_resize_cq_resp resp = {}; struct ib_cq *cq; - int ret = -EINVAL; + int ret; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) @@ -1509,10 +1509,10 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) struct ib_uverbs_open_qp cmd; struct ib_uqp_object *obj; struct ib_xrcd *xrcd; - struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_qp *qp; struct ib_qp_open_attr attr = {}; int ret; + struct ib_uobject *xrcd_uobj; struct ib_device *ib_dev; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); @@ -3291,9 +3291,9 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, struct ib_usrq_object *obj; struct ib_pd *pd; struct ib_srq *srq; - struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_srq_init_attr attr; int ret; + struct ib_uobject *xrcd_uobj; struct ib_device *ib_dev; obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs, -- cgit From 9b8d846924856570625b93f83ae0624391193bce Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 20 Jul 2020 20:56:27 +0300 Subject: RDMA/uverbs: Silence shiftTooManyBitsSigned warning Fix reported by kbuild warning. drivers/infiniband/core/uverbs_cmd.c:1897:47: warning: Shifting signed 32-bit value by 31 bits is undefined behaviour [shiftTooManyBitsSigned] BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31)); ^ Link: https://lore.kernel.org/r/20200720175627.1273096-3-leon@kernel.org Reported-by: kernel test robot Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 119646478fa7..2fbc583d5bdd 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1897,7 +1897,7 @@ static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs) * Last bit is reserved for extending the attr_mask by * using another field. */ - BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31)); + BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1ULL << 31)); if (cmd.base.attr_mask & ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) -- cgit From 6f24b15925ec83e65d4a143ef560368b5b6bd8ad Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 21 Jul 2020 08:34:55 -0500 Subject: IB/hfi1: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. Also, remove unnecessary fall-through markings when it is the case. [1] https://www.kernel.org/doc/html/v5.7-rc7/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Link: https://lore.kernel.org/r/20200721133455.GA14363@embeddedor Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/firmware.c | 16 ---------------- drivers/infiniband/hw/hfi1/mad.c | 9 ++++----- drivers/infiniband/hw/hfi1/pio.c | 2 +- drivers/infiniband/hw/hfi1/pio_copy.c | 12 ++++++------ drivers/infiniband/hw/hfi1/platform.c | 10 +++++----- drivers/infiniband/hw/hfi1/qp.c | 2 +- drivers/infiniband/hw/hfi1/qsfp.c | 4 ++-- drivers/infiniband/hw/hfi1/rc.c | 25 ++++++++++++------------- drivers/infiniband/hw/hfi1/sdma.c | 9 ++++----- drivers/infiniband/hw/hfi1/tid_rdma.c | 4 ++-- drivers/infiniband/hw/hfi1/uc.c | 8 ++++---- 11 files changed, 41 insertions(+), 60 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 2b57ba70ddd6..0e83d4b61e46 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1868,11 +1868,8 @@ int parse_platform_config(struct hfi1_devdata *dd) 2; break; case PLATFORM_CONFIG_RX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_TX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: - /* fall through */ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: pcfgcache->config_tables[table_type].num_table = table_length_dwords; @@ -1890,15 +1887,10 @@ int parse_platform_config(struct hfi1_devdata *dd) /* metadata table */ switch (table_type) { case PLATFORM_CONFIG_SYSTEM_TABLE: - /* fall through */ case PLATFORM_CONFIG_PORT_TABLE: - /* fall through */ case PLATFORM_CONFIG_RX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_TX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: - /* fall through */ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: break; default: @@ -2027,15 +2019,10 @@ static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table, switch (table) { case PLATFORM_CONFIG_SYSTEM_TABLE: - /* fall through */ case PLATFORM_CONFIG_PORT_TABLE: - /* fall through */ case PLATFORM_CONFIG_RX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_TX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: - /* fall through */ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: if (field && field < platform_config_table_limits[table]) src_ptr = @@ -2138,11 +2125,8 @@ int get_platform_config_field(struct hfi1_devdata *dd, pcfgcache->config_tables[table_type].table; break; case PLATFORM_CONFIG_RX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_TX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: - /* fall through */ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: src_ptr = pcfgcache->config_tables[table_type].table; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 7073f237a949..3222e3acb79c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -721,7 +721,7 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, /* Bad mkey not a violation below level 2 */ if (ibp->rvp.mkeyprot < 2) break; - /* fall through */ + fallthrough; case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_TRAP_REPRESS: if (ibp->rvp.mkey_violations != 0xFFFF) @@ -1272,7 +1272,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, case IB_PORT_NOP: if (phys_state == IB_PORTPHYSSTATE_NOP) break; - /* FALLTHROUGH */ + fallthrough; case IB_PORT_DOWN: if (phys_state == IB_PORTPHYSSTATE_NOP) { link_state = HLS_DN_DOWNDEF; @@ -2300,7 +2300,6 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, * can be changed from the default values */ case OPA_VLARB_PREEMPT_ELEMENTS: - /* FALLTHROUGH */ case OPA_VLARB_PREEMPT_MATRIX: smp->status |= IB_SMP_UNSUP_METH_ATTR; break; @@ -4170,7 +4169,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; if (ibp->rvp.port_cap_flags & IB_PORT_SM) return IB_MAD_RESULT_SUCCESS; - /* FALLTHROUGH */ + fallthrough; default: smp->status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_mad_hdr *)smp); @@ -4240,7 +4239,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; if (ibp->rvp.port_cap_flags & IB_PORT_SM) return IB_MAD_RESULT_SUCCESS; - /* FALLTHROUGH */ + fallthrough; default: smp->status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_mad_hdr *)smp); diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 79126b2b14ab..ff864f6f0266 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -86,7 +86,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op) switch (op) { case PSC_GLOBAL_ENABLE: reg |= SEND_CTRL_SEND_ENABLE_SMASK; - /* Fall through */ + fallthrough; case PSC_DATA_VL_ENABLE: mask = 0; for (i = 0; i < ARRAY_SIZE(dd->vld); i++) diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 03024cec78dd..b12e4665c9ab 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -191,22 +191,22 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) switch (n) { case 7: *dest++ = *src++; - /* fall through */ + fallthrough; case 6: *dest++ = *src++; - /* fall through */ + fallthrough; case 5: *dest++ = *src++; - /* fall through */ + fallthrough; case 4: *dest++ = *src++; - /* fall through */ + fallthrough; case 3: *dest++ = *src++; - /* fall through */ + fallthrough; case 2: *dest++ = *src++; - /* fall through */ + fallthrough; case 1: *dest++ = *src++; /* fall through */ diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 36593f2efe26..4642d6ceb890 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -668,8 +668,8 @@ static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd) /* active optical cables only */ switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) { - case 0x0 ... 0x9: /* fallthrough */ - case 0xC: /* fallthrough */ + case 0x0 ... 0x9: fallthrough; + case 0xC: fallthrough; case 0xE: /* active AOC */ power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]); @@ -899,8 +899,8 @@ static int tune_qsfp(struct hfi1_pportdata *ppd, *ptr_tuning_method = OPA_PASSIVE_TUNING; break; - case 0x0 ... 0x9: /* fallthrough */ - case 0xC: /* fallthrough */ + case 0x0 ... 0x9: fallthrough; + case 0xC: fallthrough; case 0xE: ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset, ptr_total_atten); @@ -909,7 +909,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd, *ptr_tuning_method = OPA_ACTIVE_TUNING; break; - case 0xD: /* fallthrough */ + case 0xD: fallthrough; case 0xF: default: dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n", diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 0c2ae9f7b3e8..b1175c514cd8 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -312,7 +312,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) switch (qp->ibqp.qp_type) { case IB_QPT_RC: hfi1_setup_tid_rdma_wqe(qp, wqe); - /* fall through */ + fallthrough; case IB_QPT_UC: if (wqe->length > 0x80000000U) return -EINVAL; diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index b5966991d647..8386c84c2d92 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -231,7 +231,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c, break; case 2: offset_bytes[1] = (offset >> 8) & 0xff; - /* fall through */ + fallthrough; case 1: num_msgs = 2; offset_bytes[0] = offset & 0xff; @@ -279,7 +279,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus, break; case 2: offset_bytes[1] = (offset >> 8) & 0xff; - /* fall through */ + fallthrough; case 1: num_msgs = 2; offset_bytes[0] = offset & 0xff; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index f1734e5e9ac4..1bb5f57152d3 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -141,7 +141,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; release_rdma_sge_mr(e); - /* FALLTHROUGH */ + fallthrough; case OP(ATOMIC_ACKNOWLEDGE): /* * We can increment the tail pointer now that the last @@ -160,7 +160,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, qp->s_acked_ack_queue = next; qp->s_tail_ack_queue = next; trace_hfi1_rsp_make_rc_ack(qp, e->psn); - /* FALLTHROUGH */ + fallthrough; case OP(SEND_ONLY): case OP(ACKNOWLEDGE): /* Check for no next entry in the queue. */ @@ -267,7 +267,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, case OP(RDMA_READ_RESPONSE_FIRST): qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_READ_RESPONSE_MIDDLE): ps->s_txreq->ss = &qp->s_ack_rdma_sge; ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr; @@ -881,8 +881,7 @@ no_flow_control: goto bail; } qp->s_num_rd_atomic++; - - /* FALLTHROUGH */ + fallthrough; case IB_WR_OPFN: if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; @@ -946,10 +945,10 @@ no_flow_control: * See restart_rc(). */ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); - /* FALLTHROUGH */ + fallthrough; case OP(SEND_FIRST): qp->s_state = OP(SEND_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(SEND_MIDDLE): bth2 = mask_psn(qp->s_psn++); ss = &qp->s_sge; @@ -991,10 +990,10 @@ no_flow_control: * See restart_rc(). */ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_WRITE_FIRST): qp->s_state = OP(RDMA_WRITE_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_WRITE_MIDDLE): bth2 = mask_psn(qp->s_psn++); ss = &qp->s_sge; @@ -2901,7 +2900,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (!ret) goto rnr_nak; qp->r_rcv_len = 0; - /* FALLTHROUGH */ + fallthrough; case OP(SEND_MIDDLE): case OP(RDMA_WRITE_MIDDLE): send_middle: @@ -2941,7 +2940,7 @@ send_middle: goto no_immediate_data; if (opcode == OP(SEND_ONLY_WITH_INVALIDATE)) goto send_last_inv; - /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */ + fallthrough; /* for SEND_ONLY_WITH_IMMEDIATE */ case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; @@ -2957,7 +2956,7 @@ send_last_inv: goto send_last; case OP(RDMA_WRITE_LAST): copy_last = rvt_is_user_qp(qp); - /* fall through */ + fallthrough; case OP(SEND_LAST): no_immediate_data: wc.wc_flags = 0; @@ -3010,7 +3009,7 @@ send_last: case OP(RDMA_WRITE_ONLY): copy_last = rvt_is_user_qp(qp); - /* fall through */ + fallthrough; case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index c93ea021cf49..04575c9afd61 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -2584,7 +2584,7 @@ static void __sdma_process_event(struct sdma_engine *sde, * 7220, e.g. */ ss->go_s99_running = 1; - /* fall through -- and start dma engine */ + fallthrough; /* and start dma engine */ case sdma_event_e10_go_hw_start: /* This reference means the state machine is started */ sdma_get(&sde->state); @@ -2726,7 +2726,6 @@ static void __sdma_process_event(struct sdma_engine *sde, case sdma_event_e70_go_idle: break; case sdma_event_e85_link_down: - /* fall through */ case sdma_event_e80_hw_freeze: sdma_set_state(sde, sdma_state_s80_hw_freeze); atomic_dec(&sde->dd->sdma_unfreeze_count); @@ -3007,7 +3006,7 @@ static void __sdma_process_event(struct sdma_engine *sde, case sdma_event_e60_hw_halted: need_progress = 1; sdma_err_progress_check_schedule(sde); - /* fall through */ + fallthrough; case sdma_event_e90_sw_halted: /* * SW initiated halt does not perform engines @@ -3021,7 +3020,7 @@ static void __sdma_process_event(struct sdma_engine *sde, break; case sdma_event_e85_link_down: ss->go_s99_running = 0; - /* fall through */ + fallthrough; case sdma_event_e80_hw_freeze: sdma_set_state(sde, sdma_state_s80_hw_freeze); atomic_dec(&sde->dd->sdma_unfreeze_count); @@ -3252,7 +3251,7 @@ void _sdma_txreq_ahgadd( tx->num_desc++; tx->descs[2].qw[0] = 0; tx->descs[2].qw[1] = 0; - /* FALLTHROUGH */ + fallthrough; case SDMA_AHG_APPLY_UPDATE2: tx->num_desc++; tx->descs[1].qw[0] = 0; diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 243b4ba0b6f6..62b6c1bf267d 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -3227,7 +3227,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) case IB_WR_RDMA_READ: if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE) break; - /* fall through */ + fallthrough; case IB_WR_TID_RDMA_READ: switch (prev->wr.opcode) { case IB_WR_RDMA_READ: @@ -5067,7 +5067,7 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (priv->s_state == TID_OP(WRITE_REQ)) hfi1_tid_rdma_restart_req(qp, wqe, &bth2); priv->s_state = TID_OP(WRITE_DATA); - /* fall through */ + fallthrough; case TID_OP(WRITE_DATA): /* diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 0c77f18120ed..1fb918399da0 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -216,7 +216,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) case OP(SEND_FIRST): qp->s_state = OP(SEND_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(SEND_MIDDLE): len = qp->s_len; if (len > pmtu) { @@ -241,7 +241,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) case OP(RDMA_WRITE_FIRST): qp->s_state = OP(RDMA_WRITE_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_WRITE_MIDDLE): len = qp->s_len; if (len > pmtu) { @@ -414,7 +414,7 @@ send_first: goto no_immediate_data; else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) goto send_last_imm; - /* FALLTHROUGH */ + fallthrough; case OP(SEND_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ /* @@ -515,7 +515,7 @@ rdma_first: wc.ex.imm_data = ohdr->u.rc.imm_data; goto rdma_last_imm; } - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_WRITE_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ if (unlikely(tlen != (hdrsize + pmtu + 4))) -- cgit From 8b603d0715a372f5827d3a6b19d9568bf854b687 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Fri, 24 Jul 2020 10:41:12 +0200 Subject: RDMA/mlx5: Fix typo in enum name Nnothing uses the enum name, so this is harmless. Fixes: 322694412400 ("IB/mlx5: Introduce driver create and destroy flow methods") Link: https://lore.kernel.org/r/20200724084112.GC31930@amd Signed-off-by: Pavel Machek (CIP) Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index b330e6eee626..e24d66d278cf 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -263,7 +263,7 @@ enum mlx5_ib_create_flow_attrs { MLX5_IB_ATTR_CREATE_FLOW_FLAGS, }; -enum mlx5_ib_destoy_flow_attrs { +enum mlx5_ib_destroy_flow_attrs { MLX5_IB_ATTR_DESTROY_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), }; -- cgit From 2224635938814fc63004e30f7c41943812bd6f1c Mon Sep 17 00:00:00 2001 From: Meir Lichtinger Date: Thu, 16 Jul 2020 13:52:47 +0300 Subject: RDMA/mlx5: Use MLX5_SET macro instead of local structure Use generic mlx5 structure defined in mlx5_ifc.h to represent ConnectX device data structures instead of using structure defined specifically for mlx5_ib module. Link: https://lore.kernel.org/r/20200716105248.1423452-3-leon@kernel.org Signed-off-by: Meir Lichtinger Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 9 --------- drivers/infiniband/hw/mlx5/wr.c | 26 ++++++++++++++++---------- include/linux/mlx5/device.h | 1 - 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5dbe3eb0d9cb..d9dfe38f4160 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1423,15 +1423,6 @@ static inline void init_query_mad(struct ib_smp *mad) mad->method = IB_MGMT_METHOD_GET; } -static inline u8 convert_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | - (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | - MLX5_PERM_LOCAL_READ; -} - static inline int is_qp1(enum ib_qp_type qp_type) { return qp_type == MLX5_IB_QPT_HW_GSI; diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index 2c6df1c43b55..e58ecb46f8e3 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -383,20 +383,26 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) - seg->status = MLX5_MKEY_STATUS_FREE; - - seg->flags = convert_access(umrwr->access_flags); + MLX5_SET(mkc, seg, free, 1); + + MLX5_SET(mkc, seg, a, + !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, seg, rw, + !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, seg, lr, 1); if (umrwr->pd) - seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); + MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn); if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && !umrwr->length) - seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64); + MLX5_SET(mkc, seg, length64, 1); - seg->start_addr = cpu_to_be64(umrwr->virt_addr); - seg->len = cpu_to_be64(umrwr->length); - seg->log2_page_size = umrwr->page_shift; - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | - mlx5_mkey_variant(umrwr->mkey)); + MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr); + MLX5_SET64(mkc, seg, len, umrwr->length); + MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift); + MLX5_SET(mkc, seg, qpn, 0xffffff); + MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey)); } static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 2aacf9a8ee4d..d184b579617f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1007,7 +1007,6 @@ enum { MLX5_MKEY_REMOTE_INVAL = 1 << 24, MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, MLX5_MKEY_BSF_EN = 1 << 30, - MLX5_MKEY_LEN64 = 1 << 31, }; struct mlx5_mkey_seg { -- cgit From 896ec9735336f5adb576d372ed7e411bce2fc74c Mon Sep 17 00:00:00 2001 From: Meir Lichtinger Date: Thu, 16 Jul 2020 13:52:48 +0300 Subject: RDMA/mlx5: Set mkey relaxed ordering by UMR with ConnectX-7 Up to ConnectX-7 UMR is not used when user passes relaxed ordering access flag. ConnectX-7 supports setting relaxed ordering read/write mkey attribute by UMR, indicated by new HCA capabilities. With ConnectX-7 driver uses UMR when user set relaxed ordering access flag, in contrast to previous silicon models. Specifically it includes setting relvant flags of mkey context mask in UMR control segment, and relaxed ordering write and read flags in UMR mkey context segment. Link: https://lore.kernel.org/r/20200716105248.1423452-4-leon@kernel.org Signed-off-by: Meir Lichtinger Reviewed-by: Michael Guralnik Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 9 ++++++-- drivers/infiniband/hw/mlx5/wr.c | 44 ++++++++++++++++++++++++++++-------- include/linux/mlx5/device.h | 4 +++- 3 files changed, 45 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d9dfe38f4160..cdd04b3d7d51 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1524,8 +1524,13 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, return false; if (access_flags & IB_ACCESS_RELAXED_ORDERING && - (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) || - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))) + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + return false; + + if (access_flags & IB_ACCESS_RELAXED_ORDERING && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) return false; return true; diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index e58ecb46f8e3..4d4f8c22b3e6 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -263,7 +263,9 @@ static __be64 get_umr_update_translation_mask(void) return cpu_to_be64(result); } -static __be64 get_umr_update_access_mask(int atomic) +static __be64 get_umr_update_access_mask(int atomic, + int relaxed_ordering_write, + int relaxed_ordering_read) { u64 result; @@ -275,6 +277,12 @@ static __be64 get_umr_update_access_mask(int atomic) if (atomic) result |= MLX5_MKEY_MASK_A; + if (relaxed_ordering_write) + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; + + if (relaxed_ordering_read) + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; + return cpu_to_be64(result); } @@ -289,17 +297,28 @@ static __be64 get_umr_update_pd_mask(void) static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) { - if ((mask & MLX5_MKEY_MASK_PAGE_SIZE && - MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) || - (mask & MLX5_MKEY_MASK_A && - MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))) + if (mask & MLX5_MKEY_MASK_PAGE_SIZE && + MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_A && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) return -EPERM; + return 0; } static int set_reg_umr_segment(struct mlx5_ib_dev *dev, struct mlx5_wqe_umr_ctrl_seg *umr, - const struct ib_send_wr *wr, int atomic) + const struct ib_send_wr *wr) { const struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -325,7 +344,10 @@ static int set_reg_umr_segment(struct mlx5_ib_dev *dev, if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) umr->mkey_mask |= get_umr_update_translation_mask(); if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) { - umr->mkey_mask |= get_umr_update_access_mask(atomic); + umr->mkey_mask |= get_umr_update_access_mask( + !!(MLX5_CAP_GEN(dev->mdev, atomic)), + !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)), + !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))); umr->mkey_mask |= get_umr_update_pd_mask(); } if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR) @@ -392,6 +414,11 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, seg, lr, 1); + MLX5_SET(mkc, seg, relaxed_ordering_write, + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); + MLX5_SET(mkc, seg, relaxed_ordering_read, + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); + if (umrwr->pd) MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn); if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && @@ -1230,8 +1257,7 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey); - err = set_reg_umr_segment(dev, *seg, wr, - !!(MLX5_CAP_GEN(dev->mdev, atomic))); + err = set_reg_umr_segment(dev, *seg, wr); if (unlikely(err)) goto out; *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index d184b579617f..4d3376e20f5e 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -276,7 +276,9 @@ enum { MLX5_MKEY_MASK_RW = 1ull << 20, MLX5_MKEY_MASK_A = 1ull << 21, MLX5_MKEY_MASK_SMALL_FENCE = 1ull << 23, - MLX5_MKEY_MASK_FREE = 1ull << 29, + MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE = 1ull << 25, + MLX5_MKEY_MASK_FREE = 1ull << 29, + MLX5_MKEY_MASK_RELAXED_ORDERING_READ = 1ull << 47, }; enum { -- cgit From 47fda651d5af2506deac57d54887cf55ce26e244 Mon Sep 17 00:00:00 2001 From: Li Heng Date: Sat, 25 Jul 2020 10:56:27 +0800 Subject: RDMA/core: Fix return error value in _ib_modify_qp() to negative The error codes in _ib_modify_qp() are supposed to be negative errno. Fixes: 7a5c938b9ed0 ("IB/core: Check for rdma_protocol_ib only after validating port_num") Link: https://lore.kernel.org/r/1595645787-20375-1-git-send-email-liheng40@huawei.com Reported-by: Hulk Robot Signed-off-by: Li Heng Reviewed-by: Parav Pandit Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 10f650a17ae7..3096e73797b7 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1710,7 +1710,7 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, if (!(rdma_protocol_ib(qp->device, attr->alt_ah_attr.port_num) && rdma_protocol_ib(qp->device, port))) { - ret = EINVAL; + ret = -EINVAL; goto out; } } -- cgit From 71cab8ef5c9e53ae2359c4130f4365428ba10136 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Jul 2020 12:57:46 +0300 Subject: RDMA/mlx5: Delete unreachable code Delete two occurrences of unreachable code discovered by the Coverity. Link: https://lore.kernel.org/r/20200727095746.495915-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 3 --- drivers/infiniband/hw/mlx5/qp.c | 10 ++++------ 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b730297b4d69..a973008286fd 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -287,9 +287,6 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, *native_port_num = 1; port = &ibdev->port[ib_port_num - 1]; - if (!port) - return NULL; - spin_lock(&port->mp.mpi_lock); mpi = ibdev->port[ib_port_num - 1].mp.mpi; if (mpi && !mpi->unaffiliate) { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 3da8af9e4ee4..9fbe0583f579 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4102,9 +4102,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_dev *dev = to_mdev(ibqp->device); enum ib_qp_state cur_state, new_state; - int err = 0; int required = IB_QP_STATE; void *dctc; + int err; if (!(attr_mask & IB_QP_STATE)) return -EINVAL; @@ -4198,11 +4198,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state); return -EINVAL; } - if (err) - qp->state = IB_QPS_ERR; - else - qp->state = new_state; - return err; + + qp->state = new_state; + return 0; } int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, -- cgit From d56a7852ec4d194aa07e45adf5586343a4ed0b59 Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Wed, 22 Jul 2020 16:56:27 +0300 Subject: IB/iser: use new shared CQ mechanism Have the driver use shared CQs provided by the rdma core driver. Since this provides similar functionality to iser_comp it has been removed. Now there is no reason to allocate very large CQs when the driver is loaded while gaining the advantage of shared CQs. Link: https://lore.kernel.org/r/20200722135629.49467-1-maxg@mellanox.com Signed-off-by: Yamin Friedman Acked-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.h | 23 ++----- drivers/infiniband/ulp/iser/iser_verbs.c | 112 +++++++------------------------ 2 files changed, 29 insertions(+), 106 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 0fd8c243ec67..78ee9445f801 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -299,18 +299,6 @@ struct iser_conn; struct ib_conn; struct iscsi_iser_task; -/** - * struct iser_comp - iSER completion context - * - * @cq: completion queue - * @active_qps: Number of active QPs attached - * to completion context - */ -struct iser_comp { - struct ib_cq *cq; - int active_qps; -}; - /** * struct iser_device - iSER device handle * @@ -320,9 +308,6 @@ struct iser_comp { * @event_handler: IB events handle routine * @ig_list: entry in devices list * @refcount: Reference counter, dominated by open iser connections - * @comps_used: Number of completion contexts used, Min between online - * cpus and device max completion vectors - * @comps: Dinamically allocated array of completion handlers */ struct iser_device { struct ib_device *ib_device; @@ -330,8 +315,6 @@ struct iser_device { struct ib_event_handler event_handler; struct list_head ig_list; int refcount; - int comps_used; - struct iser_comp *comps; }; /** @@ -382,11 +365,12 @@ struct iser_fr_pool { * * @cma_id: rdma_cm connection maneger handle * @qp: Connection Queue-pair + * @cq: Connection completion queue + * @cq_size: The number of max outstanding completions * @post_recv_buf_count: post receive counter * @sig_count: send work request signal count * @rx_wr: receive work request for batch posts * @device: reference to iser device - * @comp: iser completion context * @fr_pool: connection fast registration poool * @pi_support: Indicate device T10-PI support * @reg_cqe: completion handler @@ -394,11 +378,12 @@ struct iser_fr_pool { struct ib_conn { struct rdma_cm_id *cma_id; struct ib_qp *qp; + struct ib_cq *cq; + u32 cq_size; int post_recv_buf_count; u8 sig_count; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; struct iser_device *device; - struct iser_comp *comp; struct iser_fr_pool fr_pool; bool pi_support; struct ib_cqe reg_cqe; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index c1f44c41f501..699e075ae1b3 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -68,59 +68,23 @@ static void iser_event_handler(struct ib_event_handler *handler, static int iser_create_device_ib_res(struct iser_device *device) { struct ib_device *ib_dev = device->ib_device; - int i, max_cqe; if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { iser_err("IB device does not support memory registrations\n"); return -1; } - device->comps_used = min_t(int, num_online_cpus(), - ib_dev->num_comp_vectors); - - device->comps = kcalloc(device->comps_used, sizeof(*device->comps), - GFP_KERNEL); - if (!device->comps) - goto comps_err; - - max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe); - - iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n", - device->comps_used, dev_name(&ib_dev->dev), - ib_dev->num_comp_vectors, max_cqe); - device->pd = ib_alloc_pd(ib_dev, iser_always_reg ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY); if (IS_ERR(device->pd)) goto pd_err; - for (i = 0; i < device->comps_used; i++) { - struct iser_comp *comp = &device->comps[i]; - - comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i, - IB_POLL_SOFTIRQ); - if (IS_ERR(comp->cq)) { - comp->cq = NULL; - goto cq_err; - } - } - INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev, iser_event_handler); ib_register_event_handler(&device->event_handler); return 0; -cq_err: - for (i = 0; i < device->comps_used; i++) { - struct iser_comp *comp = &device->comps[i]; - - if (comp->cq) - ib_free_cq(comp->cq); - } - ib_dealloc_pd(device->pd); pd_err: - kfree(device->comps); -comps_err: iser_err("failed to allocate an IB resource\n"); return -1; } @@ -131,20 +95,9 @@ comps_err: */ static void iser_free_device_ib_res(struct iser_device *device) { - int i; - - for (i = 0; i < device->comps_used; i++) { - struct iser_comp *comp = &device->comps[i]; - - ib_free_cq(comp->cq); - comp->cq = NULL; - } - ib_unregister_event_handler(&device->event_handler); ib_dealloc_pd(device->pd); - kfree(device->comps); - device->comps = NULL; device->pd = NULL; } @@ -287,70 +240,57 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) struct ib_device *ib_dev; struct ib_qp_init_attr init_attr; int ret = -ENOMEM; - int index, min_index = 0; + unsigned int max_send_wr, cq_size; BUG_ON(ib_conn->device == NULL); device = ib_conn->device; ib_dev = device->ib_device; - memset(&init_attr, 0, sizeof init_attr); + if (ib_conn->pi_support) + max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; + else + max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; + max_send_wr = min_t(unsigned int, max_send_wr, + (unsigned int)ib_dev->attrs.max_qp_wr); - mutex_lock(&ig.connlist_mutex); - /* select the CQ with the minimal number of usages */ - for (index = 0; index < device->comps_used; index++) { - if (device->comps[index].active_qps < - device->comps[min_index].active_qps) - min_index = index; + cq_size = max_send_wr + ISER_QP_MAX_RECV_DTOS; + ib_conn->cq = ib_cq_pool_get(ib_dev, cq_size, -1, IB_POLL_SOFTIRQ); + if (IS_ERR(ib_conn->cq)) { + ret = PTR_ERR(ib_conn->cq); + goto cq_err; } - ib_conn->comp = &device->comps[min_index]; - ib_conn->comp->active_qps++; - mutex_unlock(&ig.connlist_mutex); - iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); + ib_conn->cq_size = cq_size; + + memset(&init_attr, 0, sizeof(init_attr)); init_attr.event_handler = iser_qp_event_callback; init_attr.qp_context = (void *)ib_conn; - init_attr.send_cq = ib_conn->comp->cq; - init_attr.recv_cq = ib_conn->comp->cq; + init_attr.send_cq = ib_conn->cq; + init_attr.recv_cq = ib_conn->cq; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; init_attr.cap.max_send_sge = 2; init_attr.cap.max_recv_sge = 1; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; - if (ib_conn->pi_support) { - init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; + init_attr.cap.max_send_wr = max_send_wr; + if (ib_conn->pi_support) init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; - iser_conn->max_cmds = - ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS); - } else { - if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) { - init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; - iser_conn->max_cmds = - ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS); - } else { - init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr; - iser_conn->max_cmds = - ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr); - iser_dbg("device %s supports max_send_wr %d\n", - dev_name(&device->ib_device->dev), - ib_dev->attrs.max_qp_wr); - } - } + iser_conn->max_cmds = ISER_GET_MAX_XMIT_CMDS(max_send_wr - 1); ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); if (ret) goto out_err; ib_conn->qp = ib_conn->cma_id->qp; - iser_info("setting conn %p cma_id %p qp %p\n", + iser_info("setting conn %p cma_id %p qp %p max_send_wr %d\n", ib_conn, ib_conn->cma_id, - ib_conn->cma_id->qp); + ib_conn->cma_id->qp, max_send_wr); return ret; out_err: - mutex_lock(&ig.connlist_mutex); - ib_conn->comp->active_qps--; - mutex_unlock(&ig.connlist_mutex); + ib_cq_pool_put(ib_conn->cq, ib_conn->cq_size); +cq_err: iser_err("unable to alloc mem or create resource, err %d\n", ret); return ret; @@ -462,10 +402,8 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn, iser_conn, ib_conn->cma_id, ib_conn->qp); if (ib_conn->qp != NULL) { - mutex_lock(&ig.connlist_mutex); - ib_conn->comp->active_qps--; - mutex_unlock(&ig.connlist_mutex); rdma_destroy_qp(ib_conn->cma_id); + ib_cq_pool_put(ib_conn->cq, ib_conn->cq_size); ib_conn->qp = NULL; } -- cgit From c6e663072333581364a9833a8bc895c699fb85ec Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Wed, 22 Jul 2020 16:56:28 +0300 Subject: IB/isert: use new shared CQ mechanism Have the driver use shared CQs provided by the rdma core driver. Since this provides similar functionality to iser_comp it has been removed. Now there is no reason to allocate very large CQs when the driver is loaded while gaining the advantage of shared CQs. Previously when a single connection was opened a CQ was opened for every core with enough space for eight connections, this is a very large overhead that in most cases will not be utilized. Link: https://lore.kernel.org/r/20200722135629.49467-2-maxg@mellanox.com Signed-off-by: Yamin Friedman Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/isert/ib_isert.c | 169 ++++++-------------------------- drivers/infiniband/ulp/isert/ib_isert.h | 18 +--- 2 files changed, 34 insertions(+), 153 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 49f5f053c7b2..61e2f7fc513d 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -24,13 +24,6 @@ #include "ib_isert.h" -#define ISERT_MAX_CONN 8 -#define ISER_MAX_RX_CQ_LEN (ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN) -#define ISER_MAX_TX_CQ_LEN \ - ((ISERT_QP_MAX_REQ_DTOS + ISCSI_DEF_XMIT_CMDS_MAX) * ISERT_MAX_CONN) -#define ISER_MAX_CQ_LEN (ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \ - ISERT_MAX_CONN) - static int isert_debug_level; module_param_named(debug_level, isert_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)"); @@ -82,50 +75,29 @@ isert_qp_event_callback(struct ib_event *e, void *context) } } -static struct isert_comp * -isert_comp_get(struct isert_conn *isert_conn) -{ - struct isert_device *device = isert_conn->device; - struct isert_comp *comp; - int i, min = 0; - - mutex_lock(&device_list_mutex); - for (i = 0; i < device->comps_used; i++) - if (device->comps[i].active_qps < - device->comps[min].active_qps) - min = i; - comp = &device->comps[min]; - comp->active_qps++; - mutex_unlock(&device_list_mutex); - - isert_info("conn %p, using comp %p min_index: %d\n", - isert_conn, comp, min); - - return comp; -} - -static void -isert_comp_put(struct isert_comp *comp) -{ - mutex_lock(&device_list_mutex); - comp->active_qps--; - mutex_unlock(&device_list_mutex); -} - static struct ib_qp * isert_create_qp(struct isert_conn *isert_conn, - struct isert_comp *comp, struct rdma_cm_id *cma_id) { + u32 cq_size = ISERT_QP_MAX_REQ_DTOS + ISERT_QP_MAX_RECV_DTOS + 2; struct isert_device *device = isert_conn->device; + struct ib_device *ib_dev = device->ib_device; struct ib_qp_init_attr attr; int ret, factor; + isert_conn->cq = ib_cq_pool_get(ib_dev, cq_size, -1, IB_POLL_WORKQUEUE); + if (IS_ERR(isert_conn->cq)) { + isert_err("Unable to allocate cq\n"); + ret = PTR_ERR(isert_conn->cq); + return ERR_PTR(ret); + } + isert_conn->cq_size = cq_size; + memset(&attr, 0, sizeof(struct ib_qp_init_attr)); attr.event_handler = isert_qp_event_callback; attr.qp_context = isert_conn; - attr.send_cq = comp->cq; - attr.recv_cq = comp->cq; + attr.send_cq = isert_conn->cq; + attr.recv_cq = isert_conn->cq; attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; factor = rdma_rw_mr_factor(device->ib_device, cma_id->port_num, @@ -141,31 +113,14 @@ isert_create_qp(struct isert_conn *isert_conn, ret = rdma_create_qp(cma_id, device->pd, &attr); if (ret) { isert_err("rdma_create_qp failed for cma_id %d\n", ret); + ib_cq_pool_put(isert_conn->cq, isert_conn->cq_size); + return ERR_PTR(ret); } return cma_id->qp; } -static int -isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) -{ - struct isert_comp *comp; - int ret; - - comp = isert_comp_get(isert_conn); - isert_conn->qp = isert_create_qp(isert_conn, comp, cma_id); - if (IS_ERR(isert_conn->qp)) { - ret = PTR_ERR(isert_conn->qp); - goto err; - } - - return 0; -err: - isert_comp_put(comp); - return ret; -} - static int isert_alloc_rx_descriptors(struct isert_conn *isert_conn) { @@ -233,61 +188,6 @@ isert_free_rx_descriptors(struct isert_conn *isert_conn) isert_conn->rx_descs = NULL; } -static void -isert_free_comps(struct isert_device *device) -{ - int i; - - for (i = 0; i < device->comps_used; i++) { - struct isert_comp *comp = &device->comps[i]; - - if (comp->cq) - ib_free_cq(comp->cq); - } - kfree(device->comps); -} - -static int -isert_alloc_comps(struct isert_device *device) -{ - int i, max_cqe, ret = 0; - - device->comps_used = min(ISERT_MAX_CQ, min_t(int, num_online_cpus(), - device->ib_device->num_comp_vectors)); - - isert_info("Using %d CQs, %s supports %d vectors support " - "pi_capable %d\n", - device->comps_used, dev_name(&device->ib_device->dev), - device->ib_device->num_comp_vectors, - device->pi_capable); - - device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp), - GFP_KERNEL); - if (!device->comps) - return -ENOMEM; - - max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe); - - for (i = 0; i < device->comps_used; i++) { - struct isert_comp *comp = &device->comps[i]; - - comp->device = device; - comp->cq = ib_alloc_cq(device->ib_device, comp, max_cqe, i, - IB_POLL_WORKQUEUE); - if (IS_ERR(comp->cq)) { - isert_err("Unable to allocate cq\n"); - ret = PTR_ERR(comp->cq); - comp->cq = NULL; - goto out_cq; - } - } - - return 0; -out_cq: - isert_free_comps(device); - return ret; -} - static int isert_create_device_ib_res(struct isert_device *device) { @@ -298,16 +198,12 @@ isert_create_device_ib_res(struct isert_device *device) ib_dev->attrs.max_send_sge, ib_dev->attrs.max_recv_sge); isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd); - ret = isert_alloc_comps(device); - if (ret) - goto out; - device->pd = ib_alloc_pd(ib_dev, 0); if (IS_ERR(device->pd)) { ret = PTR_ERR(device->pd); isert_err("failed to allocate pd, device %p, ret=%d\n", device, ret); - goto out_cq; + return ret; } /* Check signature cap */ @@ -315,13 +211,6 @@ isert_create_device_ib_res(struct isert_device *device) IB_DEVICE_INTEGRITY_HANDOVER ? true : false; return 0; - -out_cq: - isert_free_comps(device); -out: - if (ret > 0) - ret = -EINVAL; - return ret; } static void @@ -330,7 +219,6 @@ isert_free_device_ib_res(struct isert_device *device) isert_info("device %p\n", device); ib_dealloc_pd(device->pd); - isert_free_comps(device); } static void @@ -492,6 +380,13 @@ isert_set_nego_params(struct isert_conn *isert_conn, } } +static void +isert_destroy_qp(struct isert_conn *isert_conn) +{ + ib_destroy_qp(isert_conn->qp); + ib_cq_pool_put(isert_conn->cq, isert_conn->cq_size); +} + static int isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { @@ -532,17 +427,19 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_set_nego_params(isert_conn, &event->param.conn); - ret = isert_conn_setup_qp(isert_conn, cma_id); - if (ret) + isert_conn->qp = isert_create_qp(isert_conn, cma_id); + if (IS_ERR(isert_conn->qp)) { + ret = PTR_ERR(isert_conn->qp); goto out_conn_dev; + } ret = isert_login_post_recv(isert_conn); if (ret) - goto out_conn_dev; + goto out_destroy_qp; ret = isert_rdma_accept(isert_conn); if (ret) - goto out_conn_dev; + goto out_destroy_qp; mutex_lock(&isert_np->mutex); list_add_tail(&isert_conn->node, &isert_np->accepted); @@ -550,6 +447,8 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) return 0; +out_destroy_qp: + isert_destroy_qp(isert_conn); out_conn_dev: isert_device_put(device); out_rsp_dma_map: @@ -574,12 +473,8 @@ isert_connect_release(struct isert_conn *isert_conn) !isert_conn->dev_removed) rdma_destroy_id(isert_conn->cm_id); - if (isert_conn->qp) { - struct isert_comp *comp = isert_conn->qp->recv_cq->cq_context; - - isert_comp_put(comp); - ib_destroy_qp(isert_conn->qp); - } + if (isert_conn->qp) + isert_destroy_qp(isert_conn); if (isert_conn->login_req_buf) isert_free_login_buf(isert_conn); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index c9ccf1d87833..c55f7d9bfced 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -156,6 +156,8 @@ struct isert_conn { struct iser_tx_desc login_tx_desc; struct rdma_cm_id *cm_id; struct ib_qp *qp; + struct ib_cq *cq; + u32 cq_size; struct isert_device *device; struct mutex mutex; struct kref kref; @@ -166,22 +168,6 @@ struct isert_conn { bool dev_removed; }; -#define ISERT_MAX_CQ 64 - -/** - * struct isert_comp - iSER completion context - * - * @device: pointer to device handle - * @cq: completion queue - * @active_qps: Number of active QPs attached - * to completion context - */ -struct isert_comp { - struct isert_device *device; - struct ib_cq *cq; - int active_qps; -}; - struct isert_device { bool pi_capable; int refcount; -- cgit From c804af2c1d3152c0cf877eeb50d60c2d49ac0cf0 Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Wed, 22 Jul 2020 16:56:29 +0300 Subject: IB/srpt: use new shared CQ mechanism Have the driver use shared CQs provided by the rdma core driver. This provides the advantage of improved efficiency handling interrupts. Link: https://lore.kernel.org/r/20200722135629.49467-3-maxg@mellanox.com Signed-off-by: Yamin Friedman Reviewed-by: Max Gurtovoy Reviewed-by: Bart Van Assche Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 17 +++++++++-------- drivers/infiniband/ulp/srpt/ib_srpt.h | 1 + 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 0fa65c683f09..0065eb17ae36 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -869,7 +869,7 @@ static int srpt_zerolength_write(struct srpt_rdma_ch *ch) static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) { - struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_rdma_ch *ch = wc->qp->qp_context; pr_debug("%s-%d wc->status %d\n", ch->sess_name, ch->qp->qp_num, wc->status); @@ -1322,7 +1322,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) */ static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) { - struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_rdma_ch *ch = wc->qp->qp_context; struct srpt_send_ioctx *ioctx = container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); @@ -1683,7 +1683,7 @@ push: static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) { - struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_rdma_ch *ch = wc->qp->qp_context; struct srpt_recv_ioctx *ioctx = container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe); @@ -1744,7 +1744,7 @@ static void srpt_process_wait_list(struct srpt_rdma_ch *ch) */ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) { - struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_rdma_ch *ch = wc->qp->qp_context; struct srpt_send_ioctx *ioctx = container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe); enum srpt_command_state state; @@ -1791,7 +1791,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) goto out; retry: - ch->cq = ib_alloc_cq_any(sdev->device, ch, ch->rq_size + sq_size, + ch->cq = ib_cq_pool_get(sdev->device, ch->rq_size + sq_size, -1, IB_POLL_WORKQUEUE); if (IS_ERR(ch->cq)) { ret = PTR_ERR(ch->cq); @@ -1799,6 +1799,7 @@ retry: ch->rq_size + sq_size, ret); goto out; } + ch->cq_size = ch->rq_size + sq_size; qp_init->qp_context = (void *)ch; qp_init->event_handler @@ -1843,7 +1844,7 @@ retry: if (retry) { pr_debug("failed to create queue pair with sq_size = %d (%d) - retrying\n", sq_size, ret); - ib_free_cq(ch->cq); + ib_cq_pool_put(ch->cq, ch->cq_size); sq_size = max(sq_size / 2, MIN_SRPT_SQ_SIZE); goto retry; } else { @@ -1869,14 +1870,14 @@ out: err_destroy_cq: ch->qp = NULL; - ib_free_cq(ch->cq); + ib_cq_pool_put(ch->cq, ch->cq_size); goto out; } static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) { ib_destroy_qp(ch->qp); - ib_free_cq(ch->cq); + ib_cq_pool_put(ch->cq, ch->cq_size); } /** diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index f31c349d07a1..41435a699b53 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -300,6 +300,7 @@ struct srpt_rdma_ch { } rdma_cm; }; struct ib_cq *cq; + u32 cq_size; struct ib_cqe zw_cqe; struct rcu_head rcu; struct kref kref; -- cgit From 556c811f24b30cc883733a2eaf9e939817589231 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 22 Jul 2020 17:03:09 +0300 Subject: RDMA/efa: Expose maximum TX doorbell batch The device reports the maximum number of bytes to be written before ringing the doorbell (zero means unlimited). This patch queries the max batch size and reports it back to the userspace library. Link: https://lore.kernel.org/r/20200722140312.3651-2-galpress@amazon.com Reviewed-by: Daniel Kranzdorf Reviewed-by: Firas JahJah Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 11 +++++++++++ drivers/infiniband/hw/efa/efa_com_cmd.c | 1 + drivers/infiniband/hw/efa/efa_com_cmd.h | 1 + drivers/infiniband/hw/efa/efa_verbs.c | 1 + include/uapi/rdma/efa-abi.h | 4 +++- 5 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index bef2bd291054..03e7388af06e 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -632,6 +632,17 @@ struct efa_admin_feature_queue_attr_desc { /* Maximum number of SGEs for a single RDMA read WQE */ u16 max_wr_rdma_sges; + + /* + * Maximum number of bytes that can be written to SQ between two + * consecutive doorbells (in units of 64B). Driver must ensure that only + * complete WQEs are written to queue before issuing a doorbell. + * Examples: max_tx_batch=16 and WQE size = 64B, means up to 16 WQEs can + * be written to SQ between two consecutive doorbells. max_tx_batch=11 + * and WQE size = 128B, means up to 5 WQEs can be written to SQ between + * two consecutive doorbells. Zero means unlimited. + */ + u16 max_tx_batch; }; struct efa_admin_feature_aenq_desc { diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index fabd8df2e78f..53cfde5c43d8 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -480,6 +480,7 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->max_llq_size = resp.u.queue_attr.max_llq_size; result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges; + result->max_tx_batch = resp.u.queue_attr.max_tx_batch; err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); if (err) { diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 41ce4a476ee6..8df2a26d57d4 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -127,6 +127,7 @@ struct efa_com_get_device_attr_result { u16 max_sq_sge; u16 max_rq_sge; u16 max_wr_rdma_sge; + u16 max_tx_batch; u8 db_bar; }; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 08313f7c73bc..f49d14cebe4a 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1525,6 +1525,7 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq; resp.inline_buf_size = dev->dev_attr.inline_buf_size; resp.max_llq_size = dev->dev_attr.max_llq_size; + resp.max_tx_batch = dev->dev_attr.max_tx_batch; if (udata && udata->outlen) { err = ib_copy_to_udata(udata, &resp, diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index 53b6e2036a9b..10781763da37 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef EFA_ABI_USER_H @@ -31,6 +31,8 @@ struct efa_ibv_alloc_ucontext_resp { __u16 sub_cqs_per_cq; __u16 inline_buf_size; __u32 max_llq_size; /* bytes */ + __u16 max_tx_batch; /* units of 64 bytes */ + __u8 reserved_90[6]; }; struct efa_ibv_alloc_pd_resp { -- cgit From da2924bdca99768442c5e0ed0a9024ae79d62765 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 22 Jul 2020 17:03:10 +0300 Subject: RDMA/efa: Expose minimum SQ size The device reports the minimum SQ size required for creation. This patch queries the min SQ size and reports it back to the userspace library. Link: https://lore.kernel.org/r/20200722140312.3651-3-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Shadi Ammouri Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 4 ++-- drivers/infiniband/hw/efa/efa_com_cmd.c | 1 + drivers/infiniband/hw/efa/efa_com_cmd.h | 1 + drivers/infiniband/hw/efa/efa_verbs.c | 1 + include/uapi/rdma/efa-abi.h | 3 ++- 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 03e7388af06e..5484b08bbc5d 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -606,8 +606,8 @@ struct efa_admin_feature_queue_attr_desc { /* Number of sub-CQs to be created for each CQ */ u16 sub_cqs_per_cq; - /* MBZ */ - u16 reserved; + /* Minimum number of WQEs per SQ */ + u16 min_sq_depth; /* Maximum number of SGEs (buffers) allowed for a single send WQE */ u16 max_wr_send_sges; diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 53cfde5c43d8..6ac23627f65a 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -481,6 +481,7 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges; result->max_tx_batch = resp.u.queue_attr.max_tx_batch; + result->min_sq_depth = resp.u.queue_attr.min_sq_depth; err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); if (err) { diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 8df2a26d57d4..190bac23f585 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -128,6 +128,7 @@ struct efa_com_get_device_attr_result { u16 max_rq_sge; u16 max_wr_rdma_sge; u16 max_tx_batch; + u16 min_sq_depth; u8 db_bar; }; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index f49d14cebe4a..26102ab333b2 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1526,6 +1526,7 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) resp.inline_buf_size = dev->dev_attr.inline_buf_size; resp.max_llq_size = dev->dev_attr.max_llq_size; resp.max_tx_batch = dev->dev_attr.max_tx_batch; + resp.min_sq_wr = dev->dev_attr.min_sq_depth; if (udata && udata->outlen) { err = ib_copy_to_udata(udata, &resp, diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index 10781763da37..7ef2306f8dd4 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -32,7 +32,8 @@ struct efa_ibv_alloc_ucontext_resp { __u16 inline_buf_size; __u32 max_llq_size; /* bytes */ __u16 max_tx_batch; /* units of 64 bytes */ - __u8 reserved_90[6]; + __u16 min_sq_wr; + __u8 reserved_a0[4]; }; struct efa_ibv_alloc_pd_resp { -- cgit From a5d87b698547233321466b2dc91271f5855a4df6 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 22 Jul 2020 17:03:11 +0300 Subject: RDMA/efa: User/kernel compatibility handshake mechanism Introduce a mechanism that performs an handshake between the userspace provider and kernel driver which verifies that the user supports all required features in order to operate correctly. The handshake verifies the needed functionality by comparing the reported device caps and the provider caps. If the device reports a non-zero capability the appropriate comp mask is required from the userspace provider in order to allocate the context. Link: https://lore.kernel.org/r/20200722140312.3651-4-galpress@amazon.com Reviewed-by: Shadi Ammouri Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_verbs.c | 40 +++++++++++++++++++++++++++++++++++ include/uapi/rdma/efa-abi.h | 10 +++++++++ 2 files changed, 50 insertions(+) diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 26102ab333b2..fda175836fb6 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1501,11 +1501,39 @@ static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn) return efa_com_dealloc_uar(&dev->edev, ¶ms); } +#define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \ + (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \ + NULL : #_attr) + +static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext, + const struct efa_ibv_alloc_ucontext_cmd *cmd) +{ + struct efa_dev *dev = to_edev(ibucontext->device); + char *attr_str; + + if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch, + EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str)) + goto err; + + if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth, + EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR, + attr_str)) + goto err; + + return 0; + +err: + ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n", + attr_str); + return -EOPNOTSUPP; +} + int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) { struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); struct efa_ibv_alloc_ucontext_resp resp = {}; + struct efa_ibv_alloc_ucontext_cmd cmd = {}; struct efa_com_alloc_uar_result result; int err; @@ -1514,6 +1542,18 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) * we will ack input fields in our response. */ + err = ib_copy_from_udata(&cmd, udata, + min(sizeof(cmd), udata->inlen)); + if (err) { + ibdev_dbg(&dev->ibdev, + "Cannot copy udata for alloc_ucontext\n"); + goto err_out; + } + + err = efa_user_comp_handshake(ibucontext, &cmd); + if (err) + goto err_out; + err = efa_com_alloc_uar(&dev->edev, &result); if (err) goto err_out; diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index 7ef2306f8dd4..507a2862bedb 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -20,6 +20,16 @@ * hex bit offset of the field. */ +enum { + EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH = 1 << 0, + EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR = 1 << 1, +}; + +struct efa_ibv_alloc_ucontext_cmd { + __u32 comp_mask; + __u8 reserved_20[4]; +}; + enum efa_ibv_user_cmds_supp_udata { EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE = 1 << 0, EFA_USER_CMDS_SUPP_UDATA_CREATE_AH = 1 << 1, -- cgit From d4f9cb5c5b224dca3ff752c1bb854250bf114944 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 22 Jul 2020 17:03:12 +0300 Subject: RDMA/efa: Add EFA 0xefa1 PCI ID Add support for 0xefa1 devices. Link: https://lore.kernel.org/r/20200722140312.3651-5-galpress@amazon.com Reviewed-by: Shadi Ammouri Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 82145574c928..92d701146320 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -12,10 +12,12 @@ #include "efa.h" -#define PCI_DEV_ID_EFA_VF 0xefa0 +#define PCI_DEV_ID_EFA0_VF 0xefa0 +#define PCI_DEV_ID_EFA1_VF 0xefa1 static const struct pci_device_id efa_pci_tbl[] = { - { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) }, { } }; -- cgit From d54f23c09ec62670901f1a2a4712a5218522ca2b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 23 Jul 2020 10:07:04 +0300 Subject: RDMA/cma: Simplify DEVICE_REMOVAL for internal_id cma_process_remove() triggers an unconditional rdma_destroy_id() for internal_id's and skips the event deliver and transition through RDMA_CM_DEVICE_REMOVAL. This is confusing and unnecessary. internal_id always has cma_listen_handler() as the handler, have it catch the RDMA_CM_DEVICE_REMOVAL event and directly consume it and signal removal. This way the FSM sequence never skips the DEVICE_REMOVAL case and the logic in this hard to test area is simplified. Link: https://lore.kernel.org/r/20200723070707.1771101-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 3d7cc9f0f3d4..77743ce5216b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2478,6 +2478,10 @@ static int cma_listen_handler(struct rdma_cm_id *id, { struct rdma_id_private *id_priv = id->context; + /* Listening IDs are always destroyed on removal */ + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + return -1; + id->context = id_priv->id.context; id->event_handler = id_priv->id.event_handler; trace_cm_event_handler(id_priv, event); @@ -4811,7 +4815,7 @@ static void cma_process_remove(struct cma_device *cma_dev) cma_id_get(id_priv); mutex_unlock(&lock); - ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); + ret = cma_remove_id_dev(id_priv); cma_id_put(id_priv); if (ret) rdma_destroy_id(&id_priv->id); -- cgit From 3647a28de1ada8708efc78d956619b9df5004478 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 23 Jul 2020 10:07:05 +0300 Subject: RDMA/cma: Using the standard locking pattern when delivering the removal event Whenever an event is delivered to the handler it should be done under the handler_mutex and upon any non-zero return from the handler it should trigger destruction of the cm_id. cma_process_remove() skips some steps here, it is not necessarily wrong since the state change should prevent any races, but it is confusing and unnecessary. Follow the standard pattern here, with the slight twist that the transition to RDMA_CM_DEVICE_REMOVAL includes a cma_cancel_operation(). Link: https://lore.kernel.org/r/20200723070707.1771101-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 62 +++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 77743ce5216b..b51a0acd672b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1921,6 +1921,8 @@ static int cma_cm_event_handler(struct rdma_id_private *id_priv, { int ret; + lockdep_assert_held(&id_priv->handler_mutex); + trace_cm_event_handler(id_priv, event); ret = id_priv->id.event_handler(&id_priv->id, event); trace_cm_event_done(id_priv, event, ret); @@ -4775,50 +4777,58 @@ free_cma_dev: return ret; } -static int cma_remove_id_dev(struct rdma_id_private *id_priv) +static void cma_send_device_removal_put(struct rdma_id_private *id_priv) { - struct rdma_cm_event event = {}; + struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; enum rdma_cm_state state; - int ret = 0; - - /* Record that we want to remove the device */ - state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); - if (state == RDMA_CM_DESTROYING) - return 0; + unsigned long flags; - cma_cancel_operation(id_priv, state); mutex_lock(&id_priv->handler_mutex); + /* Record that we want to remove the device */ + spin_lock_irqsave(&id_priv->lock, flags); + state = id_priv->state; + if (state == RDMA_CM_DESTROYING || state == RDMA_CM_DEVICE_REMOVAL) { + spin_unlock_irqrestore(&id_priv->lock, flags); + mutex_unlock(&id_priv->handler_mutex); + cma_id_put(id_priv); + return; + } + id_priv->state = RDMA_CM_DEVICE_REMOVAL; + spin_unlock_irqrestore(&id_priv->lock, flags); - /* Check for destruction from another callback. */ - if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) - goto out; - - event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; - ret = cma_cm_event_handler(id_priv, &event); -out: + if (cma_cm_event_handler(id_priv, &event)) { + /* + * At this point the ULP promises it won't call + * rdma_destroy_id() concurrently + */ + cma_id_put(id_priv); + mutex_unlock(&id_priv->handler_mutex); + rdma_destroy_id(&id_priv->id); + return; + } mutex_unlock(&id_priv->handler_mutex); - return ret; + + /* + * If this races with destroy then the thread that first assigns state + * to a destroying does the cancel. + */ + cma_cancel_operation(id_priv, state); + cma_id_put(id_priv); } static void cma_process_remove(struct cma_device *cma_dev) { - struct rdma_id_private *id_priv; - int ret; - mutex_lock(&lock); while (!list_empty(&cma_dev->id_list)) { - id_priv = list_entry(cma_dev->id_list.next, - struct rdma_id_private, list); + struct rdma_id_private *id_priv = list_first_entry( + &cma_dev->id_list, struct rdma_id_private, list); list_del(&id_priv->listen_list); list_del_init(&id_priv->list); cma_id_get(id_priv); mutex_unlock(&lock); - ret = cma_remove_id_dev(id_priv); - cma_id_put(id_priv); - if (ret) - rdma_destroy_id(&id_priv->id); + cma_send_device_removal_put(id_priv); mutex_lock(&lock); } -- cgit From cc9c037343898eb7a775e6b81d092ee21eeff218 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 23 Jul 2020 10:07:06 +0300 Subject: RDMA/cma: Remove unneeded locking for req paths The REQ flows are concerned that once the handler is called on the new cm_id the ULP can choose to trigger a rdma_destroy_id() concurrently at any time. However, this is not true, while the ULP can call rdma_destroy_id(), it immediately blocks on the handler_mutex which prevents anything harmful from running concurrently. Remove the confusing extra locking and refcounts and make the handler_mutex protecting state during destroy more clear. Link: https://lore.kernel.org/r/20200723070707.1771101-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index b51a0acd672b..e07498dceb59 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1827,21 +1827,21 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) void rdma_destroy_id(struct rdma_cm_id *id) { - struct rdma_id_private *id_priv; + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); enum rdma_cm_state state; - id_priv = container_of(id, struct rdma_id_private, id); - trace_cm_id_destroy(id_priv); - state = cma_exch(id_priv, RDMA_CM_DESTROYING); - cma_cancel_operation(id_priv, state); - /* * Wait for any active callback to finish. New callbacks will find * the id_priv state set to destroying and abort. */ mutex_lock(&id_priv->handler_mutex); + trace_cm_id_destroy(id_priv); + state = cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); + cma_cancel_operation(id_priv, state); + rdma_restrack_del(&id_priv->res); if (id_priv->cma_dev) { if (rdma_cap_ib_cm(id_priv->id.device, 1)) { @@ -2201,19 +2201,9 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, cm_id->context = conn_id; cm_id->cm_handler = cma_ib_handler; - /* - * Protect against the user destroying conn_id from another thread - * until we're done accessing it. - */ - cma_id_get(conn_id); ret = cma_cm_event_handler(conn_id, &event); if (ret) goto err3; - /* - * Acquire mutex to prevent user executing rdma_destroy_id() - * while we're accessing the cm_id. - */ - mutex_lock(&lock); if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) { trace_cm_send_mra(cm_id->context); @@ -2222,13 +2212,11 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, mutex_unlock(&lock); mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); - cma_id_put(conn_id); if (net_dev) dev_put(net_dev); return 0; err3: - cma_id_put(conn_id); /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; err2: @@ -2405,11 +2393,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); - /* - * Protect against the user destroying conn_id from another thread - * until we're done accessing it. - */ - cma_id_get(conn_id); ret = cma_cm_event_handler(conn_id, &event); if (ret) { /* User wants to destroy the CM ID */ @@ -2417,13 +2400,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); - cma_id_put(conn_id); rdma_destroy_id(&conn_id->id); return ret; } mutex_unlock(&conn_id->handler_mutex); - cma_id_put(conn_id); out: mutex_unlock(&listen_id->handler_mutex); -- cgit From f6a9d47ae6854980fc4b1676f1fe9f9fa45ea4e2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 23 Jul 2020 10:07:07 +0300 Subject: RDMA/cma: Execute rdma_cm destruction from a handler properly When a rdma_cm_id needs to be destroyed after a handler callback fails, part of the destruction pattern is open coded into each call site. Unfortunately the blind assignment to state discards important information needed to do cma_cancel_operation(). This results in active operations being left running after rdma_destroy_id() completes, and the use-after-free bugs from KASAN. Consolidate this entire pattern into destroy_id_handler_unlock() and manage the locking correctly. The state should be set to RDMA_CM_DESTROYING under the handler_lock to atomically ensure no futher handlers are called. Link: https://lore.kernel.org/r/20200723070707.1771101-5-leon@kernel.org Reported-by: syzbot+08092148130652a6faae@syzkaller.appspotmail.com Reported-by: syzbot+a929647172775e335941@syzkaller.appspotmail.com Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 174 ++++++++++++++++++++---------------------- 1 file changed, 84 insertions(+), 90 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e07498dceb59..172cc16bd231 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -428,19 +428,6 @@ static int cma_comp_exch(struct rdma_id_private *id_priv, return ret; } -static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, - enum rdma_cm_state exch) -{ - unsigned long flags; - enum rdma_cm_state old; - - spin_lock_irqsave(&id_priv->lock, flags); - old = id_priv->state; - id_priv->state = exch; - spin_unlock_irqrestore(&id_priv->lock, flags); - return old; -} - static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) { return hdr->ip_version >> 4; @@ -1825,21 +1812,9 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) } } -void rdma_destroy_id(struct rdma_cm_id *id) +static void _destroy_id(struct rdma_id_private *id_priv, + enum rdma_cm_state state) { - struct rdma_id_private *id_priv = - container_of(id, struct rdma_id_private, id); - enum rdma_cm_state state; - - /* - * Wait for any active callback to finish. New callbacks will find - * the id_priv state set to destroying and abort. - */ - mutex_lock(&id_priv->handler_mutex); - trace_cm_id_destroy(id_priv); - state = cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - cma_cancel_operation(id_priv, state); rdma_restrack_del(&id_priv->res); @@ -1870,6 +1845,42 @@ void rdma_destroy_id(struct rdma_cm_id *id) put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); } + +/* + * destroy an ID from within the handler_mutex. This ensures that no other + * handlers can start running concurrently. + */ +static void destroy_id_handler_unlock(struct rdma_id_private *id_priv) + __releases(&idprv->handler_mutex) +{ + enum rdma_cm_state state; + unsigned long flags; + + trace_cm_id_destroy(id_priv); + + /* + * Setting the state to destroyed under the handler mutex provides a + * fence against calling handler callbacks. If this is invoked due to + * the failure of a handler callback then it guarentees that no future + * handlers will be called. + */ + lockdep_assert_held(&id_priv->handler_mutex); + spin_lock_irqsave(&id_priv->lock, flags); + state = id_priv->state; + id_priv->state = RDMA_CM_DESTROYING; + spin_unlock_irqrestore(&id_priv->lock, flags); + mutex_unlock(&id_priv->handler_mutex); + _destroy_id(id_priv, state); +} + +void rdma_destroy_id(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + mutex_lock(&id_priv->handler_mutex); + destroy_id_handler_unlock(id_priv); +} EXPORT_SYMBOL(rdma_destroy_id); static int cma_rep_recv(struct rdma_id_private *id_priv) @@ -1934,7 +1945,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, { struct rdma_id_private *id_priv = cm_id->context; struct rdma_cm_event event = {}; - int ret = 0; + int ret; mutex_lock(&id_priv->handler_mutex); if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && @@ -2003,14 +2014,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); + destroy_id_handler_unlock(id_priv); return ret; } out: mutex_unlock(&id_priv->handler_mutex); - return ret; + return 0; } static struct rdma_id_private * @@ -2172,7 +2181,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, mutex_lock(&listen_id->handler_mutex); if (listen_id->state != RDMA_CM_LISTEN) { ret = -ECONNABORTED; - goto err1; + goto err_unlock; } offset = cma_user_data_offset(listen_id); @@ -2189,43 +2198,38 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, } if (!conn_id) { ret = -ENOMEM; - goto err1; + goto err_unlock; } mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); ret = cma_ib_acquire_dev(conn_id, listen_id, &req); - if (ret) - goto err2; + if (ret) { + destroy_id_handler_unlock(conn_id); + goto err_unlock; + } conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_ib_handler; ret = cma_cm_event_handler(conn_id, &event); - if (ret) - goto err3; + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + conn_id->cm_id.ib = NULL; + mutex_unlock(&listen_id->handler_mutex); + destroy_id_handler_unlock(conn_id); + goto net_dev_put; + } + if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) { trace_cm_send_mra(cm_id->context); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); } - mutex_unlock(&lock); mutex_unlock(&conn_id->handler_mutex); - mutex_unlock(&listen_id->handler_mutex); - if (net_dev) - dev_put(net_dev); - return 0; -err3: - /* Destroy the CM ID by returning a non-zero value. */ - conn_id->cm_id.ib = NULL; -err2: - cma_exch(conn_id, RDMA_CM_DESTROYING); - mutex_unlock(&conn_id->handler_mutex); -err1: +err_unlock: mutex_unlock(&listen_id->handler_mutex); - if (conn_id) - rdma_destroy_id(&conn_id->id); net_dev_put: if (net_dev) @@ -2325,9 +2329,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); + destroy_id_handler_unlock(id_priv); return ret; } @@ -2374,16 +2376,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); if (ret) { - mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(new_cm_id); - goto out; + mutex_unlock(&listen_id->handler_mutex); + destroy_id_handler_unlock(conn_id); + return ret; } ret = cma_iw_acquire_dev(conn_id, listen_id); if (ret) { - mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(new_cm_id); - goto out; + mutex_unlock(&listen_id->handler_mutex); + destroy_id_handler_unlock(conn_id); + return ret; } conn_id->cm_id.iw = cm_id; @@ -2397,10 +2399,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, if (ret) { /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; - cma_exch(conn_id, RDMA_CM_DESTROYING); - mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); - rdma_destroy_id(&conn_id->id); + destroy_id_handler_unlock(conn_id); return ret; } @@ -2638,21 +2638,21 @@ static void cma_work_handler(struct work_struct *_work) { struct cma_work *work = container_of(_work, struct cma_work, work); struct rdma_id_private *id_priv = work->id; - int destroy = 0; mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) - goto out; + goto out_unlock; if (cma_cm_event_handler(id_priv, &work->event)) { - cma_exch(id_priv, RDMA_CM_DESTROYING); - destroy = 1; + cma_id_put(id_priv); + destroy_id_handler_unlock(id_priv); + goto out_free; } -out: + +out_unlock: mutex_unlock(&id_priv->handler_mutex); cma_id_put(id_priv); - if (destroy) - rdma_destroy_id(&id_priv->id); +out_free: kfree(work); } @@ -2660,23 +2660,22 @@ static void cma_ndev_work_handler(struct work_struct *_work) { struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); struct rdma_id_private *id_priv = work->id; - int destroy = 0; mutex_lock(&id_priv->handler_mutex); if (id_priv->state == RDMA_CM_DESTROYING || id_priv->state == RDMA_CM_DEVICE_REMOVAL) - goto out; + goto out_unlock; if (cma_cm_event_handler(id_priv, &work->event)) { - cma_exch(id_priv, RDMA_CM_DESTROYING); - destroy = 1; + cma_id_put(id_priv); + destroy_id_handler_unlock(id_priv); + goto out_free; } -out: +out_unlock: mutex_unlock(&id_priv->handler_mutex); cma_id_put(id_priv); - if (destroy) - rdma_destroy_id(&id_priv->id); +out_free: kfree(work); } @@ -3152,9 +3151,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, event.event = RDMA_CM_EVENT_ADDR_RESOLVED; if (cma_cm_event_handler(id_priv, &event)) { - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); + destroy_id_handler_unlock(id_priv); return; } out: @@ -3759,7 +3756,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, struct rdma_cm_event event = {}; const struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; - int ret = 0; + int ret; mutex_lock(&id_priv->handler_mutex); if (id_priv->state != RDMA_CM_CONNECT) @@ -3809,14 +3806,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); + destroy_id_handler_unlock(id_priv); return ret; } out: mutex_unlock(&id_priv->handler_mutex); - return ret; + return 0; } static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, @@ -4341,9 +4336,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); + destroy_id_handler_unlock(id_priv); return 0; } @@ -4784,7 +4777,8 @@ static void cma_send_device_removal_put(struct rdma_id_private *id_priv) */ cma_id_put(id_priv); mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); + trace_cm_id_destroy(id_priv); + _destroy_id(id_priv, state); return; } mutex_unlock(&id_priv->handler_mutex); -- cgit From 09e0dbbeed82e35ce2cd21e086a6fac934163e2a Mon Sep 17 00:00:00 2001 From: Danil Kipnis Date: Fri, 24 Jul 2020 16:45:06 +0530 Subject: RDMA/rtrs-clt: add an additional random 8 seconds before reconnecting In order to avoid all the clients to start reconnecting at the same time schedule the reconnect dwork with a random jitter of +[0,8] seconds. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20200724111508.15734-2-haris.iqbal@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Md Haris Iqbal Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 564388a85603..5b31d3b03737 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -12,6 +12,7 @@ #include #include +#include #include "rtrs-clt.h" #include "rtrs-log.h" @@ -23,6 +24,12 @@ * leads to "false positives" failed reconnect attempts */ #define RTRS_RECONNECT_BACKOFF 1000 +/* + * Wait for additional random time between 0 and 8 seconds + * before starting to reconnect to avoid clients reconnecting + * all at once in case of a major network outage + */ +#define RTRS_RECONNECT_SEED 8 MODULE_DESCRIPTION("RDMA Transport Client"); MODULE_LICENSE("GPL"); @@ -306,7 +313,8 @@ static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con) */ delay_ms = clt->reconnect_delay_sec * 1000; queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, - msecs_to_jiffies(delay_ms)); + msecs_to_jiffies(delay_ms + + prandom_u32() % RTRS_RECONNECT_SEED)); } else { /* * Error can happen just on establishing new connection, @@ -2503,7 +2511,9 @@ reconnect_again: sess->stats->reconnects.fail_cnt++; delay_ms = clt->reconnect_delay_sec * 1000; queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, - msecs_to_jiffies(delay_ms)); + msecs_to_jiffies(delay_ms + + prandom_u32() % + RTRS_RECONNECT_SEED)); } } -- cgit From 03ed5a8cda659e3c71d106b0dd4ce6520e4dcd6e Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 24 Jul 2020 16:45:08 +0530 Subject: RDMA/rtrs: remove WQ_MEM_RECLAIM for rtrs_wq lockdep triggers a warning from time to time when running a regression test: rnbd_client L685: Device disconnected. rnbd_client L1756: Unloading module workqueue: WQ_MEM_RECLAIM rtrs_client_wq:rtrs_clt_reconnect_work [rtrs_client] is flushing !WQ_MEM_RECLAIM ib_addr:process_one_req [ib_core] WARNING: CPU: 2 PID: 18824 at kernel/workqueue.c:2517 check_flush_dependency+0xad/0x130 The root cause is workqueue core expect flushing should not be done for a !WQ_MEM_RECLAIM wq from a WQ_MEM_RECLAIM workqueue. In above case ib_addr workqueue without WQ_MEM_RECLAIM, but rtrs_wq WQ_MEM_RECLAIM. To avoid the warning, remove the WQ_MEM_RECLAIM flag. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20200724111508.15734-4-haris.iqbal@cloud.ionos.com Signed-off-by: Jack Wang Signed-off-by: Md Haris Iqbal Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 2 +- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 5b31d3b03737..776e89231c52 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2982,7 +2982,7 @@ static int __init rtrs_client_init(void) pr_err("Failed to create rtrs-client dev class\n"); return PTR_ERR(rtrs_clt_dev_class); } - rtrs_wq = alloc_workqueue("rtrs_client_wq", WQ_MEM_RECLAIM, 0); + rtrs_wq = alloc_workqueue("rtrs_client_wq", 0, 0); if (!rtrs_wq) { class_destroy(rtrs_clt_dev_class); return -ENOMEM; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 0d9241f5d9e6..a219bd1bdbc2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -2150,7 +2150,7 @@ static int __init rtrs_server_init(void) err = PTR_ERR(rtrs_dev_class); goto out_chunk_pool; } - rtrs_wq = alloc_workqueue("rtrs_server_wq", WQ_MEM_RECLAIM, 0); + rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0); if (!rtrs_wq) { err = -ENOMEM; goto out_dev_class; -- cgit From 6bf9d8f6f0df3f7aa852dc111c960bc04578c7c5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 19 Jul 2020 10:25:21 +0300 Subject: RDMA/include: Replace license text with SPDX tags The header files in RDMA subsystem are dual licensed and can be described by simple SPDX tag, so replace all of them at once together with making them use the same coding style for header guard defines. Link: https://lore.kernel.org/r/20200719072521.135260-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib.h | 31 ++--------------------- include/rdma/ib_addr.h | 31 ++--------------------- include/rdma/ib_cache.h | 29 +-------------------- include/rdma/ib_cm.h | 1 + include/rdma/ib_hdrs.h | 44 +------------------------------- include/rdma/ib_mad.h | 31 ++--------------------- include/rdma/ib_marshall.h | 31 ++--------------------- include/rdma/ib_pack.h | 29 +-------------------- include/rdma/ib_pma.h | 31 ++--------------------- include/rdma/ib_sa.h | 29 +-------------------- include/rdma/ib_smi.h | 31 ++--------------------- include/rdma/ib_umem.h | 29 +-------------------- include/rdma/ib_umem_odp.h | 29 +-------------------- include/rdma/ib_verbs.h | 31 ++--------------------- include/rdma/iw_cm.h | 30 ++-------------------- include/rdma/iw_portmap.h | 30 ++-------------------- include/rdma/opa_addr.h | 44 +------------------------------- include/rdma/opa_port_info.h | 31 ++--------------------- include/rdma/opa_smi.h | 31 ++--------------------- include/rdma/opa_vnic.h | 49 +++--------------------------------- include/rdma/rdma_cm.h | 31 ++--------------------- include/rdma/rdma_cm_ib.h | 31 ++--------------------- include/rdma/rdma_netlink.h | 2 +- include/rdma/rdma_vt.h | 50 +++--------------------------------- include/rdma/rdmavt_cq.h | 53 +++------------------------------------ include/rdma/rdmavt_mr.h | 50 +++--------------------------------- include/rdma/rdmavt_qp.h | 50 +++--------------------------------- include/rdma/uverbs_ioctl.h | 29 +-------------------- include/rdma/uverbs_named_ioctl.h | 29 +-------------------- include/rdma/uverbs_std_types.h | 29 +-------------------- include/rdma/uverbs_types.h | 29 +-------------------- 31 files changed, 59 insertions(+), 946 deletions(-) diff --git a/include/rdma/ib.h b/include/rdma/ib.h index fe2fc9e91588..83139b9ce409 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -1,36 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2010 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ -#if !defined(_RDMA_IB_H) +#ifndef _RDMA_IB_H #define _RDMA_IB_H #include diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 2734c895c1bf..b0e636ac6690 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -1,37 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ -#if !defined(IB_ADDR_H) +#ifndef IB_ADDR_H #define IB_ADDR_H #include diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index e06d13388ae7..66a8f369a2fa 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -1,35 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef _IB_CACHE_H diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 0f1ea5f2d01c..382427add677 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -6,6 +6,7 @@ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ + #ifndef IB_CM_H #define IB_CM_H diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index 9a90bd031e8c..57c1ac881d08 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -1,48 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2016 - 2018 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #ifndef IB_HDRS_H diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 8c093fc1bb9f..8dfb1ddf345a 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -1,40 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004-2006 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ -#if !defined(IB_MAD_H) +#ifndef IB_MAD_H #define IB_MAD_H #include diff --git a/include/rdma/ib_marshall.h b/include/rdma/ib_marshall.h index 8ebf84ae9ed1..1838869aad28 100644 --- a/include/rdma/ib_marshall.h +++ b/include/rdma/ib_marshall.h @@ -1,36 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ -#if !defined(IB_USER_MARSHALL_H) +#ifndef IB_USER_MARSHALL_H #define IB_USER_MARSHALL_H #include diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index 7ea1382ad0e5..a9162f25beaf 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef IB_PACK_H diff --git a/include/rdma/ib_pma.h b/include/rdma/ib_pma.h index 2f8a65c1fca7..44c618203785 100644 --- a/include/rdma/ib_pma.h +++ b/include/rdma/ib_pma.h @@ -1,38 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ -#if !defined(IB_PMA_H) +#ifndef IB_PMA_H #define IB_PMA_H #include diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 19520979b84c..693285e76f13 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -1,35 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef IB_SA_H diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h index 7be0028f155c..fdb8633cbaff 100644 --- a/include/rdma/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -1,40 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ -#if !defined(IB_SMI_H) +#ifndef IB_SMI_H #define IB_SMI_H #include diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index e3518fd6b95b..71f573a418bf 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2007 Cisco Systems. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef IB_UMEM_H diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 64314ff76612..d16d2c17e733 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2014 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef IB_UMEM_ODP_H diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 087f001fd020..c0b2fa7e9b95 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. @@ -6,37 +7,9 @@ * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ -#if !defined(IB_VERBS_H) +#ifndef IB_VERBS_H #define IB_VERBS_H #include diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 5aa8a9c76aa0..91975400e1b3 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -1,35 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ + #ifndef IW_CM_H #define IW_CM_H diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h index c89535047c42..6dbc1a235974 100644 --- a/include/rdma/iw_portmap.h +++ b/include/rdma/iw_portmap.h @@ -1,35 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2014 Intel Corporation. All rights reserved. * Copyright (c) 2014 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ + #ifndef _IW_PORTMAP_H #define _IW_PORTMAP_H diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 66d4393d339c..c6bfa2640bac 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -1,48 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2017 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #ifndef OPA_ADDR_H diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index 0d9e6d74c385..73bcac90a048 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -1,36 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2014-2020 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ -#if !defined(OPA_PORT_INFO_H) +#ifndef OPA_PORT_INFO_H #define OPA_PORT_INFO_H #include diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h index c7b2ef12792d..a9f1b5700e98 100644 --- a/include/rdma/opa_smi.h +++ b/include/rdma/opa_smi.h @@ -1,36 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2014 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ -#if !defined(OPA_SMI_H) +#ifndef OPA_SMI_H #define OPA_SMI_H #include diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h index 6f244e759b4f..cbe3c2811455 100644 --- a/include/rdma/opa_vnic.h +++ b/include/rdma/opa_vnic.h @@ -1,52 +1,11 @@ -#ifndef _OPA_VNIC_H -#define _OPA_VNIC_H +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2017 - 2020 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef _OPA_VNIC_H +#define _OPA_VNIC_H + /* * This file contains Intel Omni-Path (OPA) Virtual Network Interface * Controller (VNIC) specific declarations. diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 939d7abe026f..cf5da2ae49bf 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -1,37 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ -#if !defined(RDMA_CM_H) +#ifndef RDMA_CM_H #define RDMA_CM_H #include diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h index 6a69d71a21a5..8354e7de7815 100644 --- a/include/rdma/rdma_cm_ib.h +++ b/include/rdma/rdma_cm_ib.h @@ -1,36 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ -#if !defined(RDMA_CM_IB_H) +#ifndef RDMA_CM_IB_H #define RDMA_CM_IB_H #include diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index ab22759de7ea..2758d9df71ee 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ + #ifndef _RDMA_NETLINK_H #define _RDMA_NETLINK_H - #include #include diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index ac5a9430abb6..9fd217b24916 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -1,53 +1,11 @@ -#ifndef DEF_RDMA_VT_H -#define DEF_RDMA_VT_H - +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2016 - 2019 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RDMA_VT_H +#define DEF_RDMA_VT_H + /* * Structure that low level drivers will populate in order to register with the * rdmavt layer. diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h index 574eb7278f46..1fe2bb5a63b0 100644 --- a/include/rdma/rdmavt_cq.h +++ b/include/rdma/rdmavt_cq.h @@ -1,56 +1,11 @@ -#ifndef DEF_RDMAVT_INCCQ_H -#define DEF_RDMAVT_INCCQ_H - +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * * Copyright(c) 2016 - 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Copyright(c) 2015 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RDMAVT_INCCQ_H +#define DEF_RDMAVT_INCCQ_H + #include #include #include diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index ce6c888f7fe7..c3367e9833ef 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -1,53 +1,11 @@ -#ifndef DEF_RDMAVT_INCMR_H -#define DEF_RDMAVT_INCMR_H - +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RDMAVT_INCMR_H +#define DEF_RDMAVT_INCMR_H + /* * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once * drivers no longer need access to the MR directly. diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index c4369a6c2951..f88392204ae9 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -1,53 +1,11 @@ -#ifndef DEF_RDMAVT_INCQP_H -#define DEF_RDMAVT_INCQP_H - +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2016 - 2020 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RDMAVT_INCQP_H +#define DEF_RDMAVT_INCQP_H + #include #include #include diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index db419c8dbd10..b00270c72740 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef _UVERBS_IOCTL_ diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h index 6ae6cf8e4c2e..f04f5126f61e 100644 --- a/include/rdma/uverbs_named_ioctl.h +++ b/include/rdma/uverbs_named_ioctl.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef _UVERBS_NAMED_IOCTL_ diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 8451b19103ee..fe0512116958 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef _UVERBS_STD_TYPES__ diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index c15b298aa62f..06db27e35f40 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef _UVERBS_TYPES_ -- cgit From 1d70ad0f85435a7262de802b104e49e6598c50ff Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 27 Jul 2020 12:58:28 +0300 Subject: RDMA/netlink: Remove CAP_NET_RAW check when dump a raw QP When dumping QPs bound to a counter, raw QPs should be allowed to dump without the CAP_NET_RAW privilege. This is consistent with what "rdma res show qp" does. Fixes: c4ffee7c9bdb ("RDMA/netlink: Implement counter dumpit calback") Link: https://lore.kernel.org/r/20200727095828.496195-1-leon@kernel.org Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 76af7ea2875d..12d29d54a081 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -759,9 +759,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg, xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { qp = container_of(res, struct ib_qp, res); - if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) - continue; - if (!qp->counter || (qp->counter->id != counter->id)) continue; -- cgit From 57005c96b7cb21f66c4e31446547694eb7f3d894 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 28 Jul 2020 18:42:15 +0800 Subject: RDMA/hns: Remove redundant hardware opcode definitions HNS_ROCE_SQ_OPCODE_XXXs and HNS_ROCE_V2_WQE_OP_XXXs have same values, so remove a set of redundant definitions. In addition, remove the suffix of HNS_ROCE_V2_WQE_OP_BIND_MW_TYPE. Link: https://lore.kernel.org/r/1595932941-40613-2-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 26 +++++++++++++------------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 18 +----------------- 2 files changed, 14 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 2b0b676e4c2f..35d46b7e56b8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3169,51 +3169,51 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, /* SQ corresponding to CQE */ switch (roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_OPCODE_M, V2_CQE_BYTE_4_OPCODE_S) & 0x1f) { - case HNS_ROCE_SQ_OPCODE_SEND: + case HNS_ROCE_V2_WQE_OP_SEND: wc->opcode = IB_WC_SEND; break; - case HNS_ROCE_SQ_OPCODE_SEND_WITH_INV: + case HNS_ROCE_V2_WQE_OP_SEND_WITH_INV: wc->opcode = IB_WC_SEND; break; - case HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM: + case HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM: wc->opcode = IB_WC_SEND; wc->wc_flags |= IB_WC_WITH_IMM; break; - case HNS_ROCE_SQ_OPCODE_RDMA_READ: + case HNS_ROCE_V2_WQE_OP_RDMA_READ: wc->opcode = IB_WC_RDMA_READ; wc->byte_len = le32_to_cpu(cqe->byte_cnt); break; - case HNS_ROCE_SQ_OPCODE_RDMA_WRITE: + case HNS_ROCE_V2_WQE_OP_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; - case HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM: + case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM: wc->opcode = IB_WC_RDMA_WRITE; wc->wc_flags |= IB_WC_WITH_IMM; break; - case HNS_ROCE_SQ_OPCODE_LOCAL_INV: + case HNS_ROCE_V2_WQE_OP_LOCAL_INV: wc->opcode = IB_WC_LOCAL_INV; wc->wc_flags |= IB_WC_WITH_INVALIDATE; break; - case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP: + case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP: wc->opcode = IB_WC_COMP_SWAP; wc->byte_len = 8; break; - case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD: + case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD: wc->opcode = IB_WC_FETCH_ADD; wc->byte_len = 8; break; - case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP: + case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP: wc->opcode = IB_WC_MASKED_COMP_SWAP; wc->byte_len = 8; break; - case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD: + case HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD: wc->opcode = IB_WC_MASKED_FETCH_ADD; wc->byte_len = 8; break; - case HNS_ROCE_SQ_OPCODE_FAST_REG_WR: + case HNS_ROCE_V2_WQE_OP_FAST_REG_PMR: wc->opcode = IB_WC_REG_MR; break; - case HNS_ROCE_SQ_OPCODE_BIND_MW: + case HNS_ROCE_V2_WQE_OP_BIND_MW: wc->opcode = IB_WC_REG_MR; break; default: diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index e176b0aaa4ac..53c26f380f25 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -179,26 +179,10 @@ enum { HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9, HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa, HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb, - HNS_ROCE_V2_WQE_OP_BIND_MW_TYPE = 0xc, + HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc, HNS_ROCE_V2_WQE_OP_MASK = 0x1f, }; -enum { - HNS_ROCE_SQ_OPCODE_SEND = 0x0, - HNS_ROCE_SQ_OPCODE_SEND_WITH_INV = 0x1, - HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM = 0x2, - HNS_ROCE_SQ_OPCODE_RDMA_WRITE = 0x3, - HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM = 0x4, - HNS_ROCE_SQ_OPCODE_RDMA_READ = 0x5, - HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP = 0x6, - HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD = 0x7, - HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP = 0x8, - HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD = 0x9, - HNS_ROCE_SQ_OPCODE_FAST_REG_WR = 0xa, - HNS_ROCE_SQ_OPCODE_LOCAL_INV = 0xb, - HNS_ROCE_SQ_OPCODE_BIND_MW = 0xc, -}; - enum { /* rq operations */ HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM = 0x0, -- cgit From cdc1f3e9465640ceb2cae106732ede0812e15f3a Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Tue, 28 Jul 2020 18:42:16 +0800 Subject: RDMA/hns: Refactor hns_roce_v2_set_hem() The parts about preparing and sending mailbox to hardware is not strongly related to other codes in hns_roce_v2_set_hem(), and can be encapsulated into a separate function. Link: https://lore.kernel.org/r/1595932941-40613-3-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 45 +++++++++++++++++------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 35d46b7e56b8..516e246de571 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3373,11 +3373,33 @@ static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type, return op + step_idx; } +static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, u64 bt_ba, + u32 hem_type, int step_idx) +{ + struct hns_roce_cmd_mailbox *mailbox; + int ret; + int op; + + op = get_op_for_set_hem(hr_dev, hem_type, step_idx); + if (op < 0) + return 0; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj, + 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS); + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, int step_idx) { - struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_hem_iter iter; struct hns_roce_hem_mhop mhop; struct hns_roce_hem *hem; @@ -3389,7 +3411,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, u64 bt_ba = 0; u32 chunk_ba_num; u32 hop_num; - int op; if (!hns_roce_check_whether_mhop(hr_dev, table->type)) return 0; @@ -3411,14 +3432,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, hem_idx = i; } - op = get_op_for_set_hem(hr_dev, table->type, step_idx); - if (op == -EINVAL) - return 0; - - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - if (table->type == HEM_TYPE_SCCC) obj = mhop.l0_idx; @@ -3427,11 +3440,8 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, for (hns_roce_hem_first(hem, &iter); !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { bt_ba = hns_roce_hem_addr(&iter); - - /* configure the ba, tag, and op */ - ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, - obj, 0, op, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, + step_idx); } } else { if (step_idx == 0) @@ -3439,12 +3449,9 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, else if (step_idx == 1 && hop_num == 2) bt_ba = table->bt_l1_dma_addr[l1_idx]; - /* configure the ba, tag, and op */ - ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj, - 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, step_idx); } - hns_roce_free_cmd_mailbox(hr_dev, mailbox); return ret; } -- cgit From a247fd28c19bdc97530ce147a23266eff9a39786 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 28 Jul 2020 18:42:17 +0800 Subject: RDMA/hns: Remove support for HIP08_A HIP08_A is an temporary version and all features of it are supported by HIP08_B. So remove the relevant code. Link: https://lore.kernel.org/r/1595932941-40613-4-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Yangyang Li Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 5 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 97 +++++++++++++---------------- drivers/infiniband/hw/hns/hns_roce_qp.c | 10 --- 3 files changed, 47 insertions(+), 65 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6e64931397ad..846954eab65c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -37,9 +37,8 @@ #define DRV_NAME "hns_roce" -/* hip08 is a pci device, it includes two version according pci version id */ -#define PCI_REVISION_ID_HIP08_A 0x20 -#define PCI_REVISION_ID_HIP08_B 0x21 +/* hip08 is a pci device */ +#define PCI_REVISION_ID_HIP08 0x21 #define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 516e246de571..ff738db335a0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1744,27 +1744,25 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR; caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE; - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { - caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW | - HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | - HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; - - caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; - caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; - caps->qpc_timer_ba_pg_sz = 0; - caps->qpc_timer_buf_pg_sz = 0; - caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; - caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; - caps->cqc_timer_ba_pg_sz = 0; - caps->cqc_timer_buf_pg_sz = 0; - caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - - caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; - caps->sccc_ba_pg_sz = 0; - caps->sccc_buf_pg_sz = 0; - caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; - } + caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW | + HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | + HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; + + caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; + caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; + caps->qpc_timer_ba_pg_sz = 0; + caps->qpc_timer_buf_pg_sz = 0; + caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; + caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; + caps->cqc_timer_ba_pg_sz = 0; + caps->cqc_timer_buf_pg_sz = 0; + caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + + caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; + caps->sccc_ba_pg_sz = 0; + caps->sccc_buf_pg_sz = 0; + caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; } static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, @@ -1995,20 +1993,18 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->srqc_bt_num, &caps->srqc_buf_pg_sz, &caps->srqc_ba_pg_sz, HEM_TYPE_SRQC); - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { - caps->sccc_hop_num = ctx_hop_num; - caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->sccc_hop_num = ctx_hop_num; + caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - calc_pg_sz(caps->num_qps, caps->sccc_entry_sz, - caps->sccc_hop_num, caps->sccc_bt_num, - &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz, - HEM_TYPE_SCCC); - calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz, - caps->cqc_timer_hop_num, caps->cqc_timer_bt_num, - &caps->cqc_timer_buf_pg_sz, - &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER); - } + calc_pg_sz(caps->num_qps, caps->sccc_entry_sz, + caps->sccc_hop_num, caps->sccc_bt_num, + &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz, + HEM_TYPE_SCCC); + calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz, + caps->cqc_timer_hop_num, caps->cqc_timer_bt_num, + &caps->cqc_timer_buf_pg_sz, + &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER); calc_pg_sz(caps->num_cqe_segs, caps->mtt_entry_sz, caps->cqe_hop_num, 1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE); @@ -2055,22 +2051,19 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { - ret = hns_roce_query_pf_timer_resource(hr_dev); - if (ret) { - dev_err(hr_dev->dev, - "Query pf timer resource fail, ret = %d.\n", - ret); - return ret; - } + ret = hns_roce_query_pf_timer_resource(hr_dev); + if (ret) { + dev_err(hr_dev->dev, + "failed to query pf timer resource, ret = %d.\n", ret); + return ret; + } - ret = hns_roce_set_vf_switch_param(hr_dev, 0); - if (ret) { - dev_err(hr_dev->dev, - "Set function switch param fail, ret = %d.\n", - ret); - return ret; - } + ret = hns_roce_set_vf_switch_param(hr_dev, 0); + if (ret) { + dev_err(hr_dev->dev, + "failed to set function switch param, ret = %d.\n", + ret); + return ret; } hr_dev->vendor_part_id = hr_dev->pci_dev->device; @@ -2336,8 +2329,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) - hns_roce_function_clear(hr_dev); + hns_roce_function_clear(hr_dev); hns_roce_free_link_table(hr_dev, &priv->tpq); hns_roce_free_link_table(hr_dev, &priv->tsq); @@ -4231,12 +4223,13 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B && is_udp) + if (is_udp) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2); else roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, grh->traffic_class); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, 0); roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index a0a47bd66975..e94ca130ff5e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -411,7 +411,6 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) { - struct ib_device *ibdev = &hr_dev->ib_dev; u32 cnt; cnt = max(1U, cap->max_send_sge); @@ -431,15 +430,6 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { cnt = roundup_pow_of_two(sq_wqe_cnt * (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); - - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { - if (cnt > hr_dev->caps.max_extend_sg) { - ibdev_err(ibdev, - "failed to check exSGE num, exSGE num = %d.\n", - cnt); - return -EINVAL; - } - } } else { cnt = 0; } -- cgit From eaaa98dedf280afea15ad3552460bf142fecb9af Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Tue, 28 Jul 2020 18:42:18 +0800 Subject: RDMA/hns: Remove redundant parameters in set_rc_wqe() There are some functions called by set_rc_wqe() use two parameters: "void *wqe" and "struct hns_roce_v2_rc_send_wqe *rc_sq_wqe", but the first one can be got from the second one. So remove the redundant wqe from related functions. Link: https://lore.kernel.org/r/1595932941-40613-5-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ff738db335a0..8cda4a9b48d2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -91,10 +91,11 @@ static u32 to_hr_opcode(u32 ib_opcode) } static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, - void *wqe, const struct ib_reg_wr *wr) + const struct ib_reg_wr *wr) { + struct hns_roce_wqe_frmr_seg *fseg = + (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); struct hns_roce_mr *mr = to_hr_mr(wr->mr); - struct hns_roce_wqe_frmr_seg *fseg = wqe; u64 pbl_ba; /* use ib_access_flags */ @@ -128,14 +129,16 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); } -static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe, +static void set_atomic_seg(const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, unsigned int valid_num_sge) { - struct hns_roce_wqe_atomic_seg *aseg; + struct hns_roce_v2_wqe_data_seg *dseg = + (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); + struct hns_roce_wqe_atomic_seg *aseg = + (void *)dseg + sizeof(struct hns_roce_v2_wqe_data_seg); - set_data_seg_v2(wqe, wr->sg_list); - aseg = wqe + sizeof(struct hns_roce_v2_wqe_data_seg); + set_data_seg_v2(dseg, wr->sg_list); if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap); @@ -143,7 +146,7 @@ static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe, } else { aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->compare_add); - aseg->cmp_data = 0; + aseg->cmp_data = 0; } roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, @@ -176,13 +179,15 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, - void *wqe, unsigned int *sge_ind, + unsigned int *sge_ind, unsigned int valid_num_sge) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_v2_wqe_data_seg *dseg = wqe; + struct hns_roce_v2_wqe_data_seg *dseg = + (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_qp *qp = to_hr_qp(ibqp); + void *wqe = dseg; int j = 0; int i; @@ -438,7 +443,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit); - wqe += sizeof(struct hns_roce_v2_rc_send_wqe); switch (wr->opcode) { case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: @@ -451,7 +455,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); break; case IB_WR_REG_MR: - set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr)); + set_frmr_seg(rc_sq_wqe, reg_wr(wr)); break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: @@ -468,10 +472,10 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - set_atomic_seg(wr, wqe, rc_sq_wqe, valid_num_sge); + set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); else if (wr->opcode != IB_WR_REG_MR) ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, - wqe, &curr_idx, valid_num_sge); + &curr_idx, valid_num_sge); *sge_idx = curr_idx; -- cgit From a5531e9b70ffa086cfad553be189baf4c2673220 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 28 Jul 2020 18:42:19 +0800 Subject: RDMA/hns: Delete unnecessary memset when allocating VF resource The hns_roce_cmq_setup_basic_desc() can clear the whole desc, so removes these redundant memset operations. Link: https://lore.kernel.org/r/1595932941-40613-6-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8cda4a9b48d2..9d64804750d6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1514,8 +1514,6 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) req_a = (struct hns_roce_vf_res_a *)desc[0].data; req_b = (struct hns_roce_vf_res_b *)desc[1].data; - memset(req_a, 0, sizeof(*req_a)); - memset(req_b, 0, sizeof(*req_b)); for (i = 0; i < 2; i++) { hns_roce_cmq_setup_basic_desc(&desc[i], HNS_ROCE_OPC_ALLOC_VF_RES, false); -- cgit From 4327bd2c41412657ee2c8c0d8d3d1945268b4238 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 28 Jul 2020 18:42:20 +0800 Subject: RDMA/hns: Fix error during modify qp RTS2RTS One qp state migrations legal configuration was deleted mistakenly. Fixes: 357f34294686 ("RDMA/hns: Simplify the state judgment code of qp") Link: https://lore.kernel.org/r/1595932941-40613-7-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 9d64804750d6..0c81100667fa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4262,7 +4262,9 @@ static bool check_qp_state(enum ib_qp_state cur_state, [IB_QPS_RTR] = { [IB_QPS_RESET] = true, [IB_QPS_RTS] = true, [IB_QPS_ERR] = true }, - [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }, + [IB_QPS_RTS] = { [IB_QPS_RESET] = true, + [IB_QPS_RTS] = true, + [IB_QPS_ERR] = true }, [IB_QPS_SQD] = {}, [IB_QPS_SQE] = {}, [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true } -- cgit From 395f2e8fd340c5bfad026f5968b56ec34cf20dd1 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Tue, 28 Jul 2020 18:42:21 +0800 Subject: RDMA/hns: Fix the unneeded process when getting a general type of CQE error If the hns ROCEE reports a general error CQE (types not specified by the IB General Specifications), it's no need to change the QP state to error, and the driver should just skip it. Fixes: 7c044adca272 ("RDMA/hns: Simplify the cqe code of poll cq") Link: https://lore.kernel.org/r/1595932941-40613-8-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 +++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + 2 files changed, 10 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0c81100667fa..d51b332ece5b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3046,6 +3046,7 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, IB_WC_RETRY_EXC_ERR }, { HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR }, { HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR }, + { HNS_ROCE_CQE_V2_GENERAL_ERR, IB_WC_GENERAL_ERR} }; u32 cqe_status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, @@ -3067,6 +3068,14 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe, sizeof(*cqe), false); + /* + * For hns ROCEE, GENERAL_ERR is an error type that is not defined in + * the standard protocol, the driver must ignore it and needn't to set + * the QP to an error state. + */ + if (cqe_status == HNS_ROCE_CQE_V2_GENERAL_ERR) + return; + /* * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets * into errored mode. Hence, as a workaround to this hardware diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 53c26f380f25..1fb1c583d0f8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -214,6 +214,7 @@ enum { HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR = 0x15, HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR = 0x16, HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR = 0x22, + HNS_ROCE_CQE_V2_GENERAL_ERR = 0x23, HNS_ROCE_V2_CQE_STATUS_MASK = 0xff, }; -- cgit From 928da37a229f344424ffc89c9a58feb2368bb018 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Jul 2020 18:57:55 -0700 Subject: RDMA/umem: Add a schedule point in ib_umem_get() Mapping as little as 64GB can take more than 10 seconds, triggering issues on kernels with CONFIG_PREEMPT_NONE=y. ib_umem_get() already splits the work in 2MB units on x86_64, adding a cond_resched() in the long-lasting loop is enough to solve the issue. Note that sg_alloc_table() can still use more than 100 ms, which is also problematic. This might be addressed later in ib_umem_add_sg_table(), adding new blocks in sgl on demand. Link: https://lore.kernel.org/r/20200730015755.1827498-1-edumazet@google.com Signed-off-by: Eric Dumazet Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 82455a1392f1..831bff8d52e5 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -261,6 +261,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, sg = umem->sg_head.sgl; while (npages) { + cond_resched(); ret = pin_user_pages_fast(cur_base, min_t(unsigned long, npages, PAGE_SIZE / -- cgit From 76251e15ea73898b62ed0da3210189e50e1fe3c9 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 21 Jul 2020 13:16:18 +0300 Subject: RDMA/rxe: Remove pkey table The RoCE spec requires RoCE devices to support only the default pkey. However the rxe driver maintains a 64 enties pkey table and uses only the first entry. Remove the pkey table and hard code a table of length one hard wired with the default pkey. Replace all checks of the pkey_table with a comparison to the default_pkey instead. Link: https://lore.kernel.org/r/20200721101618.686110-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe.c | 34 +++------------------------------- drivers/infiniband/sw/rxe/rxe_param.h | 4 ++-- drivers/infiniband/sw/rxe/rxe_recv.c | 29 ++++------------------------- drivers/infiniband/sw/rxe/rxe_req.c | 5 +---- drivers/infiniband/sw/rxe/rxe_verbs.c | 17 +++-------------- drivers/infiniband/sw/rxe/rxe_verbs.h | 1 - 6 files changed, 13 insertions(+), 77 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index efcb72c92be6..907203afbd99 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -40,14 +40,6 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); MODULE_DESCRIPTION("Soft RDMA transport"); MODULE_LICENSE("Dual BSD/GPL"); -/* free resources for all ports on a device */ -static void rxe_cleanup_ports(struct rxe_dev *rxe) -{ - kfree(rxe->port.pkey_tbl); - rxe->port.pkey_tbl = NULL; - -} - /* free resources for a rxe device all objects created for this device must * have been destroyed */ @@ -66,8 +58,6 @@ void rxe_dealloc(struct ib_device *ib_dev) rxe_pool_cleanup(&rxe->mc_grp_pool); rxe_pool_cleanup(&rxe->mc_elem_pool); - rxe_cleanup_ports(rxe); - if (rxe->tfm) crypto_free_shash(rxe->tfm); } @@ -139,25 +129,14 @@ static void rxe_init_port_param(struct rxe_port *port) /* initialize port state, note IB convention that HCA ports are always * numbered from 1 */ -static int rxe_init_ports(struct rxe_dev *rxe) +static void rxe_init_ports(struct rxe_dev *rxe) { struct rxe_port *port = &rxe->port; rxe_init_port_param(port); - - port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len, - sizeof(*port->pkey_tbl), GFP_KERNEL); - - if (!port->pkey_tbl) - return -ENOMEM; - - port->pkey_tbl[0] = 0xffff; addrconf_addr_eui48((unsigned char *)&port->port_guid, rxe->ndev->dev_addr); - spin_lock_init(&port->port_lock); - - return 0; } /* init pools of managed objects */ @@ -247,13 +226,11 @@ static int rxe_init(struct rxe_dev *rxe) /* init default device parameters */ rxe_init_device_param(rxe); - err = rxe_init_ports(rxe); - if (err) - goto err1; + rxe_init_ports(rxe); err = rxe_init_pools(rxe); if (err) - goto err2; + return err; /* init pending mmap list */ spin_lock_init(&rxe->mmap_offset_lock); @@ -263,11 +240,6 @@ static int rxe_init(struct rxe_dev *rxe) mutex_init(&rxe->usdev_lock); return 0; - -err2: - rxe_cleanup_ports(rxe); -err1: - return err; } void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 99e9d8ba9767..2f381aeafcb5 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -100,7 +100,7 @@ enum rxe_device_param { RXE_MAX_SRQ_SGE = 27, RXE_MIN_SRQ_SGE = 1, RXE_MAX_FMR_PAGE_LIST_LEN = 512, - RXE_MAX_PKEYS = 64, + RXE_MAX_PKEYS = 1, RXE_LOCAL_CA_ACK_DELAY = 15, RXE_MAX_UCONTEXT = 512, @@ -148,7 +148,7 @@ enum rxe_port_param { RXE_PORT_INIT_TYPE_REPLY = 0, RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X, RXE_PORT_ACTIVE_SPEED = 1, - RXE_PORT_PKEY_TBL_LEN = 64, + RXE_PORT_PKEY_TBL_LEN = 1, RXE_PORT_PHYS_STATE = IB_PORT_PHYS_STATE_POLLING, RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL, }; diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 46e111c218fd..7e123d3c4d09 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -101,36 +101,15 @@ static void set_qkey_viol_cntr(struct rxe_port *port) static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, u32 qpn, struct rxe_qp *qp) { - int i; - int found_pkey = 0; struct rxe_port *port = &rxe->port; u16 pkey = bth_pkey(pkt); pkt->pkey_index = 0; - if (qpn == 1) { - for (i = 0; i < port->attr.pkey_tbl_len; i++) { - if (pkey_match(pkey, port->pkey_tbl[i])) { - pkt->pkey_index = i; - found_pkey = 1; - break; - } - } - - if (!found_pkey) { - pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); - set_bad_pkey_cntr(port); - goto err1; - } - } else { - if (unlikely(!pkey_match(pkey, - port->pkey_tbl[qp->attr.pkey_index] - ))) { - pr_warn_ratelimited("bad pkey = 0x%0x\n", pkey); - set_bad_pkey_cntr(port); - goto err1; - } - pkt->pkey_index = qp->attr.pkey_index; + if (!pkey_match(pkey, IB_DEFAULT_PKEY_FULL)) { + pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); + set_bad_pkey_cntr(port); + goto err1; } if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index e5031172c019..34df2b55e650 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -381,7 +381,6 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - struct rxe_port *port = &rxe->port; struct sk_buff *skb; struct rxe_send_wr *ibwr = &wqe->wr; struct rxe_av *av; @@ -419,9 +418,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) == (RXE_WRITE_MASK | RXE_IMMDT_MASK)); - pkey = (qp_type(qp) == IB_QPT_GSI) ? - port->pkey_tbl[ibwr->wr.ud.pkey_index] : - port->pkey_tbl[qp->attr.pkey_index]; + pkey = IB_DEFAULT_PKEY_FULL; qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : qp->attr.dest_qp_num; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index c1649aec8c23..bb61e534e468 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -83,22 +83,11 @@ static int rxe_query_port(struct ib_device *dev, static int rxe_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey) { - struct rxe_dev *rxe = to_rdev(device); - struct rxe_port *port; - - port = &rxe->port; - - if (unlikely(index >= port->attr.pkey_tbl_len)) { - dev_warn(device->dev.parent, "invalid index = %d\n", - index); - goto err1; - } + if (index > 0) + return -EINVAL; - *pkey = port->pkey_tbl[index]; + *pkey = IB_DEFAULT_PKEY_FULL; return 0; - -err1: - return -EINVAL; } static int rxe_modify_device(struct ib_device *dev, diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 92de39c4a7c1..c664c7f36ab5 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -371,7 +371,6 @@ struct rxe_mc_elem { struct rxe_port { struct ib_port_attr attr; - u16 *pkey_tbl; __be64 port_guid; __be64 subnet_prefix; spinlock_t port_lock; /* guard port */ -- cgit From 23fcc7dee2c6aba1060558683988263851e74bab Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Mon, 3 Aug 2020 08:58:49 +0300 Subject: RDMA/mlx5: Fix flow destination setting for RDMA TX flow table For RDMA TX flow table, set destination type to be 'port' and prevent creation of flows with TIR destination. As RDMA TX is an egress flow table the rules on this flow table should not forward traffic back to the NIC and should set the destination to be the port. Without the setting of this destination type flow rules on the RDMA TX flow tables are not created as FW invokes a syndrome for undefined destination for the rule. Fixes: 24670b1a3166 ("net/mlx5: Add support for RDMA TX steering") Link: https://lore.kernel.org/r/20200803055849.14947-1-leon@kernel.org Signed-off-by: Michael Guralnik Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/fs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 1a7e6226f11a..e9cfb9a2ef41 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -1865,12 +1865,14 @@ static int get_dests(struct uverbs_attr_bundle *attrs, else *dest_id = mqp->raw_packet_qp.rq.tirn; *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; } if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)) return -EINVAL; return 0; -- cgit