summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/addr.c4
-rw-r--r--drivers/infiniband/core/cm.c263
-rw-r--r--drivers/infiniband/core/cma.c55
-rw-r--r--drivers/infiniband/core/device.c16
-rw-r--r--drivers/infiniband/core/lag.c138
-rw-r--r--drivers/infiniband/core/mad.c255
-rw-r--r--drivers/infiniband/core/multicast.c12
-rw-r--r--drivers/infiniband/core/rw.c2
-rw-r--r--drivers/infiniband/core/sa_query.c37
-rw-r--r--drivers/infiniband/core/ucma.c16
-rw-r--r--drivers/infiniband/core/user_mad.c22
-rw-r--r--drivers/infiniband/core/uverbs_main.c26
-rw-r--r--drivers/infiniband/core/verbs.c73
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c73
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c357
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h42
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c88
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h91
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c1
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h53
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h106
-rw-r--r--drivers/infiniband/hw/efa/efa.h6
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c5
-rw-r--r--drivers/infiniband/hw/efa/efa_com.h3
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c19
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c5
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c146
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c351
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h223
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c114
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h11
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c180
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c1308
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h9
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c70
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c1638
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c495
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c376
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c11
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile3
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c35
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c114
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h4
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c4
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c2
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c2
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c38
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c23
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h57
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c10
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c3262
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c10
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c115
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c1504
-rw-r--r--drivers/infiniband/hw/mlx5/wr.h76
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c9
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h2
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c4
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c7
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c11
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.h4
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c9
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c15
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c12
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c21
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c32
-rw-r--r--include/linux/mlx5/mlx5_ifc.h4
-rw-r--r--include/linux/mlx5/qp.h2
-rw-r--r--include/rdma/ib_mad.h49
-rw-r--r--include/rdma/ib_verbs.h58
-rw-r--r--include/rdma/lag.h23
-rw-r--r--include/rdma/rdma_cm.h8
-rw-r--r--include/rdma/rdmavt_qp.h2
-rw-r--r--include/uapi/rdma/rdma_user_cm.h4
-rw-r--r--net/rds/ib.c21
-rw-r--r--net/smc/smc_ib.c13
86 files changed, 5558 insertions, 6745 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d1b14887960e..870f0fcd54d5 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
nldev.o restrack.o counters.o ib_core_uverbs.o \
- trace.o
+ trace.o lag.o
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 1753a9801b70..3a98439bba83 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -371,6 +371,8 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
(const void *)&dst_in6->sin6_addr;
sa_family_t family = dst_in->sa_family;
+ might_sleep();
+
/* If we have a gateway in IB mode then it must be an IB network */
if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
@@ -727,6 +729,8 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec,
struct rdma_dev_addr dev_addr = {};
int ret;
+ might_sleep();
+
if (rec->roce.route_resolved)
return 0;
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 4794113ecd59..fb47cd55ce42 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -81,8 +81,11 @@ const char *__attribute_const__ ibcm_reject_msg(int reason)
EXPORT_SYMBOL(ibcm_reject_msg);
struct cm_id_private;
-static void cm_add_one(struct ib_device *device);
+struct cm_work;
+static int cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
+static void cm_process_work(struct cm_id_private *cm_id_priv,
+ struct cm_work *work);
static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param);
static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
@@ -474,24 +477,19 @@ static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
-static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
- struct cm_av *av,
- struct cm_port *port)
+static void add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
+ struct cm_av *av, struct cm_port *port)
{
unsigned long flags;
- int ret = 0;
spin_lock_irqsave(&cm.lock, flags);
-
if (&cm_id_priv->av == av)
list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
else if (&cm_id_priv->alt_av == av)
list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
else
- ret = -EINVAL;
-
+ WARN_ON(true);
spin_unlock_irqrestore(&cm.lock, flags);
- return ret;
}
static struct cm_port *
@@ -572,12 +570,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
return ret;
av->timeout = path->packet_life_time + 1;
-
- ret = add_cm_id_to_port_list(cm_id_priv, av, port);
- if (ret) {
- rdma_destroy_ah_attr(&new_ah_attr);
- return ret;
- }
+ add_cm_id_to_port_list(cm_id_priv, av, port);
rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
return 0;
}
@@ -587,11 +580,6 @@ static u32 cm_local_id(__be32 local_id)
return (__force u32) (local_id ^ cm.random_id_operand);
}
-static void cm_free_id(__be32 local_id)
-{
- xa_erase_irq(&cm.local_id_table, cm_local_id(local_id));
-}
-
static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
{
struct cm_id_private *cm_id_priv;
@@ -698,9 +686,10 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device,
cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
if ((cm_id_priv->id.service_mask & service_id) ==
cm_id_priv->id.service_id &&
- (cm_id_priv->id.device == device))
+ (cm_id_priv->id.device == device)) {
+ refcount_inc(&cm_id_priv->refcount);
return cm_id_priv;
-
+ }
if (device < cm_id_priv->id.device)
node = node->rb_left;
else if (device > cm_id_priv->id.device)
@@ -745,12 +734,14 @@ static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
return NULL;
}
-static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
- __be32 remote_id)
+static struct cm_id_private *cm_find_remote_id(__be64 remote_ca_guid,
+ __be32 remote_id)
{
struct rb_node *node = cm.remote_id_table.rb_node;
struct cm_timewait_info *timewait_info;
+ struct cm_id_private *res = NULL;
+ spin_lock_irq(&cm.lock);
while (node) {
timewait_info = rb_entry(node, struct cm_timewait_info,
remote_id_node);
@@ -762,10 +753,14 @@ static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
node = node->rb_left;
else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
node = node->rb_right;
- else
- return timewait_info;
+ else {
+ res = cm_acquire_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+ break;
+ }
}
- return NULL;
+ spin_unlock_irq(&cm.lock);
+ return res;
}
static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
@@ -917,6 +912,35 @@ static void cm_free_work(struct cm_work *work)
kfree(work);
}
+static void cm_queue_work_unlock(struct cm_id_private *cm_id_priv,
+ struct cm_work *work)
+{
+ bool immediate;
+
+ /*
+ * To deliver the event to the user callback we have the drop the
+ * spinlock, however, we need to ensure that the user callback is single
+ * threaded and receives events in the temporal order. If there are
+ * already events being processed then thread new events onto a list,
+ * the thread currently processing will pick them up.
+ */
+ immediate = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!immediate) {
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ /*
+ * This routine always consumes incoming reference. Once queued
+ * to the work_list then a reference is held by the thread
+ * currently running cm_process_work() and this reference is not
+ * needed.
+ */
+ cm_deref_id(cm_id_priv);
+ }
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (immediate)
+ cm_process_work(cm_id_priv, work);
+}
+
static inline int cm_convert_to_ms(int iba_time)
{
/* approximate conversion to ms from 4.096us x 2^iba_time */
@@ -942,8 +966,10 @@ static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
return min(31, ack_timeout);
}
-static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
+static void cm_remove_remote(struct cm_id_private *cm_id_priv)
{
+ struct cm_timewait_info *timewait_info = cm_id_priv->timewait_info;
+
if (timewait_info->inserted_remote_id) {
rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
timewait_info->inserted_remote_id = 0;
@@ -982,7 +1008,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
return;
spin_lock_irqsave(&cm.lock, flags);
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
spin_unlock_irqrestore(&cm.lock, flags);
@@ -1001,6 +1027,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
msecs_to_jiffies(wait_time));
spin_unlock_irqrestore(&cm.lock, flags);
+ /*
+ * The timewait_info is converted into a work and gets freed during
+ * cm_free_work() in cm_timewait_handler().
+ */
+ BUILD_BUG_ON(offsetof(struct cm_timewait_info, work) != 0);
cm_id_priv->timewait_info = NULL;
}
@@ -1013,7 +1044,7 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
cm_id_priv->id.state = IB_CM_IDLE;
if (cm_id_priv->timewait_info) {
spin_lock_irqsave(&cm.lock, flags);
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
spin_unlock_irqrestore(&cm.lock, flags);
kfree(cm_id_priv->timewait_info);
cm_id_priv->timewait_info = NULL;
@@ -1101,7 +1132,7 @@ retest:
case IB_CM_TIMEWAIT:
/*
* The cm_acquire_id in cm_timewait_handler will stop working
- * once we do cm_free_id() below, so just move to idle here for
+ * once we do xa_erase below, so just move to idle here for
* consistency.
*/
cm_id->state = IB_CM_IDLE;
@@ -1114,7 +1145,7 @@ retest:
spin_lock(&cm.lock);
/* Required for cleanup paths related cm_req_handler() */
if (cm_id_priv->timewait_info) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
kfree(cm_id_priv->timewait_info);
cm_id_priv->timewait_info = NULL;
}
@@ -1131,7 +1162,7 @@ retest:
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
- cm_free_id(cm_id->local_id);
+ xa_erase_irq(&cm.local_id_table, cm_local_id(cm_id->local_id));
cm_deref_id(cm_id_priv);
wait_for_completion(&cm_id_priv->comp);
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
@@ -1928,7 +1959,6 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
struct cm_timewait_info *timewait_info;
struct cm_req_msg *req_msg;
- struct ib_cm_id *cm_id;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1949,7 +1979,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
/* Check for stale connections. */
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
@@ -1958,8 +1988,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
NULL, 0);
if (cur_cm_id_priv) {
- cm_id = &cur_cm_id_priv->id;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
cm_deref_id(cur_cm_id_priv);
}
return NULL;
@@ -1970,14 +1999,13 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
cm_id_priv->id.device,
cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)));
if (!listen_cm_id_priv) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
spin_unlock_irq(&cm.lock);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
NULL, 0);
return NULL;
}
- refcount_inc(&listen_cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
return listen_cm_id_priv;
}
@@ -2154,9 +2182,7 @@ static int cm_req_handler(struct cm_work *work)
/* Refcount belongs to the event, pairs with cm_process_work() */
refcount_inc(&cm_id_priv->refcount);
- atomic_inc(&cm_id_priv->work_count);
- spin_unlock_irq(&cm_id_priv->lock);
- cm_process_work(cm_id_priv, work);
+ cm_queue_work_unlock(cm_id_priv, work);
/*
* Since this ID was just created and was not made visible to other MAD
* handlers until the cm_finalize_id() above we know that the
@@ -2405,7 +2431,6 @@ static int cm_rep_handler(struct cm_work *work)
struct cm_rep_msg *rep_msg;
int ret;
struct cm_id_private *cur_cm_id_priv;
- struct ib_cm_id *cm_id;
struct cm_timewait_info *timewait_info;
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2455,9 +2480,7 @@ static int cm_rep_handler(struct cm_work *work)
/* Check for a stale connection. */
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
- rb_erase(&cm_id_priv->timewait_info->remote_id_node,
- &cm.remote_id_table);
- cm_id_priv->timewait_info->inserted_remote_id = 0;
+ cm_remove_remote(cm_id_priv);
cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
@@ -2473,8 +2496,7 @@ static int cm_rep_handler(struct cm_work *work)
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
if (cur_cm_id_priv) {
- cm_id = &cur_cm_id_priv->id;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
cm_deref_id(cur_cm_id_priv);
}
@@ -2502,15 +2524,7 @@ static int cm_rep_handler(struct cm_work *work)
cm_id_priv->alt_av.timeout - 1);
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
error:
@@ -2521,7 +2535,6 @@ error:
static int cm_establish_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
- int ret;
/* See comment in cm_establish about lookup. */
cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
@@ -2535,15 +2548,7 @@ static int cm_establish_handler(struct cm_work *work)
}
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2554,7 +2559,6 @@ static int cm_rtu_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_rtu_msg *rtu_msg;
- int ret;
rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(
@@ -2577,15 +2581,7 @@ static int cm_rtu_handler(struct cm_work *work)
cm_id_priv->id.state = IB_CM_ESTABLISHED;
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2778,7 +2774,6 @@ static int cm_dreq_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
struct cm_dreq_msg *dreq_msg;
struct ib_mad_send_buf *msg = NULL;
- int ret;
dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(
@@ -2843,15 +2838,7 @@ static int cm_dreq_handler(struct cm_work *work)
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
cm_id_priv->tid = dreq_msg->hdr.tid;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
unlock: spin_unlock_irq(&cm_id_priv->lock);
@@ -2863,7 +2850,6 @@ static int cm_drep_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_drep_msg *drep_msg;
- int ret;
drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(
@@ -2884,15 +2870,7 @@ static int cm_drep_handler(struct cm_work *work)
cm_enter_timewait(cm_id_priv);
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2985,24 +2963,15 @@ static void cm_format_rej_event(struct cm_work *work)
static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
{
- struct cm_timewait_info *timewait_info;
struct cm_id_private *cm_id_priv;
__be32 remote_id;
remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg));
if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) {
- spin_lock_irq(&cm.lock);
- timewait_info = cm_find_remote_id(
+ cm_id_priv = cm_find_remote_id(
*((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)),
remote_id);
- if (!timewait_info) {
- spin_unlock_irq(&cm.lock);
- return NULL;
- }
- cm_id_priv =
- cm_acquire_id(timewait_info->work.local_id, remote_id);
- spin_unlock_irq(&cm.lock);
} else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) ==
CM_MSG_RESPONSE_REQ)
cm_id_priv = cm_acquire_id(
@@ -3020,7 +2989,6 @@ static int cm_rej_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_rej_msg *rej_msg;
- int ret;
rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_rejected_id(rej_msg);
@@ -3066,19 +3034,10 @@ static int cm_rej_handler(struct cm_work *work)
__func__, be32_to_cpu(cm_id_priv->id.local_id),
cm_id_priv->id.state);
spin_unlock_irq(&cm_id_priv->lock);
- ret = -EINVAL;
goto out;
}
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3188,7 +3147,7 @@ static int cm_mra_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_mra_msg *mra_msg;
- int timeout, ret;
+ int timeout;
mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_mraed_id(mra_msg);
@@ -3248,15 +3207,7 @@ static int cm_mra_handler(struct cm_work *work)
cm_id_priv->msg->context[1] = (void *) (unsigned long)
cm_id_priv->id.state;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
spin_unlock_irq(&cm_id_priv->lock);
@@ -3391,15 +3342,7 @@ static int cm_lap_handler(struct cm_work *work)
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
unlock: spin_unlock_irq(&cm_id_priv->lock);
@@ -3411,7 +3354,6 @@ static int cm_apr_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_apr_msg *apr_msg;
- int ret;
/* Currently Alternate path messages are not supported for
* RoCE link layer.
@@ -3446,16 +3388,7 @@ static int cm_apr_handler(struct cm_work *work)
cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
cm_id_priv->msg = NULL;
-
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3466,7 +3399,6 @@ static int cm_timewait_handler(struct cm_work *work)
{
struct cm_timewait_info *timewait_info;
struct cm_id_private *cm_id_priv;
- int ret;
timewait_info = container_of(work, struct cm_timewait_info, work);
spin_lock_irq(&cm.lock);
@@ -3485,15 +3417,7 @@ static int cm_timewait_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.state = IB_CM_IDLE;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3640,7 +3564,6 @@ static int cm_sidr_req_handler(struct cm_work *work)
.status = IB_SIDR_UNSUPPORTED });
goto out; /* No match. */
}
- refcount_inc(&listen_cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -4382,7 +4305,7 @@ static void cm_remove_port_fs(struct cm_port *port)
}
-static void cm_add_one(struct ib_device *ib_device)
+static int cm_add_one(struct ib_device *ib_device)
{
struct cm_device *cm_dev;
struct cm_port *port;
@@ -4401,7 +4324,7 @@ static void cm_add_one(struct ib_device *ib_device)
cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
GFP_KERNEL);
if (!cm_dev)
- return;
+ return -ENOMEM;
cm_dev->ib_device = ib_device;
cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
@@ -4413,8 +4336,10 @@ static void cm_add_one(struct ib_device *ib_device)
continue;
port = kzalloc(sizeof *port, GFP_KERNEL);
- if (!port)
+ if (!port) {
+ ret = -ENOMEM;
goto error1;
+ }
cm_dev->port[i-1] = port;
port->cm_dev = cm_dev;
@@ -4435,8 +4360,10 @@ static void cm_add_one(struct ib_device *ib_device)
cm_recv_handler,
port,
0);
- if (IS_ERR(port->mad_agent))
+ if (IS_ERR(port->mad_agent)) {
+ ret = PTR_ERR(port->mad_agent);
goto error2;
+ }
ret = ib_modify_port(ib_device, i, 0, &port_modify);
if (ret)
@@ -4445,15 +4372,17 @@ static void cm_add_one(struct ib_device *ib_device)
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
+ }
ib_set_client_data(ib_device, &cm_client, cm_dev);
write_lock_irqsave(&cm.device_lock, flags);
list_add_tail(&cm_dev->list, &cm.device_list);
write_unlock_irqrestore(&cm.device_lock, flags);
- return;
+ return 0;
error3:
ib_unregister_mad_agent(port->mad_agent);
@@ -4475,6 +4404,7 @@ error1:
}
free:
kfree(cm_dev);
+ return ret;
}
static void cm_remove_one(struct ib_device *ib_device, void *client_data)
@@ -4489,9 +4419,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
unsigned long flags;
int i;
- if (!cm_dev)
- return;
-
write_lock_irqsave(&cm.device_lock, flags);
list_del(&cm_dev->list);
write_unlock_irqrestore(&cm.device_lock, flags);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 26e6f7df247b..432eec472164 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -91,7 +91,13 @@ const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
}
EXPORT_SYMBOL(rdma_reject_msg);
-bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
+/**
+ * rdma_is_consumer_reject - return true if the consumer rejected the connect
+ * request.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+static bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
{
if (rdma_ib_or_roce(id->device, id->port_num))
return reason == IB_CM_REJ_CONSUMER_DEFINED;
@@ -102,7 +108,6 @@ bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
WARN_ON_ONCE(1);
return false;
}
-EXPORT_SYMBOL(rdma_is_consumer_reject);
const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
struct rdma_cm_event *ev, u8 *data_len)
@@ -148,7 +153,7 @@ struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
}
EXPORT_SYMBOL(rdma_res_to_id);
-static void cma_add_one(struct ib_device *device);
+static int cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
static struct ib_client cma_client = {
@@ -2904,6 +2909,24 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
return 0;
}
+static __be32 cma_get_roce_udp_flow_label(struct rdma_id_private *id_priv)
+{
+ struct sockaddr_in6 *addr6;
+ u16 dport, sport;
+ u32 hash, fl;
+
+ addr6 = (struct sockaddr_in6 *)cma_src_addr(id_priv);
+ fl = be32_to_cpu(addr6->sin6_flowinfo) & IB_GRH_FLOWLABEL_MASK;
+ if ((cma_family(id_priv) != AF_INET6) || !fl) {
+ dport = be16_to_cpu(cma_port(cma_dst_addr(id_priv)));
+ sport = be16_to_cpu(cma_port(cma_src_addr(id_priv)));
+ hash = (u32)sport * 31 + dport;
+ fl = hash & IB_GRH_FLOWLABEL_MASK;
+ }
+
+ return cpu_to_be32(fl);
+}
+
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
@@ -2970,6 +2993,11 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
+ if (rdma_protocol_roce_udp_encap(id_priv->id.device,
+ id_priv->id.port_num))
+ route->path_rec->flow_label =
+ cma_get_roce_udp_flow_label(id_priv);
+
cma_init_resolve_route_work(work, id_priv);
queue_work(cma_wq, &work->work);
@@ -4633,29 +4661,34 @@ static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};
-static void cma_add_one(struct ib_device *device)
+static int cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
unsigned int i;
unsigned long supported_gids = 0;
+ int ret;
cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
if (!cma_dev)
- return;
+ return -ENOMEM;
cma_dev->device = device;
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
- if (!cma_dev->default_gid_type)
+ if (!cma_dev->default_gid_type) {
+ ret = -ENOMEM;
goto free_cma_dev;
+ }
cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_roce_tos),
GFP_KERNEL);
- if (!cma_dev->default_roce_tos)
+ if (!cma_dev->default_roce_tos) {
+ ret = -ENOMEM;
goto free_gid_type;
+ }
rdma_for_each_port (device, i) {
supported_gids = roce_gid_type_mask_support(device, i);
@@ -4681,15 +4714,14 @@ static void cma_add_one(struct ib_device *device)
mutex_unlock(&lock);
trace_cm_add_one(device);
- return;
+ return 0;
free_gid_type:
kfree(cma_dev->default_gid_type);
free_cma_dev:
kfree(cma_dev);
-
- return;
+ return ret;
}
static int cma_remove_id_dev(struct rdma_id_private *id_priv)
@@ -4751,9 +4783,6 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
trace_cm_remove_one(device);
- if (!cma_dev)
- return;
-
mutex_lock(&lock);
list_del(&cma_dev->list);
mutex_unlock(&lock);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index d0b3d35ad3e4..d9f565a779df 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -677,8 +677,20 @@ static int add_client_context(struct ib_device *device,
if (ret)
goto out;
downgrade_write(&device->client_data_rwsem);
- if (client->add)
- client->add(device);
+ if (client->add) {
+ if (client->add(device)) {
+ /*
+ * If a client fails to add then the error code is
+ * ignored, but we won't call any more ops on this
+ * client.
+ */
+ xa_erase(&device->client_data, client->client_id);
+ up_read(&device->client_data_rwsem);
+ ib_device_put(device);
+ ib_client_put(client);
+ return 0;
+ }
+ }
/* Readers shall not see a client until add has been completed */
xa_set_mark(&device->client_data, client->client_id,
diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c
new file mode 100644
index 000000000000..7063e41eaf26
--- /dev/null
+++ b/drivers/infiniband/core/lag.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include <rdma/lag.h>
+
+static struct sk_buff *rdma_build_skb(struct ib_device *device,
+ struct net_device *netdev,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct ipv6hdr *ip6h;
+ struct sk_buff *skb;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ struct udphdr *uh;
+ u8 smac[ETH_ALEN];
+ bool is_ipv4;
+ int hdr_len;
+
+ is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw);
+ hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev);
+ hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr);
+
+ skb = alloc_skb(hdr_len, flags);
+ if (!skb)
+ return NULL;
+
+ skb->dev = netdev;
+ skb_reserve(skb, hdr_len);
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+ uh->source =
+ htons(rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label));
+ uh->dest = htons(ROCE_V2_UDP_DPORT);
+ uh->len = htons(sizeof(struct udphdr));
+
+ if (is_ipv4) {
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ iph->frag_off = 0;
+ iph->version = 4;
+ iph->protocol = IPPROTO_UDP;
+ iph->ihl = 0x5;
+ iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct
+ iphdr));
+ memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12,
+ sizeof(struct in_addr));
+ memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12,
+ sizeof(struct in_addr));
+ } else {
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6h->version = 6;
+ ip6h->nexthdr = IPPROTO_UDP;
+ memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label,
+ sizeof(*ip6h->flow_lbl));
+ memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw,
+ sizeof(struct in6_addr));
+ memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw,
+ sizeof(struct in6_addr));
+ }
+
+ skb_push(skb, sizeof(struct ethhdr));
+ skb_reset_mac_header(skb);
+ eth = eth_hdr(skb);
+ skb->protocol = eth->h_proto = htons(is_ipv4 ? ETH_P_IP : ETH_P_IPV6);
+ rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, NULL, smac);
+ memcpy(eth->h_source, smac, ETH_ALEN);
+ memcpy(eth->h_dest, ah_attr->roce.dmac, ETH_ALEN);
+
+ return skb;
+}
+
+static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device,
+ struct net_device *master,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct net_device *slave;
+ struct sk_buff *skb;
+
+ skb = rdma_build_skb(device, master, ah_attr, flags);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ rcu_read_lock();
+ slave = netdev_get_xmit_slave(master, skb,
+ !!(device->lag_flags &
+ RDMA_LAG_FLAGS_HASH_ALL_SLAVES));
+ if (slave)
+ dev_hold(slave);
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return slave;
+}
+
+void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave)
+{
+ if (xmit_slave)
+ dev_put(xmit_slave);
+}
+
+struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct net_device *slave = NULL;
+ struct net_device *master;
+
+ if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE &&
+ ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
+ ah_attr->grh.flow_label))
+ return NULL;
+
+ rcu_read_lock();
+ master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr);
+ if (IS_ERR(master)) {
+ rcu_read_unlock();
+ return master;
+ }
+ dev_hold(master);
+ rcu_read_unlock();
+
+ if (!netif_is_bond_master(master))
+ goto put;
+
+ slave = rdma_get_xmit_slave_udp(device, master, ah_attr, flags);
+put:
+ dev_put(master);
+ return slave;
+}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index c54db13fa9b0..186e0d652e8b 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -85,7 +85,6 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
-/* Client ID 0 is used for snoop-only clients */
static DEFINE_XARRAY_ALLOC1(ib_mad_clients);
static u32 ib_mad_client_next;
static struct list_head ib_mad_port_list;
@@ -483,141 +482,12 @@ error1:
}
EXPORT_SYMBOL(ib_register_mad_agent);
-static inline int is_snooping_sends(int mad_snoop_flags)
-{
- return (mad_snoop_flags &
- (/*IB_MAD_SNOOP_POSTED_SENDS |
- IB_MAD_SNOOP_RMPP_SENDS |*/
- IB_MAD_SNOOP_SEND_COMPLETIONS /*|
- IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
-}
-
-static inline int is_snooping_recvs(int mad_snoop_flags)
-{
- return (mad_snoop_flags &
- (IB_MAD_SNOOP_RECVS /*|
- IB_MAD_SNOOP_RMPP_RECVS*/));
-}
-
-static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
- struct ib_mad_snoop_private *mad_snoop_priv)
-{
- struct ib_mad_snoop_private **new_snoop_table;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- /* Check for empty slot in array. */
- for (i = 0; i < qp_info->snoop_table_size; i++)
- if (!qp_info->snoop_table[i])
- break;
-
- if (i == qp_info->snoop_table_size) {
- /* Grow table. */
- new_snoop_table = krealloc(qp_info->snoop_table,
- sizeof mad_snoop_priv *
- (qp_info->snoop_table_size + 1),
- GFP_ATOMIC);
- if (!new_snoop_table) {
- i = -ENOMEM;
- goto out;
- }
-
- qp_info->snoop_table = new_snoop_table;
- qp_info->snoop_table_size++;
- }
- qp_info->snoop_table[i] = mad_snoop_priv;
- atomic_inc(&qp_info->snoop_count);
-out:
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- return i;
-}
-
-struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
- u8 port_num,
- enum ib_qp_type qp_type,
- int mad_snoop_flags,
- ib_mad_snoop_handler snoop_handler,
- ib_mad_recv_handler recv_handler,
- void *context)
-{
- struct ib_mad_port_private *port_priv;
- struct ib_mad_agent *ret;
- struct ib_mad_snoop_private *mad_snoop_priv;
- int qpn;
- int err;
-
- /* Validate parameters */
- if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
- (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
- ret = ERR_PTR(-EINVAL);
- goto error1;
- }
- qpn = get_spl_qp_index(qp_type);
- if (qpn == -1) {
- ret = ERR_PTR(-EINVAL);
- goto error1;
- }
- port_priv = ib_get_mad_port(device, port_num);
- if (!port_priv) {
- ret = ERR_PTR(-ENODEV);
- goto error1;
- }
- /* Allocate structures */
- mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
- if (!mad_snoop_priv) {
- ret = ERR_PTR(-ENOMEM);
- goto error1;
- }
-
- /* Now, fill in the various structures */
- mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
- mad_snoop_priv->agent.device = device;
- mad_snoop_priv->agent.recv_handler = recv_handler;
- mad_snoop_priv->agent.snoop_handler = snoop_handler;
- mad_snoop_priv->agent.context = context;
- mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
- mad_snoop_priv->agent.port_num = port_num;
- mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
- init_completion(&mad_snoop_priv->comp);
-
- err = ib_mad_agent_security_setup(&mad_snoop_priv->agent, qp_type);
- if (err) {
- ret = ERR_PTR(err);
- goto error2;
- }
-
- mad_snoop_priv->snoop_index = register_snoop_agent(
- &port_priv->qp_info[qpn],
- mad_snoop_priv);
- if (mad_snoop_priv->snoop_index < 0) {
- ret = ERR_PTR(mad_snoop_priv->snoop_index);
- goto error3;
- }
-
- atomic_set(&mad_snoop_priv->refcount, 1);
- return &mad_snoop_priv->agent;
-error3:
- ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
-error2:
- kfree(mad_snoop_priv);
-error1:
- return ret;
-}
-EXPORT_SYMBOL(ib_register_mad_snoop);
-
static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
if (atomic_dec_and_test(&mad_agent_priv->refcount))
complete(&mad_agent_priv->comp);
}
-static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv)
-{
- if (atomic_dec_and_test(&mad_snoop_priv->refcount))
- complete(&mad_snoop_priv->comp);
-}
-
static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_port_private *port_priv;
@@ -650,25 +520,6 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
kfree_rcu(mad_agent_priv, rcu);
}
-static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
-{
- struct ib_mad_qp_info *qp_info;
- unsigned long flags;
-
- qp_info = mad_snoop_priv->qp_info;
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
- atomic_dec(&qp_info->snoop_count);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-
- deref_snoop_agent(mad_snoop_priv);
- wait_for_completion(&mad_snoop_priv->comp);
-
- ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
-
- kfree(mad_snoop_priv);
-}
-
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
*
@@ -677,20 +528,11 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
struct ib_mad_agent_private *mad_agent_priv;
- struct ib_mad_snoop_private *mad_snoop_priv;
-
- /* If the TID is zero, the agent can only snoop. */
- if (mad_agent->hi_tid) {
- mad_agent_priv = container_of(mad_agent,
- struct ib_mad_agent_private,
- agent);
- unregister_mad_agent(mad_agent_priv);
- } else {
- mad_snoop_priv = container_of(mad_agent,
- struct ib_mad_snoop_private,
- agent);
- unregister_mad_snoop(mad_snoop_priv);
- }
+
+ mad_agent_priv = container_of(mad_agent,
+ struct ib_mad_agent_private,
+ agent);
+ unregister_mad_agent(mad_agent_priv);
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
@@ -706,57 +548,6 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list)
spin_unlock_irqrestore(&mad_queue->lock, flags);
}
-static void snoop_send(struct ib_mad_qp_info *qp_info,
- struct ib_mad_send_buf *send_buf,
- struct ib_mad_send_wc *mad_send_wc,
- int mad_snoop_flags)
-{
- struct ib_mad_snoop_private *mad_snoop_priv;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- for (i = 0; i < qp_info->snoop_table_size; i++) {
- mad_snoop_priv = qp_info->snoop_table[i];
- if (!mad_snoop_priv ||
- !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
- continue;
-
- atomic_inc(&mad_snoop_priv->refcount);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
- send_buf, mad_send_wc);
- deref_snoop_agent(mad_snoop_priv);
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- }
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-}
-
-static void snoop_recv(struct ib_mad_qp_info *qp_info,
- struct ib_mad_recv_wc *mad_recv_wc,
- int mad_snoop_flags)
-{
- struct ib_mad_snoop_private *mad_snoop_priv;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- for (i = 0; i < qp_info->snoop_table_size; i++) {
- mad_snoop_priv = qp_info->snoop_table[i];
- if (!mad_snoop_priv ||
- !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
- continue;
-
- atomic_inc(&mad_snoop_priv->refcount);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
- mad_recv_wc);
- deref_snoop_agent(mad_snoop_priv);
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- }
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-}
-
static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
u16 pkey_index, u8 port_num, struct ib_wc *wc)
{
@@ -2289,9 +2080,6 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad;
recv->header.recv_wc.recv_buf.grh = &recv->grh;
- if (atomic_read(&qp_info->snoop_count))
- snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
-
/* Validate MAD */
if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
goto out;
@@ -2538,9 +2326,6 @@ retry:
mad_send_wc.send_buf = &mad_send_wr->send_buf;
mad_send_wc.status = wc->status;
mad_send_wc.vendor_err = wc->vendor_err;
- if (atomic_read(&qp_info->snoop_count))
- snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
- IB_MAD_SNOOP_SEND_COMPLETIONS);
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
if (queued_send_wr) {
@@ -2782,10 +2567,6 @@ static void local_completions(struct work_struct *work)
local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
local->mad_priv->header.recv_wc.recv_buf.mad =
(struct ib_mad *)local->mad_priv->mad;
- if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
- snoop_recv(recv_mad_agent->qp_info,
- &local->mad_priv->header.recv_wc,
- IB_MAD_SNOOP_RECVS);
recv_mad_agent->agent.recv_handler(
&recv_mad_agent->agent,
&local->mad_send_wr->send_buf,
@@ -2800,10 +2581,6 @@ local_send_completion:
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
- if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
- snoop_send(mad_agent_priv->qp_info,
- &local->mad_send_wr->send_buf,
- &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
@@ -3119,10 +2896,6 @@ static void init_mad_qp(struct ib_mad_port_private *port_priv,
init_mad_queue(qp_info, &qp_info->send_queue);
init_mad_queue(qp_info, &qp_info->recv_queue);
INIT_LIST_HEAD(&qp_info->overflow_list);
- spin_lock_init(&qp_info->snoop_lock);
- qp_info->snoop_table = NULL;
- qp_info->snoop_table_size = 0;
- atomic_set(&qp_info->snoop_count, 0);
}
static int create_mad_qp(struct ib_mad_qp_info *qp_info,
@@ -3166,7 +2939,6 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
return;
ib_destroy_qp(qp_info->qp);
- kfree(qp_info->snoop_table);
}
/*
@@ -3304,9 +3076,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
return 0;
}
-static void ib_mad_init_device(struct ib_device *device)
+static int ib_mad_init_device(struct ib_device *device)
{
int start, i;
+ unsigned int count = 0;
+ int ret;
start = rdma_start_port(device);
@@ -3314,17 +3088,23 @@ static void ib_mad_init_device(struct ib_device *device)
if (!rdma_cap_ib_mad(device, i))
continue;
- if (ib_mad_port_open(device, i)) {
+ ret = ib_mad_port_open(device, i);
+ if (ret) {
dev_err(&device->dev, "Couldn't open port %d\n", i);
goto error;
}
- if (ib_agent_port_open(device, i)) {
+ ret = ib_agent_port_open(device, i);
+ if (ret) {
dev_err(&device->dev,
"Couldn't open port %d for agents\n", i);
goto error_agent;
}
+ count++;
}
- return;
+ if (!count)
+ return -EOPNOTSUPP;
+
+ return 0;
error_agent:
if (ib_mad_port_close(device, i))
@@ -3341,6 +3121,7 @@ error:
if (ib_mad_port_close(device, i))
dev_err(&device->dev, "Couldn't close port %d\n", i);
}
+ return ret;
}
static void ib_mad_remove_device(struct ib_device *device, void *client_data)
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 9c2d8b7f1af9..740f03ecc05d 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -42,7 +42,7 @@
#include <rdma/ib_cache.h>
#include "sa.h"
-static void mcast_add_one(struct ib_device *device);
+static int mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device, void *client_data);
static struct ib_client mcast_client = {
@@ -815,7 +815,7 @@ static void mcast_event_handler(struct ib_event_handler *handler,
}
}
-static void mcast_add_one(struct ib_device *device)
+static int mcast_add_one(struct ib_device *device)
{
struct mcast_device *dev;
struct mcast_port *port;
@@ -825,7 +825,7 @@ static void mcast_add_one(struct ib_device *device)
dev = kmalloc(struct_size(dev, port, device->phys_port_cnt),
GFP_KERNEL);
if (!dev)
- return;
+ return -ENOMEM;
dev->start_port = rdma_start_port(device);
dev->end_port = rdma_end_port(device);
@@ -845,7 +845,7 @@ static void mcast_add_one(struct ib_device *device)
if (!count) {
kfree(dev);
- return;
+ return -EOPNOTSUPP;
}
dev->device = device;
@@ -853,6 +853,7 @@ static void mcast_add_one(struct ib_device *device)
INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
ib_register_event_handler(&dev->event_handler);
+ return 0;
}
static void mcast_remove_one(struct ib_device *device, void *client_data)
@@ -861,9 +862,6 @@ static void mcast_remove_one(struct ib_device *device, void *client_data)
struct mcast_port *port;
int i;
- if (!dev)
- return;
-
ib_unregister_event_handler(&dev->event_handler);
flush_workqueue(mcast_wq);
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 557efbf29197..614cff89fc71 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -129,7 +129,7 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
qp->integrity_en);
int i, j, ret = 0, count = 0;
- ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr;
+ ctx->nr_ops = DIV_ROUND_UP(sg_cnt, pages_per_mr);
ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
if (!ctx->reg) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 74e0058fcf9e..5c878646ff62 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -174,7 +174,7 @@ static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
};
-static void ib_sa_add_one(struct ib_device *device);
+static int ib_sa_add_one(struct ib_device *device);
static void ib_sa_remove_one(struct ib_device *device, void *client_data);
static struct ib_client sa_client = {
@@ -1412,17 +1412,13 @@ void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
EXPORT_SYMBOL(ib_sa_pack_path);
static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
- struct ib_device *device,
+ struct ib_sa_device *sa_dev,
u8 port_num)
{
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
unsigned long flags;
bool ret = false;
- if (!sa_dev)
- return ret;
-
port = &sa_dev->port[port_num - sa_dev->start_port];
spin_lock_irqsave(&port->classport_lock, flags);
if (!port->classport_info.valid)
@@ -1450,8 +1446,8 @@ enum opa_pr_supported {
* query is possible.
*/
static int opa_pr_query_possible(struct ib_sa_client *client,
- struct ib_device *device,
- u8 port_num,
+ struct ib_sa_device *sa_dev,
+ struct ib_device *device, u8 port_num,
struct sa_path_rec *rec)
{
struct ib_port_attr port_attr;
@@ -1459,7 +1455,7 @@ static int opa_pr_query_possible(struct ib_sa_client *client,
if (ib_query_port(device, port_num, &port_attr))
return PR_NOT_SUPPORTED;
- if (ib_sa_opa_pathrecord_support(client, device, port_num))
+ if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
return PR_OPA_SUPPORTED;
if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
@@ -1574,7 +1570,8 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
query->sa_query.port = port;
if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
- status = opa_pr_query_possible(client, device, port_num, rec);
+ status = opa_pr_query_possible(client, sa_dev, device, port_num,
+ rec);
if (status == PR_NOT_SUPPORTED) {
ret = -EINVAL;
goto err1;
@@ -2325,18 +2322,19 @@ static void ib_sa_event(struct ib_event_handler *handler,
}
}
-static void ib_sa_add_one(struct ib_device *device)
+static int ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
int s, e, i;
int count = 0;
+ int ret;
s = rdma_start_port(device);
e = rdma_end_port(device);
sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
if (!sa_dev)
- return;
+ return -ENOMEM;
sa_dev->start_port = s;
sa_dev->end_port = e;
@@ -2356,8 +2354,10 @@ static void ib_sa_add_one(struct ib_device *device)
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
NULL, 0, send_handler,
recv_handler, sa_dev, 0);
- if (IS_ERR(sa_dev->port[i].agent))
+ if (IS_ERR(sa_dev->port[i].agent)) {
+ ret = PTR_ERR(sa_dev->port[i].agent);
goto err;
+ }
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
@@ -2366,8 +2366,10 @@ static void ib_sa_add_one(struct ib_device *device)
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
+ }
ib_set_client_data(device, &sa_client, sa_dev);
@@ -2386,7 +2388,7 @@ static void ib_sa_add_one(struct ib_device *device)
update_sm_ah(&sa_dev->port[i].update_task);
}
- return;
+ return 0;
err:
while (--i >= 0) {
@@ -2395,7 +2397,7 @@ err:
}
free:
kfree(sa_dev);
- return;
+ return ret;
}
static void ib_sa_remove_one(struct ib_device *device, void *client_data)
@@ -2403,9 +2405,6 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
struct ib_sa_device *sa_dev = client_data;
int i;
- if (!sa_dev)
- return;
-
ib_unregister_event_handler(&sa_dev->event_handler);
flush_workqueue(ib_wq);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 16b6cf57fa85..06127c800a49 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -845,7 +845,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
struct sockaddr *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -869,6 +869,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
goto out;
resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
+ resp.ibdev_index = ctx->cm_id->device->index;
resp.port_num = ctx->cm_id->port_num;
if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num))
@@ -880,8 +881,8 @@ static ssize_t ucma_query_route(struct ucma_file *file,
out:
mutex_unlock(&ctx->mutex);
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &resp, sizeof(resp)))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp,
+ min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
ucma_put_ctx(ctx);
@@ -895,6 +896,7 @@ static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
return;
resp->node_guid = (__force __u64) cm_id->device->node_guid;
+ resp->ibdev_index = cm_id->device->index;
resp->port_num = cm_id->port_num;
resp->pkey = (__force __u16) cpu_to_be16(
ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
@@ -907,7 +909,7 @@ static ssize_t ucma_query_addr(struct ucma_context *ctx,
struct sockaddr *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
return -ENOSPC;
memset(&resp, 0, sizeof resp);
@@ -922,7 +924,7 @@ static ssize_t ucma_query_addr(struct ucma_context *ctx,
ucma_query_device_addr(ctx->cm_id, &resp);
- if (copy_to_user(response, &resp, sizeof(resp)))
+ if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
return ret;
@@ -974,7 +976,7 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
struct sockaddr_ib *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
return -ENOSPC;
memset(&resp, 0, sizeof resp);
@@ -1007,7 +1009,7 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
&ctx->cm_id->route.addr.dst_addr);
}
- if (copy_to_user(response, &resp, sizeof(resp)))
+ if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
return ret;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index da229eab5903..b0d0b522cc76 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -142,7 +142,7 @@ static dev_t dynamic_issm_dev;
static DEFINE_IDA(umad_ida);
-static void ib_umad_add_one(struct ib_device *device);
+static int ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device, void *client_data);
static void ib_umad_dev_free(struct kref *kref)
@@ -1352,37 +1352,41 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
put_device(&port->dev);
}
-static void ib_umad_add_one(struct ib_device *device)
+static int ib_umad_add_one(struct ib_device *device)
{
struct ib_umad_device *umad_dev;
int s, e, i;
int count = 0;
+ int ret;
s = rdma_start_port(device);
e = rdma_end_port(device);
umad_dev = kzalloc(struct_size(umad_dev, ports, e - s + 1), GFP_KERNEL);
if (!umad_dev)
- return;
+ return -ENOMEM;
kref_init(&umad_dev->kref);
for (i = s; i <= e; ++i) {
if (!rdma_cap_ib_mad(device, i))
continue;
- if (ib_umad_init_port(device, i, umad_dev,
- &umad_dev->ports[i - s]))
+ ret = ib_umad_init_port(device, i, umad_dev,
+ &umad_dev->ports[i - s]);
+ if (ret)
goto err;
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
+ }
ib_set_client_data(device, &umad_client, umad_dev);
- return;
+ return 0;
err:
while (--i >= s) {
@@ -1394,6 +1398,7 @@ err:
free:
/* balances kref_init */
ib_umad_dev_put(umad_dev);
+ return ret;
}
static void ib_umad_remove_one(struct ib_device *device, void *client_data)
@@ -1401,9 +1406,6 @@ static void ib_umad_remove_one(struct ib_device *device, void *client_data)
struct ib_umad_device *umad_dev = client_data;
unsigned int i;
- if (!umad_dev)
- return;
-
rdma_for_each_port (device, i) {
if (rdma_cap_ib_mad(device, i))
ib_umad_kill_port(
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2d4083bf4a04..d52eb870533b 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -75,7 +75,7 @@ static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class;
static DEFINE_IDA(uverbs_ida);
-static void ib_uverbs_add_one(struct ib_device *device);
+static int ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
/*
@@ -296,6 +296,8 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
spin_lock_irq(&ev_queue->lock);
if (!list_empty(&ev_queue->event_list))
pollflags = EPOLLIN | EPOLLRDNORM;
+ else if (ev_queue->is_closed)
+ pollflags = EPOLLERR;
spin_unlock_irq(&ev_queue->lock);
return pollflags;
@@ -1089,7 +1091,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device,
return 0;
}
-static void ib_uverbs_add_one(struct ib_device *device)
+static int ib_uverbs_add_one(struct ib_device *device)
{
int devnum;
dev_t base;
@@ -1097,16 +1099,16 @@ static void ib_uverbs_add_one(struct ib_device *device)
int ret;
if (!device->ops.alloc_ucontext)
- return;
+ return -EOPNOTSUPP;
uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
if (!uverbs_dev)
- return;
+ return -ENOMEM;
ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
if (ret) {
kfree(uverbs_dev);
- return;
+ return -ENOMEM;
}
device_initialize(&uverbs_dev->dev);
@@ -1126,15 +1128,18 @@ static void ib_uverbs_add_one(struct ib_device *device)
devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
GFP_KERNEL);
- if (devnum < 0)
+ if (devnum < 0) {
+ ret = -ENOMEM;
goto err;
+ }
uverbs_dev->devnum = devnum;
if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
else
base = IB_UVERBS_BASE_DEV + devnum;
- if (ib_uverbs_create_uapi(device, uverbs_dev))
+ ret = ib_uverbs_create_uapi(device, uverbs_dev);
+ if (ret)
goto err_uapi;
uverbs_dev->dev.devt = base;
@@ -1149,7 +1154,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
goto err_uapi;
ib_set_client_data(device, &uverbs_client, uverbs_dev);
- return;
+ return 0;
err_uapi:
ida_free(&uverbs_ida, devnum);
@@ -1158,7 +1163,7 @@ err:
ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp);
put_device(&uverbs_dev->dev);
- return;
+ return ret;
}
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
@@ -1201,9 +1206,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
struct ib_uverbs_device *uverbs_dev = client_data;
int wait_clients = 1;
- if (!uverbs_dev)
- return;
-
cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
ida_free(&uverbs_ida, uverbs_dev->devnum);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 56a71337112c..bf0249f76ae9 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -50,6 +50,7 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
#include <rdma/rw.h>
+#include <rdma/lag.h>
#include "core_priv.h"
#include <trace/events/rdma_core.h>
@@ -500,8 +501,10 @@ rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
u32 flags,
- struct ib_udata *udata)
+ struct ib_udata *udata,
+ struct net_device *xmit_slave)
{
+ struct rdma_ah_init_attr init_attr = {};
struct ib_device *device = pd->device;
struct ib_ah *ah;
int ret;
@@ -521,8 +524,11 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
ah->pd = pd;
ah->type = ah_attr->type;
ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
+ init_attr.ah_attr = ah_attr;
+ init_attr.flags = flags;
+ init_attr.xmit_slave = xmit_slave;
- ret = device->ops.create_ah(ah, ah_attr, flags, udata);
+ ret = device->ops.create_ah(ah, &init_attr, udata);
if (ret) {
kfree(ah);
return ERR_PTR(ret);
@@ -547,15 +553,22 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
u32 flags)
{
const struct ib_gid_attr *old_sgid_attr;
+ struct net_device *slave;
struct ib_ah *ah;
int ret;
ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
if (ret)
return ERR_PTR(ret);
-
- ah = _rdma_create_ah(pd, ah_attr, flags, NULL);
-
+ slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr,
+ (flags & RDMA_CREATE_AH_SLEEPABLE) ?
+ GFP_KERNEL : GFP_ATOMIC);
+ if (IS_ERR(slave)) {
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return (void *)slave;
+ }
+ ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave);
+ rdma_lag_put_ah_roce_slave(slave);
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
return ah;
}
@@ -594,7 +607,8 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
}
}
- ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata);
+ ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE,
+ udata, NULL);
out:
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
@@ -1633,11 +1647,35 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
const struct ib_gid_attr *old_sgid_attr_alt_av;
int ret;
+ attr->xmit_slave = NULL;
if (attr_mask & IB_QP_AV) {
ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
&old_sgid_attr_av);
if (ret)
return ret;
+
+ if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+ is_qp_type_connected(qp)) {
+ struct net_device *slave;
+
+ /*
+ * If the user provided the qp_attr then we have to
+ * resolve it. Kerne users have to provide already
+ * resolved rdma_ah_attr's.
+ */
+ if (udata) {
+ ret = ib_resolve_eth_dmac(qp->device,
+ &attr->ah_attr);
+ if (ret)
+ goto out_av;
+ }
+ slave = rdma_lag_get_ah_roce_slave(qp->device,
+ &attr->ah_attr,
+ GFP_KERNEL);
+ if (IS_ERR(slave))
+ goto out_av;
+ attr->xmit_slave = slave;
+ }
}
if (attr_mask & IB_QP_ALT_PATH) {
/*
@@ -1664,18 +1702,6 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
}
}
- /*
- * If the user provided the qp_attr then we have to resolve it. Kernel
- * users have to provide already resolved rdma_ah_attr's
- */
- if (udata && (attr_mask & IB_QP_AV) &&
- attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
- is_qp_type_connected(qp)) {
- ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
- if (ret)
- goto out;
- }
-
if (rdma_ib_or_roce(qp->device, port)) {
if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
dev_warn(&qp->device->dev,
@@ -1717,8 +1743,10 @@ out:
if (attr_mask & IB_QP_ALT_PATH)
rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
out_av:
- if (attr_mask & IB_QP_AV)
+ if (attr_mask & IB_QP_AV) {
+ rdma_lag_put_ah_roce_slave(attr->xmit_slave);
rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
+ }
return ret;
}
@@ -2574,6 +2602,7 @@ EXPORT_SYMBOL(ib_map_mr_sg_pi);
* @page_size: page vector desired page size
*
* Constraints:
+ *
* - The first sg element is allowed to have an offset.
* - Each sg element must either be aligned to page_size or virtually
* contiguous to the previous element. In case an sg element has a
@@ -2607,10 +2636,12 @@ EXPORT_SYMBOL(ib_map_mr_sg);
* @mr: memory region
* @sgl: dma mapped scatterlist
* @sg_nents: number of entries in sg
- * @sg_offset_p: IN: start offset in bytes into sg
- * OUT: offset in bytes for element n of the sg of the first
+ * @sg_offset_p: ==== =======================================================
+ * IN start offset in bytes into sg
+ * OUT offset in bytes for element n of the sg of the first
* byte that has not been processed where n is the return
* value of this function.
+ * ==== =======================================================
* @set_page: driver page assignment function pointer
*
* Core service helper for drivers to convert the largest
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 95f6d493d1b9..5a7c090204c5 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -631,11 +631,12 @@ static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype)
return nw_type;
}
-int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata)
+int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct ib_pd *ib_pd = ib_ah->pd;
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
struct bnxt_re_dev *rdev = pd->rdev;
const struct ib_gid_attr *sgid_attr;
@@ -673,7 +674,8 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr,
memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN);
rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah,
- !(flags & RDMA_CREATE_AH_SLEEPABLE));
+ !(init_attr->flags &
+ RDMA_CREATE_AH_SLEEPABLE));
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to allocate HW AH");
return rc;
@@ -856,7 +858,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
return -EFAULT;
- bytes = (qplib_qp->sq.max_wqe * BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
+ bytes = (qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size);
/* Consider mapping PSN search memory only for RC QPs. */
if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) {
psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
@@ -879,7 +881,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
qplib_qp->qp_handle = ureq.qp_handle;
if (!qp->qplib_qp.srq) {
- bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+ bytes = (qplib_qp->rq.max_wqe * qplib_qp->rq.wqe_size);
bytes = PAGE_ALIGN(bytes);
umem = ib_umem_get(&rdev->ibdev, ureq.qprva, bytes,
IB_ACCESS_LOCAL_WRITE);
@@ -976,6 +978,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
qp->qplib_qp.sig_type = true;
/* Shadow QP SQ depth should be same as QP1 RQ depth */
+ qp->qplib_qp.sq.wqe_size = bnxt_re_get_swqe_size();
qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
qp->qplib_qp.sq.max_sge = 2;
/* Q full delta can be 1 since it is internal QP */
@@ -986,6 +989,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
qp->qplib_qp.scq = qp1_qp->scq;
qp->qplib_qp.rcq = qp1_qp->rcq;
+ qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size();
qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
/* Q full delta can be 1 since it is internal QP */
@@ -1021,10 +1025,12 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp,
struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_qplib_qp *qplqp;
struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_q *rq;
int entries;
rdev = qp->rdev;
qplqp = &qp->qplib_qp;
+ rq = &qplqp->rq;
dev_attr = &rdev->dev_attr;
if (init_attr->srq) {
@@ -1036,23 +1042,21 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp,
return -EINVAL;
}
qplqp->srq = &srq->qplib_srq;
- qplqp->rq.max_wqe = 0;
+ rq->max_wqe = 0;
} else {
+ rq->wqe_size = bnxt_re_get_rwqe_size();
/* Allocate 1 more than what's provided so posting max doesn't
* mean empty.
*/
entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1);
- qplqp->rq.max_wqe = min_t(u32, entries,
- dev_attr->max_qp_wqes + 1);
-
- qplqp->rq.q_full_delta = qplqp->rq.max_wqe -
- init_attr->cap.max_recv_wr;
- qplqp->rq.max_sge = init_attr->cap.max_recv_sge;
- if (qplqp->rq.max_sge > dev_attr->max_qp_sges)
- qplqp->rq.max_sge = dev_attr->max_qp_sges;
+ rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+ rq->q_full_delta = rq->max_wqe - init_attr->cap.max_recv_wr;
+ rq->max_sge = init_attr->cap.max_recv_sge;
+ if (rq->max_sge > dev_attr->max_qp_sges)
+ rq->max_sge = dev_attr->max_qp_sges;
}
- qplqp->rq.sg_info.pgsize = PAGE_SIZE;
- qplqp->rq.sg_info.pgshft = PAGE_SHIFT;
+ rq->sg_info.pgsize = PAGE_SIZE;
+ rq->sg_info.pgshft = PAGE_SHIFT;
return 0;
}
@@ -1080,15 +1084,18 @@ static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_qplib_qp *qplqp;
struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_q *sq;
int entries;
rdev = qp->rdev;
qplqp = &qp->qplib_qp;
+ sq = &qplqp->sq;
dev_attr = &rdev->dev_attr;
- qplqp->sq.max_sge = init_attr->cap.max_send_sge;
- if (qplqp->sq.max_sge > dev_attr->max_qp_sges)
- qplqp->sq.max_sge = dev_attr->max_qp_sges;
+ sq->wqe_size = bnxt_re_get_swqe_size();
+ sq->max_sge = init_attr->cap.max_send_sge;
+ if (sq->max_sge > dev_attr->max_qp_sges)
+ sq->max_sge = dev_attr->max_qp_sges;
/*
* Change the SQ depth if user has requested minimum using
* configfs. Only supported for kernel consumers
@@ -1096,9 +1103,9 @@ static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
entries = init_attr->cap.max_send_wr;
/* Allocate 128 + 1 more than what's provided */
entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1);
- qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes +
- BNXT_QPLIB_RESERVED_QP_WRS + 1);
- qplqp->sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
+ sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes +
+ BNXT_QPLIB_RESERVED_QP_WRS + 1);
+ sq->q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
/*
* Reserving one slot for Phantom WQE. Application can
* post one extra entry in this case. But allowing this to avoid
@@ -1511,7 +1518,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
return -EFAULT;
- bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+ bytes = (qplib_srq->max_wqe * qplib_srq->wqe_size);
bytes = PAGE_ALIGN(bytes);
umem = ib_umem_get(&rdev->ibdev, ureq.srqva, bytes,
IB_ACCESS_LOCAL_WRITE);
@@ -1534,15 +1541,20 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata)
{
- struct ib_pd *ib_pd = ib_srq->pd;
- struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
- struct bnxt_re_dev *rdev = pd->rdev;
- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
- struct bnxt_re_srq *srq =
- container_of(ib_srq, struct bnxt_re_srq, ib_srq);
+ struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_qplib_nq *nq = NULL;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_srq *srq;
+ struct bnxt_re_pd *pd;
+ struct ib_pd *ib_pd;
int rc, entries;
+ ib_pd = ib_srq->pd;
+ pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ rdev = pd->rdev;
+ dev_attr = &rdev->dev_attr;
+ srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
+
if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) {
ibdev_err(&rdev->ibdev, "Create CQ failed - max exceeded");
rc = -EINVAL;
@@ -1563,8 +1575,9 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
entries = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
if (entries > dev_attr->max_srq_wqes + 1)
entries = dev_attr->max_srq_wqes + 1;
-
srq->qplib_srq.max_wqe = entries;
+
+ srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size();
srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge;
srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit;
srq->srq_limit = srq_init_attr->attr.srq_limit;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 23d972da5652..204c0849ba28 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -142,6 +142,16 @@ struct bnxt_re_ucontext {
spinlock_t sh_lock; /* protect shpg */
};
+static inline u16 bnxt_re_get_swqe_size(void)
+{
+ return sizeof(struct sq_send);
+}
+
+static inline u16 bnxt_re_get_rwqe_size(void)
+{
+ return sizeof(struct rq_wqe);
+}
+
int bnxt_re_query_device(struct ib_device *ibdev,
struct ib_device_attr *ib_attr,
struct ib_udata *udata);
@@ -160,7 +170,7 @@ enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
u8 port_num);
int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags,
+int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 899a5d2c100e..c5e29577cd43 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -300,12 +300,12 @@ static void bnxt_qplib_service_nq(unsigned long data)
{
struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data;
struct bnxt_qplib_hwq *hwq = &nq->hwq;
- struct nq_base *nqe, **nq_ptr;
- struct bnxt_qplib_cq *cq;
- int num_cqne_processed = 0;
int num_srqne_processed = 0;
+ int num_cqne_processed = 0;
+ struct bnxt_qplib_cq *cq;
int budget = nq->budget;
u32 sw_cons, raw_cons;
+ struct nq_base *nqe;
uintptr_t q_handle;
u16 type;
@@ -314,8 +314,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
raw_cons = hwq->cons;
while (budget--) {
sw_cons = HWQ_CMP(raw_cons, hwq);
- nq_ptr = (struct nq_base **)hwq->pbl_ptr;
- nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)];
+ nqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL);
if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements))
break;
@@ -392,13 +391,11 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
{
struct bnxt_qplib_nq *nq = dev_instance;
struct bnxt_qplib_hwq *hwq = &nq->hwq;
- struct nq_base **nq_ptr;
u32 sw_cons;
/* Prefetch the NQ element */
sw_cons = HWQ_CMP(hwq->cons, hwq);
- nq_ptr = (struct nq_base **)nq->hwq.pbl_ptr;
- prefetch(&nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]);
+ prefetch(bnxt_qplib_get_qe(hwq, sw_cons, NULL));
/* Fan out to CPU affinitized kthreads? */
tasklet_schedule(&nq->nq_tasklet);
@@ -612,12 +609,13 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
struct cmdq_create_srq req;
struct bnxt_qplib_pbl *pbl;
u16 cmd_flags = 0;
+ u16 pg_sz_lvl;
int rc, idx;
hwq_attr.res = res;
hwq_attr.sginfo = &srq->sg_info;
hwq_attr.depth = srq->max_wqe;
- hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE;
+ hwq_attr.stride = srq->wqe_size;
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&srq->hwq, &hwq_attr);
if (rc)
@@ -638,22 +636,11 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
req.srq_size = cpu_to_le16((u16)srq->hwq.max_elements);
pbl = &srq->hwq.pbl[PBL_LVL_0];
- req.pg_size_lvl = cpu_to_le16((((u16)srq->hwq.level &
- CMDQ_CREATE_SRQ_LVL_MASK) <<
- CMDQ_CREATE_SRQ_LVL_SFT) |
- (pbl->pg_size == ROCE_PG_SIZE_4K ?
- CMDQ_CREATE_SRQ_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ?
- CMDQ_CREATE_SRQ_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ?
- CMDQ_CREATE_SRQ_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ?
- CMDQ_CREATE_SRQ_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ?
- CMDQ_CREATE_SRQ_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ?
- CMDQ_CREATE_SRQ_PG_SIZE_PG_1G :
- CMDQ_CREATE_SRQ_PG_SIZE_PG_4K));
+ pg_sz_lvl = ((u16)bnxt_qplib_base_pg_size(&srq->hwq) <<
+ CMDQ_CREATE_SRQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (srq->hwq.level & CMDQ_CREATE_SRQ_LVL_MASK) <<
+ CMDQ_CREATE_SRQ_LVL_SFT;
+ req.pg_size_lvl = cpu_to_le16(pg_sz_lvl);
req.pbl = cpu_to_le64(pbl->pg_map_arr[0]);
req.pd_id = cpu_to_le32(srq->pd->id);
req.eventq_id = cpu_to_le16(srq->eventq_hw_ring_id);
@@ -740,7 +727,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
struct bnxt_qplib_swqe *wqe)
{
struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
- struct rq_wqe *srqe, **srqe_ptr;
+ struct rq_wqe *srqe;
struct sq_sge *hw_sge;
u32 sw_prod, sw_cons, count = 0;
int i, rc = 0, next;
@@ -758,9 +745,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
spin_unlock(&srq_hwq->lock);
sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
- srqe_ptr = (struct rq_wqe **)srq_hwq->pbl_ptr;
- srqe = &srqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)];
- memset(srqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+ srqe = bnxt_qplib_get_qe(srq_hwq, sw_prod, NULL);
+ memset(srqe, 0, srq->wqe_size);
/* Calculate wqe_size16 and data_len */
for (i = 0, hw_sge = (struct sq_sge *)srqe->data;
i < wqe->num_sge; i++, hw_sge++) {
@@ -809,6 +795,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
struct bnxt_qplib_pbl *pbl;
u16 cmd_flags = 0;
u32 qp_flags = 0;
+ u8 pg_sz_lvl;
int rc;
RCFW_CMD_PREP(req, CREATE_QP1, cmd_flags);
@@ -822,7 +809,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
hwq_attr.res = res;
hwq_attr.sginfo = &sq->sg_info;
hwq_attr.depth = sq->max_wqe;
- hwq_attr.stride = BNXT_QPLIB_MAX_SQE_ENTRY_SIZE;
+ hwq_attr.stride = sq->wqe_size;
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
if (rc)
@@ -835,33 +822,18 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
}
pbl = &sq->hwq.pbl[PBL_LVL_0];
req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
- req.sq_pg_size_sq_lvl =
- ((sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK)
- << CMDQ_CREATE_QP1_SQ_LVL_SFT) |
- (pbl->pg_size == ROCE_PG_SIZE_4K ?
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ?
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ?
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ?
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ?
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ?
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G :
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K);
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) <<
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK);
+ req.sq_pg_size_sq_lvl = pg_sz_lvl;
if (qp->scq)
req.scq_cid = cpu_to_le32(qp->scq->id);
-
- qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE;
-
/* RQ */
if (rq->max_wqe) {
hwq_attr.res = res;
hwq_attr.sginfo = &rq->sg_info;
- hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE;
+ hwq_attr.stride = rq->wqe_size;
hwq_attr.depth = qp->rq.max_wqe;
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr);
@@ -876,32 +848,20 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
}
pbl = &rq->hwq.pbl[PBL_LVL_0];
req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
- req.rq_pg_size_rq_lvl =
- ((rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK) <<
- CMDQ_CREATE_QP1_RQ_LVL_SFT) |
- (pbl->pg_size == ROCE_PG_SIZE_4K ?
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ?
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ?
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ?
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ?
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ?
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G :
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K);
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) <<
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK);
+ req.rq_pg_size_rq_lvl = pg_sz_lvl;
if (qp->rcq)
req.rcq_cid = cpu_to_le32(qp->rcq->id);
}
-
/* Header buffer - allow hdr_buf pass in */
rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
if (rc) {
rc = -ENOMEM;
goto fail;
}
+ qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE;
req.qp_flags = cpu_to_le32(qp_flags);
req.sq_size = cpu_to_le32(sq->hwq.max_elements);
req.rq_size = cpu_to_le32(rq->hwq.max_elements);
@@ -948,23 +908,47 @@ exit:
return rc;
}
+static void bnxt_qplib_init_psn_ptr(struct bnxt_qplib_qp *qp, int size)
+{
+ struct bnxt_qplib_hwq *hwq;
+ struct bnxt_qplib_q *sq;
+ u64 fpsne, psne, psn_pg;
+ u16 indx_pad = 0, indx;
+ u16 pg_num, pg_indx;
+ u64 *page;
+
+ sq = &qp->sq;
+ hwq = &sq->hwq;
+
+ fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->max_elements, &psn_pg);
+ if (!IS_ALIGNED(fpsne, PAGE_SIZE))
+ indx_pad = ALIGN(fpsne, PAGE_SIZE) / size;
+
+ page = (u64 *)psn_pg;
+ for (indx = 0; indx < hwq->max_elements; indx++) {
+ pg_num = (indx + indx_pad) / (PAGE_SIZE / size);
+ pg_indx = (indx + indx_pad) % (PAGE_SIZE / size);
+ psne = page[pg_num] + pg_indx * size;
+ sq->swq[indx].psn_ext = (struct sq_psn_search_ext *)psne;
+ sq->swq[indx].psn_search = (struct sq_psn_search *)psne;
+ }
+}
+
int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct bnxt_qplib_hwq_attr hwq_attr = {};
- unsigned long int psn_search, poff = 0;
struct bnxt_qplib_sg_info sginfo = {};
- struct sq_psn_search **psn_search_ptr;
struct bnxt_qplib_q *sq = &qp->sq;
struct bnxt_qplib_q *rq = &qp->rq;
- int i, rc, req_size, psn_sz = 0;
- struct sq_send **hw_sq_send_ptr;
struct creq_create_qp_resp resp;
+ int rc, req_size, psn_sz = 0;
struct bnxt_qplib_hwq *xrrq;
u16 cmd_flags = 0, max_ssge;
- struct cmdq_create_qp req;
struct bnxt_qplib_pbl *pbl;
+ struct cmdq_create_qp req;
u32 qp_flags = 0;
+ u8 pg_sz_lvl;
u16 max_rsge;
RCFW_CMD_PREP(req, CREATE_QP, cmd_flags);
@@ -983,7 +967,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
hwq_attr.res = res;
hwq_attr.sginfo = &sq->sg_info;
- hwq_attr.stride = BNXT_QPLIB_MAX_SQE_ENTRY_SIZE;
+ hwq_attr.stride = sq->wqe_size;
hwq_attr.depth = sq->max_wqe;
hwq_attr.aux_stride = psn_sz;
hwq_attr.aux_depth = hwq_attr.depth;
@@ -997,64 +981,25 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
rc = -ENOMEM;
goto fail_sq;
}
- hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
- if (psn_sz) {
- psn_search_ptr = (struct sq_psn_search **)
- &hw_sq_send_ptr[get_sqe_pg
- (sq->hwq.max_elements)];
- psn_search = (unsigned long int)
- &hw_sq_send_ptr[get_sqe_pg(sq->hwq.max_elements)]
- [get_sqe_idx(sq->hwq.max_elements)];
- if (psn_search & ~PAGE_MASK) {
- /* If the psn_search does not start on a page boundary,
- * then calculate the offset
- */
- poff = (psn_search & ~PAGE_MASK) /
- BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE;
- }
- for (i = 0; i < sq->hwq.max_elements; i++) {
- sq->swq[i].psn_search =
- &psn_search_ptr[get_psne_pg(i + poff)]
- [get_psne_idx(i + poff)];
- /*psns_ext will be used only for P5 chips. */
- sq->swq[i].psn_ext =
- (struct sq_psn_search_ext *)
- &psn_search_ptr[get_psne_pg(i + poff)]
- [get_psne_idx(i + poff)];
- }
- }
+
+ if (psn_sz)
+ bnxt_qplib_init_psn_ptr(qp, psn_sz);
+
pbl = &sq->hwq.pbl[PBL_LVL_0];
req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
- req.sq_pg_size_sq_lvl =
- ((sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK)
- << CMDQ_CREATE_QP_SQ_LVL_SFT) |
- (pbl->pg_size == ROCE_PG_SIZE_4K ?
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ?
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ?
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ?
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ?
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ?
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G :
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K);
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) <<
+ CMDQ_CREATE_QP_SQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK);
+ req.sq_pg_size_sq_lvl = pg_sz_lvl;
if (qp->scq)
req.scq_cid = cpu_to_le32(qp->scq->id);
- qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE;
- qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED;
- if (qp->sig_type)
- qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
-
/* RQ */
if (rq->max_wqe) {
hwq_attr.res = res;
hwq_attr.sginfo = &rq->sg_info;
- hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE;
+ hwq_attr.stride = rq->wqe_size;
hwq_attr.depth = rq->max_wqe;
hwq_attr.aux_stride = 0;
hwq_attr.aux_depth = 0;
@@ -1071,22 +1016,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
}
pbl = &rq->hwq.pbl[PBL_LVL_0];
req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
- req.rq_pg_size_rq_lvl =
- ((rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK) <<
- CMDQ_CREATE_QP_RQ_LVL_SFT) |
- (pbl->pg_size == ROCE_PG_SIZE_4K ?
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ?
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ?
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ?
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ?
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ?
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G :
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K);
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) <<
+ CMDQ_CREATE_QP_RQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK);
+ req.rq_pg_size_rq_lvl = pg_sz_lvl;
} else {
/* SRQ */
if (qp->srq) {
@@ -1097,7 +1030,13 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
if (qp->rcq)
req.rcq_cid = cpu_to_le32(qp->rcq->id);
+
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE;
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED;
+ if (qp->sig_type)
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
req.qp_flags = cpu_to_le32(qp_flags);
+
req.sq_size = cpu_to_le32(sq->hwq.max_elements);
req.rq_size = cpu_to_le32(rq->hwq.max_elements);
qp->sq_hdr_buf = NULL;
@@ -1483,12 +1422,11 @@ bail:
static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
{
struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
- struct cq_base *hw_cqe, **hw_cqe_ptr;
+ struct cq_base *hw_cqe;
int i;
for (i = 0; i < cq_hwq->max_elements; i++) {
- hw_cqe_ptr = (struct cq_base **)cq_hwq->pbl_ptr;
- hw_cqe = &hw_cqe_ptr[CQE_PG(i)][CQE_IDX(i)];
+ hw_cqe = bnxt_qplib_get_qe(cq_hwq, i, NULL);
if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements))
continue;
/*
@@ -1615,6 +1553,34 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
return NULL;
}
+static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ struct bnxt_qplib_swq *swq)
+{
+ struct sq_psn_search_ext *psns_ext;
+ struct sq_psn_search *psns;
+ u32 flg_npsn;
+ u32 op_spsn;
+
+ psns = swq->psn_search;
+ psns_ext = swq->psn_ext;
+
+ op_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) &
+ SQ_PSN_SEARCH_START_PSN_MASK);
+ op_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) &
+ SQ_PSN_SEARCH_OPCODE_MASK);
+ flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
+ SQ_PSN_SEARCH_NEXT_PSN_MASK);
+
+ if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) {
+ psns_ext->opcode_start_psn = cpu_to_le32(op_spsn);
+ psns_ext->flags_next_psn = cpu_to_le32(flg_npsn);
+ } else {
+ psns->opcode_start_psn = cpu_to_le32(op_spsn);
+ psns->flags_next_psn = cpu_to_le32(flg_npsn);
+ }
+}
+
void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *sq = &qp->sq;
@@ -1625,16 +1591,16 @@ void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_swqe *wqe)
{
+ struct bnxt_qplib_nq_work *nq_work = NULL;
+ int i, rc = 0, data_len = 0, pkt_num = 0;
struct bnxt_qplib_q *sq = &qp->sq;
+ struct sq_send *hw_sq_send_hdr;
struct bnxt_qplib_swq *swq;
- struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
- struct sq_sge *hw_sge;
- struct bnxt_qplib_nq_work *nq_work = NULL;
bool sch_handler = false;
- u32 sw_prod;
+ struct sq_sge *hw_sge;
u8 wqe_size16;
- int i, rc = 0, data_len = 0, pkt_num = 0;
__le32 temp32;
+ u32 sw_prod;
if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) {
if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
@@ -1663,11 +1629,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP;
swq->start_psn = sq->psn & BTH_PSN_MASK;
- hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
- hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)]
- [get_sqe_idx(sw_prod)];
-
- memset(hw_sq_send_hdr, 0, BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
+ hw_sq_send_hdr = bnxt_qplib_get_qe(&sq->hwq, sw_prod, NULL);
+ memset(hw_sq_send_hdr, 0, sq->wqe_size);
if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) {
/* Copy the inline data */
@@ -1854,28 +1817,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
goto done;
}
swq->next_psn = sq->psn & BTH_PSN_MASK;
- if (swq->psn_search) {
- u32 opcd_spsn;
- u32 flg_npsn;
-
- opcd_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) &
- SQ_PSN_SEARCH_START_PSN_MASK);
- opcd_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) &
- SQ_PSN_SEARCH_OPCODE_MASK);
- flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
- SQ_PSN_SEARCH_NEXT_PSN_MASK);
- if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) {
- swq->psn_ext->opcode_start_psn =
- cpu_to_le32(opcd_spsn);
- swq->psn_ext->flags_next_psn =
- cpu_to_le32(flg_npsn);
- } else {
- swq->psn_search->opcode_start_psn =
- cpu_to_le32(opcd_spsn);
- swq->psn_search->flags_next_psn =
- cpu_to_le32(flg_npsn);
- }
- }
+ if (qp->type == CMDQ_CREATE_QP_TYPE_RC)
+ bnxt_qplib_fill_psn_search(qp, wqe, swq);
queue_err:
if (sch_handler) {
/* Store the ULP info in the software structures */
@@ -1918,13 +1861,13 @@ void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp)
int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_swqe *wqe)
{
- struct bnxt_qplib_q *rq = &qp->rq;
- struct rq_wqe *rqe, **rqe_ptr;
- struct sq_sge *hw_sge;
struct bnxt_qplib_nq_work *nq_work = NULL;
+ struct bnxt_qplib_q *rq = &qp->rq;
bool sch_handler = false;
- u32 sw_prod;
+ struct sq_sge *hw_sge;
+ struct rq_wqe *rqe;
int i, rc = 0;
+ u32 sw_prod;
if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
sch_handler = true;
@@ -1941,10 +1884,8 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
rq->swq[sw_prod].wr_id = wqe->wr_id;
- rqe_ptr = (struct rq_wqe **)rq->hwq.pbl_ptr;
- rqe = &rqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)];
-
- memset(rqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+ rqe = bnxt_qplib_get_qe(&rq->hwq, sw_prod, NULL);
+ memset(rqe, 0, rq->wqe_size);
/* Calculate wqe_size16 and data_len */
for (i = 0, hw_sge = (struct sq_sge *)rqe->data;
@@ -1997,9 +1938,10 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct bnxt_qplib_hwq_attr hwq_attr = {};
struct creq_create_cq_resp resp;
- struct cmdq_create_cq req;
struct bnxt_qplib_pbl *pbl;
+ struct cmdq_create_cq req;
u16 cmd_flags = 0;
+ u32 pg_sz_lvl;
int rc;
hwq_attr.res = res;
@@ -2020,22 +1962,13 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
}
req.dpi = cpu_to_le32(cq->dpi->dpi);
req.cq_handle = cpu_to_le64(cq->cq_handle);
-
req.cq_size = cpu_to_le32(cq->hwq.max_elements);
pbl = &cq->hwq.pbl[PBL_LVL_0];
- req.pg_size_lvl = cpu_to_le32(
- ((cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK) <<
- CMDQ_CREATE_CQ_LVL_SFT) |
- (pbl->pg_size == ROCE_PG_SIZE_4K ? CMDQ_CREATE_CQ_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ? CMDQ_CREATE_CQ_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ? CMDQ_CREATE_CQ_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ? CMDQ_CREATE_CQ_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ? CMDQ_CREATE_CQ_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ? CMDQ_CREATE_CQ_PG_SIZE_PG_1G :
- CMDQ_CREATE_CQ_PG_SIZE_PG_4K));
-
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) <<
+ CMDQ_CREATE_CQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK);
+ req.pg_size_lvl = cpu_to_le32(pg_sz_lvl);
req.pbl = cpu_to_le64(pbl->pg_map_arr[0]);
-
req.cq_fco_cnq_id = cpu_to_le32(
(cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) <<
CMDQ_CREATE_CQ_CNQ_ID_SFT);
@@ -2194,13 +2127,13 @@ void bnxt_qplib_mark_qp_error(void *qp_handle)
static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons)
{
- struct bnxt_qplib_q *sq = &qp->sq;
- struct bnxt_qplib_swq *swq;
u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
- struct cq_base *peek_hwcqe, **peek_hw_cqe_ptr;
+ struct bnxt_qplib_q *sq = &qp->sq;
struct cq_req *peek_req_hwcqe;
struct bnxt_qplib_qp *peek_qp;
struct bnxt_qplib_q *peek_sq;
+ struct bnxt_qplib_swq *swq;
+ struct cq_base *peek_hwcqe;
int i, rc = 0;
/* Normal mode */
@@ -2230,9 +2163,8 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
i = cq->hwq.max_elements;
while (i--) {
peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq);
- peek_hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
- peek_hwcqe = &peek_hw_cqe_ptr[CQE_PG(peek_sw_cq_cons)]
- [CQE_IDX(peek_sw_cq_cons)];
+ peek_hwcqe = bnxt_qplib_get_qe(&cq->hwq,
+ peek_sw_cq_cons, NULL);
/* If the next hwcqe is VALID */
if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons,
cq->hwq.max_elements)) {
@@ -2294,11 +2226,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe **pcqe, int *budget,
u32 cq_cons, struct bnxt_qplib_qp **lib_qp)
{
- struct bnxt_qplib_qp *qp;
- struct bnxt_qplib_q *sq;
- struct bnxt_qplib_cqe *cqe;
u32 sw_sq_cons, cqe_sq_cons;
struct bnxt_qplib_swq *swq;
+ struct bnxt_qplib_cqe *cqe;
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *sq;
int rc = 0;
qp = (struct bnxt_qplib_qp *)((unsigned long)
@@ -2408,10 +2340,10 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe **pcqe,
int *budget)
{
- struct bnxt_qplib_qp *qp;
- struct bnxt_qplib_q *rq;
struct bnxt_qplib_srq *srq;
struct bnxt_qplib_cqe *cqe;
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
u32 wr_id_idx;
int rc = 0;
@@ -2483,10 +2415,10 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe **pcqe,
int *budget)
{
- struct bnxt_qplib_qp *qp;
- struct bnxt_qplib_q *rq;
struct bnxt_qplib_srq *srq;
struct bnxt_qplib_cqe *cqe;
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
u32 wr_id_idx;
int rc = 0;
@@ -2561,15 +2493,13 @@ done:
bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
{
- struct cq_base *hw_cqe, **hw_cqe_ptr;
+ struct cq_base *hw_cqe;
u32 sw_cons, raw_cons;
bool rc = true;
raw_cons = cq->hwq.cons;
sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
- hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
- hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)];
-
+ hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL);
/* Check for Valid bit. If the CQE is valid, return false */
rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements);
return rc;
@@ -2813,7 +2743,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
int num_cqes, struct bnxt_qplib_qp **lib_qp)
{
- struct cq_base *hw_cqe, **hw_cqe_ptr;
+ struct cq_base *hw_cqe;
u32 sw_cons, raw_cons;
int budget, rc = 0;
@@ -2822,8 +2752,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
while (budget) {
sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
- hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
- hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)];
+ hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL);
/* Check for Valid bit */
if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements))
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 7edb70b6bb16..568ca390322c 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -45,6 +45,7 @@ struct bnxt_qplib_srq {
struct bnxt_qplib_db_info dbinfo;
u64 srq_handle;
u32 id;
+ u16 wqe_size;
u32 max_wqe;
u32 max_sge;
u32 threshold;
@@ -65,38 +66,7 @@ struct bnxt_qplib_sge {
u32 size;
};
-#define BNXT_QPLIB_MAX_SQE_ENTRY_SIZE sizeof(struct sq_send)
-
-#define SQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_SQE_ENTRY_SIZE)
-#define SQE_MAX_IDX_PER_PG (SQE_CNT_PER_PG - 1)
-
-static inline u32 get_sqe_pg(u32 val)
-{
- return ((val & ~SQE_MAX_IDX_PER_PG) / SQE_CNT_PER_PG);
-}
-
-static inline u32 get_sqe_idx(u32 val)
-{
- return (val & SQE_MAX_IDX_PER_PG);
-}
-
-#define BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE sizeof(struct sq_psn_search)
-
-#define PSNE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE)
-#define PSNE_MAX_IDX_PER_PG (PSNE_CNT_PER_PG - 1)
-
-static inline u32 get_psne_pg(u32 val)
-{
- return ((val & ~PSNE_MAX_IDX_PER_PG) / PSNE_CNT_PER_PG);
-}
-
-static inline u32 get_psne_idx(u32 val)
-{
- return (val & PSNE_MAX_IDX_PER_PG);
-}
-
#define BNXT_QPLIB_QP_MAX_SGL 6
-
struct bnxt_qplib_swq {
u64 wr_id;
int next_idx;
@@ -226,19 +196,13 @@ struct bnxt_qplib_swqe {
};
};
-#define BNXT_QPLIB_MAX_RQE_ENTRY_SIZE sizeof(struct rq_wqe)
-
-#define RQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_RQE_ENTRY_SIZE)
-#define RQE_MAX_IDX_PER_PG (RQE_CNT_PER_PG - 1)
-#define RQE_PG(x) (((x) & ~RQE_MAX_IDX_PER_PG) / RQE_CNT_PER_PG)
-#define RQE_IDX(x) ((x) & RQE_MAX_IDX_PER_PG)
-
struct bnxt_qplib_q {
struct bnxt_qplib_hwq hwq;
struct bnxt_qplib_swq *swq;
struct bnxt_qplib_db_info dbinfo;
struct bnxt_qplib_sg_info sg_info;
u32 max_wqe;
+ u16 wqe_size;
u16 q_full_delta;
u16 max_sge;
u32 psn;
@@ -256,7 +220,7 @@ struct bnxt_qplib_qp {
struct bnxt_qplib_dpi *dpi;
struct bnxt_qplib_chip_ctx *cctx;
u64 qp_handle;
-#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF
+#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF
u32 id;
u8 type;
u8 sig_type;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index f01e864bb611..4e211162acee 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -89,10 +89,9 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
struct creq_base *resp, void *sb, u8 is_block)
{
struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
- struct bnxt_qplib_cmdqe *cmdqe, **hwq_ptr;
struct bnxt_qplib_hwq *hwq = &cmdq->hwq;
struct bnxt_qplib_crsqe *crsqe;
- u32 cmdq_depth = rcfw->cmdq_depth;
+ struct bnxt_qplib_cmdqe *cmdqe;
u32 sw_prod, cmdq_prod;
struct pci_dev *pdev;
unsigned long flags;
@@ -163,13 +162,11 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
BNXT_QPLIB_CMDQE_UNITS;
}
- hwq_ptr = (struct bnxt_qplib_cmdqe **)hwq->pbl_ptr;
preq = (u8 *)req;
do {
/* Locate the next cmdq slot */
sw_prod = HWQ_CMP(hwq->prod, hwq);
- cmdqe = &hwq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)]
- [get_cmdq_idx(sw_prod, cmdq_depth)];
+ cmdqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL);
if (!cmdqe) {
dev_err(&pdev->dev,
"RCFW request failed with no cmdqe!\n");
@@ -378,7 +375,7 @@ static void bnxt_qplib_service_creq(unsigned long data)
struct bnxt_qplib_creq_ctx *creq = &rcfw->creq;
u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
struct bnxt_qplib_hwq *hwq = &creq->hwq;
- struct creq_base *creqe, **hwq_ptr;
+ struct creq_base *creqe;
u32 sw_cons, raw_cons;
unsigned long flags;
@@ -387,8 +384,7 @@ static void bnxt_qplib_service_creq(unsigned long data)
raw_cons = hwq->cons;
while (budget > 0) {
sw_cons = HWQ_CMP(raw_cons, hwq);
- hwq_ptr = (struct creq_base **)hwq->pbl_ptr;
- creqe = &hwq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)];
+ creqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL);
if (!CREQ_CMP_VALID(creqe, raw_cons, hwq->max_elements))
break;
/* The valid test of the entry must be done first before
@@ -434,7 +430,6 @@ static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
{
struct bnxt_qplib_rcfw *rcfw = dev_instance;
struct bnxt_qplib_creq_ctx *creq;
- struct creq_base **creq_ptr;
struct bnxt_qplib_hwq *hwq;
u32 sw_cons;
@@ -442,8 +437,7 @@ static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
hwq = &creq->hwq;
/* Prefetch the CREQ element */
sw_cons = HWQ_CMP(hwq->cons, hwq);
- creq_ptr = (struct creq_base **)creq->hwq.pbl_ptr;
- prefetch(&creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]);
+ prefetch(bnxt_qplib_get_qe(hwq, sw_cons, NULL));
tasklet_schedule(&creq->creq_tasklet);
@@ -468,29 +462,13 @@ int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw)
return 0;
}
-static int __get_pbl_pg_idx(struct bnxt_qplib_pbl *pbl)
-{
- return (pbl->pg_size == ROCE_PG_SIZE_4K ?
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ?
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ?
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ?
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ?
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ?
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G :
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K);
-}
-
int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx, int is_virtfn)
{
- struct cmdq_initialize_fw req;
struct creq_initialize_fw_resp resp;
- u16 cmd_flags = 0, level;
+ struct cmdq_initialize_fw req;
+ u16 cmd_flags = 0;
+ u8 pgsz, lvl;
int rc;
RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
@@ -511,32 +489,30 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx))
goto config_vf_res;
- level = ctx->qpc_tbl.level;
- req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->qpc_tbl.pbl[level]);
- level = ctx->mrw_tbl.level;
- req.mrw_pg_size_mrw_lvl = (level << CMDQ_INITIALIZE_FW_MRW_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->mrw_tbl.pbl[level]);
- level = ctx->srqc_tbl.level;
- req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]);
- level = ctx->cq_tbl.level;
- req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]);
- level = ctx->srqc_tbl.level;
- req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]);
- level = ctx->cq_tbl.level;
- req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]);
- level = ctx->tim_tbl.level;
- req.tim_pg_size_tim_lvl = (level << CMDQ_INITIALIZE_FW_TIM_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->tim_tbl.pbl[level]);
- level = ctx->tqm_ctx.pde.level;
- req.tqm_pg_size_tqm_lvl =
- (level << CMDQ_INITIALIZE_FW_TQM_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->tqm_ctx.pde.pbl[level]);
-
+ lvl = ctx->qpc_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->qpc_tbl);
+ req.qpc_pg_size_qpc_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->mrw_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->mrw_tbl);
+ req.mrw_pg_size_mrw_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->srqc_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->srqc_tbl);
+ req.srq_pg_size_srq_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->cq_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->cq_tbl);
+ req.cq_pg_size_cq_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->tim_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->tim_tbl);
+ req.tim_pg_size_tim_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->tqm_ctx.pde.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->tqm_ctx.pde);
+ req.tqm_pg_size_tqm_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
req.qpc_page_dir =
cpu_to_le64(ctx->qpc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
req.mrw_page_dir =
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 411fce3493b6..157387636d00 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -87,12 +87,6 @@ static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth)
return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE);
}
-static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth)
-{
- return (bnxt_qplib_cmdqe_page_size(depth) /
- BNXT_QPLIB_CMDQE_UNITS);
-}
-
/* Set the cmd_size to a factor of CMDQE unit */
static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req)
{
@@ -100,30 +94,12 @@ static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req)
BNXT_QPLIB_CMDQE_UNITS;
}
-#define MAX_CMDQ_IDX(depth) ((depth) - 1)
-
-static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth)
-{
- return (bnxt_qplib_cmdqe_cnt_per_pg(depth) - 1);
-}
-
#define RCFW_MAX_COOKIE_VALUE 0x7FFF
#define RCFW_CMD_IS_BLOCKING 0x8000
#define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20
#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL
-static inline u32 get_cmdq_pg(u32 val, u32 depth)
-{
- return (val & ~(bnxt_qplib_max_cmdq_idx_per_pg(depth))) /
- (bnxt_qplib_cmdqe_cnt_per_pg(depth));
-}
-
-static inline u32 get_cmdq_idx(u32 val, u32 depth)
-{
- return val & (bnxt_qplib_max_cmdq_idx_per_pg(depth));
-}
-
/* Crsq buf is 1024-Byte */
struct bnxt_qplib_crsbe {
u8 data[1024];
@@ -133,76 +109,9 @@ struct bnxt_qplib_crsbe {
/* Allocate 1 per QP for async error notification for now */
#define BNXT_QPLIB_CREQE_MAX_CNT (64 * 1024)
#define BNXT_QPLIB_CREQE_UNITS 16 /* 16-Bytes per prod unit */
-#define BNXT_QPLIB_CREQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CREQE_UNITS)
-
-#define MAX_CREQ_IDX (BNXT_QPLIB_CREQE_MAX_CNT - 1)
-#define MAX_CREQ_IDX_PER_PG (BNXT_QPLIB_CREQE_CNT_PER_PG - 1)
-
-static inline u32 get_creq_pg(u32 val)
-{
- return (val & ~MAX_CREQ_IDX_PER_PG) / BNXT_QPLIB_CREQE_CNT_PER_PG;
-}
-
-static inline u32 get_creq_idx(u32 val)
-{
- return val & MAX_CREQ_IDX_PER_PG;
-}
-
-#define BNXT_QPLIB_CREQE_PER_PG (PAGE_SIZE / sizeof(struct creq_base))
-
#define CREQ_CMP_VALID(hdr, raw_cons, cp_bit) \
(!!((hdr)->v & CREQ_BASE_V) == \
!((raw_cons) & (cp_bit)))
-
-#define CREQ_DB_KEY_CP (0x2 << CMPL_DOORBELL_KEY_SFT)
-#define CREQ_DB_IDX_VALID CMPL_DOORBELL_IDX_VALID
-#define CREQ_DB_IRQ_DIS CMPL_DOORBELL_MASK
-#define CREQ_DB_CP_FLAGS_REARM (CREQ_DB_KEY_CP | \
- CREQ_DB_IDX_VALID)
-#define CREQ_DB_CP_FLAGS (CREQ_DB_KEY_CP | \
- CREQ_DB_IDX_VALID | \
- CREQ_DB_IRQ_DIS)
-
-static inline void bnxt_qplib_ring_creq_db64(void __iomem *db, u32 index,
- u32 xid, bool arm)
-{
- u64 val = 0;
-
- val = xid & DBC_DBC_XID_MASK;
- val |= DBC_DBC_PATH_ROCE;
- val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
- val <<= 32;
- val |= index & DBC_DBC_INDEX_MASK;
-
- writeq(val, db);
-}
-
-static inline void bnxt_qplib_ring_creq_db_rearm(void __iomem *db, u32 raw_cons,
- u32 max_elements, u32 xid,
- bool gen_p5)
-{
- u32 index = raw_cons & (max_elements - 1);
-
- if (gen_p5)
- bnxt_qplib_ring_creq_db64(db, index, xid, true);
- else
- writel(CREQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK),
- db);
-}
-
-static inline void bnxt_qplib_ring_creq_db(void __iomem *db, u32 raw_cons,
- u32 max_elements, u32 xid,
- bool gen_p5)
-{
- u32 index = raw_cons & (max_elements - 1);
-
- if (gen_p5)
- bnxt_qplib_ring_creq_db64(db, index, xid, true);
- else
- writel(CREQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK),
- db);
-}
-
#define CREQ_ENTRY_POLL_BUDGET 0x100
/* HWQ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index cab1adf1fed9..7efa6e5dce62 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -347,6 +347,7 @@ done:
hwq->depth = hwq_attr->depth;
hwq->max_elements = depth;
hwq->element_size = stride;
+ hwq->qe_ppg = pg_size / stride;
/* For direct access to the elements */
lvl = hwq->level;
if (hwq_attr->sginfo->nopte && hwq->level)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 95b645dbbc2d..c29cbd3a2d7b 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -80,6 +80,15 @@ enum bnxt_qplib_pbl_lvl {
#define ROCE_PG_SIZE_8M (8 * 1024 * 1024)
#define ROCE_PG_SIZE_1G (1024 * 1024 * 1024)
+enum bnxt_qplib_hwrm_pg_size {
+ BNXT_QPLIB_HWRM_PG_SIZE_4K = 0,
+ BNXT_QPLIB_HWRM_PG_SIZE_8K = 1,
+ BNXT_QPLIB_HWRM_PG_SIZE_64K = 2,
+ BNXT_QPLIB_HWRM_PG_SIZE_2M = 3,
+ BNXT_QPLIB_HWRM_PG_SIZE_8M = 4,
+ BNXT_QPLIB_HWRM_PG_SIZE_1G = 5,
+};
+
struct bnxt_qplib_reg_desc {
u8 bar_id;
resource_size_t bar_base;
@@ -126,6 +135,7 @@ struct bnxt_qplib_hwq {
u32 max_elements;
u32 depth;
u16 element_size; /* Size of each entry */
+ u16 qe_ppg; /* queue entry per page */
u32 prod; /* raw */
u32 cons; /* raw */
@@ -263,6 +273,49 @@ static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx)
RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL;
}
+static inline u8 bnxt_qplib_base_pg_size(struct bnxt_qplib_hwq *hwq)
+{
+ u8 pg_size = BNXT_QPLIB_HWRM_PG_SIZE_4K;
+ struct bnxt_qplib_pbl *pbl;
+
+ pbl = &hwq->pbl[PBL_LVL_0];
+ switch (pbl->pg_size) {
+ case ROCE_PG_SIZE_4K:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_4K;
+ break;
+ case ROCE_PG_SIZE_8K:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_8K;
+ break;
+ case ROCE_PG_SIZE_64K:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_64K;
+ break;
+ case ROCE_PG_SIZE_2M:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_2M;
+ break;
+ case ROCE_PG_SIZE_8M:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_8M;
+ break;
+ case ROCE_PG_SIZE_1G:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_1G;
+ break;
+ default:
+ break;
+ }
+
+ return pg_size;
+}
+
+static inline void *bnxt_qplib_get_qe(struct bnxt_qplib_hwq *hwq,
+ u32 indx, u64 *pg)
+{
+ u32 pg_num, pg_idx;
+
+ pg_num = (indx / hwq->qe_ppg);
+ pg_idx = (indx % hwq->qe_ppg);
+ if (pg)
+ *pg = (u64)&hwq->pbl_ptr[pg_num];
+ return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx);
+}
#define to_bnxt_qplib(ptr, type, member) \
container_of(ptr, type, member)
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index e4b09e7c2175..6f00f07420b7 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -210,6 +210,20 @@ struct sq_send {
__le32 data[24];
};
+/* sq_send_hdr (size:256b/32B) */
+struct sq_send_hdr {
+ u8 wqe_type;
+ u8 flags;
+ u8 wqe_size;
+ u8 reserved8_1;
+ __le32 inv_key_or_imm_data;
+ __le32 length;
+ __le32 q_key;
+ __le32 dst_qp;
+ __le32 avid;
+ __le64 reserved64;
+};
+
/* Send Raw Ethernet and QP1 SQ WQE (40 bytes) */
struct sq_send_raweth_qp1 {
u8 wqe_type;
@@ -265,6 +279,21 @@ struct sq_send_raweth_qp1 {
__le32 data[24];
};
+/* sq_send_raweth_qp1_hdr (size:256b/32B) */
+struct sq_send_raweth_qp1_hdr {
+ u8 wqe_type;
+ u8 flags;
+ u8 wqe_size;
+ u8 reserved8;
+ __le16 lflags;
+ __le16 cfa_action;
+ __le32 length;
+ __le32 reserved32_1;
+ __le32 cfa_meta;
+ __le32 reserved32_2;
+ __le64 reserved64;
+};
+
/* RDMA SQ WQE (40 bytes) */
struct sq_rdma {
u8 wqe_type;
@@ -288,6 +317,20 @@ struct sq_rdma {
__le32 data[24];
};
+/* sq_rdma_hdr (size:256b/32B) */
+struct sq_rdma_hdr {
+ u8 wqe_type;
+ u8 flags;
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 imm_data;
+ __le32 length;
+ __le32 reserved32_1;
+ __le64 remote_va;
+ __le32 remote_key;
+ __le32 reserved32_2;
+};
+
/* Atomic SQ WQE (40 bytes) */
struct sq_atomic {
u8 wqe_type;
@@ -307,6 +350,17 @@ struct sq_atomic {
__le32 data[24];
};
+/* sq_atomic_hdr (size:256b/32B) */
+struct sq_atomic_hdr {
+ u8 wqe_type;
+ u8 flags;
+ __le16 reserved16;
+ __le32 remote_key;
+ __le64 remote_va;
+ __le64 swap_data;
+ __le64 cmp_data;
+};
+
/* Local Invalidate SQ WQE (40 bytes) */
struct sq_localinvalidate {
u8 wqe_type;
@@ -324,6 +378,16 @@ struct sq_localinvalidate {
__le32 data[24];
};
+/* sq_localinvalidate_hdr (size:256b/32B) */
+struct sq_localinvalidate_hdr {
+ u8 wqe_type;
+ u8 flags;
+ __le16 reserved16;
+ __le32 inv_l_key;
+ __le64 reserved64;
+ u8 reserved128[16];
+};
+
/* FR-PMR SQ WQE (40 bytes) */
struct sq_fr_pmr {
u8 wqe_type;
@@ -380,6 +444,21 @@ struct sq_fr_pmr {
__le32 data[24];
};
+/* sq_fr_pmr_hdr (size:256b/32B) */
+struct sq_fr_pmr_hdr {
+ u8 wqe_type;
+ u8 flags;
+ u8 access_cntl;
+ u8 zero_based_page_size_log;
+ __le32 l_key;
+ u8 length[5];
+ u8 reserved8_1;
+ u8 reserved8_2;
+ u8 numlevels_pbl_page_size_log;
+ __le64 pblptr;
+ __le64 va;
+};
+
/* Bind SQ WQE (40 bytes) */
struct sq_bind {
u8 wqe_type;
@@ -417,6 +496,22 @@ struct sq_bind {
#define SQ_BIND_DATA_SFT 0
};
+/* sq_bind_hdr (size:256b/32B) */
+struct sq_bind_hdr {
+ u8 wqe_type;
+ u8 flags;
+ u8 access_cntl;
+ u8 reserved8_1;
+ u8 mw_type_zero_based;
+ u8 reserved8_2;
+ __le16 reserved16;
+ __le32 parent_l_key;
+ __le32 l_key;
+ __le64 va;
+ u8 length[5];
+ u8 reserved24[3];
+};
+
/* RQ/SRQ WQE Structures */
/* RQ/SRQ WQE (40 bytes) */
struct rq_wqe {
@@ -435,6 +530,17 @@ struct rq_wqe {
__le32 data[24];
};
+/* rq_wqe_hdr (size:256b/32B) */
+struct rq_wqe_hdr {
+ u8 wqe_type;
+ u8 flags;
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 reserved32;
+ __le32 wr_id[2];
+ u8 reserved128[16];
+};
+
/* CQ CQE Structures */
/* Base CQE (32 bytes) */
struct cq_base {
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index aa7396a1588a..1889dd172a25 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_H_
@@ -40,6 +40,7 @@ struct efa_sw_stats {
atomic64_t reg_mr_err;
atomic64_t alloc_ucontext_err;
atomic64_t create_ah_err;
+ atomic64_t mmap_err;
};
/* Don't use anything other than atomic64 */
@@ -153,8 +154,7 @@ int efa_mmap(struct ib_ucontext *ibucontext,
struct vm_area_struct *vma);
void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
int efa_create_ah(struct ib_ah *ibah,
- struct rdma_ah_attr *ah_attr,
- u32 flags,
+ struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
void efa_destroy_ah(struct ib_ah *ibah, u32 flags);
int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
index 7fce69f5568f..336bc2c57bb1 100644
--- a/drivers/infiniband/hw/efa/efa_com.c
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -631,17 +631,20 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
up(&aq->avail_cmds);
+ atomic64_inc(&aq->stats.cmd_err);
return PTR_ERR(comp_ctx);
}
err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
- if (err)
+ if (err) {
ibdev_err_ratelimited(
aq->efa_dev,
"Failed to process command %s (opcode %u) comp_status %d err %d\n",
efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
cmd->aq_common_descriptor.opcode, comp_ctx->comp_status,
err);
+ atomic64_inc(&aq->stats.cmd_err);
+ }
up(&aq->avail_cmds);
diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h
index c67dd8109d1c..5e4c88877ddb 100644
--- a/drivers/infiniband/hw/efa/efa_com.h
+++ b/drivers/infiniband/hw/efa/efa_com.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_COM_H_
@@ -47,6 +47,7 @@ struct efa_com_admin_sq {
struct efa_com_stats_admin {
atomic64_t submitted_cmd;
atomic64_t completed_cmd;
+ atomic64_t cmd_err;
atomic64_t no_completion;
};
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 5c57098a4aee..08313f7c73bc 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -37,13 +37,16 @@ struct efa_user_mmap_entry {
op(EFA_RX_DROPS, "rx_drops") \
op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
op(EFA_COMPLETED_CMDS, "completed_cmds") \
+ op(EFA_CMDS_ERR, "cmds_err") \
op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
op(EFA_CREATE_QP_ERR, "create_qp_err") \
+ op(EFA_CREATE_CQ_ERR, "create_cq_err") \
op(EFA_REG_MR_ERR, "reg_mr_err") \
op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
- op(EFA_CREATE_AH_ERR, "create_ah_err")
+ op(EFA_CREATE_AH_ERR, "create_ah_err") \
+ op(EFA_MMAP_ERR, "mmap_err")
#define EFA_STATS_ENUM(ename, name) ename,
#define EFA_STATS_STR(ename, name) [ename] = name,
@@ -1568,6 +1571,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
ibdev_dbg(&dev->ibdev,
"pgoff[%#lx] does not have valid entry\n",
vma->vm_pgoff);
+ atomic64_inc(&dev->stats.sw_stats.mmap_err);
return -EINVAL;
}
entry = to_emmap(rdma_entry);
@@ -1603,12 +1607,14 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
err = -EINVAL;
}
- if (err)
+ if (err) {
ibdev_dbg(
&dev->ibdev,
"Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
entry->address, rdma_entry->npages * PAGE_SIZE,
entry->mmap_flag, err);
+ atomic64_inc(&dev->stats.sw_stats.mmap_err);
+ }
rdma_user_mmap_entry_put(rdma_entry);
return err;
@@ -1639,10 +1645,10 @@ static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
}
int efa_create_ah(struct ib_ah *ibah,
- struct rdma_ah_attr *ah_attr,
- u32 flags,
+ struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata)
{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
struct efa_dev *dev = to_edev(ibah->device);
struct efa_com_create_ah_params params = {};
struct efa_ibv_create_ah_resp resp = {};
@@ -1650,7 +1656,7 @@ int efa_create_ah(struct ib_ah *ibah,
struct efa_ah *ah = to_eah(ibah);
int err;
- if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) {
+ if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) {
ibdev_dbg(&dev->ibdev,
"Create address handle is not supported in atomic context\n");
err = -EOPNOTSUPP;
@@ -1747,15 +1753,18 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
as = &dev->edev.aq.stats;
stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
+ stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err);
stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
s = &dev->stats;
stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err);
stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err);
+ stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->sw_stats.create_cq_err);
stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err);
stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err);
stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err);
+ stats->value[EFA_MMAP_ERR] = atomic64_read(&s->sw_stats.mmap_err);
return ARRAY_SIZE(efa_stats_names);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 8a522e14ef62..5b2f9314edd3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -39,13 +39,14 @@
#define HNS_ROCE_VLAN_SL_BIT_MASK 7
#define HNS_ROCE_VLAN_SL_SHIFT 13
-int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata)
+int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device);
const struct ib_gid_attr *gid_attr;
struct device *dev = hr_dev->dev;
struct hns_roce_ah *ah = to_hr_ah(ibah);
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
u16 vlan_id = 0xffff;
bool vlan_en = false;
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index da574c26e063..365e7db6c498 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -157,84 +157,78 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap)
kfree(bitmap->table);
}
-void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
- struct hns_roce_buf *buf)
+void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf)
{
- int i;
struct device *dev = hr_dev->dev;
+ u32 size = buf->size;
+ int i;
+
+ if (size == 0)
+ return;
- if (buf->nbufs == 1) {
+ buf->size = 0;
+
+ if (hns_roce_buf_is_direct(buf)) {
dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map);
} else {
- for (i = 0; i < buf->nbufs; ++i)
+ for (i = 0; i < buf->npages; ++i)
if (buf->page_list[i].buf)
dma_free_coherent(dev, 1 << buf->page_shift,
buf->page_list[i].buf,
buf->page_list[i].map);
kfree(buf->page_list);
+ buf->page_list = NULL;
}
}
int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
struct hns_roce_buf *buf, u32 page_shift)
{
- int i = 0;
- dma_addr_t t;
+ struct hns_roce_buf_list *buf_list;
struct device *dev = hr_dev->dev;
- u32 page_size = 1 << page_shift;
- u32 order;
+ u32 page_size;
+ int i;
- /* SQ/RQ buf lease than one page, SQ + RQ = 8K */
+ /* The minimum shift of the page accessed by hw is PAGE_ADDR_SHIFT */
+ buf->page_shift = max_t(int, PAGE_ADDR_SHIFT, page_shift);
+
+ page_size = 1 << buf->page_shift;
+ buf->npages = DIV_ROUND_UP(size, page_size);
+
+ /* required size is not bigger than one trunk size */
if (size <= max_direct) {
- buf->nbufs = 1;
- /* Npages calculated by page_size */
- order = get_order(size);
- if (order <= page_shift - PAGE_SHIFT)
- order = 0;
- else
- order -= page_shift - PAGE_SHIFT;
- buf->npages = 1 << order;
- buf->page_shift = page_shift;
- /* MTT PA must be recorded in 4k alignment, t is 4k aligned */
- buf->direct.buf = dma_alloc_coherent(dev, size, &t,
+ buf->page_list = NULL;
+ buf->direct.buf = dma_alloc_coherent(dev, size,
+ &buf->direct.map,
GFP_KERNEL);
if (!buf->direct.buf)
return -ENOMEM;
-
- buf->direct.map = t;
-
- while (t & ((1 << buf->page_shift) - 1)) {
- --buf->page_shift;
- buf->npages *= 2;
- }
} else {
- buf->nbufs = (size + page_size - 1) / page_size;
- buf->npages = buf->nbufs;
- buf->page_shift = page_shift;
- buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
- GFP_KERNEL);
-
- if (!buf->page_list)
+ buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL);
+ if (!buf_list)
return -ENOMEM;
- for (i = 0; i < buf->nbufs; ++i) {
- buf->page_list[i].buf = dma_alloc_coherent(dev,
- page_size,
- &t,
- GFP_KERNEL);
-
- if (!buf->page_list[i].buf)
- goto err_free;
+ for (i = 0; i < buf->npages; i++) {
+ buf_list[i].buf = dma_alloc_coherent(dev, page_size,
+ &buf_list[i].map,
+ GFP_KERNEL);
+ if (!buf_list[i].buf)
+ break;
+ }
- buf->page_list[i].map = t;
+ if (i != buf->npages && i > 0) {
+ while (i-- > 0)
+ dma_free_coherent(dev, page_size,
+ buf_list[i].buf,
+ buf_list[i].map);
+ kfree(buf_list);
+ return -ENOMEM;
}
+ buf->page_list = buf_list;
}
+ buf->size = size;
return 0;
-
-err_free:
- hns_roce_buf_free(hr_dev, size, buf);
- return -ENOMEM;
}
int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
@@ -246,18 +240,14 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
end = start + buf_cnt;
if (end > buf->npages) {
dev_err(hr_dev->dev,
- "invalid kmem region,offset %d,buf_cnt %d,total %d!\n",
+ "Failed to check kmem bufs, end %d + %d total %d!\n",
start, buf_cnt, buf->npages);
return -EINVAL;
}
total = 0;
for (i = start; i < end; i++)
- if (buf->nbufs == 1)
- bufs[total++] = buf->direct.map +
- ((dma_addr_t)i << buf->page_shift);
- else
- bufs[total++] = buf->page_list[i].map;
+ bufs[total++] = hns_roce_buf_page(buf, i);
return total;
}
@@ -271,8 +261,9 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
int idx = 0;
u64 addr;
- if (page_shift < PAGE_SHIFT) {
- dev_err(hr_dev->dev, "invalid page shift %d!\n", page_shift);
+ if (page_shift < PAGE_ADDR_SHIFT) {
+ dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n",
+ page_shift);
return -EINVAL;
}
@@ -292,49 +283,6 @@ done:
return total;
}
-void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum,
- int offset, int buf_cnt)
-{
- if (hopnum == HNS_ROCE_HOP_NUM_0)
- region->hopnum = 0;
- else
- region->hopnum = hopnum;
-
- region->offset = offset;
- region->count = buf_cnt;
-}
-
-void hns_roce_free_buf_list(dma_addr_t **bufs, int region_cnt)
-{
- int i;
-
- for (i = 0; i < region_cnt; i++) {
- kfree(bufs[i]);
- bufs[i] = NULL;
- }
-}
-
-int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions,
- dma_addr_t **bufs, int region_cnt)
-{
- struct hns_roce_buf_region *r;
- int i;
-
- for (i = 0; i < region_cnt; i++) {
- r = &regions[i];
- bufs[i] = kcalloc(r->count, sizeof(dma_addr_t), GFP_KERNEL);
- if (!bufs[i])
- goto err_alloc;
- }
-
- return 0;
-
-err_alloc:
- hns_roce_free_buf_list(bufs, i);
-
- return -ENOMEM;
-}
-
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
{
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 5bfb52ffd590..d2d7074bbe69 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -39,51 +39,40 @@
#include <rdma/hns-abi.h>
#include "hns_roce_common.h"
-static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq *hr_cq)
+static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
struct hns_roce_cmd_mailbox *mailbox;
- struct hns_roce_hem_table *mtt_table;
struct hns_roce_cq_table *cq_table;
- struct device *dev = hr_dev->dev;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u64 mtts[MTT_MIN_COUNT] = { 0 };
dma_addr_t dma_handle;
- u64 *mtts;
int ret;
- cq_table = &hr_dev->cq_table;
-
- /* Get the physical address of cq buf */
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- mtt_table = &hr_dev->mr_table.mtt_cqe_table;
- else
- mtt_table = &hr_dev->mr_table.mtt_table;
-
- mtts = hns_roce_table_find(hr_dev, mtt_table, hr_cq->mtt.first_seg,
- &dma_handle);
-
- if (!mtts) {
- dev_err(dev, "Failed to find mtt for CQ buf.\n");
+ ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts),
+ &dma_handle);
+ if (ret < 1) {
+ ibdev_err(ibdev, "Failed to find CQ mtr\n");
return -EINVAL;
}
+ cq_table = &hr_dev->cq_table;
ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn);
if (ret) {
- dev_err(dev, "Num of CQ out of range.\n");
+ ibdev_err(ibdev, "Failed to alloc CQ bitmap, err %d\n", ret);
return ret;
}
/* Get CQC memory HEM(Hardware Entry Memory) table */
ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn);
if (ret) {
- dev_err(dev,
- "Get context mem failed(%d) when CQ(0x%lx) alloc.\n",
- ret, hr_cq->cqn);
+ ibdev_err(ibdev, "Failed to get CQ(0x%lx) context, err %d\n",
+ hr_cq->cqn, ret);
goto err_out;
}
ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
if (ret) {
- dev_err(dev, "Failed to xa_store CQ.\n");
+ ibdev_err(ibdev, "Failed to xa_store CQ\n");
goto err_put;
}
@@ -101,9 +90,9 @@ static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev,
HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret) {
- dev_err(dev,
- "Send cmd mailbox failed(%d) when CQ(0x%lx) alloc.\n",
- ret, hr_cq->cqn);
+ ibdev_err(ibdev,
+ "Failed to send create cmd for CQ(0x%lx), err %d\n",
+ hr_cq->cqn, ret);
goto err_xa;
}
@@ -126,7 +115,7 @@ err_out:
return ret;
}
-void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct device *dev = hr_dev->dev;
@@ -153,190 +142,86 @@ void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
}
-static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
- struct hns_roce_ib_create_cq ucmd,
- struct ib_udata *udata)
-{
- struct hns_roce_buf *buf = &hr_cq->buf;
- struct hns_roce_mtt *mtt = &hr_cq->mtt;
- struct ib_umem **umem = &hr_cq->umem;
- u32 npages;
- int ret;
-
- *umem = ib_umem_get(&hr_dev->ib_dev, ucmd.buf_addr, buf->size,
- IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(*umem))
- return PTR_ERR(*umem);
-
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- mtt->mtt_type = MTT_TYPE_CQE;
- else
- mtt->mtt_type = MTT_TYPE_WQE;
-
- npages = DIV_ROUND_UP(ib_umem_page_count(*umem),
- 1 << hr_dev->caps.cqe_buf_pg_sz);
- ret = hns_roce_mtt_init(hr_dev, npages, buf->page_shift, mtt);
- if (ret)
- goto err_buf;
-
- ret = hns_roce_ib_umem_write_mtt(hr_dev, mtt, *umem);
- if (ret)
- goto err_mtt;
-
- return 0;
-
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, mtt);
-
-err_buf:
- ib_umem_release(*umem);
- return ret;
-}
-
-static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata, unsigned long addr)
{
- struct hns_roce_buf *buf = &hr_cq->buf;
- struct hns_roce_mtt *mtt = &hr_cq->mtt;
- int ret;
-
- ret = hns_roce_buf_alloc(hr_dev, buf->size, (1 << buf->page_shift) * 2,
- buf, buf->page_shift);
- if (ret)
- goto out;
-
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- mtt->mtt_type = MTT_TYPE_CQE;
- else
- mtt->mtt_type = MTT_TYPE_WQE;
-
- ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, mtt);
- if (ret)
- goto err_buf;
-
- ret = hns_roce_buf_write_mtt(hr_dev, mtt, buf);
- if (ret)
- goto err_mtt;
-
- return 0;
-
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, mtt);
-
-err_buf:
- hns_roce_buf_free(hr_dev, buf->size, buf);
-
-out:
- return ret;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int err;
+
+ buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_ADDR_SHIFT;
+ buf_attr.region[0].size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz;
+ buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
+ buf_attr.region_count = 1;
+ buf_attr.fixed_page = true;
+
+ err = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr,
+ hr_dev->caps.cqe_ba_pg_sz + PAGE_ADDR_SHIFT,
+ udata, addr);
+ if (err)
+ ibdev_err(ibdev, "Failed to alloc CQ mtr, err %d\n", err);
+
+ return err;
}
static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
- hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf);
+ hns_roce_mtr_destroy(hr_dev, &hr_cq->mtr);
}
-static int create_user_cq(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq *hr_cq,
- struct ib_udata *udata,
- struct hns_roce_ib_create_cq_resp *resp)
+static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata, unsigned long addr,
+ struct hns_roce_ib_create_cq_resp *resp)
{
- struct hns_roce_ib_create_cq ucmd;
- struct device *dev = hr_dev->dev;
- int ret;
- struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
- udata, struct hns_roce_ucontext, ibucontext);
-
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
- dev_err(dev, "Failed to copy_from_udata.\n");
- return -EFAULT;
- }
+ bool has_db = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB;
+ struct hns_roce_ucontext *uctx;
+ int err;
- /* Get user space address, write it into mtt table */
- ret = get_cq_umem(hr_dev, hr_cq, ucmd, udata);
- if (ret) {
- dev_err(dev, "Failed to get_cq_umem.\n");
- return ret;
- }
-
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB &&
- udata->outlen >= offsetofend(typeof(*resp), cap_flags)) {
- ret = hns_roce_db_map_user(context, udata, ucmd.db_addr,
- &hr_cq->db);
- if (ret) {
- dev_err(dev, "cq record doorbell map failed!\n");
- goto err_mtt;
+ if (udata) {
+ if (has_db &&
+ udata->outlen >= offsetofend(typeof(*resp), cap_flags)) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ err = hns_roce_db_map_user(uctx, udata, addr,
+ &hr_cq->db);
+ if (err)
+ return err;
+ hr_cq->db_en = 1;
+ resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB;
}
- hr_cq->db_en = 1;
- resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB;
- }
-
- return 0;
-
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
- ib_umem_release(hr_cq->umem);
-
- return ret;
-}
-
-static int create_kernel_cq(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq *hr_cq)
-{
- struct device *dev = hr_dev->dev;
- int ret;
-
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
- ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
- if (ret)
- return ret;
-
- hr_cq->set_ci_db = hr_cq->db.db_record;
- *hr_cq->set_ci_db = 0;
- hr_cq->db_en = 1;
- }
-
- /* Init mtt table and write buff address to mtt table */
- ret = alloc_cq_buf(hr_dev, hr_cq);
- if (ret) {
- dev_err(dev, "Failed to alloc_cq_buf.\n");
- goto err_db;
+ } else {
+ if (has_db) {
+ err = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
+ if (err)
+ return err;
+ hr_cq->set_ci_db = hr_cq->db.db_record;
+ *hr_cq->set_ci_db = 0;
+ hr_cq->db_en = 1;
+ }
+ hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
}
- hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset +
- DB_REG_OFFSET * hr_dev->priv_uar.index;
-
return 0;
-
-err_db:
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
- hns_roce_free_db(hr_dev, &hr_cq->db);
-
- return ret;
}
-static void destroy_user_cq(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq *hr_cq,
- struct ib_udata *udata,
- struct hns_roce_ib_create_cq_resp *resp)
+static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata)
{
- struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
- udata, struct hns_roce_ucontext, ibucontext);
+ struct hns_roce_ucontext *uctx;
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB &&
- udata->outlen >= offsetofend(typeof(*resp), cap_flags))
- hns_roce_db_unmap_user(context, &hr_cq->db);
-
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
- ib_umem_release(hr_cq->umem);
-}
-
-static void destroy_kernel_cq(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq *hr_cq)
-{
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
- free_cq_buf(hr_dev, hr_cq);
+ if (!hr_cq->db_en)
+ return;
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
+ hr_cq->db_en = 0;
+ if (udata) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext,
+ ibucontext);
+ hns_roce_db_unmap_user(uctx, &hr_cq->db);
+ } else {
hns_roce_free_db(hr_dev, &hr_cq->db);
+ }
}
int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
@@ -345,20 +230,21 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_ib_create_cq_resp resp = {};
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
- struct device *dev = hr_dev->dev;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ib_create_cq ucmd = {};
int vector = attr->comp_vector;
u32 cq_entries = attr->cqe;
int ret;
if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) {
- dev_err(dev, "Create CQ failed. entries=%d, max=%d\n",
- cq_entries, hr_dev->caps.max_cqes);
+ ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n",
+ cq_entries, hr_dev->caps.max_cqes);
return -EINVAL;
}
if (vector >= hr_dev->caps.num_comp_vectors) {
- dev_err(dev, "Create CQ failed, vector=%d, max=%d\n",
- vector, hr_dev->caps.num_comp_vectors);
+ ibdev_err(ibdev, "Failed to check CQ vector=%d max=%d\n",
+ vector, hr_dev->caps.num_comp_vectors);
return -EINVAL;
}
@@ -367,30 +253,35 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */
hr_cq->cq_depth = cq_entries;
hr_cq->vector = vector;
- hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz;
- hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
spin_lock_init(&hr_cq->lock);
INIT_LIST_HEAD(&hr_cq->sq_list);
INIT_LIST_HEAD(&hr_cq->rq_list);
if (udata) {
- ret = create_user_cq(hr_dev, hr_cq, udata, &resp);
- if (ret) {
- dev_err(dev, "Create cq failed in user mode!\n");
- goto err_cq;
- }
- } else {
- ret = create_kernel_cq(hr_dev, hr_cq);
+ ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (ret) {
- dev_err(dev, "Create cq failed in kernel mode!\n");
- goto err_cq;
+ ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n",
+ ret);
+ return ret;
}
}
- ret = hns_roce_alloc_cqc(hr_dev, hr_cq);
+ ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
+ if (ret) {
+ ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret);
+ return ret;
+ }
+
+ ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp);
if (ret) {
- dev_err(dev, "Alloc CQ failed(%d).\n", ret);
- goto err_dbmap;
+ ibdev_err(ibdev, "Failed to alloc CQ db, err %d\n", ret);
+ goto err_cq_buf;
+ }
+
+ ret = alloc_cqc(hr_dev, hr_cq);
+ if (ret) {
+ ibdev_err(ibdev, "Failed to alloc CQ context, err %d\n", ret);
+ goto err_cq_db;
}
/*
@@ -412,15 +303,11 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
return 0;
err_cqc:
- hns_roce_free_cqc(hr_dev, hr_cq);
-
-err_dbmap:
- if (udata)
- destroy_user_cq(hr_dev, hr_cq, udata, &resp);
- else
- destroy_kernel_cq(hr_dev, hr_cq);
-
-err_cq:
+ free_cqc(hr_dev, hr_cq);
+err_cq_db:
+ free_cq_db(hr_dev, hr_cq, udata);
+err_cq_buf:
+ free_cq_buf(hr_dev, hr_cq);
return ret;
}
@@ -429,28 +316,12 @@ void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
- if (hr_dev->hw->destroy_cq) {
+ if (hr_dev->hw->destroy_cq)
hr_dev->hw->destroy_cq(ib_cq, udata);
- return;
- }
-
- hns_roce_free_cqc(hr_dev, hr_cq);
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
- ib_umem_release(hr_cq->umem);
- if (udata) {
- if (hr_cq->db_en == 1)
- hns_roce_db_unmap_user(rdma_udata_to_drv_context(
- udata,
- struct hns_roce_ucontext,
- ibucontext),
- &hr_cq->db);
- } else {
- /* Free the buff of stored cq */
- free_cq_buf(hr_dev, hr_cq);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
- hns_roce_free_db(hr_dev, &hr_cq->db);
- }
+ free_cq_buf(hr_dev, hr_cq);
+ free_cq_db(hr_dev, hr_cq, udata);
+ free_cqc(hr_dev, hr_cq);
}
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index f6b3cf6b95d6..4fcd608ee55f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -131,8 +131,8 @@ enum {
};
enum {
- HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
- HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
+ HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0),
+ HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1),
};
enum {
@@ -209,6 +209,8 @@ enum {
HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07,
};
+#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
+
enum {
HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0),
HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1),
@@ -222,13 +224,6 @@ enum {
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
};
-enum hns_roce_mtt_type {
- MTT_TYPE_WQE,
- MTT_TYPE_CQE,
- MTT_TYPE_SRQWQE,
- MTT_TYPE_IDX
-};
-
#define HNS_ROCE_DB_TYPE_COUNT 2
#define HNS_ROCE_DB_UNIT_SIZE 4
@@ -267,10 +262,11 @@ enum {
#define HNS_ROCE_PORT_DOWN 0
#define HNS_ROCE_PORT_UP 1
-#define HNS_ROCE_MTT_ENTRY_PER_SEG 8
-
#define PAGE_ADDR_SHIFT 12
+/* The minimum page count for hardware access page directly. */
+#define HNS_HW_DIRECT_PAGE_COUNT 2
+
struct hns_roce_uar {
u64 pfn;
unsigned long index;
@@ -300,22 +296,6 @@ struct hns_roce_bitmap {
unsigned long *table;
};
-/* Order bitmap length -- bit num compute formula: 1 << (max_order - order) */
-/* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */
-/* Every bit repesent to a partner free/used status in bitmap */
-/*
- * Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
- * Bit = 1 represent to idle and available; bit = 0: not available
- */
-struct hns_roce_buddy {
- /* Members point to every order level bitmap */
- unsigned long **bits;
- /* Represent to avail bits of the order level bitmap */
- u32 *num_free;
- int max_order;
- spinlock_t lock;
-};
-
/* For Hardware Entry Memory */
struct hns_roce_hem_table {
/* HEM type: 0 = qpc, 1 = mtt, 2 = cqc, 3 = srq, 4 = other */
@@ -336,13 +316,6 @@ struct hns_roce_hem_table {
dma_addr_t *bt_l0_dma_addr;
};
-struct hns_roce_mtt {
- unsigned long first_seg;
- int order;
- int page_shift;
- enum hns_roce_mtt_type mtt_type;
-};
-
struct hns_roce_buf_region {
int offset; /* page offset */
u32 count; /* page count */
@@ -357,13 +330,32 @@ struct hns_roce_hem_list {
struct list_head mid_bt[HNS_ROCE_MAX_BT_REGION][HNS_ROCE_MAX_BT_LEVEL];
struct list_head btm_bt; /* link all bottom bt in @mid_bt */
dma_addr_t root_ba; /* pointer to the root ba table */
- int bt_pg_shift;
+};
+
+struct hns_roce_buf_attr {
+ struct {
+ size_t size; /* region size */
+ int hopnum; /* multi-hop addressing hop num */
+ } region[HNS_ROCE_MAX_BT_REGION];
+ int region_count; /* valid region count */
+ int page_shift; /* buffer page shift */
+ bool fixed_page; /* decide page shift is fixed-size or maximum size */
+ int user_access; /* umem access flag */
+ bool mtt_only; /* only alloc buffer-required MTT memory */
};
/* memory translate region */
struct hns_roce_mtr {
- struct hns_roce_hem_list hem_list;
- int buf_pg_shift;
+ struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */
+ struct ib_umem *umem; /* user space buffer */
+ struct hns_roce_buf *kmem; /* kernel space buffer */
+ struct {
+ dma_addr_t root_ba; /* root BA table's address */
+ bool is_direct; /* addressing without BA table */
+ int ba_pg_shift; /* BA table page shift */
+ int buf_pg_shift; /* buffer page shift */
+ int buf_pg_count; /* buffer page count */
+ } hem_cfg; /* config for hardware addressing */
};
struct hns_roce_mw {
@@ -381,43 +373,22 @@ struct hns_roce_mw {
struct hns_roce_mr {
struct ib_mr ibmr;
- struct ib_umem *umem;
u64 iova; /* MR's virtual orignal addr */
u64 size; /* Address range of MR */
u32 key; /* Key of MR */
u32 pd; /* PD num of MR */
u32 access; /* Access permission of MR */
- u32 npages;
int enabled; /* MR's active status */
int type; /* MR's register type */
- u64 *pbl_buf; /* MR's PBL space */
- dma_addr_t pbl_dma_addr; /* MR's PBL space PA */
- u32 pbl_size; /* PA number in the PBL */
- u64 pbl_ba; /* page table address */
- u32 l0_chunk_last_num; /* L0 last number */
- u32 l1_chunk_last_num; /* L1 last number */
- u64 **pbl_bt_l2; /* PBL BT L2 */
- u64 **pbl_bt_l1; /* PBL BT L1 */
- u64 *pbl_bt_l0; /* PBL BT L0 */
- dma_addr_t *pbl_l2_dma_addr; /* PBL BT L2 dma addr */
- dma_addr_t *pbl_l1_dma_addr; /* PBL BT L1 dma addr */
- dma_addr_t pbl_l0_dma_addr; /* PBL BT L0 dma addr */
- u32 pbl_ba_pg_sz; /* BT chunk page size */
- u32 pbl_buf_pg_sz; /* buf chunk page size */
u32 pbl_hop_num; /* multi-hop number */
+ struct hns_roce_mtr pbl_mtr;
+ u32 npages;
+ dma_addr_t *page_list;
};
struct hns_roce_mr_table {
struct hns_roce_bitmap mtpt_bitmap;
- struct hns_roce_buddy mtt_buddy;
- struct hns_roce_hem_table mtt_table;
struct hns_roce_hem_table mtpt_table;
- struct hns_roce_buddy mtt_cqe_buddy;
- struct hns_roce_hem_table mtt_cqe_table;
- struct hns_roce_buddy mtt_srqwqe_buddy;
- struct hns_roce_hem_table mtt_srqwqe_table;
- struct hns_roce_buddy mtt_idx_buddy;
- struct hns_roce_hem_table mtt_idx_table;
};
struct hns_roce_wq {
@@ -446,7 +417,6 @@ struct hns_roce_buf_list {
struct hns_roce_buf {
struct hns_roce_buf_list direct;
struct hns_roce_buf_list *page_list;
- int nbufs;
u32 npages;
u32 size;
int page_shift;
@@ -482,12 +452,10 @@ struct hns_roce_db {
struct hns_roce_cq {
struct ib_cq ib_cq;
- struct hns_roce_buf buf;
- struct hns_roce_mtt mtt;
+ struct hns_roce_mtr mtr;
struct hns_roce_db db;
u8 db_en;
spinlock_t lock;
- struct ib_umem *umem;
u32 cq_depth;
u32 cons_index;
u32 *set_ci_db;
@@ -505,11 +473,8 @@ struct hns_roce_cq {
};
struct hns_roce_idx_que {
- struct hns_roce_buf idx_buf;
- int entry_sz;
- u32 buf_size;
- struct ib_umem *umem;
- struct hns_roce_mtt mtt;
+ struct hns_roce_mtr mtr;
+ int entry_shift;
unsigned long *bitmap;
};
@@ -524,10 +489,9 @@ struct hns_roce_srq {
atomic_t refcount;
struct completion free;
- struct hns_roce_buf buf;
+ struct hns_roce_mtr buf_mtr;
+
u64 *wrid;
- struct ib_umem *umem;
- struct hns_roce_mtt mtt;
struct hns_roce_idx_que idx_que;
spinlock_t lock;
int head;
@@ -656,20 +620,15 @@ struct hns_roce_work {
struct hns_roce_qp {
struct ib_qp ibqp;
- struct hns_roce_buf hr_buf;
struct hns_roce_wq rq;
struct hns_roce_db rdb;
struct hns_roce_db sdb;
- u8 rdb_en;
- u8 sdb_en;
+ unsigned long en_flags;
u32 doorbell_qpn;
u32 sq_signal_bits;
struct hns_roce_wq sq;
- struct ib_umem *umem;
- struct hns_roce_mtt mtt;
struct hns_roce_mtr mtr;
- int wqe_bt_pg_shift;
u32 buff_size;
struct mutex mutex;
@@ -769,17 +728,11 @@ struct hns_roce_eq {
int over_ignore;
int coalesce;
int arm_st;
- u64 eqe_ba;
- int eqe_ba_pg_sz;
- int eqe_buf_pg_sz;
int hop_num;
struct hns_roce_mtr mtr;
- struct hns_roce_buf buf;
int eq_max_cnt;
int eq_period;
int shift;
- dma_addr_t cur_eqe_ba;
- dma_addr_t nxt_eqe_ba;
int event_type;
int sub_type;
};
@@ -1102,15 +1055,64 @@ static inline struct hns_roce_qp
return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1));
}
+static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf)
+{
+ if (buf->page_list)
+ return false;
+
+ return true;
+}
+
static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset)
{
- u32 page_size = 1 << buf->page_shift;
+ if (hns_roce_buf_is_direct(buf))
+ return (char *)(buf->direct.buf) + (offset & (buf->size - 1));
- if (buf->nbufs == 1)
- return (char *)(buf->direct.buf) + offset;
+ return (char *)(buf->page_list[offset >> buf->page_shift].buf) +
+ (offset & ((1 << buf->page_shift) - 1));
+}
+
+static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx)
+{
+ if (hns_roce_buf_is_direct(buf))
+ return buf->direct.map + ((dma_addr_t)idx << buf->page_shift);
else
- return (char *)(buf->page_list[offset >> buf->page_shift].buf) +
- (offset & (page_size - 1));
+ return buf->page_list[idx].map;
+}
+
+#define hr_hw_page_align(x) ALIGN(x, 1 << PAGE_ADDR_SHIFT)
+
+static inline u64 to_hr_hw_page_addr(u64 addr)
+{
+ return addr >> PAGE_ADDR_SHIFT;
+}
+
+static inline u32 to_hr_hw_page_shift(u32 page_shift)
+{
+ return page_shift - PAGE_ADDR_SHIFT;
+}
+
+static inline u32 to_hr_hem_hopnum(u32 hopnum, u32 count)
+{
+ if (count > 0)
+ return hopnum == HNS_ROCE_HOP_NUM_0 ? 0 : hopnum;
+
+ return 0;
+}
+
+static inline u32 to_hr_hem_entries_size(u32 count, u32 buf_shift)
+{
+ return hr_hw_page_align(count << buf_shift);
+}
+
+static inline u32 to_hr_hem_entries_count(u32 count, u32 buf_shift)
+{
+ return hr_hw_page_align(count << buf_shift) >> buf_shift;
+}
+
+static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift)
+{
+ return ilog2(to_hr_hem_entries_count(count, buf_shift));
}
int hns_roce_init_uar_table(struct hns_roce_dev *dev);
@@ -1125,25 +1127,18 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev);
void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev);
-int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
- struct hns_roce_mtt *mtt);
-void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt);
-int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, struct hns_roce_buf *buf);
-
-void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift,
- int buf_pg_shift);
-int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- dma_addr_t **bufs, struct hns_roce_buf_region *regions,
- int region_cnt);
-void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtr *mtr);
-
/* hns roce hw need current block and next block addr from mtt */
#define MTT_MIN_COUNT 2
int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr);
+int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr, int page_shift,
+ struct ib_udata *udata, unsigned long user_addr);
+void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr);
+int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_region *regions, int region_cnt,
+ dma_addr_t *pages, int page_cnt);
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
@@ -1171,8 +1166,8 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
unsigned long obj, int cnt,
int rr);
-int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata);
+int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags);
@@ -1200,20 +1195,10 @@ struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
struct ib_udata *udata);
int hns_roce_dealloc_mw(struct ib_mw *ibmw);
-void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
- struct hns_roce_buf *buf);
+void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf);
int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
struct hns_roce_buf *buf, u32 page_shift);
-int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, struct ib_umem *umem);
-
-void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum,
- int offset, int buf_cnt);
-int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions,
- dma_addr_t **bufs, int count);
-void hns_roce_free_buf_list(dma_addr_t **bufs, int count);
-
int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
int buf_cnt, int start, struct hns_roce_buf *buf);
int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
@@ -1254,8 +1239,6 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
-void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
-
int hns_roce_db_map_user(struct hns_roce_ucontext *context,
struct ib_udata *udata, unsigned long virt,
struct hns_roce_db *db);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 263338b90d7a..37d101eec181 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -75,18 +75,6 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
case HEM_TYPE_CQC_TIMER:
hop_num = hr_dev->caps.cqc_timer_hop_num;
break;
- case HEM_TYPE_CQE:
- hop_num = hr_dev->caps.cqe_hop_num;
- break;
- case HEM_TYPE_MTT:
- hop_num = hr_dev->caps.mtt_hop_num;
- break;
- case HEM_TYPE_SRQWQE:
- hop_num = hr_dev->caps.srqwqe_hop_num;
- break;
- case HEM_TYPE_IDX:
- hop_num = hr_dev->caps.idx_hop_num;
- break;
default:
return false;
}
@@ -195,38 +183,6 @@ static int get_hem_table_config(struct hns_roce_dev *hr_dev,
mhop->ba_l0_num = hr_dev->caps.srqc_bt_num;
mhop->hop_num = hr_dev->caps.srqc_hop_num;
break;
- case HEM_TYPE_MTT:
- mhop->buf_chunk_size = 1 << (hr_dev->caps.mtt_buf_pg_sz
- + PAGE_SHIFT);
- mhop->bt_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz
- + PAGE_SHIFT);
- mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN;
- mhop->hop_num = hr_dev->caps.mtt_hop_num;
- break;
- case HEM_TYPE_CQE:
- mhop->buf_chunk_size = 1 << (hr_dev->caps.cqe_buf_pg_sz
- + PAGE_SHIFT);
- mhop->bt_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz
- + PAGE_SHIFT);
- mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN;
- mhop->hop_num = hr_dev->caps.cqe_hop_num;
- break;
- case HEM_TYPE_SRQWQE:
- mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz
- + PAGE_SHIFT);
- mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
- + PAGE_SHIFT);
- mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN;
- mhop->hop_num = hr_dev->caps.srqwqe_hop_num;
- break;
- case HEM_TYPE_IDX:
- mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz
- + PAGE_SHIFT);
- mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
- + PAGE_SHIFT);
- mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN;
- mhop->hop_num = hr_dev->caps.idx_hop_num;
- break;
default:
dev_err(dev, "Table %d not support multi-hop addressing!\n",
type);
@@ -899,57 +855,6 @@ out:
return addr;
}
-int hns_roce_table_get_range(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long start, unsigned long end)
-{
- struct hns_roce_hem_mhop mhop;
- unsigned long inc = table->table_chunk_size / table->obj_size;
- unsigned long i = 0;
- int ret;
-
- if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
- ret = hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop);
- if (ret)
- goto fail;
- inc = mhop.bt_chunk_size / table->obj_size;
- }
-
- /* Allocate MTT entry memory according to chunk(128K) */
- for (i = start; i <= end; i += inc) {
- ret = hns_roce_table_get(hr_dev, table, i);
- if (ret)
- goto fail;
- }
-
- return 0;
-
-fail:
- while (i > start) {
- i -= inc;
- hns_roce_table_put(hr_dev, table, i);
- }
- return ret;
-}
-
-void hns_roce_table_put_range(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long start, unsigned long end)
-{
- struct hns_roce_hem_mhop mhop;
- unsigned long inc = table->table_chunk_size / table->obj_size;
- unsigned long i;
-
- if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
- if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop))
- return;
- inc = mhop.bt_chunk_size / table->obj_size;
- }
-
- for (i = start; i <= end; i += inc)
- hns_roce_table_put(hr_dev, table, i);
-}
-
int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, u32 type,
unsigned long obj_size, unsigned long nobj,
@@ -1112,12 +1017,6 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
{
- if ((hr_dev->caps.num_idx_segs))
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_idx_table);
- if (hr_dev->caps.num_srqwqe_segs)
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_srqwqe_table);
if (hr_dev->caps.srqc_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->srq_table.table);
@@ -1137,10 +1036,6 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_cqe_table);
- hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table);
}
struct roce_hem_item {
@@ -1505,7 +1400,7 @@ err_exit:
int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list,
const struct hns_roce_buf_region *regions,
- int region_cnt)
+ int region_cnt, int bt_pg_shift)
{
const struct hns_roce_buf_region *r;
int ofs, end;
@@ -1519,7 +1414,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
return -EINVAL;
}
- unit = (1 << hem_list->bt_pg_shift) / BA_BYTE_LEN;
+ unit = (1 << bt_pg_shift) / BA_BYTE_LEN;
for (i = 0; i < region_cnt; i++) {
r = &regions[i];
if (!r->count)
@@ -1566,8 +1461,7 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
hem_list->root_ba = 0;
}
-void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list,
- int bt_page_order)
+void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list)
{
int i, j;
@@ -1576,8 +1470,6 @@ void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list,
for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++)
for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++)
INIT_LIST_HEAD(&hem_list->mid_bt[i][j]);
-
- hem_list->bt_pg_shift = bt_page_order;
}
void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index 3bb8f78fb7b0..1fa0bdcb1989 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -115,12 +115,6 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, unsigned long obj,
dma_addr_t *dma_handle);
-int hns_roce_table_get_range(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long start, unsigned long end);
-void hns_roce_table_put_range(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long start, unsigned long end);
int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, u32 type,
unsigned long obj_size, unsigned long nobj,
@@ -133,14 +127,13 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_mhop *mhop);
bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type);
-void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list,
- int bt_page_order);
+void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list);
int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions,
int region_cnt, int unit);
int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list,
const struct hns_roce_buf_region *regions,
- int region_cnt);
+ int region_cnt, int bt_pg_shift);
void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list);
void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 5ff028d77be3..b4b98e818328 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1099,7 +1099,6 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
struct completion comp;
long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS;
unsigned long start = jiffies;
- int npages;
int ret = 0;
priv = (struct hns_roce_v1_priv *)hr_dev->priv;
@@ -1146,17 +1145,9 @@ free_mr:
dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n",
mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start));
- if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
- dma_free_coherent(dev, npages * 8, mr->pbl_buf,
- mr->pbl_dma_addr);
- }
-
hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
key_to_hw_index(mr->key), 0);
-
- ib_umem_release(mr->umem);
-
+ hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
kfree(mr);
return ret;
@@ -1826,9 +1817,12 @@ static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port,
static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
unsigned long mtpt_idx)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device);
+ u64 pages[HNS_ROCE_MAX_INNER_MTPT_NUM] = { 0 };
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_v1_mpt_entry *mpt_entry;
- struct sg_dma_page_iter sg_iter;
- u64 *pages;
+ dma_addr_t pbl_ba;
+ int count;
int i;
/* MPT filled into mailbox buf */
@@ -1878,22 +1872,15 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
if (mr->type == MR_TYPE_DMA)
return 0;
- pages = (u64 *) __get_free_page(GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
-
- i = 0;
- for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) {
- pages[i] = ((u64)sg_page_iter_dma_address(&sg_iter)) >> 12;
-
- /* Directly record to MTPT table firstly 7 entry */
- if (i >= HNS_ROCE_MAX_INNER_MTPT_NUM)
- break;
- i++;
+ count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
+ ARRAY_SIZE(pages), &pbl_ba);
+ if (count < 1) {
+ ibdev_err(ibdev, "failed to find PBL mtr, count = %d.", count);
+ return -ENOBUFS;
}
/* Register user mr */
- for (i = 0; i < HNS_ROCE_MAX_INNER_MTPT_NUM; i++) {
+ for (i = 0; i < count; i++) {
switch (i) {
case 0:
mpt_entry->pa0_l = cpu_to_le32((u32)(pages[i]));
@@ -1959,20 +1946,17 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
}
}
- free_page((unsigned long) pages);
-
- mpt_entry->pbl_addr_l = cpu_to_le32((u32)(mr->pbl_dma_addr));
-
+ mpt_entry->pbl_addr_l = cpu_to_le32(pbl_ba);
roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
- MPT_BYTE_12_PBL_ADDR_H_S,
- ((u32)(mr->pbl_dma_addr >> 32)));
+ MPT_BYTE_12_PBL_ADDR_H_S, upper_32_bits(pbl_ba));
return 0;
}
static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
{
- return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
+ return hns_roce_buf_offset(hr_cq->mtr.kmem,
+ n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
}
static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
@@ -2479,7 +2463,6 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
}
static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt,
enum hns_roce_qp_state cur_state,
enum hns_roce_qp_state new_state,
struct hns_roce_qp_context *context,
@@ -2560,6 +2543,29 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
return ret;
}
+static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int rq_pa_start;
+ int count;
+
+ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba);
+ if (count < 1) {
+ ibdev_err(ibdev, "Failed to find SQ ba\n");
+ return -ENOBUFS;
+ }
+ rq_pa_start = hr_qp->rq.offset >> hr_qp->mtr.hem_cfg.buf_pg_shift;
+ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, rq_pa_start, rq_ba, 1,
+ NULL);
+ if (!count) {
+ ibdev_err(ibdev, "Failed to find RQ ba\n");
+ return -ENOBUFS;
+ }
+
+ return 0;
+}
+
static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
int attr_mask, enum ib_qp_state cur_state,
enum ib_qp_state new_state)
@@ -2567,25 +2573,20 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct hns_roce_sqp_context *context;
- struct device *dev = &hr_dev->pdev->dev;
dma_addr_t dma_handle = 0;
u32 __iomem *addr;
- int rq_pa_start;
+ u64 sq_ba = 0;
+ u64 rq_ba = 0;
__le32 tmp;
u32 reg_val;
- u64 *mtts;
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return -ENOMEM;
/* Search QP buf's MTTs */
- mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table,
- hr_qp->mtt.first_seg, &dma_handle);
- if (!mtts) {
- dev_err(dev, "qp buf pa find failed\n");
+ if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle))
goto out;
- }
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
roce_set_field(context->qp1c_bytes_4,
@@ -2599,11 +2600,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M,
QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn);
- context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle));
+ context->sq_rq_bt_l = cpu_to_le32(dma_handle);
roce_set_field(context->qp1c_bytes_12,
QP1C_BYTES_12_SQ_RQ_BT_H_M,
QP1C_BYTES_12_SQ_RQ_BT_H_S,
- ((u32)(dma_handle >> 32)));
+ upper_32_bits(dma_handle));
roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M,
QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head);
@@ -2624,14 +2625,12 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M,
QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index);
- rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
- context->cur_rq_wqe_ba_l =
- cpu_to_le32((u32)(mtts[rq_pa_start]));
+ context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba);
roce_set_field(context->qp1c_bytes_28,
QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M,
QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S,
- (mtts[rq_pa_start]) >> 32);
+ upper_32_bits(rq_ba));
roce_set_field(context->qp1c_bytes_28,
QP1C_BYTES_28_RQ_CUR_IDX_M,
QP1C_BYTES_28_RQ_CUR_IDX_S, 0);
@@ -2645,12 +2644,12 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP1C_BYTES_32_TX_CQ_NUM_S,
to_hr_cq(ibqp->send_cq)->cqn);
- context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]);
+ context->cur_sq_wqe_ba_l = cpu_to_le32(sq_ba);
roce_set_field(context->qp1c_bytes_40,
QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M,
QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S,
- (mtts[0]) >> 32);
+ upper_32_bits(sq_ba));
roce_set_field(context->qp1c_bytes_40,
QP1C_BYTES_40_SQ_CUR_IDX_M,
QP1C_BYTES_40_SQ_CUR_IDX_S, 0);
@@ -2704,6 +2703,28 @@ out:
return -EINVAL;
}
+static bool check_qp_state(enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+ static const bool sm[][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = { [IB_QPS_RESET] = true,
+ [IB_QPS_INIT] = true },
+ [IB_QPS_INIT] = { [IB_QPS_RESET] = true,
+ [IB_QPS_INIT] = true,
+ [IB_QPS_RTR] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_RTR] = { [IB_QPS_RESET] = true,
+ [IB_QPS_RTS] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true },
+ [IB_QPS_SQD] = {},
+ [IB_QPS_SQE] = {},
+ [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }
+ };
+
+ return sm[cur_state][new_state];
+}
+
static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
int attr_mask, enum ib_qp_state cur_state,
enum ib_qp_state new_state)
@@ -2716,26 +2737,29 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
dma_addr_t dma_handle_2 = 0;
dma_addr_t dma_handle = 0;
__le32 doorbell[2] = {0};
- int rq_pa_start = 0;
u64 *mtts_2 = NULL;
int ret = -EINVAL;
- u64 *mtts = NULL;
+ u64 sq_ba = 0;
+ u64 rq_ba = 0;
int port;
u8 port_num;
u8 *dmac;
u8 *smac;
+ if (!check_qp_state(cur_state, new_state)) {
+ ibdev_err(ibqp->device,
+ "not support QP(%u) status from %d to %d\n",
+ ibqp->qp_num, cur_state, new_state);
+ return -EINVAL;
+ }
+
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return -ENOMEM;
/* Search qp buf's mtts */
- mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table,
- hr_qp->mtt.first_seg, &dma_handle);
- if (mtts == NULL) {
- dev_err(dev, "qp buf pa find failed\n");
+ if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle))
goto out;
- }
/* Search IRRL's mtts */
mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table,
@@ -2890,11 +2914,11 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
dmac = (u8 *)attr->ah_attr.roce.dmac;
- context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle));
+ context->sq_rq_bt_l = cpu_to_le32(dma_handle);
roce_set_field(context->qpc_bytes_24,
QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M,
QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S,
- ((u32)(dma_handle >> 32)));
+ upper_32_bits(dma_handle));
roce_set_bit(context->qpc_bytes_24,
QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S,
1);
@@ -2993,14 +3017,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M,
QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0);
- rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
- context->cur_rq_wqe_ba_l =
- cpu_to_le32((u32)(mtts[rq_pa_start]));
+ context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba);
roce_set_field(context->qpc_bytes_76,
QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M,
QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S,
- mtts[rq_pa_start] >> 32);
+ upper_32_bits(rq_ba));
roce_set_field(context->qpc_bytes_76,
QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M,
QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0);
@@ -3062,8 +3084,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP_CONTEXT_QPC_BYTES_156_SL_S,
rdma_ah_get_sl(&attr->ah_attr));
hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
- } else if (cur_state == IB_QPS_RTR &&
- new_state == IB_QPS_RTS) {
+ } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
/* If exist optional param, return error */
if ((attr_mask & IB_QP_ALT_PATH) ||
(attr_mask & IB_QP_ACCESS_FLAGS) ||
@@ -3075,12 +3096,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
goto out;
}
- context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0]));
+ context->rx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba);
roce_set_field(context->qpc_bytes_120,
QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M,
QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S,
- (mtts[0]) >> 32);
+ upper_32_bits(sq_ba));
roce_set_field(context->qpc_bytes_124,
QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M,
@@ -3223,28 +3244,18 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M,
QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0);
- context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0]));
+ context->tx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba);
roce_set_field(context->qpc_bytes_188,
QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M,
QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S,
- (mtts[0]) >> 32);
+ upper_32_bits(sq_ba));
roce_set_bit(context->qpc_bytes_188,
QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0);
roce_set_field(context->qpc_bytes_188,
QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M,
QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S,
0);
- } else if (!((cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR))) {
- dev_err(dev, "not support this status migration\n");
- goto out;
}
/* Every status migrate must change state */
@@ -3253,8 +3264,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state);
/* SW pass context to HW */
- ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt,
- to_hns_roce_state(cur_state),
+ ret = hns_roce_v1_qp_modify(hr_dev, to_hns_roce_state(cur_state),
to_hns_roce_state(new_state), context,
hr_qp);
if (ret) {
@@ -3636,8 +3646,6 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
u32 cqe_cnt_cur;
int wait_time = 0;
- hns_roce_free_cqc(hr_dev, hr_cq);
-
/*
* Before freeing cq buffer, we need to ensure that the outstanding CQE
* have been written by checking the CQE counter.
@@ -3660,14 +3668,6 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
}
wait_time++;
}
-
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
-
- ib_umem_release(hr_cq->umem);
- if (!udata) {
- /* Free the buff of stored cq */
- hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf);
- }
}
static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index c3316672b70e..ebe570aa2323 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -95,6 +95,7 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
{
struct hns_roce_mr *mr = to_hr_mr(wr->mr);
struct hns_roce_wqe_frmr_seg *fseg = wqe;
+ u64 pbl_ba;
/* use ib_access_flags */
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
@@ -109,19 +110,20 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
/* Data structure reuse may lead to confusion */
- rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff);
- rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32);
+ pbl_ba = mr->pbl_mtr.hem_cfg.root_ba;
+ rc_sq_wqe->msg_len = cpu_to_le32(lower_32_bits(pbl_ba));
+ rc_sq_wqe->inv_key = cpu_to_le32(upper_32_bits(pbl_ba));
rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
rc_sq_wqe->rkey = cpu_to_le32(wr->key);
rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
- fseg->pbl_size = cpu_to_le32(mr->pbl_size);
+ fseg->pbl_size = cpu_to_le32(mr->npages);
roce_set_field(fseg->mode_buf_pg_sz,
V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
- mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
roce_set_bit(fseg->mode_buf_pg_sz,
V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
}
@@ -152,47 +154,24 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
unsigned int *sge_ind, int valid_num_sge)
{
struct hns_roce_v2_wqe_data_seg *dseg;
- struct ib_sge *sg;
- int num_in_wqe = 0;
- int extend_sge_num;
- int fi_sge_num;
- int se_sge_num;
- int shift;
- int i;
+ struct ib_sge *sge = wr->sg_list;
+ unsigned int idx = *sge_ind;
+ int cnt = valid_num_sge;
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
- num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE;
- extend_sge_num = valid_num_sge - num_in_wqe;
- sg = wr->sg_list + num_in_wqe;
- shift = qp->hr_buf.page_shift;
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
+ cnt -= HNS_ROCE_SGE_IN_WQE;
+ sge += HNS_ROCE_SGE_IN_WQE;
+ }
- /*
- * Check whether wr->num_sge sges are in the same page. If not, we
- * should calculate how many sges in the first page and the second
- * page.
- */
- dseg = hns_roce_get_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1));
- fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) -
- (uintptr_t)dseg) /
- sizeof(struct hns_roce_v2_wqe_data_seg);
- if (extend_sge_num > fi_sge_num) {
- se_sge_num = extend_sge_num - fi_sge_num;
- for (i = 0; i < fi_sge_num; i++) {
- set_data_seg_v2(dseg++, sg + i);
- (*sge_ind)++;
- }
- dseg = hns_roce_get_extend_sge(qp,
- (*sge_ind) & (qp->sge.sge_cnt - 1));
- for (i = 0; i < se_sge_num; i++) {
- set_data_seg_v2(dseg++, sg + fi_sge_num + i);
- (*sge_ind)++;
- }
- } else {
- for (i = 0; i < extend_sge_num; i++) {
- set_data_seg_v2(dseg++, sg + i);
- (*sge_ind)++;
- }
+ while (cnt > 0) {
+ dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
+ set_data_seg_v2(dseg, sge);
+ idx++;
+ sge++;
+ cnt--;
}
+
+ *sge_ind = idx;
}
static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
@@ -230,7 +209,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
1);
} else {
- if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
+ if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
for (i = 0; i < wr->num_sge; i++) {
if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i);
@@ -243,8 +222,8 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
(*sge_ind) & (qp->sge.sge_cnt - 1));
- for (i = 0; i < wr->num_sge &&
- j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
+ for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE;
+ i++) {
if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i);
dseg++;
@@ -673,7 +652,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
}
/* rq support inline data */
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ if (hr_qp->rq_inl_buf.wqe_cnt) {
sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list;
hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt =
(u32)wr->num_sge;
@@ -715,6 +694,128 @@ out:
return ret;
}
+static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
+{
+ return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift);
+}
+
+static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n)
+{
+ return hns_roce_buf_offset(idx_que->mtr.kmem,
+ n << idx_que->entry_shift);
+}
+
+static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
+{
+ /* always called with interrupts disabled. */
+ spin_lock(&srq->lock);
+
+ bitmap_clear(srq->idx_que.bitmap, wqe_index, 1);
+ srq->tail++;
+
+ spin_unlock(&srq->lock);
+}
+
+static int find_empty_entry(struct hns_roce_idx_que *idx_que,
+ unsigned long size)
+{
+ int wqe_idx;
+
+ if (unlikely(bitmap_full(idx_que->bitmap, size)))
+ return -ENOSPC;
+
+ wqe_idx = find_first_zero_bit(idx_que->bitmap, size);
+
+ bitmap_set(idx_que->bitmap, wqe_idx, 1);
+
+ return wqe_idx;
+}
+
+static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ struct hns_roce_v2_db srq_db;
+ unsigned long flags;
+ __le32 *srq_idx;
+ int ret = 0;
+ int wqe_idx;
+ void *wqe;
+ int nreq;
+ int ind;
+ int i;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ ind = srq->head & (srq->wqe_cnt - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(wr->num_sge >= srq->max_gs)) {
+ ret = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ if (unlikely(srq->head == srq->tail)) {
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt);
+ if (wqe_idx < 0) {
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ wqe = get_srq_wqe(srq, wqe_idx);
+ dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ dseg[i].len = cpu_to_le32(wr->sg_list[i].length);
+ dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey);
+ dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
+ }
+
+ if (i < srq->max_gs) {
+ dseg[i].len = 0;
+ dseg[i].lkey = cpu_to_le32(0x100);
+ dseg[i].addr = 0;
+ }
+
+ srq_idx = get_idx_buf(&srq->idx_que, ind);
+ *srq_idx = cpu_to_le32(wqe_idx);
+
+ srq->wrid[wqe_idx] = wr->wr_id;
+ ind = (ind + 1) & (srq->wqe_cnt - 1);
+ }
+
+ if (likely(nreq)) {
+ srq->head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ srq_db.byte_4 =
+ cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S |
+ (srq->srqn & V2_DB_BYTE_4_TAG_M));
+ srq_db.parameter = cpu_to_le32(srq->head);
+
+ hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l);
+ }
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return ret;
+}
+
static int hns_roce_v2_cmd_hw_reseted(struct hns_roce_dev *hr_dev,
unsigned long instance_stage,
unsigned long reset_stage)
@@ -1786,6 +1887,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->flags = roce_get_field(resp_c->cap_flags_num_pds,
V2_QUERY_PF_CAPS_C_CAP_FLAGS_M,
V2_QUERY_PF_CAPS_C_CAP_FLAGS_S);
+ caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) <<
+ HNS_ROCE_CAP_FLAGS_EX_SHIFT;
+
caps->num_cqs = 1 << roce_get_field(resp_c->max_gid_num_cqs,
V2_QUERY_PF_CAPS_C_NUM_CQS_M,
V2_QUERY_PF_CAPS_C_NUM_CQS_S);
@@ -1978,11 +2082,6 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
hr_dev->vendor_part_id = hr_dev->pci_dev->device;
hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
- caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
- caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
- caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
- caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
-
caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K;
caps->pbl_buf_pg_sz = 0;
caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM;
@@ -2040,8 +2139,6 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
page_num = link_tbl->npages;
entry = link_tbl->table.buf;
- memset(req_a, 0, sizeof(*req_a));
- memset(req_b, 0, sizeof(*req_b));
for (i = 0; i < 2; i++) {
hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false);
@@ -2050,39 +2147,30 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
else
desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
-
- if (i == 0) {
- req_a->base_addr_l =
- cpu_to_le32(link_tbl->table.map & 0xffffffff);
- req_a->base_addr_h =
- cpu_to_le32(link_tbl->table.map >> 32);
- roce_set_field(req_a->depth_pgsz_init_en,
- CFG_LLM_QUE_DEPTH_M, CFG_LLM_QUE_DEPTH_S,
- link_tbl->npages);
- roce_set_field(req_a->depth_pgsz_init_en,
- CFG_LLM_QUE_PGSZ_M, CFG_LLM_QUE_PGSZ_S,
- link_tbl->pg_sz);
- req_a->head_ba_l = cpu_to_le32(entry[0].blk_ba0);
- req_a->head_ba_h_nxtptr =
- cpu_to_le32(entry[0].blk_ba1_nxt_ptr);
- roce_set_field(req_a->head_ptr, CFG_LLM_HEAD_PTR_M,
- CFG_LLM_HEAD_PTR_S, 0);
- } else {
- req_b->tail_ba_l =
- cpu_to_le32(entry[page_num - 1].blk_ba0);
- roce_set_field(req_b->tail_ba_h, CFG_LLM_TAIL_BA_H_M,
- CFG_LLM_TAIL_BA_H_S,
- entry[page_num - 1].blk_ba1_nxt_ptr &
- HNS_ROCE_LINK_TABLE_BA1_M);
- roce_set_field(req_b->tail_ptr, CFG_LLM_TAIL_PTR_M,
- CFG_LLM_TAIL_PTR_S,
- (entry[page_num - 2].blk_ba1_nxt_ptr &
- HNS_ROCE_LINK_TABLE_NXT_PTR_M) >>
- HNS_ROCE_LINK_TABLE_NXT_PTR_S);
- }
}
+
+ req_a->base_addr_l = cpu_to_le32(link_tbl->table.map & 0xffffffff);
+ req_a->base_addr_h = cpu_to_le32(link_tbl->table.map >> 32);
+ roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_QUE_DEPTH_M,
+ CFG_LLM_QUE_DEPTH_S, link_tbl->npages);
+ roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_QUE_PGSZ_M,
+ CFG_LLM_QUE_PGSZ_S, link_tbl->pg_sz);
roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_INIT_EN_M,
CFG_LLM_INIT_EN_S, 1);
+ req_a->head_ba_l = cpu_to_le32(entry[0].blk_ba0);
+ req_a->head_ba_h_nxtptr = cpu_to_le32(entry[0].blk_ba1_nxt_ptr);
+ roce_set_field(req_a->head_ptr, CFG_LLM_HEAD_PTR_M, CFG_LLM_HEAD_PTR_S,
+ 0);
+
+ req_b->tail_ba_l = cpu_to_le32(entry[page_num - 1].blk_ba0);
+ roce_set_field(req_b->tail_ba_h, CFG_LLM_TAIL_BA_H_M,
+ CFG_LLM_TAIL_BA_H_S,
+ entry[page_num - 1].blk_ba1_nxt_ptr &
+ HNS_ROCE_LINK_TABLE_BA1_M);
+ roce_set_field(req_b->tail_ptr, CFG_LLM_TAIL_PTR_M, CFG_LLM_TAIL_PTR_S,
+ (entry[page_num - 2].blk_ba1_nxt_ptr &
+ HNS_ROCE_LINK_TABLE_NXT_PTR_M) >>
+ HNS_ROCE_LINK_TABLE_NXT_PTR_S);
return hns_roce_cmq_send(hr_dev, desc, 2);
}
@@ -2438,12 +2526,9 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
reg_smac_l = *(u32 *)(&addr[0]);
reg_smac_h = *(u16 *)(&addr[4]);
- memset(smac_tb, 0, sizeof(*smac_tb));
- roce_set_field(smac_tb->tb_idx_rsv,
- CFG_SMAC_TB_IDX_M,
+ roce_set_field(smac_tb->tb_idx_rsv, CFG_SMAC_TB_IDX_M,
CFG_SMAC_TB_IDX_S, phy_port);
- roce_set_field(smac_tb->vf_smac_h_rsv,
- CFG_SMAC_TB_VF_SMAC_H_M,
+ roce_set_field(smac_tb->vf_smac_h_rsv, CFG_SMAC_TB_VF_SMAC_H_M,
CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h);
smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l);
@@ -2453,32 +2538,30 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry,
struct hns_roce_mr *mr)
{
- struct sg_dma_page_iter sg_iter;
- u64 page_addr;
- u64 *pages;
- int i;
+ struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device);
+ u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 };
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t pbl_ba;
+ int i, count;
- mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
- mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
- roce_set_field(mpt_entry->byte_48_mode_ba,
- V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S,
- upper_32_bits(mr->pbl_ba >> 3));
+ count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
+ ARRAY_SIZE(pages), &pbl_ba);
+ if (count < 1) {
+ ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n",
+ count);
+ return -ENOBUFS;
+ }
- pages = (u64 *)__get_free_page(GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
+ /* Aligned to the hardware address access unit */
+ for (i = 0; i < count; i++)
+ pages[i] >>= 6;
- i = 0;
- for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) {
- page_addr = sg_page_iter_dma_address(&sg_iter);
- pages[i] = page_addr >> 6;
+ mpt_entry->pbl_size = cpu_to_le32(mr->npages);
+ mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3);
+ roce_set_field(mpt_entry->byte_48_mode_ba,
+ V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S,
+ upper_32_bits(pbl_ba >> 3));
- /* Record the first 2 entry directly to MTPT table */
- if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1)
- goto found;
- i++;
- }
-found:
mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M,
V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0]));
@@ -2489,9 +2572,7 @@ found:
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
- mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
-
- free_page((unsigned long)pages);
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
return 0;
}
@@ -2513,7 +2594,7 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
- mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mr->pd);
@@ -2599,11 +2680,19 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_v2_mpt_entry *mpt_entry;
+ dma_addr_t pbl_ba = 0;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
+ if (hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, NULL, 0, &pbl_ba) < 0) {
+ ibdev_err(ibdev, "failed to find frmr mtr.\n");
+ return -ENOBUFS;
+ }
+
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
@@ -2611,7 +2700,7 @@ static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
- mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mr->pd);
@@ -2624,17 +2713,17 @@ static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
- mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
+ mpt_entry->pbl_size = cpu_to_le32(mr->npages);
- mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
+ mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3));
roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
V2_MPT_BYTE_48_PBL_BA_H_S,
- upper_32_bits(mr->pbl_ba >> 3));
+ upper_32_bits(pbl_ba >> 3));
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
- mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
return 0;
}
@@ -2680,7 +2769,8 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
{
- return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
+ return hns_roce_buf_offset(hr_cq->mtr.kmem,
+ n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
}
static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n)
@@ -2697,22 +2787,6 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
return get_sw_cqe_v2(hr_cq, hr_cq->cons_index);
}
-static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
-{
- return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift);
-}
-
-static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
-{
- /* always called with interrupts disabled. */
- spin_lock(&srq->lock);
-
- bitmap_clear(srq->idx_que.bitmap, wqe_index, 1);
- srq->tail++;
-
- spin_unlock(&srq->lock);
-}
-
static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
{
*hr_cq->set_ci_db = cons_index & V2_CQ_DB_PARAMETER_CONS_IDX_M;
@@ -2801,30 +2875,30 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M,
V2_CQC_BYTE_8_CQN_S, hr_cq->cqn);
- cq_context->cqe_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT);
+ cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
roce_set_field(cq_context->byte_16_hop_addr,
V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M,
V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S,
- mtts[0] >> (32 + PAGE_ADDR_SHIFT));
+ upper_32_bits(to_hr_hw_page_addr(mtts[0])));
roce_set_field(cq_context->byte_16_hop_addr,
V2_CQC_BYTE_16_CQE_HOP_NUM_M,
V2_CQC_BYTE_16_CQE_HOP_NUM_S, hr_dev->caps.cqe_hop_num ==
HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num);
- cq_context->cqe_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT);
+ cq_context->cqe_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1]));
roce_set_field(cq_context->byte_24_pgsz_addr,
V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M,
V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S,
- mtts[1] >> (32 + PAGE_ADDR_SHIFT));
+ upper_32_bits(to_hr_hw_page_addr(mtts[1])));
roce_set_field(cq_context->byte_24_pgsz_addr,
V2_CQC_BYTE_24_CQE_BA_PG_SZ_M,
V2_CQC_BYTE_24_CQE_BA_PG_SZ_S,
- hr_dev->caps.cqe_ba_pg_sz + PG_SHIFT_OFFSET);
+ to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.ba_pg_shift));
roce_set_field(cq_context->byte_24_pgsz_addr,
V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M,
V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S,
- hr_dev->caps.cqe_buf_pg_sz + PG_SHIFT_OFFSET);
+ to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.buf_pg_shift));
cq_context->cqe_ba = cpu_to_le32(dma_handle >> 3);
@@ -2968,6 +3042,61 @@ out:
return npolled;
}
+static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
+ struct hns_roce_v2_cqe *cqe, struct ib_wc *wc)
+{
+ static const struct {
+ u32 cqe_status;
+ enum ib_wc_status wc_status;
+ } map[] = {
+ { HNS_ROCE_CQE_V2_SUCCESS, IB_WC_SUCCESS },
+ { HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR, IB_WC_LOC_LEN_ERR },
+ { HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR },
+ { HNS_ROCE_CQE_V2_LOCAL_PROT_ERR, IB_WC_LOC_PROT_ERR },
+ { HNS_ROCE_CQE_V2_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR },
+ { HNS_ROCE_CQE_V2_MW_BIND_ERR, IB_WC_MW_BIND_ERR },
+ { HNS_ROCE_CQE_V2_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR },
+ { HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR, IB_WC_REM_INV_REQ_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR, IB_WC_REM_ACCESS_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_OP_ERR, IB_WC_REM_OP_ERR },
+ { HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR,
+ IB_WC_RETRY_EXC_ERR },
+ { HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR },
+ };
+
+ u32 cqe_status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M,
+ V2_CQE_BYTE_4_STATUS_S);
+ int i;
+
+ wc->status = IB_WC_GENERAL_ERR;
+ for (i = 0; i < ARRAY_SIZE(map); i++)
+ if (cqe_status == map[i].cqe_status) {
+ wc->status = map[i].wc_status;
+ break;
+ }
+
+ if (wc->status == IB_WC_SUCCESS || wc->status == IB_WC_WR_FLUSH_ERR)
+ return;
+
+ ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status);
+ print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe,
+ sizeof(*cqe), false);
+
+ /*
+ * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets
+ * into errored mode. Hence, as a workaround to this hardware
+ * limitation, driver needs to assist in flushing. But the flushing
+ * operation uses mailbox to convey the QP state to the hardware and
+ * which can sleep due to the mutex protection around the mailbox calls.
+ * Hence, use the deferred flush for now. Once wc error detected, the
+ * flushing operation is needed.
+ */
+ if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag))
+ init_flush_work(hr_dev, qp);
+}
+
static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
struct hns_roce_qp **cur_qp, struct ib_wc *wc)
{
@@ -2979,7 +3108,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
int is_send;
u16 wqe_ctr;
u32 opcode;
- u32 status;
int qpn;
int ret;
@@ -3009,7 +3137,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
*cur_qp = hr_qp;
}
- hr_qp = *cur_qp;
wc->qp = &(*cur_qp)->ibqp;
wc->vendor_err = 0;
@@ -3044,77 +3171,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
++wq->tail;
}
- status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M,
- V2_CQE_BYTE_4_STATUS_S);
- switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) {
- case HNS_ROCE_CQE_V2_SUCCESS:
- wc->status = IB_WC_SUCCESS;
- break;
- case HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR:
- wc->status = IB_WC_LOC_LEN_ERR;
- break;
- case HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR:
- wc->status = IB_WC_LOC_QP_OP_ERR;
- break;
- case HNS_ROCE_CQE_V2_LOCAL_PROT_ERR:
- wc->status = IB_WC_LOC_PROT_ERR;
- break;
- case HNS_ROCE_CQE_V2_WR_FLUSH_ERR:
- wc->status = IB_WC_WR_FLUSH_ERR;
- break;
- case HNS_ROCE_CQE_V2_MW_BIND_ERR:
- wc->status = IB_WC_MW_BIND_ERR;
- break;
- case HNS_ROCE_CQE_V2_BAD_RESP_ERR:
- wc->status = IB_WC_BAD_RESP_ERR;
- break;
- case HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR:
- wc->status = IB_WC_LOC_ACCESS_ERR;
- break;
- case HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR:
- wc->status = IB_WC_REM_INV_REQ_ERR;
- break;
- case HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR:
- wc->status = IB_WC_REM_ACCESS_ERR;
- break;
- case HNS_ROCE_CQE_V2_REMOTE_OP_ERR:
- wc->status = IB_WC_REM_OP_ERR;
- break;
- case HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR:
- wc->status = IB_WC_RETRY_EXC_ERR;
- break;
- case HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR:
- wc->status = IB_WC_RNR_RETRY_EXC_ERR;
- break;
- case HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR:
- wc->status = IB_WC_REM_ABORT_ERR;
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- }
-
- /*
- * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets
- * into errored mode. Hence, as a workaround to this hardware
- * limitation, driver needs to assist in flushing. But the flushing
- * operation uses mailbox to convey the QP state to the hardware and
- * which can sleep due to the mutex protection around the mailbox calls.
- * Hence, use the deferred flush for now. Once wc error detected, the
- * flushing operation is needed.
- */
- if (wc->status != IB_WC_SUCCESS &&
- wc->status != IB_WC_WR_FLUSH_ERR) {
- ibdev_err(&hr_dev->ib_dev, "error cqe status is: 0x%x\n",
- status & HNS_ROCE_V2_CQE_STATUS_MASK);
-
- if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag))
- init_flush_work(hr_dev, hr_qp);
-
- return 0;
- }
-
- if (wc->status == IB_WC_WR_FLUSH_ERR)
+ get_cqe_status(hr_dev, *cur_qp, cqe, wc);
+ if (wc->status != IB_WC_SUCCESS)
return 0;
if (is_send) {
@@ -3514,29 +3572,18 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
- if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sge.sge_cnt));
- else
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- hr_qp->sq.max_gs >
- HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ?
- ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
+ roce_set_field(context->byte_4_sqpn_tst,
+ V2_QPC_BYTE_4_SGE_SHIFT_M, V2_QPC_BYTE_4_SGE_SHIFT_S,
+ to_hr_hem_entries_shift(hr_qp->sge.sge_cnt,
+ hr_qp->sge.sge_shift));
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
+ ilog2(hr_qp->sq.wqe_cnt));
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
- (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
- hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT ||
- hr_qp->ibqp.srq) ? 0 :
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
+ ilog2(hr_qp->rq.wqe_cnt));
}
static void modify_qp_reset_to_init(struct ib_qp *ibqp,
@@ -3572,7 +3619,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
V2_QPC_BYTE_24_VLAN_ID_S, 0xfff);
- if (hr_qp->rdb_en)
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
roce_set_bit(context->byte_68_rq_db,
V2_QPC_BYTE_68_RQ_RECORD_EN_S, 1);
@@ -3757,7 +3804,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
int port;
/* Search qp buf's mtts */
- page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
+ page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift;
count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
hr_qp->rq.offset / page_size, mtts,
MTT_MIN_COUNT, &wqe_sge_ba);
@@ -3804,17 +3851,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M,
V2_QPC_BYTE_12_SQ_HOP_NUM_S,
- hr_dev->caps.wqe_sq_hop_num == HNS_ROCE_HOP_NUM_0 ?
- 0 : hr_dev->caps.wqe_sq_hop_num);
+ to_hr_hem_hopnum(hr_dev->caps.wqe_sq_hop_num,
+ hr_qp->sq.wqe_cnt));
roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M,
V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0);
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_SGE_HOP_NUM_M,
V2_QPC_BYTE_20_SGE_HOP_NUM_S,
- ((ibqp->qp_type == IB_QPT_GSI) ||
- hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
- hr_dev->caps.wqe_sge_hop_num : 0);
+ to_hr_hem_hopnum(hr_dev->caps.wqe_sge_hop_num,
+ hr_qp->sge.sge_cnt));
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_SGE_HOP_NUM_M,
V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0);
@@ -3822,8 +3868,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_HOP_NUM_M,
V2_QPC_BYTE_20_RQ_HOP_NUM_S,
- hr_dev->caps.wqe_rq_hop_num == HNS_ROCE_HOP_NUM_0 ?
- 0 : hr_dev->caps.wqe_rq_hop_num);
+ to_hr_hem_hopnum(hr_dev->caps.wqe_rq_hop_num,
+ hr_qp->rq.wqe_cnt));
+
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_HOP_NUM_M,
V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0);
@@ -3831,7 +3878,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_16_buf_ba_pg_sz,
V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M,
V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S,
- hr_qp->wqe_bt_pg_shift + PG_SHIFT_OFFSET);
+ to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.ba_pg_shift));
roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz,
V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M,
V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0);
@@ -3839,29 +3886,29 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_16_buf_ba_pg_sz,
V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M,
V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S,
- hr_dev->caps.mtt_buf_pg_sz + PG_SHIFT_OFFSET);
+ to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.buf_pg_shift));
roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz,
V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M,
V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0);
- context->rq_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT);
+ context->rq_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
qpc_mask->rq_cur_blk_addr = 0;
roce_set_field(context->byte_92_srq_info,
V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S,
- mtts[0] >> (32 + PAGE_ADDR_SHIFT));
+ upper_32_bits(to_hr_hw_page_addr(mtts[0])));
roce_set_field(qpc_mask->byte_92_srq_info,
V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, 0);
- context->rq_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT);
+ context->rq_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1]));
qpc_mask->rq_nxt_blk_addr = 0;
roce_set_field(context->byte_104_rq_sge,
V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M,
V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S,
- mtts[1] >> (32 + PAGE_ADDR_SHIFT));
+ upper_32_bits(to_hr_hw_page_addr(mtts[1])));
roce_set_field(qpc_mask->byte_104_rq_sge,
V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M,
V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, 0);
@@ -3928,7 +3975,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
/* mtu*(2^LP_PKTN_INI) should not bigger than 1 message length 64kb */
roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
- V2_QPC_BYTE_56_LP_PKTN_INI_S, 0);
+ V2_QPC_BYTE_56_LP_PKTN_INI_S,
+ ilog2(hr_dev->caps.max_sq_inline / IB_MTU_4096));
roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
V2_QPC_BYTE_56_LP_PKTN_INI_S, 0);
@@ -3995,18 +4043,18 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
/* Search qp buf's mtts */
count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL);
if (count < 1) {
- ibdev_err(ibdev, "failed to find buf pa of QP(0x%lx)\n",
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf\n",
hr_qp->qpn);
return -EINVAL;
}
- if (hr_qp->sge.offset) {
- page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
+ if (hr_qp->sge.sge_cnt > 0) {
+ page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift;
count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
hr_qp->sge.offset / page_size,
&sge_cur_blk, 1, NULL);
if (count < 1) {
- ibdev_err(ibdev, "failed to find sge pa of QP(0x%lx)\n",
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf\n",
hr_qp->qpn);
return -EINVAL;
}
@@ -4024,38 +4072,33 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
* we should set all bits of the relevant fields in context mask to
* 0 at the same time, else set them to 0x1.
*/
- context->sq_cur_blk_addr = cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT);
+ context->sq_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(sq_cur_blk));
roce_set_field(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S,
- sq_cur_blk >> (32 + PAGE_ADDR_SHIFT));
+ upper_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
qpc_mask->sq_cur_blk_addr = 0;
roce_set_field(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0);
- context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) ||
- hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
- cpu_to_le32(sge_cur_blk >>
- PAGE_ADDR_SHIFT) : 0;
+ context->sq_cur_sge_blk_addr =
+ cpu_to_le32(to_hr_hw_page_addr(sge_cur_blk));
roce_set_field(context->byte_184_irrl_idx,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S,
- ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs >
- HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
- (sge_cur_blk >>
- (32 + PAGE_ADDR_SHIFT)) : 0);
+ upper_32_bits(to_hr_hw_page_addr(sge_cur_blk)));
qpc_mask->sq_cur_sge_blk_addr = 0;
roce_set_field(qpc_mask->byte_184_irrl_idx,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, 0);
context->rx_sq_cur_blk_addr =
- cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT);
+ cpu_to_le32(to_hr_hw_page_addr(sq_cur_blk));
roce_set_field(context->byte_232_irrl_sge,
V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S,
- sq_cur_blk >> (32 + PAGE_ADDR_SHIFT));
+ upper_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
qpc_mask->rx_sq_cur_blk_addr = 0;
roce_set_field(qpc_mask->byte_232_irrl_sge,
V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M,
@@ -4108,21 +4151,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
return 0;
}
-static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
-{
-
- if ((cur_state != IB_QPS_RESET &&
- (new_state == IB_QPS_ERR || new_state == IB_QPS_RESET)) ||
- ((cur_state == IB_QPS_RTS || cur_state == IB_QPS_SQD) &&
- (new_state == IB_QPS_RTS || new_state == IB_QPS_SQD)) ||
- (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS))
- return true;
-
- return false;
-
-}
-
static int hns_roce_v2_set_path(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask,
@@ -4226,6 +4254,28 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
return 0;
}
+static bool check_qp_state(enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+ static const bool sm[][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = { [IB_QPS_RESET] = true,
+ [IB_QPS_INIT] = true },
+ [IB_QPS_INIT] = { [IB_QPS_RESET] = true,
+ [IB_QPS_INIT] = true,
+ [IB_QPS_RTR] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_RTR] = { [IB_QPS_RESET] = true,
+ [IB_QPS_RTS] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true },
+ [IB_QPS_SQD] = {},
+ [IB_QPS_SQE] = {},
+ [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }
+ };
+
+ return sm[cur_state][new_state];
+}
+
static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask,
@@ -4237,6 +4287,11 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
int ret = 0;
+ if (!check_qp_state(cur_state, new_state)) {
+ ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n");
+ return -EINVAL;
+ }
+
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
memset(qpc_mask, 0, sizeof(*qpc_mask));
modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
@@ -4247,23 +4302,11 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context,
qpc_mask);
- if (ret)
- goto out;
} else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
ret = modify_qp_rtr_to_rts(ibqp, attr, attr_mask, context,
qpc_mask);
- if (ret)
- goto out;
- } else if (hns_roce_v2_check_qp_stat(cur_state, new_state)) {
- /* Nothing */
- ;
- } else {
- ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n");
- ret = -EINVAL;
- goto out;
}
-out:
return ret;
}
@@ -4554,19 +4597,20 @@ out:
return ret;
}
-static inline enum ib_qp_state to_ib_qp_st(enum hns_roce_v2_qp_state state)
+static int to_ib_qp_st(enum hns_roce_v2_qp_state state)
{
- switch (state) {
- case HNS_ROCE_QP_ST_RST: return IB_QPS_RESET;
- case HNS_ROCE_QP_ST_INIT: return IB_QPS_INIT;
- case HNS_ROCE_QP_ST_RTR: return IB_QPS_RTR;
- case HNS_ROCE_QP_ST_RTS: return IB_QPS_RTS;
- case HNS_ROCE_QP_ST_SQ_DRAINING:
- case HNS_ROCE_QP_ST_SQD: return IB_QPS_SQD;
- case HNS_ROCE_QP_ST_SQER: return IB_QPS_SQE;
- case HNS_ROCE_QP_ST_ERR: return IB_QPS_ERR;
- default: return -1;
- }
+ static const enum ib_qp_state map[] = {
+ [HNS_ROCE_QP_ST_RST] = IB_QPS_RESET,
+ [HNS_ROCE_QP_ST_INIT] = IB_QPS_INIT,
+ [HNS_ROCE_QP_ST_RTR] = IB_QPS_RTR,
+ [HNS_ROCE_QP_ST_RTS] = IB_QPS_RTS,
+ [HNS_ROCE_QP_ST_SQD] = IB_QPS_SQD,
+ [HNS_ROCE_QP_ST_SQER] = IB_QPS_SQE,
+ [HNS_ROCE_QP_ST_ERR] = IB_QPS_ERR,
+ [HNS_ROCE_QP_ST_SQ_DRAINING] = IB_QPS_SQD
+ };
+
+ return (state < ARRAY_SIZE(map)) ? map[state] : -1;
}
static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev,
@@ -4838,6 +4882,186 @@ out:
return ret;
}
+static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq, u32 pdn, u16 xrcd,
+ u32 cqn, void *mb_buf, u64 *mtts_wqe,
+ u64 *mtts_idx, dma_addr_t dma_handle_wqe,
+ dma_addr_t dma_handle_idx)
+{
+ struct hns_roce_srq_context *srq_context;
+
+ srq_context = mb_buf;
+ memset(srq_context, 0, sizeof(*srq_context));
+
+ roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M,
+ SRQC_BYTE_4_SRQ_ST_S, 1);
+
+ roce_set_field(srq_context->byte_4_srqn_srqst,
+ SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M,
+ SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S,
+ to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num,
+ srq->wqe_cnt));
+ roce_set_field(srq_context->byte_4_srqn_srqst,
+ SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S,
+ ilog2(srq->wqe_cnt));
+
+ roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M,
+ SRQC_BYTE_4_SRQN_S, srq->srqn);
+
+ roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
+
+ roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M,
+ SRQC_BYTE_12_SRQ_XRCD_S, xrcd);
+
+ srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3));
+
+ roce_set_field(srq_context->byte_24_wqe_bt_ba,
+ SRQC_BYTE_24_SRQ_WQE_BT_BA_M,
+ SRQC_BYTE_24_SRQ_WQE_BT_BA_S,
+ dma_handle_wqe >> 35);
+
+ roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M,
+ SRQC_BYTE_28_PD_S, pdn);
+ roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M,
+ SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 :
+ fls(srq->max_gs - 1));
+
+ srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3);
+ roce_set_field(srq_context->rsv_idx_bt_ba,
+ SRQC_BYTE_36_SRQ_IDX_BT_BA_M,
+ SRQC_BYTE_36_SRQ_IDX_BT_BA_S,
+ dma_handle_idx >> 35);
+
+ srq_context->idx_cur_blk_addr =
+ cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0]));
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M,
+ SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S,
+ upper_32_bits(to_hr_hw_page_addr(mtts_idx[0])));
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M,
+ SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S,
+ to_hr_hem_hopnum(hr_dev->caps.idx_hop_num,
+ srq->wqe_cnt));
+
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
+ SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
+ to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift));
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
+ SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
+ to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift));
+
+ srq_context->idx_nxt_blk_addr =
+ cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1]));
+ roce_set_field(srq_context->rsv_idxnxtblkaddr,
+ SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M,
+ SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S,
+ upper_32_bits(to_hr_hw_page_addr(mtts_idx[1])));
+ roce_set_field(srq_context->byte_56_xrc_cqn,
+ SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S,
+ cqn);
+ roce_set_field(srq_context->byte_56_xrc_cqn,
+ SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M,
+ SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S,
+ to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift));
+ roce_set_field(srq_context->byte_56_xrc_cqn,
+ SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M,
+ SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S,
+ to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift));
+
+ roce_set_bit(srq_context->db_record_addr_record_en,
+ SRQC_BYTE_60_SRQ_RECORD_EN_S, 0);
+}
+
+static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
+ struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_srq_context *srqc_mask;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ if (srq_attr_mask & IB_SRQ_LIMIT) {
+ if (srq_attr->srq_limit >= srq->wqe_cnt)
+ return -EINVAL;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+ srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1;
+
+ memset(srqc_mask, 0xff, sizeof(*srqc_mask));
+
+ roce_set_field(srq_context->byte_8_limit_wl,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit);
+ roce_set_field(srqc_mask->byte_8_limit_wl,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0,
+ HNS_ROCE_CMD_MODIFY_SRQC,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to handle cmd of modifying SRQ, ret = %d.\n",
+ ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int limit_wl;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0,
+ HNS_ROCE_CMD_QUERY_SRQC,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to process cmd of querying SRQ, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ limit_wl = roce_get_field(srq_context->byte_8_limit_wl,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S);
+
+ attr->srq_limit = limit_wl;
+ attr->max_wr = srq->wqe_cnt;
+ attr->max_sge = srq->max_gs;
+
+ memcpy(srq_context, mailbox->buf, sizeof(*srq_context));
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
struct hns_roce_dev *hr_dev = to_hr_dev(cq->device);
@@ -4989,24 +5213,14 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
hns_roce_write64(hr_dev, doorbell, eq->doorbell);
}
-static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset)
-{
- u32 buf_chk_sz;
-
- buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
- if (eq->buf.nbufs == 1)
- return eq->buf.direct.buf + offset % buf_chk_sz;
- else
- return eq->buf.page_list[offset / buf_chk_sz].buf +
- offset % buf_chk_sz;
-}
-
static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
{
struct hns_roce_aeqe *aeqe;
- aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
- HNS_ROCE_AEQ_ENTRY_SIZE);
+ aeqe = hns_roce_buf_offset(eq->mtr.kmem,
+ (eq->cons_index & (eq->entries - 1)) *
+ HNS_ROCE_AEQ_ENTRY_SIZE);
+
return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
}
@@ -5103,8 +5317,9 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
{
struct hns_roce_ceqe *ceqe;
- ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
- HNS_ROCE_CEQ_ENTRY_SIZE);
+ ceqe = hns_roce_buf_offset(eq->mtr.kmem,
+ (eq->cons_index & (eq->entries - 1)) *
+ HNS_ROCE_CEQ_ENTRY_SIZE);
return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
(!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
}
@@ -5263,17 +5478,15 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
{
- if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0)
- hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
- hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf);
+ hns_roce_mtr_destroy(hr_dev, &eq->mtr);
}
-static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq,
- void *mb_buf)
+static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
+ void *mb_buf)
{
+ u64 eqe_ba[MTT_MIN_COUNT] = { 0 };
struct hns_roce_eq_context *eqc;
- u64 ba[MTT_MIN_COUNT] = { 0 };
+ u64 bt_ba = 0;
int count;
eqc = mb_buf;
@@ -5281,31 +5494,18 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
/* init eqc */
eq->doorbell = hr_dev->reg_base + ROCEE_VF_EQ_DB_CFG0_REG;
- eq->hop_num = hr_dev->caps.eqe_hop_num;
eq->cons_index = 0;
eq->over_ignore = HNS_ROCE_V2_EQ_OVER_IGNORE_0;
eq->coalesce = HNS_ROCE_V2_EQ_COALESCE_0;
eq->arm_st = HNS_ROCE_V2_EQ_ALWAYS_ARMED;
- eq->eqe_ba_pg_sz = hr_dev->caps.eqe_ba_pg_sz;
- eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
eq->shift = ilog2((unsigned int)eq->entries);
- /* if not muti-hop, eqe buffer only use one trunk */
- if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) {
- eq->eqe_ba = eq->buf.direct.map;
- eq->cur_eqe_ba = eq->eqe_ba;
- if (eq->buf.npages > 1)
- eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz);
- else
- eq->nxt_eqe_ba = eq->eqe_ba;
- } else {
- count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba,
- MTT_MIN_COUNT, &eq->eqe_ba);
- eq->cur_eqe_ba = ba[0];
- if (count > 1)
- eq->nxt_eqe_ba = ba[1];
- else
- eq->nxt_eqe_ba = ba[0];
+ /* if not multi-hop, eqe buffer only use one trunk */
+ count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba, MTT_MIN_COUNT,
+ &bt_ba);
+ if (count < 1) {
+ dev_err(hr_dev->dev, "failed to find EQE mtr\n");
+ return -ENOBUFS;
}
/* set eqc state */
@@ -5339,12 +5539,12 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
/* set eqe_ba_pg_sz */
roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BA_PG_SZ_M,
HNS_ROCE_EQC_BA_PG_SZ_S,
- eq->eqe_ba_pg_sz + PG_SHIFT_OFFSET);
+ to_hr_hw_page_shift(eq->mtr.hem_cfg.ba_pg_shift));
/* set eqe_buf_pg_sz */
roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BUF_PG_SZ_M,
HNS_ROCE_EQC_BUF_PG_SZ_S,
- eq->eqe_buf_pg_sz + PG_SHIFT_OFFSET);
+ to_hr_hw_page_shift(eq->mtr.hem_cfg.buf_pg_shift));
/* set eq_producer_idx */
roce_set_field(eqc->byte_8, HNS_ROCE_EQC_PROD_INDX_M,
@@ -5363,13 +5563,13 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
HNS_ROCE_EQC_REPORT_TIMER_S,
HNS_ROCE_EQ_INIT_REPORT_TIMER);
- /* set eqe_ba [34:3] */
+ /* set bt_ba [34:3] */
roce_set_field(eqc->eqe_ba0, HNS_ROCE_EQC_EQE_BA_L_M,
- HNS_ROCE_EQC_EQE_BA_L_S, eq->eqe_ba >> 3);
+ HNS_ROCE_EQC_EQE_BA_L_S, bt_ba >> 3);
- /* set eqe_ba [64:35] */
+ /* set bt_ba [64:35] */
roce_set_field(eqc->eqe_ba1, HNS_ROCE_EQC_EQE_BA_H_M,
- HNS_ROCE_EQC_EQE_BA_H_S, eq->eqe_ba >> 35);
+ HNS_ROCE_EQC_EQE_BA_H_S, bt_ba >> 35);
/* set eq shift */
roce_set_field(eqc->byte_28, HNS_ROCE_EQC_SHIFT_M, HNS_ROCE_EQC_SHIFT_S,
@@ -5381,15 +5581,15 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
/* set cur_eqe_ba [27:12] */
roce_set_field(eqc->byte_28, HNS_ROCE_EQC_CUR_EQE_BA_L_M,
- HNS_ROCE_EQC_CUR_EQE_BA_L_S, eq->cur_eqe_ba >> 12);
+ HNS_ROCE_EQC_CUR_EQE_BA_L_S, eqe_ba[0] >> 12);
/* set cur_eqe_ba [59:28] */
roce_set_field(eqc->byte_32, HNS_ROCE_EQC_CUR_EQE_BA_M_M,
- HNS_ROCE_EQC_CUR_EQE_BA_M_S, eq->cur_eqe_ba >> 28);
+ HNS_ROCE_EQC_CUR_EQE_BA_M_S, eqe_ba[0] >> 28);
/* set cur_eqe_ba [63:60] */
roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CUR_EQE_BA_H_M,
- HNS_ROCE_EQC_CUR_EQE_BA_H_S, eq->cur_eqe_ba >> 60);
+ HNS_ROCE_EQC_CUR_EQE_BA_H_S, eqe_ba[0] >> 60);
/* set eq consumer idx */
roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M,
@@ -5397,97 +5597,38 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
/* set nex_eqe_ba[43:12] */
roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M,
- HNS_ROCE_EQC_NXT_EQE_BA_L_S, eq->nxt_eqe_ba >> 12);
+ HNS_ROCE_EQC_NXT_EQE_BA_L_S, eqe_ba[1] >> 12);
/* set nex_eqe_ba[63:44] */
roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M,
- HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
-}
-
-static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
- u32 page_shift)
-{
- struct hns_roce_buf_region region = {};
- dma_addr_t *buf_list = NULL;
- int ba_num;
- int ret;
-
- ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size),
- 1 << page_shift);
- hns_roce_init_buf_region(&region, hr_dev->caps.eqe_hop_num, 0, ba_num);
-
- /* alloc a tmp list for storing eq buf address */
- ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
- if (ret) {
- dev_err(hr_dev->dev, "alloc eq buf_list error\n");
- return ret;
- }
+ HNS_ROCE_EQC_NXT_EQE_BA_H_S, eqe_ba[1] >> 44);
- ba_num = hns_roce_get_kmem_bufs(hr_dev, buf_list, region.count,
- region.offset, &eq->buf);
- if (ba_num != region.count) {
- dev_err(hr_dev->dev, "get eqe buf err,expect %d,ret %d.\n",
- region.count, ba_num);
- ret = -ENOBUFS;
- goto done;
- }
-
- hns_roce_mtr_init(&eq->mtr, PAGE_SHIFT + hr_dev->caps.eqe_ba_pg_sz,
- page_shift);
- ret = hns_roce_mtr_attach(hr_dev, &eq->mtr, &buf_list, &region, 1);
- if (ret)
- dev_err(hr_dev->dev, "mtr attach error for eqe\n");
-
- goto done;
-
- hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
-done:
- hns_roce_free_buf_list(&buf_list, 1);
-
- return ret;
+ return 0;
}
static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
{
- struct hns_roce_buf *buf = &eq->buf;
- bool is_mhop = false;
- u32 page_shift;
- u32 mhop_num;
- u32 max_size;
- int ret;
+ struct hns_roce_buf_attr buf_attr = {};
+ int err;
- page_shift = PAGE_SHIFT + hr_dev->caps.eqe_buf_pg_sz;
- mhop_num = hr_dev->caps.eqe_hop_num;
- if (!mhop_num) {
- max_size = 1 << page_shift;
- buf->size = max_size;
- } else if (mhop_num == HNS_ROCE_HOP_NUM_0) {
- max_size = eq->entries * eq->eqe_size;
- buf->size = max_size;
- } else {
- max_size = 1 << page_shift;
- buf->size = PAGE_ALIGN(eq->entries * eq->eqe_size);
- is_mhop = true;
- }
+ if (hr_dev->caps.eqe_hop_num == HNS_ROCE_HOP_NUM_0)
+ eq->hop_num = 0;
+ else
+ eq->hop_num = hr_dev->caps.eqe_hop_num;
- ret = hns_roce_buf_alloc(hr_dev, buf->size, max_size, buf, page_shift);
- if (ret) {
- dev_err(hr_dev->dev, "alloc eq buf error\n");
- return ret;
- }
+ buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + PAGE_ADDR_SHIFT;
+ buf_attr.region[0].size = eq->entries * eq->eqe_size;
+ buf_attr.region[0].hopnum = eq->hop_num;
+ buf_attr.region_count = 1;
+ buf_attr.fixed_page = true;
- if (is_mhop) {
- ret = map_eq_buf(hr_dev, eq, page_shift);
- if (ret) {
- dev_err(hr_dev->dev, "map roce buf error\n");
- goto err_alloc;
- }
- }
+ err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr,
+ hr_dev->caps.srqwqe_ba_pg_sz +
+ PAGE_ADDR_SHIFT, NULL, 0);
+ if (err)
+ dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err);
- return 0;
-err_alloc:
- hns_roce_buf_free(hr_dev, buf->size, buf);
- return ret;
+ return err;
}
static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
@@ -5499,15 +5640,16 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
/* Allocate mailbox memory */
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
+ if (IS_ERR_OR_NULL(mailbox))
+ return -ENOMEM;
ret = alloc_eq_buf(hr_dev, eq);
- if (ret) {
- ret = -ENOMEM;
+ if (ret)
goto free_cmd_mbox;
- }
- hns_roce_config_eqc(hr_dev, eq, mailbox->buf);
+
+ ret = config_eqc(hr_dev, eq, mailbox->buf);
+ if (ret)
+ goto err_cmd_mbox;
ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0,
eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS);
@@ -5731,294 +5873,6 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
destroy_workqueue(hr_dev->irq_workq);
}
-static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
- struct hns_roce_srq *srq, u32 pdn, u16 xrcd,
- u32 cqn, void *mb_buf, u64 *mtts_wqe,
- u64 *mtts_idx, dma_addr_t dma_handle_wqe,
- dma_addr_t dma_handle_idx)
-{
- struct hns_roce_srq_context *srq_context;
-
- srq_context = mb_buf;
- memset(srq_context, 0, sizeof(*srq_context));
-
- roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M,
- SRQC_BYTE_4_SRQ_ST_S, 1);
-
- roce_set_field(srq_context->byte_4_srqn_srqst,
- SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M,
- SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S,
- (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
- hr_dev->caps.srqwqe_hop_num));
- roce_set_field(srq_context->byte_4_srqn_srqst,
- SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S,
- ilog2(srq->wqe_cnt));
-
- roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M,
- SRQC_BYTE_4_SRQN_S, srq->srqn);
-
- roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M,
- SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
-
- roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M,
- SRQC_BYTE_12_SRQ_XRCD_S, xrcd);
-
- srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3));
-
- roce_set_field(srq_context->byte_24_wqe_bt_ba,
- SRQC_BYTE_24_SRQ_WQE_BT_BA_M,
- SRQC_BYTE_24_SRQ_WQE_BT_BA_S,
- dma_handle_wqe >> 35);
-
- roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M,
- SRQC_BYTE_28_PD_S, pdn);
- roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M,
- SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 :
- fls(srq->max_gs - 1));
-
- srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3);
- roce_set_field(srq_context->rsv_idx_bt_ba,
- SRQC_BYTE_36_SRQ_IDX_BT_BA_M,
- SRQC_BYTE_36_SRQ_IDX_BT_BA_S,
- dma_handle_idx >> 35);
-
- srq_context->idx_cur_blk_addr =
- cpu_to_le32(mtts_idx[0] >> PAGE_ADDR_SHIFT);
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M,
- SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S,
- mtts_idx[0] >> (32 + PAGE_ADDR_SHIFT));
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M,
- SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S,
- hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
- hr_dev->caps.idx_hop_num);
-
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
- SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
- hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
- SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
- hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET);
-
- srq_context->idx_nxt_blk_addr =
- cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT);
- roce_set_field(srq_context->rsv_idxnxtblkaddr,
- SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M,
- SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S,
- mtts_idx[1] >> (32 + PAGE_ADDR_SHIFT));
- roce_set_field(srq_context->byte_56_xrc_cqn,
- SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S,
- cqn);
- roce_set_field(srq_context->byte_56_xrc_cqn,
- SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M,
- SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S,
- hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(srq_context->byte_56_xrc_cqn,
- SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M,
- SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S,
- hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET);
-
- roce_set_bit(srq_context->db_record_addr_record_en,
- SRQC_BYTE_60_SRQ_RECORD_EN_S, 0);
-}
-
-static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
- struct ib_srq_attr *srq_attr,
- enum ib_srq_attr_mask srq_attr_mask,
- struct ib_udata *udata)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
- struct hns_roce_srq *srq = to_hr_srq(ibsrq);
- struct hns_roce_srq_context *srq_context;
- struct hns_roce_srq_context *srqc_mask;
- struct hns_roce_cmd_mailbox *mailbox;
- int ret;
-
- if (srq_attr_mask & IB_SRQ_LIMIT) {
- if (srq_attr->srq_limit >= srq->wqe_cnt)
- return -EINVAL;
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
- srq_context = mailbox->buf;
- srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1;
-
- memset(srqc_mask, 0xff, sizeof(*srqc_mask));
-
- roce_set_field(srq_context->byte_8_limit_wl,
- SRQC_BYTE_8_SRQ_LIMIT_WL_M,
- SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit);
- roce_set_field(srqc_mask->byte_8_limit_wl,
- SRQC_BYTE_8_SRQ_LIMIT_WL_M,
- SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
-
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0,
- HNS_ROCE_CMD_MODIFY_SRQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- if (ret) {
- ibdev_err(&hr_dev->ib_dev,
- "failed to process cmd when modifying SRQ, ret = %d\n",
- ret);
- return ret;
- }
- }
-
- return 0;
-}
-
-static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
- struct hns_roce_srq *srq = to_hr_srq(ibsrq);
- struct hns_roce_srq_context *srq_context;
- struct hns_roce_cmd_mailbox *mailbox;
- int limit_wl;
- int ret;
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
- srq_context = mailbox->buf;
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0,
- HNS_ROCE_CMD_QUERY_SRQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
- if (ret) {
- ibdev_err(&hr_dev->ib_dev,
- "failed to process cmd when querying SRQ, ret = %d\n",
- ret);
- goto out;
- }
-
- limit_wl = roce_get_field(srq_context->byte_8_limit_wl,
- SRQC_BYTE_8_SRQ_LIMIT_WL_M,
- SRQC_BYTE_8_SRQ_LIMIT_WL_S);
-
- attr->srq_limit = limit_wl;
- attr->max_wr = srq->wqe_cnt - 1;
- attr->max_sge = srq->max_gs;
-
- memcpy(srq_context, mailbox->buf, sizeof(*srq_context));
-
-out:
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- return ret;
-}
-
-static int find_empty_entry(struct hns_roce_idx_que *idx_que,
- unsigned long size)
-{
- int wqe_idx;
-
- if (unlikely(bitmap_full(idx_que->bitmap, size)))
- return -ENOSPC;
-
- wqe_idx = find_first_zero_bit(idx_que->bitmap, size);
-
- bitmap_set(idx_que->bitmap, wqe_idx, 1);
-
- return wqe_idx;
-}
-
-static void fill_idx_queue(struct hns_roce_idx_que *idx_que,
- int cur_idx, int wqe_idx)
-{
- unsigned int *addr;
-
- addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf,
- cur_idx * idx_que->entry_sz);
- *addr = wqe_idx;
-}
-
-static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
- const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
- struct hns_roce_srq *srq = to_hr_srq(ibsrq);
- struct hns_roce_v2_wqe_data_seg *dseg;
- struct hns_roce_v2_db srq_db;
- unsigned long flags;
- int ret = 0;
- int wqe_idx;
- void *wqe;
- int nreq;
- int ind;
- int i;
-
- spin_lock_irqsave(&srq->lock, flags);
-
- ind = srq->head & (srq->wqe_cnt - 1);
-
- for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (unlikely(wr->num_sge > srq->max_gs)) {
- ret = -EINVAL;
- *bad_wr = wr;
- break;
- }
-
- if (unlikely(srq->head == srq->tail)) {
- ret = -ENOMEM;
- *bad_wr = wr;
- break;
- }
-
- wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt);
- if (wqe_idx < 0) {
- ret = -ENOMEM;
- *bad_wr = wr;
- break;
- }
-
- fill_idx_queue(&srq->idx_que, ind, wqe_idx);
- wqe = get_srq_wqe(srq, wqe_idx);
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
-
- for (i = 0; i < wr->num_sge; ++i) {
- dseg[i].len = cpu_to_le32(wr->sg_list[i].length);
- dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey);
- dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
- }
-
- if (i < srq->max_gs) {
- dseg[i].len = 0;
- dseg[i].lkey = cpu_to_le32(0x100);
- dseg[i].addr = 0;
- }
-
- srq->wrid[wqe_idx] = wr->wr_id;
- ind = (ind + 1) & (srq->wqe_cnt - 1);
- }
-
- if (likely(nreq)) {
- srq->head += nreq;
-
- /*
- * Make sure that descriptors are written before
- * doorbell record.
- */
- wmb();
-
- srq_db.byte_4 =
- cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S |
- (srq->srqn & V2_DB_BYTE_4_TAG_M));
- srq_db.parameter = cpu_to_le32(srq->head);
-
- hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l);
-
- }
-
- spin_unlock_irqrestore(&srq->lock, flags);
-
- return ret;
-}
-
static const struct hns_roce_dfx_hw hns_roce_dfx_hw_v2 = {
.query_cqc_info = hns_roce_v2_query_cqc_info,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 82dd9f6f4845..938b7b522faf 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -1241,10 +1241,9 @@ struct hns_roce_func_clear {
};
#define FUNC_CLEAR_RST_FUN_DONE_S 0
-/* Each physical function manages up to 248 virtual functionsï¼›
- * it takes up to 100ms for each function to execute clearï¼›
- * if an abnormal reset occurs, it is executed twice at most;
- * so it takes up to 249 * 2 * 100ms.
+/* Each physical function manages up to 248 virtual functions, it takes up to
+ * 100ms for each function to execute clear. If an abnormal reset occurs, it is
+ * executed twice at most, so it takes up to 249 * 2 * 100ms.
*/
#define HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS (249 * 2 * 100)
#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40
@@ -1648,7 +1647,7 @@ struct hns_roce_query_pf_caps_c {
struct hns_roce_query_pf_caps_d {
__le32 wq_hop_num_max_srqs;
__le16 srq_depth;
- __le16 rsv;
+ __le16 cap_flags_ex;
__le32 num_ceqs_ceq_depth;
__le32 arm_st_aeq_depth;
__le32 num_uars_rsv_pds;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index d0031d559213..fd3581efe9a8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -579,33 +579,12 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
int ret;
struct device *dev = hr_dev->dev;
- ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtt_table,
- HEM_TYPE_MTT, hr_dev->caps.mtt_entry_sz,
- hr_dev->caps.num_mtt_segs, 1);
- if (ret) {
- dev_err(dev, "Failed to init MTT context memory, aborting.\n");
- return ret;
- }
-
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) {
- ret = hns_roce_init_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_cqe_table,
- HEM_TYPE_CQE,
- hr_dev->caps.mtt_entry_sz,
- hr_dev->caps.num_cqe_segs, 1);
- if (ret) {
- dev_err(dev,
- "Failed to init CQE context memory, aborting.\n");
- goto err_unmap_cqe;
- }
- }
-
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table,
HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz,
hr_dev->caps.num_mtpts, 1);
if (ret) {
dev_err(dev, "Failed to init MTPT context memory, aborting.\n");
- goto err_unmap_mtt;
+ return ret;
}
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table,
@@ -660,32 +639,6 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
}
}
- if (hr_dev->caps.num_srqwqe_segs) {
- ret = hns_roce_init_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_srqwqe_table,
- HEM_TYPE_SRQWQE,
- hr_dev->caps.mtt_entry_sz,
- hr_dev->caps.num_srqwqe_segs, 1);
- if (ret) {
- dev_err(dev,
- "Failed to init MTT srqwqe memory, aborting.\n");
- goto err_unmap_srq;
- }
- }
-
- if (hr_dev->caps.num_idx_segs) {
- ret = hns_roce_init_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_idx_table,
- HEM_TYPE_IDX,
- hr_dev->caps.idx_entry_sz,
- hr_dev->caps.num_idx_segs, 1);
- if (ret) {
- dev_err(dev,
- "Failed to init MTT idx memory, aborting.\n");
- goto err_unmap_srqwqe;
- }
- }
-
if (hr_dev->caps.sccc_entry_sz) {
ret = hns_roce_init_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table,
@@ -695,7 +648,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
if (ret) {
dev_err(dev,
"Failed to init SCC context memory, aborting.\n");
- goto err_unmap_idx;
+ goto err_unmap_srq;
}
}
@@ -733,17 +686,6 @@ err_unmap_ctx:
if (hr_dev->caps.sccc_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table);
-
-err_unmap_idx:
- if (hr_dev->caps.num_idx_segs)
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_idx_table);
-
-err_unmap_srqwqe:
- if (hr_dev->caps.num_srqwqe_segs)
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_srqwqe_table);
-
err_unmap_srq:
if (hr_dev->caps.srqc_entry_sz)
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table);
@@ -765,14 +707,6 @@ err_unmap_qp:
err_unmap_dmpt:
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
-err_unmap_mtt:
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_cqe_table);
-
-err_unmap_cqe:
- hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table);
-
return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 176f34692f88..ecd76759d47a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -66,645 +66,89 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
- unsigned long *seg)
+static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
+ u32 pd, u64 iova, u64 size, u32 access)
{
- int o;
- u32 m;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ unsigned long obj = 0;
+ int err;
- spin_lock(&buddy->lock);
-
- for (o = order; o <= buddy->max_order; ++o) {
- if (buddy->num_free[o]) {
- m = 1 << (buddy->max_order - o);
- *seg = find_first_bit(buddy->bits[o], m);
- if (*seg < m)
- goto found;
- }
- }
- spin_unlock(&buddy->lock);
- return -EINVAL;
-
- found:
- clear_bit(*seg, buddy->bits[o]);
- --buddy->num_free[o];
-
- while (o > order) {
- --o;
- *seg <<= 1;
- set_bit(*seg ^ 1, buddy->bits[o]);
- ++buddy->num_free[o];
- }
-
- spin_unlock(&buddy->lock);
-
- *seg <<= order;
- return 0;
-}
-
-static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg,
- int order)
-{
- seg >>= order;
-
- spin_lock(&buddy->lock);
-
- while (test_bit(seg ^ 1, buddy->bits[order])) {
- clear_bit(seg ^ 1, buddy->bits[order]);
- --buddy->num_free[order];
- seg >>= 1;
- ++order;
- }
-
- set_bit(seg, buddy->bits[order]);
- ++buddy->num_free[order];
-
- spin_unlock(&buddy->lock);
-}
-
-static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
-{
- int i, s;
-
- buddy->max_order = max_order;
- spin_lock_init(&buddy->lock);
- buddy->bits = kcalloc(buddy->max_order + 1,
- sizeof(*buddy->bits),
- GFP_KERNEL);
- buddy->num_free = kcalloc(buddy->max_order + 1,
- sizeof(*buddy->num_free),
- GFP_KERNEL);
- if (!buddy->bits || !buddy->num_free)
- goto err_out;
-
- for (i = 0; i <= buddy->max_order; ++i) {
- s = BITS_TO_LONGS(1 << (buddy->max_order - i));
- buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
- __GFP_NOWARN);
- if (!buddy->bits[i]) {
- buddy->bits[i] = vzalloc(array_size(s, sizeof(long)));
- if (!buddy->bits[i])
- goto err_out_free;
- }
- }
-
- set_bit(0, buddy->bits[buddy->max_order]);
- buddy->num_free[buddy->max_order] = 1;
-
- return 0;
-
-err_out_free:
- for (i = 0; i <= buddy->max_order; ++i)
- kvfree(buddy->bits[i]);
-
-err_out:
- kfree(buddy->bits);
- kfree(buddy->num_free);
- return -ENOMEM;
-}
-
-static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy)
-{
- int i;
-
- for (i = 0; i <= buddy->max_order; ++i)
- kvfree(buddy->bits[i]);
-
- kfree(buddy->bits);
- kfree(buddy->num_free);
-}
-
-static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
- unsigned long *seg, u32 mtt_type)
-{
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
- struct hns_roce_hem_table *table;
- struct hns_roce_buddy *buddy;
- int ret;
-
- switch (mtt_type) {
- case MTT_TYPE_WQE:
- buddy = &mr_table->mtt_buddy;
- table = &mr_table->mtt_table;
- break;
- case MTT_TYPE_CQE:
- buddy = &mr_table->mtt_cqe_buddy;
- table = &mr_table->mtt_cqe_table;
- break;
- case MTT_TYPE_SRQWQE:
- buddy = &mr_table->mtt_srqwqe_buddy;
- table = &mr_table->mtt_srqwqe_table;
- break;
- case MTT_TYPE_IDX:
- buddy = &mr_table->mtt_idx_buddy;
- table = &mr_table->mtt_idx_table;
- break;
- default:
- dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n",
- mtt_type);
- return -EINVAL;
- }
-
- ret = hns_roce_buddy_alloc(buddy, order, seg);
- if (ret)
- return ret;
-
- ret = hns_roce_table_get_range(hr_dev, table, *seg,
- *seg + (1 << order) - 1);
- if (ret) {
- hns_roce_buddy_free(buddy, *seg, order);
- return ret;
- }
-
- return 0;
-}
-
-int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
- struct hns_roce_mtt *mtt)
-{
- int ret;
- int i;
-
- /* Page num is zero, correspond to DMA memory register */
- if (!npages) {
- mtt->order = -1;
- mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT;
- return 0;
- }
-
- /* Note: if page_shift is zero, FAST memory register */
- mtt->page_shift = page_shift;
-
- /* Compute MTT entry necessary */
- for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages;
- i <<= 1)
- ++mtt->order;
-
- /* Allocate MTT entry */
- ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg,
- mtt->mtt_type);
- if (ret)
+ /* Allocate a key for mr from mr_table */
+ err = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &obj);
+ if (err) {
+ ibdev_err(ibdev,
+ "failed to alloc bitmap for MR key, ret = %d.\n",
+ err);
return -ENOMEM;
-
- return 0;
-}
-
-void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
-{
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
-
- if (mtt->order < 0)
- return;
-
- switch (mtt->mtt_type) {
- case MTT_TYPE_WQE:
- hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg,
- mtt->order);
- hns_roce_table_put_range(hr_dev, &mr_table->mtt_table,
- mtt->first_seg,
- mtt->first_seg + (1 << mtt->order) - 1);
- break;
- case MTT_TYPE_CQE:
- hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg,
- mtt->order);
- hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table,
- mtt->first_seg,
- mtt->first_seg + (1 << mtt->order) - 1);
- break;
- case MTT_TYPE_SRQWQE:
- hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg,
- mtt->order);
- hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table,
- mtt->first_seg,
- mtt->first_seg + (1 << mtt->order) - 1);
- break;
- case MTT_TYPE_IDX:
- hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg,
- mtt->order);
- hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table,
- mtt->first_seg,
- mtt->first_seg + (1 << mtt->order) - 1);
- break;
- default:
- dev_err(hr_dev->dev,
- "Unsupport mtt type %d, clean mtt failed\n",
- mtt->mtt_type);
- break;
- }
-}
-
-static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr, int err_loop_index,
- int loop_i, int loop_j)
-{
- struct device *dev = hr_dev->dev;
- u32 mhop_num;
- u32 pbl_bt_sz;
- u64 bt_idx;
- int i, j;
-
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
- mhop_num = hr_dev->caps.pbl_hop_num;
-
- i = loop_i;
- if (mhop_num == 3 && err_loop_index == 2) {
- for (; i >= 0; i--) {
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
-
- for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
- if (i == loop_i && j >= loop_j)
- break;
-
- bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j;
- dma_free_coherent(dev, pbl_bt_sz,
- mr->pbl_bt_l2[bt_idx],
- mr->pbl_l2_dma_addr[bt_idx]);
- }
- }
- } else if (mhop_num == 3 && err_loop_index == 1) {
- for (i -= 1; i >= 0; i--) {
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
-
- for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
- bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j;
- dma_free_coherent(dev, pbl_bt_sz,
- mr->pbl_bt_l2[bt_idx],
- mr->pbl_l2_dma_addr[bt_idx]);
- }
- }
- } else if (mhop_num == 2 && err_loop_index == 1) {
- for (i -= 1; i >= 0; i--)
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
- } else {
- dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.",
- mhop_num, err_loop_index);
- return;
}
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr);
- mr->pbl_bt_l0 = NULL;
- mr->pbl_l0_dma_addr = 0;
-}
-static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages,
- struct hns_roce_mr *mr, u32 pbl_bt_sz)
-{
- struct device *dev = hr_dev->dev;
-
- if (npages > pbl_bt_sz / 8) {
- dev_err(dev, "npages %d is larger than buf_pg_sz!",
- npages);
- return -EINVAL;
- }
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
- &(mr->pbl_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_buf)
- return -ENOMEM;
-
- mr->pbl_size = npages;
- mr->pbl_ba = mr->pbl_dma_addr;
- mr->pbl_hop_num = 1;
- mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
- mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
- return 0;
-
-}
-
-
-static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages,
- struct hns_roce_mr *mr, u32 pbl_bt_sz)
-{
- struct device *dev = hr_dev->dev;
- int npages_allocated;
- u64 pbl_last_bt_num;
- u64 pbl_bt_cnt = 0;
- u64 size;
- int i;
-
- pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
-
- /* alloc L1 BT */
- for (i = 0; i < pbl_bt_sz / 8; i++) {
- if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
- size = pbl_bt_sz;
- } else {
- npages_allocated = i * (pbl_bt_sz / 8);
- size = (npages - npages_allocated) * 8;
- }
- mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
- &(mr->pbl_l1_dma_addr[i]),
- GFP_KERNEL);
- if (!mr->pbl_bt_l1[i]) {
- hns_roce_loop_free(hr_dev, mr, 1, i, 0);
- return -ENOMEM;
- }
-
- *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
+ mr->iova = iova; /* MR va starting addr */
+ mr->size = size; /* MR addr range */
+ mr->pd = pd; /* MR num */
+ mr->access = access; /* MR access permit */
+ mr->enabled = 0; /* MR active status */
+ mr->key = hw_index_to_key(obj); /* MR key */
- pbl_bt_cnt++;
- if (pbl_bt_cnt >= pbl_last_bt_num)
- break;
+ err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
+ if (err) {
+ ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err);
+ goto err_free_bitmap;
}
- mr->l0_chunk_last_num = i + 1;
-
return 0;
+err_free_bitmap:
+ hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR);
+ return err;
}
-static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages,
- struct hns_roce_mr *mr, u32 pbl_bt_sz)
+static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
{
- struct device *dev = hr_dev->dev;
- int mr_alloc_done = 0;
- int npages_allocated;
- u64 pbl_last_bt_num;
- u64 pbl_bt_cnt = 0;
- u64 bt_idx;
- u64 size;
- int i;
- int j = 0;
+ unsigned long obj = key_to_hw_index(mr->key);
- pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
-
- mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
- sizeof(*mr->pbl_l2_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_l2_dma_addr)
- return -ENOMEM;
-
- mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
- sizeof(*mr->pbl_bt_l2),
- GFP_KERNEL);
- if (!mr->pbl_bt_l2)
- goto err_kcalloc_bt_l2;
-
- /* alloc L1, L2 BT */
- for (i = 0; i < pbl_bt_sz / 8; i++) {
- mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
- &(mr->pbl_l1_dma_addr[i]),
- GFP_KERNEL);
- if (!mr->pbl_bt_l1[i]) {
- hns_roce_loop_free(hr_dev, mr, 1, i, 0);
- goto err_dma_alloc_l0;
- }
-
- *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
-
- for (j = 0; j < pbl_bt_sz / 8; j++) {
- bt_idx = i * pbl_bt_sz / 8 + j;
-
- if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
- size = pbl_bt_sz;
- } else {
- npages_allocated = bt_idx *
- (pbl_bt_sz / 8);
- size = (npages - npages_allocated) * 8;
- }
- mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
- dev, size,
- &(mr->pbl_l2_dma_addr[bt_idx]),
- GFP_KERNEL);
- if (!mr->pbl_bt_l2[bt_idx]) {
- hns_roce_loop_free(hr_dev, mr, 2, i, j);
- goto err_dma_alloc_l0;
- }
-
- *(mr->pbl_bt_l1[i] + j) =
- mr->pbl_l2_dma_addr[bt_idx];
-
- pbl_bt_cnt++;
- if (pbl_bt_cnt >= pbl_last_bt_num) {
- mr_alloc_done = 1;
- break;
- }
- }
-
- if (mr_alloc_done)
- break;
- }
-
- mr->l0_chunk_last_num = i + 1;
- mr->l1_chunk_last_num = j + 1;
-
-
- return 0;
-
-err_dma_alloc_l0:
- kfree(mr->pbl_bt_l2);
- mr->pbl_bt_l2 = NULL;
-
-err_kcalloc_bt_l2:
- kfree(mr->pbl_l2_dma_addr);
- mr->pbl_l2_dma_addr = NULL;
-
- return -ENOMEM;
+ hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
+ hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR);
}
-
-/* PBL multi hop addressing */
-static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
- struct hns_roce_mr *mr)
+static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
+ size_t length, struct ib_udata *udata, u64 start,
+ int access)
{
- struct device *dev = hr_dev->dev;
- u32 pbl_bt_sz;
- u32 mhop_num;
-
- mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
-
- if (mhop_num == HNS_ROCE_HOP_NUM_0)
- return 0;
-
- if (mhop_num == 1)
- return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz);
-
- mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
- sizeof(*mr->pbl_l1_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_l1_dma_addr)
- return -ENOMEM;
-
- mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
- GFP_KERNEL);
- if (!mr->pbl_bt_l1)
- goto err_kcalloc_bt_l1;
-
- /* alloc L0 BT */
- mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
- &(mr->pbl_l0_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_bt_l0)
- goto err_kcalloc_l2_dma;
-
- if (mhop_num == 2) {
- if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
- goto err_kcalloc_l2_dma;
- }
-
- if (mhop_num == 3) {
- if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
- goto err_kcalloc_l2_dma;
- }
-
-
- mr->pbl_size = npages;
- mr->pbl_ba = mr->pbl_l0_dma_addr;
- mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
- mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
- mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
-
- return 0;
-
-err_kcalloc_l2_dma:
- kfree(mr->pbl_bt_l1);
- mr->pbl_bt_l1 = NULL;
-
-err_kcalloc_bt_l1:
- kfree(mr->pbl_l1_dma_addr);
- mr->pbl_l1_dma_addr = NULL;
-
- return -ENOMEM;
-}
-
-static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
- u64 size, u32 access, int npages,
- struct hns_roce_mr *mr)
-{
- struct device *dev = hr_dev->dev;
- unsigned long index = 0;
- int ret;
-
- /* Allocate a key for mr from mr_table */
- ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
- if (ret)
- return -ENOMEM;
-
- mr->iova = iova; /* MR va starting addr */
- mr->size = size; /* MR addr range */
- mr->pd = pd; /* MR num */
- mr->access = access; /* MR access permit */
- mr->enabled = 0; /* MR active status */
- mr->key = hw_index_to_key(index); /* MR key */
-
- if (size == ~0ull) {
- mr->pbl_buf = NULL;
- mr->pbl_dma_addr = 0;
- /* PBL multi-hop addressing parameters */
- mr->pbl_bt_l2 = NULL;
- mr->pbl_bt_l1 = NULL;
- mr->pbl_bt_l0 = NULL;
- mr->pbl_l2_dma_addr = NULL;
- mr->pbl_l1_dma_addr = NULL;
- mr->pbl_l0_dma_addr = 0;
- } else {
- if (!hr_dev->caps.pbl_hop_num) {
- mr->pbl_buf = dma_alloc_coherent(dev,
- npages * BA_BYTE_LEN,
- &(mr->pbl_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_buf)
- return -ENOMEM;
- } else {
- ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
- }
- }
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ bool is_fast = mr->type == MR_TYPE_FRMR;
+ struct hns_roce_buf_attr buf_attr = {};
+ int err;
+
+ mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num;
+ buf_attr.page_shift = is_fast ? PAGE_SHIFT :
+ hr_dev->caps.pbl_buf_pg_sz + PAGE_ADDR_SHIFT;
+ buf_attr.region[0].size = length;
+ buf_attr.region[0].hopnum = mr->pbl_hop_num;
+ buf_attr.region_count = 1;
+ buf_attr.fixed_page = true;
+ buf_attr.user_access = access;
+ /* fast MR's buffer is alloced before mapping, not at creation */
+ buf_attr.mtt_only = is_fast;
+
+ err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
+ hr_dev->caps.pbl_ba_pg_sz + PAGE_ADDR_SHIFT,
+ udata, start);
+ if (err)
+ ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
+ else
+ mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
- return ret;
+ return err;
}
-static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr)
+static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
{
- struct device *dev = hr_dev->dev;
- int npages_allocated;
- int npages;
- int i, j;
- u32 pbl_bt_sz;
- u32 mhop_num;
- u64 bt_idx;
-
- npages = mr->pbl_size;
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
- mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
-
- if (mhop_num == HNS_ROCE_HOP_NUM_0)
- return;
-
- if (mhop_num == 1) {
- dma_free_coherent(dev, (unsigned int)(npages * BA_BYTE_LEN),
- mr->pbl_buf, mr->pbl_dma_addr);
- return;
- }
-
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0,
- mr->pbl_l0_dma_addr);
-
- if (mhop_num == 2) {
- for (i = 0; i < mr->l0_chunk_last_num; i++) {
- if (i == mr->l0_chunk_last_num - 1) {
- npages_allocated =
- i * (pbl_bt_sz / BA_BYTE_LEN);
-
- dma_free_coherent(dev,
- (npages - npages_allocated) * BA_BYTE_LEN,
- mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
-
- break;
- }
-
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
- }
- } else if (mhop_num == 3) {
- for (i = 0; i < mr->l0_chunk_last_num; i++) {
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
-
- for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
- bt_idx = i * (pbl_bt_sz / BA_BYTE_LEN) + j;
-
- if ((i == mr->l0_chunk_last_num - 1)
- && j == mr->l1_chunk_last_num - 1) {
- npages_allocated = bt_idx *
- (pbl_bt_sz / BA_BYTE_LEN);
-
- dma_free_coherent(dev,
- (npages - npages_allocated) *
- BA_BYTE_LEN,
- mr->pbl_bt_l2[bt_idx],
- mr->pbl_l2_dma_addr[bt_idx]);
-
- break;
- }
-
- dma_free_coherent(dev, pbl_bt_sz,
- mr->pbl_bt_l2[bt_idx],
- mr->pbl_l2_dma_addr[bt_idx]);
- }
- }
- }
-
- kfree(mr->pbl_bt_l1);
- kfree(mr->pbl_l1_dma_addr);
- mr->pbl_bt_l1 = NULL;
- mr->pbl_l1_dma_addr = NULL;
- if (mhop_num == 3) {
- kfree(mr->pbl_bt_l2);
- kfree(mr->pbl_l2_dma_addr);
- mr->pbl_bt_l2 = NULL;
- mr->pbl_l2_dma_addr = NULL;
- }
+ hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
}
static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr)
{
- struct device *dev = hr_dev->dev;
- int npages = 0;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
int ret;
if (mr->enabled) {
@@ -712,27 +156,12 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
key_to_hw_index(mr->key) &
(hr_dev->caps.num_mtpts - 1));
if (ret)
- dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret);
+ ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n",
+ ret);
}
- if (mr->size != ~0ULL) {
- if (mr->type == MR_TYPE_MR)
- npages = ib_umem_page_count(mr->umem);
-
- if (!hr_dev->caps.pbl_hop_num)
- dma_free_coherent(dev,
- (unsigned int)(npages * BA_BYTE_LEN),
- mr->pbl_buf, mr->pbl_dma_addr);
- else
- hns_roce_mhop_free(hr_dev, mr);
- }
-
- if (mr->enabled)
- hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
- key_to_hw_index(mr->key));
-
- hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
- key_to_hw_index(mr->key), BITMAP_NO_RR);
+ free_mr_pbl(hr_dev, mr);
+ free_mr_key(hr_dev, mr);
}
static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
@@ -742,18 +171,12 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
unsigned long mtpt_idx = key_to_hw_index(mr->key);
struct device *dev = hr_dev->dev;
struct hns_roce_cmd_mailbox *mailbox;
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
-
- /* Prepare HEM entry memory */
- ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
- if (ret)
- return ret;
/* Allocate mailbox memory */
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox)) {
ret = PTR_ERR(mailbox);
- goto err_table;
+ return ret;
}
if (mr->type != MR_TYPE_FRMR)
@@ -780,137 +203,6 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
err_page:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-err_table:
- hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
- return ret;
-}
-
-static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, u32 start_index,
- u32 npages, u64 *page_list)
-{
- struct hns_roce_hem_table *table;
- dma_addr_t dma_handle;
- __le64 *mtts;
- u32 bt_page_size;
- u32 i;
-
- switch (mtt->mtt_type) {
- case MTT_TYPE_WQE:
- table = &hr_dev->mr_table.mtt_table;
- bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
- break;
- case MTT_TYPE_CQE:
- table = &hr_dev->mr_table.mtt_cqe_table;
- bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
- break;
- case MTT_TYPE_SRQWQE:
- table = &hr_dev->mr_table.mtt_srqwqe_table;
- bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
- break;
- case MTT_TYPE_IDX:
- table = &hr_dev->mr_table.mtt_idx_table;
- bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
- break;
- default:
- return -EINVAL;
- }
-
- /* All MTTs must fit in the same page */
- if (start_index / (bt_page_size / sizeof(u64)) !=
- (start_index + npages - 1) / (bt_page_size / sizeof(u64)))
- return -EINVAL;
-
- if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
- return -EINVAL;
-
- mtts = hns_roce_table_find(hr_dev, table,
- mtt->first_seg +
- start_index / HNS_ROCE_MTT_ENTRY_PER_SEG,
- &dma_handle);
- if (!mtts)
- return -ENOMEM;
-
- /* Save page addr, low 12 bits : 0 */
- for (i = 0; i < npages; ++i) {
- if (!hr_dev->caps.mtt_hop_num)
- mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT);
- else
- mtts[i] = cpu_to_le64(page_list[i]);
- }
-
- return 0;
-}
-
-static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, u32 start_index,
- u32 npages, u64 *page_list)
-{
- int chunk;
- int ret;
- u32 bt_page_size;
-
- if (mtt->order < 0)
- return -EINVAL;
-
- switch (mtt->mtt_type) {
- case MTT_TYPE_WQE:
- bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
- break;
- case MTT_TYPE_CQE:
- bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
- break;
- case MTT_TYPE_SRQWQE:
- bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
- break;
- case MTT_TYPE_IDX:
- bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
- break;
- default:
- dev_err(hr_dev->dev,
- "Unsupport mtt type %d, write mtt failed\n",
- mtt->mtt_type);
- return -EINVAL;
- }
-
- while (npages > 0) {
- chunk = min_t(int, bt_page_size / sizeof(u64), npages);
-
- ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk,
- page_list);
- if (ret)
- return ret;
-
- npages -= chunk;
- start_index += chunk;
- page_list += chunk;
- }
-
- return 0;
-}
-
-int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, struct hns_roce_buf *buf)
-{
- u64 *page_list;
- int ret;
- u32 i;
-
- page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL);
- if (!page_list)
- return -ENOMEM;
-
- for (i = 0; i < buf->npages; ++i) {
- if (buf->nbufs == 1)
- page_list[i] = buf->direct.map + (i << buf->page_shift);
- else
- page_list[i] = buf->page_list[i].map;
-
- }
- ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list);
-
- kfree(page_list);
-
return ret;
}
@@ -923,50 +215,6 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
hr_dev->caps.num_mtpts,
hr_dev->caps.num_mtpts - 1,
hr_dev->caps.reserved_mrws, 0);
- if (ret)
- return ret;
-
- ret = hns_roce_buddy_init(&mr_table->mtt_buddy,
- ilog2(hr_dev->caps.num_mtt_segs));
- if (ret)
- goto err_buddy;
-
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) {
- ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy,
- ilog2(hr_dev->caps.num_cqe_segs));
- if (ret)
- goto err_buddy_cqe;
- }
-
- if (hr_dev->caps.num_srqwqe_segs) {
- ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy,
- ilog2(hr_dev->caps.num_srqwqe_segs));
- if (ret)
- goto err_buddy_srqwqe;
- }
-
- if (hr_dev->caps.num_idx_segs) {
- ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy,
- ilog2(hr_dev->caps.num_idx_segs));
- if (ret)
- goto err_buddy_idx;
- }
-
- return 0;
-
-err_buddy_idx:
- if (hr_dev->caps.num_srqwqe_segs)
- hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
-
-err_buddy_srqwqe:
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
-
-err_buddy_cqe:
- hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
-
-err_buddy:
- hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
return ret;
}
@@ -974,30 +222,24 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
- if (hr_dev->caps.num_idx_segs)
- hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy);
- if (hr_dev->caps.num_srqwqe_segs)
- hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
- hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
}
struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct hns_roce_mr *mr;
int ret;
- mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (mr == NULL)
return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_DMA;
/* Allocate memory region key */
- ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
- ~0ULL, acc, 0, mr);
+ hns_roce_hem_list_init(&mr->pbl_mtr.hem_list);
+ ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc);
if (ret)
goto err_free;
@@ -1006,203 +248,51 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
- mr->umem = NULL;
return &mr->ibmr;
-
err_mr:
- hns_roce_mr_free(to_hr_dev(pd->device), mr);
+ free_mr_key(hr_dev, mr);
err_free:
kfree(mr);
return ERR_PTR(ret);
}
-int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, struct ib_umem *umem)
-{
- struct device *dev = hr_dev->dev;
- struct sg_dma_page_iter sg_iter;
- unsigned int order;
- int npage = 0;
- int ret = 0;
- int i;
- u64 page_addr;
- u64 *pages;
- u32 bt_page_size;
- u32 n;
-
- switch (mtt->mtt_type) {
- case MTT_TYPE_WQE:
- order = hr_dev->caps.mtt_ba_pg_sz;
- break;
- case MTT_TYPE_CQE:
- order = hr_dev->caps.cqe_ba_pg_sz;
- break;
- case MTT_TYPE_SRQWQE:
- order = hr_dev->caps.srqwqe_ba_pg_sz;
- break;
- case MTT_TYPE_IDX:
- order = hr_dev->caps.idx_ba_pg_sz;
- break;
- default:
- dev_err(dev, "Unsupport mtt type %d, write mtt failed\n",
- mtt->mtt_type);
- return -EINVAL;
- }
-
- bt_page_size = 1 << (order + PAGE_SHIFT);
-
- pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
- if (!pages)
- return -ENOMEM;
-
- i = n = 0;
-
- for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
- page_addr = sg_page_iter_dma_address(&sg_iter);
- if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
- if (page_addr & ((1 << mtt->page_shift) - 1)) {
- dev_err(dev,
- "page_addr is not page_shift %d alignment!\n",
- mtt->page_shift);
- ret = -EINVAL;
- goto out;
- }
- pages[i++] = page_addr;
- }
- npage++;
- if (i == bt_page_size / sizeof(u64)) {
- ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
- if (ret)
- goto out;
- n += i;
- i = 0;
- }
- }
-
- if (i)
- ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
-
-out:
- free_pages((unsigned long) pages, order);
- return ret;
-}
-
-static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr,
- struct ib_umem *umem)
-{
- struct sg_dma_page_iter sg_iter;
- int i = 0, j = 0;
- u64 page_addr;
- u32 pbl_bt_sz;
-
- if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0)
- return 0;
-
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
- for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
- page_addr = sg_page_iter_dma_address(&sg_iter);
- if (!hr_dev->caps.pbl_hop_num) {
- /* for hip06, page addr is aligned to 4K */
- mr->pbl_buf[i++] = page_addr >> 12;
- } else if (hr_dev->caps.pbl_hop_num == 1) {
- mr->pbl_buf[i++] = page_addr;
- } else {
- if (hr_dev->caps.pbl_hop_num == 2)
- mr->pbl_bt_l1[i][j] = page_addr;
- else if (hr_dev->caps.pbl_hop_num == 3)
- mr->pbl_bt_l2[i][j] = page_addr;
-
- j++;
- if (j >= (pbl_bt_sz / BA_BYTE_LEN)) {
- i++;
- j = 0;
- }
- }
- }
-
- /* Memory barrier */
- mb();
-
- return 0;
-}
-
struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
- struct device *dev = hr_dev->dev;
struct hns_roce_mr *mr;
- int bt_size;
int ret;
- int n;
- int i;
- mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
- mr->umem = ib_umem_get(pd->device, start, length, access_flags);
- if (IS_ERR(mr->umem)) {
- ret = PTR_ERR(mr->umem);
- goto err_free;
- }
-
- n = ib_umem_page_count(mr->umem);
-
- if (!hr_dev->caps.pbl_hop_num) {
- if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
- dev_err(dev,
- " MR len %lld err. MR is limited to 4G at most!\n",
- length);
- ret = -EINVAL;
- goto err_umem;
- }
- } else {
- u64 pbl_size = 1;
-
- bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) /
- BA_BYTE_LEN;
- for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
- pbl_size *= bt_size;
- if (n > pbl_size) {
- dev_err(dev,
- " MR len %lld err. MR page num is limited to %lld!\n",
- length, pbl_size);
- ret = -EINVAL;
- goto err_umem;
- }
- }
-
mr->type = MR_TYPE_MR;
-
- ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
- access_flags, n, mr);
+ ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length,
+ access_flags);
if (ret)
- goto err_umem;
+ goto err_alloc_mr;
- ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
+ ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags);
if (ret)
- goto err_mr;
+ goto err_alloc_key;
ret = hns_roce_mr_enable(hr_dev, mr);
if (ret)
- goto err_mr;
+ goto err_alloc_pbl;
mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
return &mr->ibmr;
-err_mr:
- hns_roce_mr_free(hr_dev, mr);
-
-err_umem:
- ib_umem_release(mr->umem);
-
-err_free:
+err_alloc_pbl:
+ free_mr_pbl(hr_dev, mr);
+err_alloc_key:
+ free_mr_key(hr_dev, mr);
+err_alloc_mr:
kfree(mr);
return ERR_PTR(ret);
}
@@ -1214,84 +304,36 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
u32 pdn, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_mr *mr = to_hr_mr(ibmr);
- struct device *dev = hr_dev->dev;
- int npages;
int ret;
- if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
-
- if (hr_dev->caps.pbl_hop_num)
- hns_roce_mhop_free(hr_dev, mr);
- else
- dma_free_coherent(dev, npages * 8,
- mr->pbl_buf, mr->pbl_dma_addr);
- }
- ib_umem_release(mr->umem);
-
- mr->umem = ib_umem_get(ibmr->device, start, length, mr_access_flags);
- if (IS_ERR(mr->umem)) {
- ret = PTR_ERR(mr->umem);
- mr->umem = NULL;
- return -ENOMEM;
- }
- npages = ib_umem_page_count(mr->umem);
-
- if (hr_dev->caps.pbl_hop_num) {
- ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
- if (ret)
- goto release_umem;
- } else {
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
- &(mr->pbl_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_buf) {
- ret = -ENOMEM;
- goto release_umem;
- }
+ free_mr_pbl(hr_dev, mr);
+ ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags);
+ if (ret) {
+ ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret);
+ return ret;
}
ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
mr_access_flags, virt_addr,
length, mailbox->buf);
- if (ret)
- goto release_umem;
-
-
- ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
if (ret) {
- if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
-
- if (hr_dev->caps.pbl_hop_num)
- hns_roce_mhop_free(hr_dev, mr);
- else
- dma_free_coherent(dev, npages * 8,
- mr->pbl_buf,
- mr->pbl_dma_addr);
- }
-
- goto release_umem;
+ ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret);
+ free_mr_pbl(hr_dev, mr);
}
- return 0;
-
-release_umem:
- ib_umem_release(mr->umem);
return ret;
-
}
-
int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct ib_device *ib_dev = &hr_dev->ib_dev;
struct hns_roce_mr *mr = to_hr_mr(ibmr);
struct hns_roce_cmd_mailbox *mailbox;
- struct device *dev = hr_dev->dev;
unsigned long mtpt_idx;
u32 pdn = 0;
int ret;
@@ -1312,7 +354,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx);
if (ret)
- dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret);
+ ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret);
mr->enabled = 0;
@@ -1336,8 +378,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx);
if (ret) {
- dev_err(dev, "CREATE_MPT failed (%d)\n", ret);
- ib_umem_release(mr->umem);
+ ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret);
goto free_cmd_mbox;
}
@@ -1365,8 +406,6 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata);
} else {
hns_roce_mr_free(hr_dev, mr);
-
- ib_umem_release(mr->umem);
kfree(mr);
}
@@ -1380,12 +419,8 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
struct device *dev = hr_dev->dev;
struct hns_roce_mr *mr;
u64 length;
- u32 page_size;
int ret;
- page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
- length = max_num_sg * page_size;
-
if (mr_type != IB_MR_TYPE_MEM_REG)
return ERR_PTR(-EINVAL);
@@ -1402,23 +437,27 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
mr->type = MR_TYPE_FRMR;
/* Allocate memory region key */
- ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
- 0, max_num_sg, mr);
+ length = max_num_sg * (1 << PAGE_SHIFT);
+ ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0);
if (ret)
goto err_free;
+ ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0);
+ if (ret)
+ goto err_key;
+
ret = hns_roce_mr_enable(hr_dev, mr);
if (ret)
- goto err_mr;
+ goto err_pbl;
mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
- mr->umem = NULL;
return &mr->ibmr;
-err_mr:
- hns_roce_mr_free(to_hr_dev(pd->device), mr);
-
+err_key:
+ free_mr_key(hr_dev, mr);
+err_pbl:
+ free_mr_pbl(hr_dev, mr);
err_free:
kfree(mr);
return ERR_PTR(ret);
@@ -1428,19 +467,54 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
{
struct hns_roce_mr *mr = to_hr_mr(ibmr);
- mr->pbl_buf[mr->npages++] = addr;
+ if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) {
+ mr->page_list[mr->npages++] = addr;
+ return 0;
+ }
- return 0;
+ return -ENOBUFS;
}
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_mr *mr = to_hr_mr(ibmr);
+ struct hns_roce_buf_region region = {};
+ int ret = 0;
mr->npages = 0;
+ mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ if (!mr->page_list)
+ return ret;
- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+ ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+ if (ret < 1) {
+ ibdev_err(ibdev, "failed to store sg pages %d %d, cnt = %d.\n",
+ mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret);
+ goto err_page_list;
+ }
+
+ region.offset = 0;
+ region.count = mr->npages;
+ region.hopnum = mr->pbl_hop_num;
+ ret = hns_roce_mtr_map(hr_dev, &mr->pbl_mtr, &region, 1, mr->page_list,
+ mr->npages);
+ if (ret) {
+ ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret);
+ ret = 0;
+ } else {
+ mr->pbl_mtr.hem_cfg.buf_pg_shift = ilog2(ibmr->page_size);
+ ret = mr->npages;
+ }
+
+err_page_list:
+ kvfree(mr->page_list);
+ mr->page_list = NULL;
+
+ return ret;
}
static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
@@ -1564,32 +638,23 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw)
return 0;
}
-void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift,
- int buf_pg_shift)
-{
- hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift);
- mtr->buf_pg_shift = buf_pg_shift;
-}
-
-void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtr *mtr)
-{
- hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
-}
-
-static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtr *mtr, dma_addr_t *bufs,
- struct hns_roce_buf_region *r)
+static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ dma_addr_t *pages, struct hns_roce_buf_region *region)
{
+ __le64 *mtts;
int offset;
int count;
int npage;
- u64 *mtts;
+ u64 addr;
int end;
int i;
- offset = r->offset;
- end = offset + r->count;
+ /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */
+ if (!region->hopnum)
+ return 0;
+
+ offset = region->offset;
+ end = offset + region->count;
npage = 0;
while (offset < end) {
mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
@@ -1597,13 +662,13 @@ static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev,
if (!mtts)
return -ENOBUFS;
- /* Save page addr, low 12 bits : 0 */
for (i = 0; i < count; i++) {
if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
- mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT;
+ addr = to_hr_hw_page_addr(pages[npage]);
else
- mtts[i] = bufs[npage];
+ addr = pages[npage];
+ mtts[i] = cpu_to_le64(addr);
npage++;
}
offset += count;
@@ -1612,69 +677,412 @@ static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev,
return 0;
}
-int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- dma_addr_t **bufs, struct hns_roce_buf_region *regions,
- int region_cnt)
+static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr)
{
- struct hns_roce_buf_region *r;
- int ret;
int i;
- ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions,
- region_cnt);
- if (ret)
- return ret;
+ for (i = 0; i < attr->region_count; i++)
+ if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 &&
+ attr->region[i].hopnum > 0)
+ return true;
- for (i = 0; i < region_cnt; i++) {
- r = &regions[i];
- ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r);
+ /* because the mtr only one root base address, when hopnum is 0 means
+ * root base address equals the first buffer address, thus all alloced
+ * memory must in a continuous space accessed by direct mode.
+ */
+ return false;
+}
+
+static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr)
+{
+ size_t size = 0;
+ int i;
+
+ for (i = 0; i < attr->region_count; i++)
+ size += attr->region[i].size;
+
+ return size;
+}
+
+static inline int mtr_umem_page_count(struct ib_umem *umem, int page_shift)
+{
+ int count = ib_umem_page_count(umem);
+
+ if (page_shift >= PAGE_SHIFT)
+ count >>= page_shift - PAGE_SHIFT;
+ else
+ count <<= PAGE_SHIFT - page_shift;
+
+ return count;
+}
+
+static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size,
+ int page_shift)
+{
+ if (is_direct)
+ return ALIGN(alloc_size, 1 << page_shift);
+ else
+ return HNS_HW_DIRECT_PAGE_COUNT << page_shift;
+}
+
+/*
+ * check the given pages in continuous address space
+ * Returns 0 on success, or the error page num.
+ */
+static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count,
+ int page_shift)
+{
+ size_t page_size = 1 << page_shift;
+ int i;
+
+ for (i = 1; i < page_count; i++)
+ if (pages[i] - pages[i - 1] != page_size)
+ return i;
+
+ return 0;
+}
+
+static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+ /* release user buffers */
+ if (mtr->umem) {
+ ib_umem_release(mtr->umem);
+ mtr->umem = NULL;
+ }
+
+ /* release kernel buffers */
+ if (mtr->kmem) {
+ hns_roce_buf_free(hr_dev, mtr->kmem);
+ kfree(mtr->kmem);
+ mtr->kmem = NULL;
+ }
+}
+
+static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr, bool is_direct,
+ struct ib_udata *udata, unsigned long user_addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int max_pg_shift = buf_attr->page_shift;
+ int best_pg_shift = 0;
+ int all_pg_count = 0;
+ size_t direct_size;
+ size_t total_size;
+ unsigned long tmp;
+ int ret = 0;
+
+ total_size = mtr_bufs_size(buf_attr);
+ if (total_size < 1) {
+ ibdev_err(ibdev, "Failed to check mtr size\n");
+ return -EINVAL;
+ }
+
+ if (udata) {
+ mtr->kmem = NULL;
+ mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
+ buf_attr->user_access);
+ if (IS_ERR_OR_NULL(mtr->umem)) {
+ ibdev_err(ibdev, "Failed to get umem, ret %ld\n",
+ PTR_ERR(mtr->umem));
+ return -ENOMEM;
+ }
+ if (buf_attr->fixed_page) {
+ best_pg_shift = max_pg_shift;
+ } else {
+ tmp = GENMASK(max_pg_shift, 0);
+ ret = ib_umem_find_best_pgsz(mtr->umem, tmp, user_addr);
+ best_pg_shift = (ret <= PAGE_SIZE) ?
+ PAGE_SHIFT : ilog2(ret);
+ }
+ all_pg_count = mtr_umem_page_count(mtr->umem, best_pg_shift);
+ ret = 0;
+ } else {
+ mtr->umem = NULL;
+ mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL);
+ if (!mtr->kmem) {
+ ibdev_err(ibdev, "Failed to alloc kmem\n");
+ return -ENOMEM;
+ }
+ direct_size = mtr_kmem_direct_size(is_direct, total_size,
+ max_pg_shift);
+ ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size,
+ mtr->kmem, max_pg_shift);
if (ret) {
- dev_err(hr_dev->dev,
- "write mtr[%d/%d] err %d,offset=%d.\n",
- i, region_cnt, ret, r->offset);
- goto err_write;
+ ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret);
+ goto err_alloc_mem;
+ } else {
+ best_pg_shift = max_pg_shift;
+ all_pg_count = mtr->kmem->npages;
}
}
- return 0;
+ /* must bigger than minimum hardware page shift */
+ if (best_pg_shift < PAGE_ADDR_SHIFT || all_pg_count < 1) {
+ ret = -EINVAL;
+ ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n",
+ best_pg_shift, all_pg_count);
+ goto err_alloc_mem;
+ }
-err_write:
- hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+ mtr->hem_cfg.buf_pg_shift = best_pg_shift;
+ mtr->hem_cfg.buf_pg_count = all_pg_count;
+ return 0;
+err_alloc_mem:
+ mtr_free_bufs(hr_dev, mtr);
return ret;
}
+static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ dma_addr_t *pages, int count, int page_shift)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int npage;
+ int err;
+
+ if (mtr->umem)
+ npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0,
+ mtr->umem, page_shift);
+ else
+ npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0,
+ mtr->kmem);
+
+ if (mtr->hem_cfg.is_direct && npage > 1) {
+ err = mtr_check_direct_pages(pages, npage, page_shift);
+ if (err) {
+ ibdev_err(ibdev, "Failed to check %s direct page-%d\n",
+ mtr->umem ? "user" : "kernel", err);
+ npage = err;
+ }
+ }
+
+ return npage;
+}
+
+int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_region *regions, int region_cnt,
+ dma_addr_t *pages, int page_cnt)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_region *r;
+ int err;
+ int i;
+
+ for (i = 0; i < region_cnt; i++) {
+ r = &regions[i];
+ if (r->offset + r->count > page_cnt) {
+ err = -EINVAL;
+ ibdev_err(ibdev,
+ "Failed to check mtr%d end %d + %d, max %d\n",
+ i, r->offset, r->count, page_cnt);
+ return err;
+ }
+
+ err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r);
+ if (err) {
+ ibdev_err(ibdev,
+ "Failed to map mtr%d offset %d, err %d\n",
+ i, r->offset, err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
{
- u64 *mtts = mtt_buf;
int mtt_count;
int total = 0;
- u64 *addr;
+ __le64 *mtts;
int npage;
+ u64 addr;
int left;
- if (mtts == NULL || mtt_max < 1)
+ if (!mtt_buf || mtt_max < 1)
goto done;
+ /* no mtt memory in direct mode, so just return the buffer address */
+ if (mtr->hem_cfg.is_direct) {
+ npage = offset;
+ for (total = 0; total < mtt_max; total++, npage++) {
+ addr = mtr->hem_cfg.root_ba +
+ (npage << mtr->hem_cfg.buf_pg_shift);
+
+ if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
+ mtt_buf[total] = to_hr_hw_page_addr(addr);
+ else
+ mtt_buf[total] = addr;
+ }
+
+ goto done;
+ }
+
left = mtt_max;
while (left > 0) {
mtt_count = 0;
- addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
+ mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
offset + total,
&mtt_count, NULL);
- if (!addr || !mtt_count)
+ if (!mtts || !mtt_count)
goto done;
npage = min(mtt_count, left);
- memcpy(&mtts[total], addr, BA_BYTE_LEN * npage);
left -= npage;
- total += npage;
+ for (mtt_count = 0; mtt_count < npage; mtt_count++)
+ mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]);
}
done:
if (base_addr)
- *base_addr = mtr->hem_list.root_ba;
+ *base_addr = mtr->hem_cfg.root_ba;
return total;
}
+
+/* convert buffer size to page index and page count */
+static int mtr_init_region(struct hns_roce_buf_attr *attr, int page_cnt,
+ struct hns_roce_buf_region *regions, int region_cnt,
+ int page_shift)
+{
+ unsigned int page_size = 1 << page_shift;
+ int max_region = attr->region_count;
+ struct hns_roce_buf_region *r;
+ int page_idx = 0;
+ int i = 0;
+
+ for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) {
+ r = &regions[i];
+ r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ?
+ 0 : attr->region[i].hopnum;
+ r->offset = page_idx;
+ r->count = DIV_ROUND_UP(attr->region[i].size, page_size);
+ page_idx += r->count;
+ }
+
+ return i;
+}
+
+/**
+ * hns_roce_mtr_create - Create hns memory translate region.
+ *
+ * @mtr: memory translate region
+ * @init_attr: init attribute for creating mtr
+ * @page_shift: page shift for multi-hop base address table
+ * @udata: user space context, if it's NULL, means kernel space
+ * @user_addr: userspace virtual address to start at
+ * @buf_alloced: mtr has private buffer, true means need to alloc
+ */
+int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr, int page_shift,
+ struct ib_udata *udata, unsigned long user_addr)
+{
+ struct hns_roce_buf_region regions[HNS_ROCE_MAX_BT_REGION] = {};
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t *pages = NULL;
+ int region_cnt = 0;
+ int all_pg_cnt;
+ int get_pg_cnt;
+ bool has_mtt;
+ int err = 0;
+
+ has_mtt = mtr_has_mtt(buf_attr);
+ /* if buffer only need mtt, just init the hem cfg */
+ if (buf_attr->mtt_only) {
+ mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift;
+ mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >>
+ buf_attr->page_shift;
+ mtr->umem = NULL;
+ mtr->kmem = NULL;
+ } else {
+ err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata,
+ user_addr);
+ if (err) {
+ ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n",
+ err);
+ return err;
+ }
+ }
+
+ /* alloc mtt memory */
+ all_pg_cnt = mtr->hem_cfg.buf_pg_count;
+ hns_roce_hem_list_init(&mtr->hem_list);
+ mtr->hem_cfg.is_direct = !has_mtt;
+ mtr->hem_cfg.ba_pg_shift = page_shift;
+ if (has_mtt) {
+ region_cnt = mtr_init_region(buf_attr, all_pg_cnt,
+ regions, ARRAY_SIZE(regions),
+ mtr->hem_cfg.buf_pg_shift);
+ if (region_cnt < 1) {
+ err = -ENOBUFS;
+ ibdev_err(ibdev, "Failed to init mtr region %d\n",
+ region_cnt);
+ goto err_alloc_bufs;
+ }
+ err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
+ regions, region_cnt,
+ page_shift);
+ if (err) {
+ ibdev_err(ibdev, "Failed to request mtr hem, err %d\n",
+ err);
+ goto err_alloc_bufs;
+ }
+ mtr->hem_cfg.root_ba = mtr->hem_list.root_ba;
+ }
+
+ /* no buffer to map */
+ if (buf_attr->mtt_only)
+ return 0;
+
+ /* alloc a tmp array to store buffer's dma address */
+ pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!pages) {
+ err = -ENOMEM;
+ ibdev_err(ibdev, "Failed to alloc mtr page list %d\n",
+ all_pg_cnt);
+ goto err_alloc_hem_list;
+ }
+
+ get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt,
+ mtr->hem_cfg.buf_pg_shift);
+ if (get_pg_cnt != all_pg_cnt) {
+ ibdev_err(ibdev, "Failed to get mtr page %d != %d\n",
+ get_pg_cnt, all_pg_cnt);
+ err = -ENOBUFS;
+ goto err_alloc_page_list;
+ }
+
+ if (!has_mtt) {
+ mtr->hem_cfg.root_ba = pages[0];
+ } else {
+ /* write buffer's dma address to BA table */
+ err = hns_roce_mtr_map(hr_dev, mtr, regions, region_cnt, pages,
+ all_pg_cnt);
+ if (err) {
+ ibdev_err(ibdev, "Failed to map mtr pages, err %d\n",
+ err);
+ goto err_alloc_page_list;
+ }
+ }
+
+ /* drop tmp array */
+ kvfree(pages);
+ return 0;
+err_alloc_page_list:
+ kvfree(pages);
+err_alloc_hem_list:
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+err_alloc_bufs:
+ mtr_free_bufs(hr_dev, mtr);
+ return err;
+}
+
+void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+ /* release multi-hop addressing resource */
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+
+ /* free buffers */
+ mtr_free_bufs(hr_dev, mtr);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 6317901c4b4f..dca979d8c345 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -355,16 +355,16 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR);
}
-static int set_rq_size(struct hns_roce_dev *hr_dev,
- struct ib_qp_cap *cap, bool is_user, int has_rq,
- struct hns_roce_qp *hr_qp)
+static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
+ struct hns_roce_qp *hr_qp, int has_rq)
{
- u32 max_cnt;
+ u32 cnt;
/* If srq exist, set zero for relative number of rq */
if (!has_rq) {
hr_qp->rq.wqe_cnt = 0;
hr_qp->rq.max_gs = 0;
+ hr_qp->rq_inl_buf.wqe_cnt = 0;
cap->max_recv_wr = 0;
cap->max_recv_sge = 0;
@@ -379,17 +379,14 @@ static int set_rq_size(struct hns_roce_dev *hr_dev,
return -EINVAL;
}
- max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes);
-
- hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt);
- if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) {
+ cnt = roundup_pow_of_two(max(cap->max_recv_wr, hr_dev->caps.min_wqes));
+ if (cnt > hr_dev->caps.max_wqes) {
ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n",
cap->max_recv_wr);
return -EINVAL;
}
- max_cnt = max(1U, cap->max_recv_sge);
- hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt);
+ hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
@@ -397,12 +394,61 @@ static int set_rq_size(struct hns_roce_dev *hr_dev,
hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
hr_qp->rq.max_gs);
- cap->max_recv_wr = hr_qp->rq.wqe_cnt;
+ hr_qp->rq.wqe_cnt = cnt;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
+ hr_qp->rq_inl_buf.wqe_cnt = cnt;
+ else
+ hr_qp->rq_inl_buf.wqe_cnt = 0;
+
+ cap->max_recv_wr = cnt;
cap->max_recv_sge = hr_qp->rq.max_gs;
return 0;
}
+static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_cap *cap)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 cnt;
+
+ cnt = max(1U, cap->max_send_sge);
+ if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
+ hr_qp->sq.max_gs = roundup_pow_of_two(cnt);
+ hr_qp->sge.sge_cnt = 0;
+
+ return 0;
+ }
+
+ hr_qp->sq.max_gs = cnt;
+
+ /* UD sqwqe's sge use extend sge */
+ if (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD) {
+ cnt = roundup_pow_of_two(sq_wqe_cnt * hr_qp->sq.max_gs);
+ } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) {
+ cnt = roundup_pow_of_two(sq_wqe_cnt *
+ (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE));
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) {
+ if (cnt > hr_dev->caps.max_extend_sg) {
+ ibdev_err(ibdev,
+ "failed to check exSGE num, exSGE num = %d.\n",
+ cnt);
+ return -EINVAL;
+ }
+ }
+ } else {
+ cnt = 0;
+ }
+
+ hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
+ hr_qp->sge.sge_cnt = cnt;
+
+ return 0;
+}
+
static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap,
struct hns_roce_ib_create_qp *ucmd)
@@ -430,174 +476,79 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp,
struct hns_roce_ib_create_qp *ucmd)
{
- u32 ex_sge_num;
- u32 page_size;
- u32 max_cnt;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 cnt = 0;
int ret;
- if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) ||
- hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes)
+ if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) ||
+ cnt > hr_dev->caps.max_wqes)
return -EINVAL;
ret = check_sq_size_with_integrity(hr_dev, cap, ucmd);
if (ret) {
- ibdev_err(&hr_dev->ib_dev, "Failed to check user SQ size limit\n");
+ ibdev_err(ibdev, "failed to check user SQ size, ret = %d.\n",
+ ret);
return ret;
}
- hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
-
- max_cnt = max(1U, cap->max_send_sge);
- if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
- hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt);
- else
- hr_qp->sq.max_gs = max_cnt;
-
- if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
- hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
- (hr_qp->sq.max_gs - 2));
-
- if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE &&
- hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) {
- if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
- ibdev_err(&hr_dev->ib_dev,
- "Failed to check extended SGE size limit %d\n",
- hr_qp->sge.sge_cnt);
- return -EINVAL;
- }
- }
-
- hr_qp->sge.sge_shift = 4;
- ex_sge_num = hr_qp->sge.sge_cnt;
+ ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap);
+ if (ret)
+ return ret;
- /* Get buf size, SQ and RQ are aligned to page_szie */
- if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
- hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt <<
- hr_qp->rq.wqe_shift), PAGE_SIZE) +
- round_up((hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift), PAGE_SIZE);
-
- hr_qp->sq.offset = 0;
- hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift), PAGE_SIZE);
- } else {
- page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
- hr_qp->sge.sge_cnt = ex_sge_num ?
- max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0;
- hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt <<
- hr_qp->rq.wqe_shift), page_size) +
- round_up((hr_qp->sge.sge_cnt <<
- hr_qp->sge.sge_shift), page_size) +
- round_up((hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift), page_size);
-
- hr_qp->sq.offset = 0;
- if (ex_sge_num) {
- hr_qp->sge.offset = round_up((hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift),
- page_size);
- hr_qp->rq.offset = hr_qp->sge.offset +
- round_up((hr_qp->sge.sge_cnt <<
- hr_qp->sge.sge_shift),
- page_size);
- } else {
- hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift),
- page_size);
- }
- }
+ hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
+ hr_qp->sq.wqe_cnt = cnt;
return 0;
}
static int split_wqe_buf_region(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp,
- struct hns_roce_buf_region *regions,
- int region_max, int page_shift)
+ struct hns_roce_buf_attr *buf_attr)
{
- int page_size = 1 << page_shift;
- bool is_extend_sge;
- int region_cnt = 0;
int buf_size;
- int buf_cnt;
+ int idx = 0;
- if (hr_qp->buff_size < 1 || region_max < 1)
- return region_cnt;
+ hr_qp->buff_size = 0;
- if (hr_qp->sge.sge_cnt > 0)
- is_extend_sge = true;
- else
- is_extend_sge = false;
-
- /* sq region */
- if (is_extend_sge)
- buf_size = hr_qp->sge.offset - hr_qp->sq.offset;
- else
- buf_size = hr_qp->rq.offset - hr_qp->sq.offset;
-
- if (buf_size > 0 && region_cnt < region_max) {
- buf_cnt = DIV_ROUND_UP(buf_size, page_size);
- hns_roce_init_buf_region(&regions[region_cnt],
- hr_dev->caps.wqe_sq_hop_num,
- hr_qp->sq.offset / page_size,
- buf_cnt);
- region_cnt++;
- }
-
- /* sge region */
- if (is_extend_sge) {
- buf_size = hr_qp->rq.offset - hr_qp->sge.offset;
- if (buf_size > 0 && region_cnt < region_max) {
- buf_cnt = DIV_ROUND_UP(buf_size, page_size);
- hns_roce_init_buf_region(&regions[region_cnt],
- hr_dev->caps.wqe_sge_hop_num,
- hr_qp->sge.offset / page_size,
- buf_cnt);
- region_cnt++;
- }
- }
-
- /* rq region */
- buf_size = hr_qp->buff_size - hr_qp->rq.offset;
- if (buf_size > 0) {
- buf_cnt = DIV_ROUND_UP(buf_size, page_size);
- hns_roce_init_buf_region(&regions[region_cnt],
- hr_dev->caps.wqe_rq_hop_num,
- hr_qp->rq.offset / page_size,
- buf_cnt);
- region_cnt++;
- }
-
- return region_cnt;
-}
-
-static int set_extend_sge_param(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp)
-{
- struct device *dev = hr_dev->dev;
-
- if (hr_qp->sq.max_gs > 2) {
- hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
- (hr_qp->sq.max_gs - 2));
- hr_qp->sge.sge_shift = 4;
- }
-
- /* ud sqwqe's sge use extend sge */
- if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 &&
- hr_qp->ibqp.qp_type == IB_QPT_GSI) {
- hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
- hr_qp->sq.max_gs);
- hr_qp->sge.sge_shift = 4;
- }
+ /* SQ WQE */
+ hr_qp->sq.offset = 0;
+ buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt,
+ hr_qp->sq.wqe_shift);
+ if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
+ buf_attr->region[idx].size = buf_size;
+ buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num;
+ idx++;
+ hr_qp->buff_size += buf_size;
+ }
+
+ /* extend SGE WQE in SQ */
+ hr_qp->sge.offset = hr_qp->buff_size;
+ buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt,
+ hr_qp->sge.sge_shift);
+ if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
+ buf_attr->region[idx].size = buf_size;
+ buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num;
+ idx++;
+ hr_qp->buff_size += buf_size;
+ }
+
+ /* RQ WQE */
+ hr_qp->rq.offset = hr_qp->buff_size;
+ buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt,
+ hr_qp->rq.wqe_shift);
+ if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
+ buf_attr->region[idx].size = buf_size;
+ buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num;
+ idx++;
+ hr_qp->buff_size += buf_size;
+ }
+
+ if (hr_qp->buff_size < 1)
+ return -EINVAL;
- if (hr_qp->sq.max_gs > 2 &&
- hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) {
- if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
- dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
- hr_qp->sge.sge_cnt);
- return -EINVAL;
- }
- }
+ buf_attr->page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
+ buf_attr->fixed_page = true;
+ buf_attr->region_count = idx;
return 0;
}
@@ -605,62 +556,35 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev,
static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp)
{
- u32 page_size;
- u32 max_cnt;
- int size;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 cnt;
int ret;
if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes ||
cap->max_send_sge > hr_dev->caps.max_sq_sg ||
cap->max_inline_data > hr_dev->caps.max_sq_inline) {
- ibdev_err(&hr_dev->ib_dev,
- "SQ WR or sge or inline data error!\n");
+ ibdev_err(ibdev,
+ "failed to check SQ WR, SGE or inline num, ret = %d.\n",
+ -EINVAL);
return -EINVAL;
}
- hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
-
- max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes);
-
- hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt);
- if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) {
- ibdev_err(&hr_dev->ib_dev,
- "while setting kernel sq size, sq.wqe_cnt too large\n");
+ cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes));
+ if (cnt > hr_dev->caps.max_wqes) {
+ ibdev_err(ibdev, "failed to check WQE num, WQE num = %d.\n",
+ cnt);
return -EINVAL;
}
- /* Get data_seg numbers */
- max_cnt = max(1U, cap->max_send_sge);
- if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
- hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt);
- else
- hr_qp->sq.max_gs = max_cnt;
+ hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
+ hr_qp->sq.wqe_cnt = cnt;
- ret = set_extend_sge_param(hr_dev, hr_qp);
- if (ret) {
- ibdev_err(&hr_dev->ib_dev, "set extend sge parameters fail\n");
+ ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap);
+ if (ret)
return ret;
- }
- /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
- page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
- hr_qp->sq.offset = 0;
- size = round_up(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size);
-
- if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && hr_qp->sge.sge_cnt) {
- hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
- (u32)hr_qp->sge.sge_cnt);
- hr_qp->sge.offset = size;
- size += round_up(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift,
- page_size);
- }
-
- hr_qp->rq.offset = size;
- size += round_up((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size);
- hr_qp->buff_size = size;
-
- /* Get wr and sge number which send */
- cap->max_send_wr = hr_qp->sq.wqe_cnt;
+ /* sync the parameters of kernel QP to user's configuration */
+ cap->max_send_wr = cnt;
cap->max_send_sge = hr_qp->sq.max_gs;
/* We don't support inline sends for kernel QPs (yet) */
@@ -691,8 +615,8 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp,
struct ib_qp_init_attr *init_attr)
{
u32 max_recv_sge = init_attr->cap.max_recv_sge;
+ u32 wqe_cnt = hr_qp->rq_inl_buf.wqe_cnt;
struct hns_roce_rinl_wqe *wqe_list;
- u32 wqe_cnt = hr_qp->rq.wqe_cnt;
int i;
/* allocate recv inline buf */
@@ -714,7 +638,6 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp,
wqe_list[i].sg_list = &wqe_list[0].sg_list[i * max_recv_sge];
hr_qp->rq_inl_buf.wqe_list = wqe_list;
- hr_qp->rq_inl_buf.wqe_cnt = wqe_cnt;
return 0;
@@ -727,140 +650,55 @@ err:
static void free_rq_inline_buf(struct hns_roce_qp *hr_qp)
{
- kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
+ if (hr_qp->rq_inl_buf.wqe_list)
+ kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
kfree(hr_qp->rq_inl_buf.wqe_list);
}
-static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
- u32 page_shift, bool is_user)
-{
-/* WQE buffer include 3 parts: SQ, extend SGE and RQ. */
-#define HNS_ROCE_WQE_REGION_MAX 3
- struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX] = {};
- dma_addr_t *buf_list[HNS_ROCE_WQE_REGION_MAX] = {};
- struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_buf_region *r;
- int region_count;
- int buf_count;
- int ret;
- int i;
-
- region_count = split_wqe_buf_region(hr_dev, hr_qp, regions,
- ARRAY_SIZE(regions), page_shift);
-
- /* alloc a tmp list to store WQE buffers address */
- ret = hns_roce_alloc_buf_list(regions, buf_list, region_count);
- if (ret) {
- ibdev_err(ibdev, "Failed to alloc WQE buffer list\n");
- return ret;
- }
-
- for (i = 0; i < region_count; i++) {
- r = &regions[i];
- if (is_user)
- buf_count = hns_roce_get_umem_bufs(hr_dev, buf_list[i],
- r->count, r->offset, hr_qp->umem,
- page_shift);
- else
- buf_count = hns_roce_get_kmem_bufs(hr_dev, buf_list[i],
- r->count, r->offset, &hr_qp->hr_buf);
-
- if (buf_count != r->count) {
- ibdev_err(ibdev, "Failed to get %s WQE buf, expect %d = %d.\n",
- is_user ? "user" : "kernel",
- r->count, buf_count);
- ret = -ENOBUFS;
- goto done;
- }
- }
-
- hr_qp->wqe_bt_pg_shift = hr_dev->caps.mtt_ba_pg_sz;
- hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift,
- page_shift);
- ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, regions,
- region_count);
- if (ret)
- ibdev_err(ibdev, "Failed to attach WQE's mtr\n");
-
- goto done;
-
- hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr);
-done:
- hns_roce_free_buf_list(buf_list, region_count);
-
- return ret;
-}
-
static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, unsigned long addr)
{
- u32 page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
struct ib_device *ibdev = &hr_dev->ib_dev;
- bool is_rq_buf_inline;
+ struct hns_roce_buf_attr buf_attr = {};
int ret;
- is_rq_buf_inline = (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
- hns_roce_qp_has_rq(init_attr);
- if (is_rq_buf_inline) {
+ if (!udata && hr_qp->rq_inl_buf.wqe_cnt) {
ret = alloc_rq_inline_buf(hr_qp, init_attr);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc inline RQ buffer\n");
+ ibdev_err(ibdev,
+ "failed to alloc inline buf, ret = %d.\n",
+ ret);
return ret;
}
- }
-
- if (udata) {
- hr_qp->umem = ib_umem_get(ibdev, addr, hr_qp->buff_size, 0);
- if (IS_ERR(hr_qp->umem)) {
- ret = PTR_ERR(hr_qp->umem);
- goto err_inline;
- }
} else {
- ret = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size,
- (1 << page_shift) * 2,
- &hr_qp->hr_buf, page_shift);
- if (ret)
- goto err_inline;
+ hr_qp->rq_inl_buf.wqe_list = NULL;
}
- ret = map_wqe_buf(hr_dev, hr_qp, page_shift, udata);
- if (ret)
- goto err_alloc;
+ ret = split_wqe_buf_region(hr_dev, hr_qp, &buf_attr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret);
+ goto err_inline;
+ }
+ ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr,
+ PAGE_ADDR_SHIFT + hr_dev->caps.mtt_ba_pg_sz,
+ udata, addr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret);
+ goto err_inline;
+ }
return 0;
-
err_inline:
- if (is_rq_buf_inline)
- free_rq_inline_buf(hr_qp);
-
-err_alloc:
- if (udata) {
- ib_umem_release(hr_qp->umem);
- hr_qp->umem = NULL;
- } else {
- hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
- }
-
- ibdev_err(ibdev, "Failed to alloc WQE buffer, ret %d.\n", ret);
+ free_rq_inline_buf(hr_qp);
return ret;
}
static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
- hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr);
- if (hr_qp->umem) {
- ib_umem_release(hr_qp->umem);
- hr_qp->umem = NULL;
- }
-
- if (hr_qp->hr_buf.nbufs > 0)
- hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
-
- if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
- hr_qp->rq.wqe_cnt)
- free_rq_inline_buf(hr_qp);
+ hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr);
+ free_rq_inline_buf(hr_qp);
}
static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev,
@@ -912,8 +750,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
"Failed to map user SQ doorbell\n");
goto err_out;
}
- hr_qp->sdb_en = 1;
- resp->cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB;
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
+ resp->cap_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
}
if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) {
@@ -924,8 +762,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
"Failed to map user RQ doorbell\n");
goto err_sdb;
}
- hr_qp->rdb_en = 1;
- resp->cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB;
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
+ resp->cap_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
}
} else {
/* QP doorbell register address */
@@ -942,13 +780,13 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
goto err_out;
}
*hr_qp->rdb.db_record = 0;
- hr_qp->rdb_en = 1;
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
}
}
return 0;
err_sdb:
- if (udata && hr_qp->sdb_en)
+ if (udata && hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
err_out:
return ret;
@@ -961,12 +799,12 @@ static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
udata, struct hns_roce_ucontext, ibucontext);
if (udata) {
- if (hr_qp->rdb_en)
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
- if (hr_qp->sdb_en)
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
} else {
- if (hr_qp->rdb_en)
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
hns_roce_free_db(hr_dev, &hr_qp->rdb);
}
}
@@ -1025,10 +863,11 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
else
hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
- ret = set_rq_size(hr_dev, &init_attr->cap, udata,
- hns_roce_qp_has_rq(init_attr), hr_qp);
+ ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp,
+ hns_roce_qp_has_rq(init_attr));
if (ret) {
- ibdev_err(ibdev, "Failed to set user RQ size\n");
+ ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n",
+ ret);
return ret;
}
@@ -1339,10 +1178,10 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (ibqp->uobject &&
(attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
- if (hr_qp->sdb_en == 1) {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) {
hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
- if (hr_qp->rdb_en == 1)
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
} else {
ibdev_warn(&hr_dev->ib_dev,
@@ -1431,10 +1270,9 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
}
}
-static void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
+static inline void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
{
-
- return hns_roce_buf_offset(&hr_qp->hr_buf, offset);
+ return hns_roce_buf_offset(hr_qp->mtr.kmem, offset);
}
void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
@@ -1449,8 +1287,7 @@ void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n)
void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n)
{
- return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset +
- (n << hr_qp->sge.sge_shift));
+ return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift));
}
bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 5b3dd1a337d4..6e5a2adc2ab2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -77,56 +77,56 @@ static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev,
HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn,
- u16 xrcd, struct hns_roce_mtt *hr_mtt,
- u64 db_rec_addr, struct hns_roce_srq *srq)
+static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ u32 pdn, u32 cqn, u16 xrcd, u64 db_rec_addr)
{
struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_cmd_mailbox *mailbox;
- dma_addr_t dma_handle_wqe;
- dma_addr_t dma_handle_idx;
- u64 *mtts_wqe;
- u64 *mtts_idx;
+ u64 mtts_wqe[MTT_MIN_COUNT] = { 0 };
+ u64 mtts_idx[MTT_MIN_COUNT] = { 0 };
+ dma_addr_t dma_handle_wqe = 0;
+ dma_addr_t dma_handle_idx = 0;
int ret;
/* Get the physical address of srq buf */
- mtts_wqe = hns_roce_table_find(hr_dev,
- &hr_dev->mr_table.mtt_srqwqe_table,
- srq->mtt.first_seg,
- &dma_handle_wqe);
- if (!mtts_wqe) {
- dev_err(hr_dev->dev, "Failed to find mtt for srq buf.\n");
- return -EINVAL;
+ ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe,
+ ARRAY_SIZE(mtts_wqe), &dma_handle_wqe);
+ if (ret < 1) {
+ ibdev_err(ibdev, "Failed to find mtr for SRQ WQE\n");
+ return -ENOBUFS;
}
/* Get physical address of idx que buf */
- mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table,
- srq->idx_que.mtt.first_seg,
- &dma_handle_idx);
- if (!mtts_idx) {
- dev_err(hr_dev->dev,
- "Failed to find mtt for srq idx queue buf.\n");
- return -EINVAL;
+ ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx,
+ ARRAY_SIZE(mtts_idx), &dma_handle_idx);
+ if (ret < 1) {
+ ibdev_err(ibdev, "Failed to find mtr for SRQ idx\n");
+ return -ENOBUFS;
}
ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn);
if (ret) {
- dev_err(hr_dev->dev,
- "Failed to alloc a bit from srq bitmap.\n");
+ ibdev_err(ibdev, "Failed to alloc SRQ number, err %d\n", ret);
return -ENOMEM;
}
ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
- if (ret)
+ if (ret) {
+ ibdev_err(ibdev, "Failed to get SRQC table, err %d\n", ret);
goto err_out;
+ }
ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
- if (ret)
+ if (ret) {
+ ibdev_err(ibdev, "Failed to store SRQC, err %d\n", ret);
goto err_put;
+ }
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox)) {
- ret = PTR_ERR(mailbox);
+ if (IS_ERR_OR_NULL(mailbox)) {
+ ret = -ENOMEM;
+ ibdev_err(ibdev, "Failed to alloc mailbox for SRQC\n");
goto err_xa;
}
@@ -136,8 +136,10 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn,
ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- if (ret)
+ if (ret) {
+ ibdev_err(ibdev, "Failed to config SRQC, err %d\n", ret);
goto err_xa;
+ }
atomic_set(&srq->refcount, 1);
init_completion(&srq->free);
@@ -154,8 +156,7 @@ err_out:
return ret;
}
-static void hns_roce_srq_free(struct hns_roce_dev *hr_dev,
- struct hns_roce_srq *srq)
+static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
{
struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
int ret;
@@ -175,187 +176,104 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev,
hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
}
-static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata,
- int srq_buf_size)
+static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata, unsigned long addr)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
- struct hns_roce_ib_create_srq ucmd;
- struct hns_roce_buf *buf;
- int ret;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int err;
+
+ srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE,
+ HNS_ROCE_SGE_SIZE *
+ srq->max_gs)));
+
+ buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_ADDR_SHIFT;
+ buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
+ srq->wqe_shift);
+ buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num;
+ buf_attr.region_count = 1;
+ buf_attr.fixed_page = true;
+
+ err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr,
+ hr_dev->caps.srqwqe_ba_pg_sz +
+ PAGE_ADDR_SHIFT, udata, addr);
+ if (err)
+ ibdev_err(ibdev, "Failed to alloc SRQ buf mtr, err %d\n", err);
+
+ return err;
+}
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
- return -EFAULT;
-
- srq->umem =
- ib_umem_get(srq->ibsrq.device, ucmd.buf_addr, srq_buf_size, 0);
- if (IS_ERR(srq->umem))
- return PTR_ERR(srq->umem);
-
- buf = &srq->buf;
- buf->npages = (ib_umem_page_count(srq->umem) +
- (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
- (1 << hr_dev->caps.srqwqe_buf_pg_sz);
- buf->page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
- ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift,
- &srq->mtt);
- if (ret)
- goto err_user_buf;
+static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr);
+}
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem);
- if (ret)
- goto err_user_srq_mtt;
-
- /* config index queue BA */
- srq->idx_que.umem = ib_umem_get(srq->ibsrq.device, ucmd.que_addr,
- srq->idx_que.buf_size, 0);
- if (IS_ERR(srq->idx_que.umem)) {
- dev_err(hr_dev->dev, "ib_umem_get error for index queue\n");
- ret = PTR_ERR(srq->idx_que.umem);
- goto err_user_srq_mtt;
+static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata, unsigned long addr)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int err;
+
+ srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ);
+
+ buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_ADDR_SHIFT;
+ buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
+ srq->idx_que.entry_shift);
+ buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num;
+ buf_attr.region_count = 1;
+ buf_attr.fixed_page = true;
+
+ err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr,
+ hr_dev->caps.idx_ba_pg_sz + PAGE_ADDR_SHIFT,
+ udata, addr);
+ if (err) {
+ ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, err %d\n", err);
+ return err;
}
- buf = &srq->idx_que.idx_buf;
- buf->npages = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem),
- 1 << hr_dev->caps.idx_buf_pg_sz);
- buf->page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
- ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift,
- &srq->idx_que.mtt);
- if (ret) {
- dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n");
- goto err_user_idx_mtt;
- }
+ if (!udata) {
+ idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL);
+ if (!idx_que->bitmap) {
+ ibdev_err(ibdev, "Failed to alloc SRQ idx bitmap\n");
+ err = -ENOMEM;
+ goto err_idx_mtr;
+ }
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt,
- srq->idx_que.umem);
- if (ret) {
- dev_err(hr_dev->dev,
- "hns_roce_ib_umem_write_mtt error for idx que\n");
- goto err_user_idx_buf;
}
return 0;
+err_idx_mtr:
+ hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
-err_user_idx_buf:
- hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
-
-err_user_idx_mtt:
- ib_umem_release(srq->idx_que.umem);
-
-err_user_srq_mtt:
- hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
-
-err_user_buf:
- ib_umem_release(srq->umem);
-
- return ret;
+ return err;
}
-static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
- u32 page_shift)
+static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct hns_roce_idx_que *idx_que = &srq->idx_que;
- idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL);
- if (!idx_que->bitmap)
- return -ENOMEM;
-
- idx_que->buf_size = srq->idx_que.buf_size;
-
- if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2,
- &idx_que->idx_buf, page_shift)) {
- bitmap_free(idx_que->bitmap);
- return -ENOMEM;
- }
-
- return 0;
+ bitmap_free(idx_que->bitmap);
+ idx_que->bitmap = NULL;
+ hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
}
-static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size)
+static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
- u32 page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
- int ret;
-
- if (hns_roce_buf_alloc(hr_dev, srq_buf_size, (1 << page_shift) * 2,
- &srq->buf, page_shift))
- return -ENOMEM;
-
srq->head = 0;
srq->tail = srq->wqe_cnt - 1;
-
- ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift,
- &srq->mtt);
- if (ret)
- goto err_kernel_buf;
-
- ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
- if (ret)
- goto err_kernel_srq_mtt;
-
- page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
- ret = hns_roce_create_idx_que(srq->ibsrq.pd, srq, page_shift);
- if (ret) {
- dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", ret);
- goto err_kernel_srq_mtt;
- }
-
- /* Init mtt table for idx_que */
- ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
- srq->idx_que.idx_buf.page_shift,
- &srq->idx_que.mtt);
- if (ret)
- goto err_kernel_create_idx;
-
- /* Write buffer address into the mtt table */
- ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
- &srq->idx_que.idx_buf);
- if (ret)
- goto err_kernel_idx_buf;
-
srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL);
- if (!srq->wrid) {
- ret = -ENOMEM;
- goto err_kernel_idx_buf;
- }
+ if (!srq->wrid)
+ return -ENOMEM;
return 0;
-
-err_kernel_idx_buf:
- hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
-
-err_kernel_create_idx:
- hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
- &srq->idx_que.idx_buf);
- kfree(srq->idx_que.bitmap);
-
-err_kernel_srq_mtt:
- hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
-
-err_kernel_buf:
- hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
-
- return ret;
-}
-
-static void destroy_user_srq(struct hns_roce_dev *hr_dev,
- struct hns_roce_srq *srq)
-{
- hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
- ib_umem_release(srq->idx_que.umem);
- hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
- ib_umem_release(srq->umem);
}
-static void destroy_kernel_srq(struct hns_roce_dev *hr_dev,
- struct hns_roce_srq *srq, int srq_buf_size)
+static void free_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
{
- kvfree(srq->wrid);
- hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
- hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, &srq->idx_que.idx_buf);
- kfree(srq->idx_que.bitmap);
- hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
- hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+ kfree(srq->wrid);
+ srq->wrid = NULL;
}
int hns_roce_create_srq(struct ib_srq *ib_srq,
@@ -365,8 +283,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
struct hns_roce_ib_create_srq_resp resp = {};
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
- int srq_desc_size;
- int srq_buf_size;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ib_create_srq ucmd = {};
int ret = 0;
u32 cqn;
@@ -381,41 +299,45 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1);
srq->max_gs = init_attr->attr.max_sge;
- srq_desc_size = roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE,
- HNS_ROCE_SGE_SIZE * srq->max_gs));
-
- srq->wqe_shift = ilog2(srq_desc_size);
-
- srq_buf_size = srq->wqe_cnt * srq_desc_size;
-
- srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ;
- srq->idx_que.buf_size = srq->wqe_cnt * srq->idx_que.entry_sz;
- srq->mtt.mtt_type = MTT_TYPE_SRQWQE;
- srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX;
-
if (udata) {
- ret = create_user_srq(srq, udata, srq_buf_size);
+ ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (ret) {
- dev_err(hr_dev->dev, "Create user srq failed\n");
- goto err_srq;
+ ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n",
+ ret);
+ return ret;
}
- } else {
- ret = create_kernel_srq(srq, srq_buf_size);
+ }
+
+ ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr);
+ if (ret) {
+ ibdev_err(ibdev, "Failed to alloc SRQ buffer, err %d\n", ret);
+ return ret;
+ }
+
+ ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr);
+ if (ret) {
+ ibdev_err(ibdev, "Failed to alloc SRQ idx, err %d\n", ret);
+ goto err_buf_alloc;
+ }
+
+ if (!udata) {
+ ret = alloc_srq_wrid(hr_dev, srq);
if (ret) {
- dev_err(hr_dev->dev, "Create kernel srq failed\n");
- goto err_srq;
+ ibdev_err(ibdev, "Failed to alloc SRQ wrid, err %d\n",
+ ret);
+ goto err_idx_alloc;
}
}
cqn = ib_srq_has_cq(init_attr->srq_type) ?
to_hr_cq(init_attr->ext.cq)->cqn : 0;
-
srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG;
- ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(ib_srq->pd)->pdn, cqn, 0,
- &srq->mtt, 0, srq);
- if (ret)
- goto err_wrid;
+ ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0);
+ if (ret) {
+ ibdev_err(ibdev, "Failed to alloc SRQ context, err %d\n", ret);
+ goto err_wrid_alloc;
+ }
srq->event = hns_roce_ib_srq_event;
resp.srqn = srq->srqn;
@@ -431,15 +353,13 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
return 0;
err_srqc_alloc:
- hns_roce_srq_free(hr_dev, srq);
-
-err_wrid:
- if (udata)
- destroy_user_srq(hr_dev, srq);
- else
- destroy_kernel_srq(hr_dev, srq, srq_buf_size);
-
-err_srq:
+ free_srqc(hr_dev, srq);
+err_wrid_alloc:
+ free_srq_wrid(hr_dev, srq);
+err_idx_alloc:
+ free_srq_idx(hr_dev, srq);
+err_buf_alloc:
+ free_srq_buf(hr_dev, srq);
return ret;
}
@@ -448,18 +368,10 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
- hns_roce_srq_free(hr_dev, srq);
- hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
-
- if (udata) {
- hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
- } else {
- kvfree(srq->wrid);
- hns_roce_buf_free(hr_dev, srq->wqe_cnt << srq->wqe_shift,
- &srq->buf);
- }
- ib_umem_release(srq->idx_que.umem);
- ib_umem_release(srq->umem);
+ free_srqc(hr_dev, srq);
+ free_srq_idx(hr_dev, srq);
+ free_srq_wrid(hr_dev, srq);
+ free_srq_buf(hr_dev, srq);
}
int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 02a169f8027b..5f8f8d5c0ce0 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -141,10 +141,11 @@ static int create_iboe_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
return 0;
}
-int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata)
-
+int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
+
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
return -EINVAL;
@@ -167,12 +168,14 @@ int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
int slave_sgid_index, u8 *s_mac, u16 vlan_tag)
{
struct rdma_ah_attr slave_attr = *ah_attr;
+ struct rdma_ah_init_attr init_attr = {};
struct mlx4_ib_ah *mah = to_mah(ah);
int ret;
slave_attr.grh.sgid_attr = NULL;
slave_attr.grh.sgid_index = slave_sgid_index;
- ret = mlx4_ib_create_ah(ah, &slave_attr, 0, NULL);
+ init_attr.ah_attr = &slave_attr;
+ ret = mlx4_ib_create_ah(ah, &init_attr, NULL);
if (ret)
return ret;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index d188573187fa..182a237b87f7 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -752,7 +752,7 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
-int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags,
+int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
int slave_sgid_index, u8 *s_mac, u16 vlan_tag);
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 228be05fbaf8..8cca61c671f8 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -16,7 +16,8 @@ mlx5_ib-y := ah.o \
qpc.o \
restrack.o \
srq.o \
- srq_cmd.o
+ srq_cmd.o \
+ wr.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 80642dd359bc..59e5ec39b447 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -32,9 +32,28 @@
#include "mlx5_ib.h"
+static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev,
+ const struct rdma_ah_attr *ah_attr)
+{
+ enum ib_gid_type gid_type = ah_attr->grh.sgid_attr->gid_type;
+ __be16 sport;
+
+ if ((gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) &&
+ (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) &&
+ (ah_attr->grh.flow_label & IB_GRH_FLOWLABEL_MASK))
+ sport = cpu_to_be16(
+ rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label));
+ else
+ sport = mlx5_get_roce_udp_sport_min(dev,
+ ah_attr->grh.sgid_attr);
+
+ return sport;
+}
+
static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
- struct rdma_ah_attr *ah_attr)
+ struct rdma_ah_init_attr *init_attr)
{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
enum ib_gid_type gid_type;
if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
@@ -51,12 +70,15 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ if (init_attr->xmit_slave)
+ ah->xmit_port =
+ mlx5_lag_get_slave_port(dev->mdev,
+ init_attr->xmit_slave);
gid_type = ah_attr->grh.sgid_attr->gid_type;
memcpy(ah->av.rmac, ah_attr->roce.dmac,
sizeof(ah_attr->roce.dmac));
- ah->av.udp_sport =
- mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr);
+ ah->av.udp_sport = mlx5_ah_get_udp_sport(dev, ah_attr);
ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1;
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
#define MLX5_ECN_ENABLED BIT(1)
@@ -68,10 +90,11 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
}
}
-int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata)
+int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
struct mlx5_ib_ah *ah = to_mah(ibah);
struct mlx5_ib_dev *dev = to_mdev(ibah->device);
enum rdma_ah_attr_type ah_type = ah_attr->type;
@@ -97,7 +120,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
return err;
}
- create_ib_ah(dev, ah, ah_attr);
+ create_ib_ah(dev, ah, init_attr);
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index a2fcbc49131e..cc24c711e92a 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -1,46 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2017-2020, Mellanox Technologies inc. All rights reserved.
*/
#include "cmd.h"
int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey)
{
- u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
int err;
MLX5_SET(query_special_contexts_in, in, opcode,
MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out);
if (!err)
*mkey = MLX5_GET(query_special_contexts_out, out,
dump_fill_mkey);
@@ -50,12 +23,12 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey)
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
{
u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
int err;
MLX5_SET(query_special_contexts_in, in, opcode,
MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out);
if (!err)
*null_mkey = MLX5_GET(query_special_contexts_out, out,
null_mkey);
@@ -63,23 +36,15 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
}
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
- void *out, int out_size)
+ void *out)
{
- u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { };
+ u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = {};
MLX5_SET(query_cong_params_in, in, opcode,
MLX5_CMD_OP_QUERY_CONG_PARAMS);
MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
-}
-
-int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,
- void *in, int in_size)
-{
- u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { };
-
- return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));
+ return mlx5_cmd_exec_inout(dev, query_cong_params, in, out);
}
int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
@@ -133,7 +98,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
MLX5_SET64(alloc_memic_in, in, range_start_addr,
hw_start_addr + (page_idx * PAGE_SIZE));
- ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ ret = mlx5_cmd_exec_inout(dev, alloc_memic, in, out);
if (ret) {
spin_lock(&dm->lock);
bitmap_clear(dm->memic_alloc_pages,
@@ -162,8 +127,7 @@ void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
struct mlx5_core_dev *dev = dm->dev;
u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
- u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {};
u64 start_page_idx;
int err;
@@ -174,7 +138,7 @@ void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr);
MLX5_SET(dealloc_memic_in, in, memic_size, length);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_in(dev, dealloc_memic, in);
if (err)
return;
@@ -198,49 +162,46 @@ int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
- u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
MLX5_SET(destroy_tir_in, in, tirn, tirn);
MLX5_SET(destroy_tir_in, in, uid, uid);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, destroy_tir, in);
}
void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {};
MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
MLX5_SET(destroy_tis_in, in, tisn, tisn);
MLX5_SET(destroy_tis_in, in, uid, uid);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, destroy_tis, in);
}
void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
- u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
MLX5_SET(destroy_rqt_in, in, uid, uid);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, destroy_rqt, in);
}
int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {};
int err;
MLX5_SET(alloc_transport_domain_in, in, opcode,
MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
MLX5_SET(alloc_transport_domain_in, in, uid, uid);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, alloc_transport_domain, in, out);
if (!err)
*tdn = MLX5_GET(alloc_transport_domain_out, out,
transport_domain);
@@ -251,32 +212,29 @@ int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {};
MLX5_SET(dealloc_transport_domain_in, in, opcode,
MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
MLX5_SET(dealloc_transport_domain_in, in, uid, uid);
MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, dealloc_transport_domain, in);
}
void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
{
- u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {};
- u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
MLX5_SET(dealloc_pd_in, in, pd, pdn);
MLX5_SET(dealloc_pd_in, in, uid, uid);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, dealloc_pd, in);
}
int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
u32 qpn, u16 uid)
{
- u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {};
- u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {};
void *gid;
MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
@@ -284,14 +242,13 @@ int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
MLX5_SET(attach_to_mcg_in, in, uid, uid);
gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
memcpy(gid, mgid, sizeof(*mgid));
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, attach_to_mcg, in);
}
int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
u32 qpn, u16 uid)
{
- u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {};
- u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {};
void *gid;
MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
@@ -299,18 +256,18 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
MLX5_SET(detach_from_mcg_in, in, uid, uid);
gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
memcpy(gid, mgid, sizeof(*mgid));
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, detach_from_mcg, in);
}
int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid)
{
u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {};
int err;
MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
MLX5_SET(alloc_xrcd_in, in, uid, uid);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, alloc_xrcd, in, out);
if (!err)
*xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
return err;
@@ -318,13 +275,12 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid)
int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
{
- u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {};
- u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {};
MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
MLX5_SET(dealloc_xrcd_in, in, uid, uid);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, dealloc_xrcd, in);
}
int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
@@ -350,7 +306,7 @@ int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
- err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+ err = mlx5_cmd_exec_inout(dev, mad_ifc, in, out);
if (err)
goto out;
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 43079b18d9b4..f4d8558db434 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -40,10 +40,8 @@
int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
- void *out, int out_size);
+ void *out);
int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out);
-int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
- void *in, int in_size);
int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
u64 length, u32 alignment);
void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index de4da92b81a6..b9291e482428 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -290,7 +290,7 @@ static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
node = mlx5_ib_param_to_node(offset);
- err = mlx5_cmd_query_cong_params(mdev, node, out, outlen);
+ err = mlx5_cmd_query_cong_params(mdev, node, out);
if (err)
goto free;
@@ -339,7 +339,7 @@ static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
attr_mask);
- err = mlx5_cmd_modify_cong_params(mdev, in, inlen);
+ err = mlx5_cmd_exec_in(dev->mdev, modify_cong_params, in);
kvfree(in);
alloc_err:
mlx5_ib_put_native_port_mdev(dev, port_num + 1);
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 35b98c2d64d5..1d7feed6d3cb 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -615,7 +615,7 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
enum ib_qp_type qp_type = qp->ibqp.qp_type;
if (qp_type == IB_QPT_RAW_PACKET ||
- (qp->flags & MLX5_IB_QP_UNDERLAY)) {
+ (qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
struct mlx5_ib_raw_packet_qp *raw_packet_qp =
&qp->raw_packet_qp;
struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index 69cb7e6e8955..08fd6a650868 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -142,7 +142,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
return -EINVAL;
mqp = to_mqp(qp);
- if (mqp->flags & MLX5_IB_QP_RSS)
+ if (mqp->is_rss)
dest_id = mqp->rss_qp.tirn;
else
dest_id = mqp->raw_packet_qp.rq.tirn;
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 1ae6fd95acaa..40d418153891 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -119,17 +119,15 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
struct mlx5_ib_gsi_qp *gsi;
struct ib_qp_init_attr hw_init_attr = *init_attr;
const u8 port_num = init_attr->port_num;
- const int num_pkeys = pd->device->attrs.max_pkeys;
- const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0;
+ int num_qps = 0;
int ret;
- mlx5_ib_dbg(dev, "creating GSI QP\n");
-
- if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) {
- mlx5_ib_warn(dev,
- "invalid port number %d during GSI QP creation\n",
- port_num);
- return ERR_PTR(-EINVAL);
+ if (mlx5_ib_deth_sqpn_cap(dev)) {
+ if (MLX5_CAP_GEN(dev->mdev,
+ port_type) == MLX5_CAP_PORT_TYPE_IB)
+ num_qps = pd->device->attrs.max_pkeys;
+ else if (dev->lag_active)
+ num_qps = MLX5_MAX_PORTS;
}
gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
@@ -270,7 +268,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
}
static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
- u16 qp_index)
+ u16 pkey_index)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct ib_qp_attr attr;
@@ -279,7 +277,7 @@ static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
attr.qp_state = IB_QPS_INIT;
- attr.pkey_index = qp_index;
+ attr.pkey_index = pkey_index;
attr.qkey = IB_QP1_QKEY;
attr.port_num = gsi->port_num;
ret = ib_modify_qp(qp, &attr, mask);
@@ -313,12 +311,17 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
{
struct ib_device *device = gsi->rx_qp->device;
struct mlx5_ib_dev *dev = to_mdev(device);
+ int pkey_index = qp_index;
+ struct mlx5_ib_qp *mqp;
struct ib_qp *qp;
unsigned long flags;
u16 pkey;
int ret;
- ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey);
+ if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
+ pkey_index = 0;
+
+ ret = ib_query_pkey(device, gsi->port_num, pkey_index, &pkey);
if (ret) {
mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
gsi->port_num, qp_index);
@@ -347,7 +350,10 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
return;
}
- ret = modify_to_rts(gsi, qp, qp_index);
+ mqp = to_mqp(qp);
+ if (dev->lag_active)
+ mqp->gsi_lag_port = qp_index + 1;
+ ret = modify_to_rts(gsi, qp, pkey_index);
if (ret)
goto err_destroy_qp;
@@ -466,11 +472,15 @@ static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
{
struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
+ struct mlx5_ib_ah *ah = to_mah(wr->ah);
int qp_index = wr->pkey_index;
- if (!mlx5_ib_deth_sqpn_cap(dev))
+ if (!gsi->num_qps)
return gsi->rx_qp;
+ if (dev->lag_active && ah->xmit_port)
+ qp_index = ah->xmit_port - 1;
+
if (qp_index >= gsi->num_qps)
return NULL;
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
index 3b6750cba796..5b30d3fa8f8d 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.h
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -9,9 +9,9 @@
#include <linux/mlx5/eswitch.h>
#include "mlx5_ib.h"
-#ifdef CONFIG_MLX5_ESWITCH
extern const struct mlx5_ib_profile raw_eth_profile;
+#ifdef CONFIG_MLX5_ESWITCH
u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
u16 vport_num);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 566b42f3fb18..3af57dfe7224 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -53,6 +53,7 @@
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
+#include <rdma/lag.h>
#include <linux/in.h>
#include <linux/etherdevice.h>
#include "mlx5_ib.h"
@@ -60,6 +61,7 @@
#include "cmd.h"
#include "srq.h"
#include "qp.h"
+#include "wr.h"
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <rdma/uverbs_std_types.h>
@@ -628,8 +630,8 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
attr->index, NULL, NULL);
}
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
- const struct ib_gid_attr *attr)
+__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev,
+ const struct ib_gid_attr *attr)
{
if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
return 0;
@@ -2561,7 +2563,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
struct mlx5_ib_alloc_pd_resp resp;
int err;
u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
u16 uid = 0;
struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
@@ -2569,8 +2571,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
uid = context ? context->devx_uid : 0;
MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
MLX5_SET(alloc_pd_in, in, uid, uid);
- err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in),
- out, sizeof(out));
+ err = mlx5_cmd_exec_inout(to_mdev(ibdev)->mdev, alloc_pd, in, out);
if (err)
return err;
@@ -3944,7 +3945,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
} else {
dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- if (mqp->flags & MLX5_IB_QP_RSS)
+ if (mqp->is_rss)
dst->tir_num = mqp->rss_qp.tirn;
else
dst->tir_num = mqp->raw_packet_qp.rq.tirn;
@@ -4420,7 +4421,7 @@ static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
uid = ibqp->pd ?
to_mpd(ibqp->pd)->uid : 0;
- if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
+ if (mqp->flags & IB_QP_CREATE_SOURCE_QPN) {
mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
return -EOPNOTSUPP;
}
@@ -6540,6 +6541,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev);
dev->ib_dev.dev.parent = mdev->device;
+ dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES;
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
@@ -6629,8 +6631,8 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.modify_qp = mlx5_ib_modify_qp,
.modify_srq = mlx5_ib_modify_srq,
.poll_cq = mlx5_ib_poll_cq,
- .post_recv = mlx5_ib_post_recv,
- .post_send = mlx5_ib_post_send,
+ .post_recv = mlx5_ib_post_recv_nodrain,
+ .post_send = mlx5_ib_post_send_nodrain,
.post_srq_recv = mlx5_ib_post_srq_recv,
.process_mad = mlx5_ib_process_mad,
.query_ah = mlx5_ib_query_ah,
@@ -7131,6 +7133,8 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
int err;
int i;
+ dev->profile = profile;
+
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
err = profile->stage[i].init(dev);
@@ -7139,7 +7143,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
}
}
- dev->profile = profile;
dev->ib_active = true;
return dev;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index aaabb8a98eed..482b54eb9764 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -337,7 +337,6 @@ struct mlx5_ib_rwq {
struct ib_umem *umem;
size_t buf_size;
unsigned int page_shift;
- int create_type;
struct mlx5_db db;
u32 user_index;
u32 wqe_count;
@@ -346,17 +345,6 @@ struct mlx5_ib_rwq {
u32 create_flags; /* Use enum mlx5_ib_wq_flags */
};
-enum {
- MLX5_QP_USER,
- MLX5_QP_KERNEL,
- MLX5_QP_EMPTY
-};
-
-enum {
- MLX5_WQ_USER,
- MLX5_WQ_KERNEL
-};
-
struct mlx5_ib_rwq_ind_table {
struct ib_rwq_ind_table ib_rwq_ind_tbl;
u32 rqtn;
@@ -443,34 +431,37 @@ struct mlx5_ib_qp {
/* serialize qp state modifications
*/
struct mutex mutex;
+ /* cached variant of create_flags from struct ib_qp_init_attr */
u32 flags;
u8 port;
u8 state;
- int wq_sig;
- int scat_cqe;
int max_inline_data;
struct mlx5_bf bf;
- int has_rq;
+ u8 has_rq:1;
+ u8 is_rss:1;
/* only for user space QPs. For kernel
* we have it from the bf object
*/
int bfregn;
- int create_type;
-
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
struct mlx5_rate_limit rl;
u32 underlay_qpn;
u32 flags_en;
- /* storage for qp sub type when core qp type is IB_QPT_DRIVER */
- enum ib_qp_type qp_sub_type;
+ /*
+ * IB/core doesn't store low-level QP types, so
+ * store both MLX and IBTA types in the field below.
+ * IB_QPT_DRIVER will be break to DCI/DCT subtypes.
+ */
+ enum ib_qp_type type;
/* A flag to indicate if there's a new counter is configured
* but not take effective
*/
u32 counter_pending;
+ u16 gsi_lag_port;
};
struct mlx5_ib_cq_buf {
@@ -481,24 +472,6 @@ struct mlx5_ib_cq_buf {
int nent;
};
-enum mlx5_ib_qp_flags {
- MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
- MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
- MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL,
- MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND,
- MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV,
- MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5,
- /* QP uses 1 as its source QP number */
- MLX5_IB_QP_SQPN_QP1 = 1 << 6,
- MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
- MLX5_IB_QP_RSS = 1 << 8,
- MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9,
- MLX5_IB_QP_UNDERLAY = 1 << 10,
- MLX5_IB_QP_PCI_WRITE_END_PADDING = 1 << 11,
- MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12,
- MLX5_IB_QP_PACKET_BASED_CREDIT = 1 << 13,
-};
-
struct mlx5_umr_wr {
struct ib_send_wr wr;
u64 virt_addr;
@@ -1181,7 +1154,7 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
-int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags,
+int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags);
@@ -1205,10 +1178,6 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
void mlx5_ib_drain_sq(struct ib_qp *qp);
void mlx5_ib_drain_rq(struct ib_qp *qp);
-int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr);
-int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr);
int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
size_t buflen, size_t *bc);
int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
@@ -1383,8 +1352,8 @@ int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
u64 guid, int type);
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
- const struct ib_gid_attr *attr);
+__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev,
+ const struct ib_gid_attr *attr);
void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 16af1105cfcf..7d2ec9ee5097 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -447,8 +447,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
{
int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
pfault->wqe.wq_num : pfault->token;
- u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { };
- u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = { };
+ u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {};
int err;
MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME);
@@ -457,7 +456,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
MLX5_SET(page_fault_resume_in, in, error, !!error);
- err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_in(dev->mdev, page_fault_resume, in);
if (err)
mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n",
wq_num, err);
@@ -1136,8 +1135,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
if (qp->ibqp.qp_type == IB_QPT_XRC_INI)
*wqe += sizeof(struct mlx5_wqe_xrc_seg);
- if (qp->ibqp.qp_type == IB_QPT_UD ||
- qp->qp_sub_type == MLX5_IB_QPT_DCI) {
+ if (qp->type == IB_QPT_UD || qp->type == MLX5_IB_QPT_DCI) {
av = *wqe;
if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
*wqe += sizeof(struct mlx5_av);
@@ -1190,7 +1188,7 @@ static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_wq *wq = &qp->rq;
int wqe_size = 1 << wq->wqe_shift;
- if (qp->wq_sig) {
+ if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) {
mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n");
return -EFAULT;
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index af599c8b88aa..c571b7a97f10 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -40,9 +40,7 @@
#include "ib_rep.h"
#include "cmd.h"
#include "qp.h"
-
-/* not supported currently */
-static int wq_signature;
+#include "wr.h"
enum {
MLX5_IB_ACK_REQ_FREQ = 8,
@@ -55,32 +53,6 @@ enum {
MLX5_IB_LINK_TYPE_ETH = 1
};
-enum {
- MLX5_IB_SQ_STRIDE = 6,
- MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64,
-};
-
-static const u32 mlx5_ib_opcode[] = {
- [IB_WR_SEND] = MLX5_OPCODE_SEND,
- [IB_WR_LSO] = MLX5_OPCODE_LSO,
- [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
- [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
- [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
- [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ,
- [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
- [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
- [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
- [IB_WR_REG_MR] = MLX5_OPCODE_UMR,
- [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
- [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
- [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
-};
-
-struct mlx5_wqe_eth_pad {
- u8 rsvd0[16];
-};
-
enum raw_qp_set_mask_map {
MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0,
MLX5_RAW_QP_RATE_LIMIT = 1UL << 1,
@@ -392,17 +364,26 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
cap->max_recv_wr = 0;
cap->max_recv_sge = 0;
} else {
+ int wq_sig = !!(qp->flags_en & MLX5_QP_FLAG_SIGNATURE);
+
if (ucmd) {
qp->rq.wqe_cnt = ucmd->rq_wqe_count;
if (ucmd->rq_wqe_shift > BITS_PER_BYTE * sizeof(ucmd->rq_wqe_shift))
return -EINVAL;
qp->rq.wqe_shift = ucmd->rq_wqe_shift;
- if ((1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) < qp->wq_sig)
+ if ((1 << qp->rq.wqe_shift) /
+ sizeof(struct mlx5_wqe_data_seg) <
+ wq_sig)
return -EINVAL;
- qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
+ qp->rq.max_gs =
+ (1 << qp->rq.wqe_shift) /
+ sizeof(struct mlx5_wqe_data_seg) -
+ wq_sig;
qp->rq.max_post = qp->rq.wqe_cnt;
} else {
- wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
+ wqe_size =
+ wq_sig ? sizeof(struct mlx5_wqe_signature_seg) :
+ 0;
wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
wqe_size = roundup_pow_of_two(wqe_size);
wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
@@ -416,7 +397,10 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
return -EINVAL;
}
qp->rq.wqe_shift = ilog2(wqe_size);
- qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
+ qp->rq.max_gs =
+ (1 << qp->rq.wqe_shift) /
+ sizeof(struct mlx5_wqe_data_seg) -
+ wq_sig;
qp->rq.max_post = qp->rq.wqe_cnt;
}
}
@@ -596,7 +580,7 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
}
if (attr->qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) {
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
} else {
@@ -751,10 +735,7 @@ static int to_mlx5_st(enum ib_qp_type type)
case IB_QPT_SMI: return MLX5_QP_ST_QP0;
case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1;
case MLX5_IB_QPT_DCI: return MLX5_QP_ST_DCI;
- case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
- case IB_QPT_RAW_PACKET:
- case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
- case IB_QPT_MAX:
+ case IB_QPT_RAW_PACKET: return MLX5_QP_ST_RAW_ETHERTYPE;
default: return -EINVAL;
}
}
@@ -891,7 +872,6 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
goto err_umem;
}
- rwq->create_type = MLX5_WQ_USER;
return 0;
err_umem:
@@ -906,15 +886,14 @@ static int adjust_bfregn(struct mlx5_ib_dev *dev,
bfregn % MLX5_NON_FP_BFREGS_PER_UAR;
}
-static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
- struct mlx5_ib_qp *qp, struct ib_udata *udata,
- struct ib_qp_init_attr *attr,
- u32 **in,
- struct mlx5_ib_create_qp_resp *resp, int *inlen,
- struct mlx5_ib_qp_base *base)
+static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp, struct ib_udata *udata,
+ struct ib_qp_init_attr *attr, u32 **in,
+ struct mlx5_ib_create_qp_resp *resp, int *inlen,
+ struct mlx5_ib_qp_base *base,
+ struct mlx5_ib_create_qp *ucmd)
{
struct mlx5_ib_ucontext *context;
- struct mlx5_ib_create_qp ucmd;
struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
int page_shift = 0;
int uar_index = 0;
@@ -928,30 +907,24 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
u16 uid;
u32 uar_flags;
- err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
- if (err) {
- mlx5_ib_dbg(dev, "copy failed\n");
- return err;
- }
-
context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext,
ibucontext);
- uar_flags = ucmd.flags & (MLX5_QP_FLAG_UAR_PAGE_INDEX |
- MLX5_QP_FLAG_BFREG_INDEX);
+ uar_flags = qp->flags_en &
+ (MLX5_QP_FLAG_UAR_PAGE_INDEX | MLX5_QP_FLAG_BFREG_INDEX);
switch (uar_flags) {
case MLX5_QP_FLAG_UAR_PAGE_INDEX:
- uar_index = ucmd.bfreg_index;
+ uar_index = ucmd->bfreg_index;
bfregn = MLX5_IB_INVALID_BFREG;
break;
case MLX5_QP_FLAG_BFREG_INDEX:
uar_index = bfregn_to_uar_index(dev, &context->bfregi,
- ucmd.bfreg_index, true);
+ ucmd->bfreg_index, true);
if (uar_index < 0)
return uar_index;
bfregn = MLX5_IB_INVALID_BFREG;
break;
case 0:
- if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
return -EINVAL;
bfregn = alloc_bfreg(dev, &context->bfregi);
if (bfregn < 0)
@@ -970,12 +943,12 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- err = set_user_buf_size(dev, qp, &ucmd, base, attr);
+ err = set_user_buf_size(dev, qp, ucmd, base, attr);
if (err)
goto err_bfreg;
- if (ucmd.buf_addr && ubuffer->buf_size) {
- ubuffer->buf_addr = ucmd.buf_addr;
+ if (ucmd->buf_addr && ubuffer->buf_size) {
+ ubuffer->buf_addr = ucmd->buf_addr;
err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr,
ubuffer->buf_size, &ubuffer->umem,
&npages, &page_shift, &ncont, &offset);
@@ -993,8 +966,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
goto err_umem;
}
- uid = (attr->qp_type != IB_QPT_XRC_TGT &&
- attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0;
+ uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0;
MLX5_SET(create_qp_in, *in, uid, uid);
pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
if (ubuffer->umem)
@@ -1012,24 +984,14 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
resp->bfreg_index = MLX5_IB_INVALID_BFREG;
qp->bfregn = bfregn;
- err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &qp->db);
+ err = mlx5_ib_db_map_user(context, udata, ucmd->db_addr, &qp->db);
if (err) {
mlx5_ib_dbg(dev, "map failed\n");
goto err_free;
}
- err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp)));
- if (err) {
- mlx5_ib_dbg(dev, "copy failed\n");
- goto err_unmap;
- }
- qp->create_type = MLX5_QP_USER;
-
return 0;
-err_unmap:
- mlx5_ib_db_unmap_user(context, &qp->db);
-
err_free:
kvfree(*in);
@@ -1042,72 +1004,50 @@ err_bfreg:
return err;
}
-static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd,
- struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base,
- struct ib_udata *udata)
+static void destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct mlx5_ib_qp_base *base, struct ib_udata *udata)
{
- struct mlx5_ib_ucontext *context =
- rdma_udata_to_drv_context(
- udata,
- struct mlx5_ib_ucontext,
- ibucontext);
-
- mlx5_ib_db_unmap_user(context, &qp->db);
- ib_umem_release(base->ubuffer.umem);
-
- /*
- * Free only the BFREGs which are handled by the kernel.
- * BFREGs of UARs allocated dynamically are handled by user.
- */
- if (qp->bfregn != MLX5_IB_INVALID_BFREG)
- mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
-}
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
-/* get_sq_edge - Get the next nearby edge.
- *
- * An 'edge' is defined as the first following address after the end
- * of the fragment or the SQ. Accordingly, during the WQE construction
- * which repetitively increases the pointer to write the next data, it
- * simply should check if it gets to an edge.
- *
- * @sq - SQ buffer.
- * @idx - Stride index in the SQ buffer.
- *
- * Return:
- * The new edge.
- */
-static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
-{
- void *fragment_end;
+ if (udata) {
+ /* User QP */
+ mlx5_ib_db_unmap_user(context, &qp->db);
+ ib_umem_release(base->ubuffer.umem);
- fragment_end = mlx5_frag_buf_get_wqe
- (&sq->fbc,
- mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx));
+ /*
+ * Free only the BFREGs which are handled by the kernel.
+ * BFREGs of UARs allocated dynamically are handled by user.
+ */
+ if (qp->bfregn != MLX5_IB_INVALID_BFREG)
+ mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
+ return;
+ }
- return fragment_end + MLX5_SEND_WQE_BB;
+ /* Kernel QP */
+ kvfree(qp->sq.wqe_head);
+ kvfree(qp->sq.w_list);
+ kvfree(qp->sq.wrid);
+ kvfree(qp->sq.wr_data);
+ kvfree(qp->rq.wrid);
+ if (qp->db.db)
+ mlx5_db_free(dev->mdev, &qp->db);
+ if (qp->buf.frags)
+ mlx5_frag_buf_free(dev->mdev, &qp->buf);
}
-static int create_kernel_qp(struct mlx5_ib_dev *dev,
- struct ib_qp_init_attr *init_attr,
- struct mlx5_ib_qp *qp,
- u32 **in, int *inlen,
- struct mlx5_ib_qp_base *base)
+static int _create_kernel_qp(struct mlx5_ib_dev *dev,
+ struct ib_qp_init_attr *init_attr,
+ struct mlx5_ib_qp *qp, u32 **in, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
int uar_index;
void *qpc;
int err;
- if (init_attr->create_flags & ~(IB_QP_CREATE_INTEGRITY_EN |
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
- IB_QP_CREATE_IPOIB_UD_LSO |
- IB_QP_CREATE_NETIF_QP |
- MLX5_IB_QP_CREATE_SQPN_QP1 |
- MLX5_IB_QP_CREATE_WC_TEST))
- return -EINVAL;
-
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
qp->bf.bfreg = &dev->fp_bfreg;
- else if (init_attr->create_flags & MLX5_IB_QP_CREATE_WC_TEST)
+ else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
qp->bf.bfreg = &dev->wc_bfreg;
else
qp->bf.bfreg = &dev->bfreg;
@@ -1167,10 +1107,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
MLX5_SET(qpc, qpc, fre, 1);
MLX5_SET(qpc, qpc, rlky, 1);
- if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) {
+ if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
MLX5_SET(qpc, qpc, deth_sqpn, 1);
- qp->flags |= MLX5_IB_QP_SQPN_QP1;
- }
mlx5_fill_page_frag_array(&qp->buf,
(__be64 *)MLX5_ADDR_OF(create_qp_in,
@@ -1198,7 +1136,6 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
err = -ENOMEM;
goto err_wrid;
}
- qp->create_type = MLX5_QP_KERNEL;
return 0;
@@ -1218,36 +1155,15 @@ err_buf:
return err;
}
-static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
-{
- kvfree(qp->sq.wqe_head);
- kvfree(qp->sq.w_list);
- kvfree(qp->sq.wrid);
- kvfree(qp->sq.wr_data);
- kvfree(qp->rq.wrid);
- mlx5_db_free(dev->mdev, &qp->db);
- mlx5_frag_buf_free(dev->mdev, &qp->buf);
-}
-
static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
{
- if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
- (attr->qp_type == MLX5_IB_QPT_DCI) ||
- (attr->qp_type == IB_QPT_XRC_INI))
+ if (attr->srq || (qp->type == IB_QPT_XRC_TGT) ||
+ (qp->type == MLX5_IB_QPT_DCI) || (qp->type == IB_QPT_XRC_INI))
return MLX5_SRQ_RQ;
else if (!qp->has_rq)
return MLX5_ZERO_LEN_RQ;
- else
- return MLX5_NON_ZERO_RQ;
-}
-
-static int is_connected(enum ib_qp_type qp_type)
-{
- if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC ||
- qp_type == MLX5_IB_QPT_DCI)
- return 1;
- return 0;
+ return MLX5_NON_ZERO_RQ;
}
static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
@@ -1260,7 +1176,7 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(tisc, tisc, transport_domain, tdn);
- if (qp->flags & MLX5_IB_QP_UNDERLAY)
+ if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
return mlx5_core_create_tis(dev->mdev, in, &sq->tisn);
@@ -1409,7 +1325,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
- if (mqp->flags & MLX5_IB_QP_CAP_SCATTER_FCS)
+ if (mqp->flags & IB_QP_CREATE_SCATTER_FCS)
MLX5_SET(rqc, rqc, scatter_fcs, 1);
wq = MLX5_ADDR_OF(rqc, rqc, wq);
@@ -1440,13 +1356,6 @@ static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
mlx5_core_destroy_rq_tracked(dev, &rq->base.mqp);
}
-static bool tunnel_offload_supported(struct mlx5_core_dev *dev)
-{
- return (MLX5_CAP_ETH(dev, tunnel_stateless_vxlan) ||
- MLX5_CAP_ETH(dev, tunnel_stateless_gre) ||
- MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx));
-}
-
static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq,
u32 qp_flags_en,
@@ -1524,6 +1433,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u16 uid = to_mpd(pd)->uid;
u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {};
+ if (!qp->sq.wqe_cnt && !qp->rq.wqe_cnt)
+ return -EINVAL;
if (qp->sq.wqe_cnt) {
err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd);
if (err)
@@ -1547,9 +1458,9 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (qp->rq.wqe_cnt) {
rq->base.container_mibqp = qp;
- if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING)
+ if (qp->flags & IB_QP_CREATE_CVLAN_STRIPPING)
rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
- if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING)
+ if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING)
rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd);
if (err)
@@ -1584,14 +1495,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
rq->base.mqp.qpn;
- err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp)));
- if (err)
- goto err_destroy_tir;
-
return 0;
-err_destroy_tir:
- destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, pd);
err_destroy_rq:
destroy_raw_packet_qp_rq(dev, rq);
err_destroy_sq:
@@ -1643,14 +1548,26 @@ static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *q
to_mpd(qp->ibqp.pd)->uid);
}
-static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+struct mlx5_create_qp_params {
+ struct ib_udata *udata;
+ size_t inlen;
+ size_t outlen;
+ void *ucmd;
+ u8 is_rss_raw : 1;
+ struct ib_qp_init_attr *attr;
+ u32 uidx;
+ struct mlx5_ib_create_qp_resp resp;
+};
+
+static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
{
+ struct ib_qp_init_attr *init_attr = params->attr;
+ struct mlx5_ib_create_qp_rss *ucmd = params->ucmd;
+ struct ib_udata *udata = params->udata;
struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
- struct mlx5_ib_create_qp_resp resp = {};
int inlen;
int outlen;
int err;
@@ -1660,79 +1577,28 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
void *hfso;
u32 selected_fields = 0;
u32 outer_l4;
- size_t min_resp_len;
u32 tdn = mucontext->tdn;
- struct mlx5_ib_create_qp_rss ucmd = {};
- size_t required_cmd_sz;
u8 lb_flag = 0;
- if (init_attr->qp_type != IB_QPT_RAW_PACKET)
- return -EOPNOTSUPP;
-
- if (init_attr->create_flags || init_attr->send_cq)
- return -EINVAL;
-
- min_resp_len = offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index);
- if (udata->outlen < min_resp_len)
- return -EINVAL;
-
- required_cmd_sz = offsetof(typeof(ucmd), flags) + sizeof(ucmd.flags);
- if (udata->inlen < required_cmd_sz) {
- mlx5_ib_dbg(dev, "invalid inlen\n");
- return -EINVAL;
- }
-
- if (udata->inlen > sizeof(ucmd) &&
- !ib_is_udata_cleared(udata, sizeof(ucmd),
- udata->inlen - sizeof(ucmd))) {
- mlx5_ib_dbg(dev, "inlen is not supported\n");
- return -EOPNOTSUPP;
- }
-
- if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
- mlx5_ib_dbg(dev, "copy failed\n");
- return -EFAULT;
- }
-
- if (ucmd.comp_mask) {
+ if (ucmd->comp_mask) {
mlx5_ib_dbg(dev, "invalid comp mask\n");
return -EOPNOTSUPP;
}
- if (ucmd.flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS |
- MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
- MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) {
- mlx5_ib_dbg(dev, "invalid flags\n");
- return -EOPNOTSUPP;
- }
-
- if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS &&
- !tunnel_offload_supported(dev->mdev)) {
- mlx5_ib_dbg(dev, "tunnel offloads isn't supported\n");
- return -EOPNOTSUPP;
- }
-
- if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER &&
- !(ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) {
+ if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER &&
+ !(ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) {
mlx5_ib_dbg(dev, "Tunnel offloads must be set for inner RSS\n");
return -EOPNOTSUPP;
}
- if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->is_rep) {
- lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+ if (dev->is_rep)
qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
- }
- if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
- lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
- qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
- }
+ if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
- err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
- if (err) {
- mlx5_ib_dbg(dev, "copy failed\n");
- return -EINVAL;
- }
+ if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
inlen = MLX5_ST_SZ_BYTES(create_tir_in);
outlen = MLX5_ST_SZ_BYTES(create_tir_out);
@@ -1751,29 +1617,29 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
- if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
+ if (ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
- if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER)
+ if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER)
hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
else
hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
- switch (ucmd.rx_hash_function) {
+ switch (ucmd->rx_hash_function) {
case MLX5_RX_HASH_FUNC_TOEPLITZ:
{
void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
- if (len != ucmd.rx_key_len) {
+ if (len != ucmd->rx_key_len) {
err = -EINVAL;
goto err;
}
MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
- memcpy(rss_key, ucmd.rx_hash_key, len);
+ memcpy(rss_key, ucmd->rx_hash_key, len);
break;
}
default:
@@ -1781,7 +1647,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
goto err;
}
- if (!ucmd.rx_hash_fields_mask) {
+ if (!ucmd->rx_hash_fields_mask) {
/* special case when this TIR serves as steering entry without hashing */
if (!init_attr->rwq_ind_tbl->log_ind_tbl_size)
goto create_tir;
@@ -1789,29 +1655,31 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
goto err;
}
- if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
- ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
+ if (((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
+ ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
err = -EINVAL;
goto err;
}
/* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
- else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
+ else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
- outer_l4 = ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) << 0 |
- ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) << 1 |
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2;
+ outer_l4 = ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
+ << 0 |
+ ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ << 1 |
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2;
/* Check that only one l4 protocol is set */
if (outer_l4 & (outer_l4 - 1)) {
@@ -1820,32 +1688,32 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
}
/* If none of TCP & UDP SRC/DST was set - this bit field is ignored */
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_TCP);
- else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_UDP);
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP;
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP;
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT;
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT;
- if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI)
+ if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI)
selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI;
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
@@ -1867,73 +1735,43 @@ create_tir:
goto err;
if (mucontext->devx_uid) {
- resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
- resp.tirn = qp->rss_qp.tirn;
+ params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
+ params->resp.tirn = qp->rss_qp.tirn;
if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) {
- resp.tir_icm_addr =
+ params->resp.tir_icm_addr =
MLX5_GET(create_tir_out, out, icm_address_31_0);
- resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out,
- icm_address_39_32)
- << 32;
- resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out,
- icm_address_63_40)
- << 40;
- resp.comp_mask |=
+ params->resp.tir_icm_addr |=
+ (u64)MLX5_GET(create_tir_out, out,
+ icm_address_39_32)
+ << 32;
+ params->resp.tir_icm_addr |=
+ (u64)MLX5_GET(create_tir_out, out,
+ icm_address_63_40)
+ << 40;
+ params->resp.comp_mask |=
MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR;
}
}
- err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
- if (err)
- goto err_copy;
-
kvfree(in);
/* qpn is reserved for that QP */
qp->trans_qp.base.mqp.qpn = 0;
- qp->flags |= MLX5_IB_QP_RSS;
+ qp->is_rss = true;
return 0;
-err_copy:
- mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, mucontext->devx_uid);
err:
kvfree(in);
return err;
}
-static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr,
- void *qpc)
-{
- int rcqe_sz;
-
- if (init_attr->qp_type == MLX5_IB_QPT_DCI)
- return;
-
- rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
-
- if (init_attr->qp_type == MLX5_IB_QPT_DCT) {
- if (rcqe_sz == 128)
- MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
-
- return;
- }
-
- MLX5_SET(qpc, qpc, cs_res,
- rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE :
- MLX5_RES_SCAT_DATA32_CQE);
-}
-
static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_create_qp *ucmd,
void *qpc)
{
- enum ib_qp_type qpt = init_attr->qp_type;
int scqe_sz;
bool allow_scat_cqe = false;
- if (qpt == IB_QPT_UC || qpt == IB_QPT_UD)
- return;
-
if (ucmd)
allow_scat_cqe = ucmd->flags & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
@@ -1998,234 +1836,137 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev,
return atomic_mode;
}
-static inline bool check_flags_mask(uint64_t input, uint64_t supported)
-{
- return (input & ~supported) == 0;
-}
-
-static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, struct mlx5_ib_qp *qp)
+static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
{
+ struct ib_qp_init_attr *attr = params->attr;
+ u32 uidx = params->uidx;
struct mlx5_ib_resources *devr = &dev->devr;
int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
struct mlx5_core_dev *mdev = dev->mdev;
- struct mlx5_ib_create_qp_resp resp = {};
- struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
- udata, struct mlx5_ib_ucontext, ibucontext);
- struct mlx5_ib_cq *send_cq;
- struct mlx5_ib_cq *recv_cq;
- unsigned long flags;
- u32 uidx = MLX5_IB_DEFAULT_UIDX;
- struct mlx5_ib_create_qp ucmd;
struct mlx5_ib_qp_base *base;
- int mlx5_st;
+ unsigned long flags;
void *qpc;
u32 *in;
int err;
mutex_init(&qp->mutex);
- spin_lock_init(&qp->sq.lock);
- spin_lock_init(&qp->rq.lock);
- mlx5_st = to_mlx5_st(init_attr->qp_type);
- if (mlx5_st < 0)
- return -EINVAL;
+ if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
- if (init_attr->rwq_ind_tbl) {
- if (!udata)
- return -ENOSYS;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
- err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata);
- return err;
- }
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
- if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
- if (!MLX5_CAP_GEN(mdev, block_lb_mc)) {
- mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
- return -EINVAL;
- } else {
- qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
- }
- }
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_XRC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, to_mpd(devr->p0)->pdn);
- if (init_attr->create_flags &
- (IB_QP_CREATE_CROSS_CHANNEL |
- IB_QP_CREATE_MANAGED_SEND |
- IB_QP_CREATE_MANAGED_RECV)) {
- if (!MLX5_CAP_GEN(mdev, cd)) {
- mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
- return -EINVAL;
- }
- if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
- qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
- if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
- qp->flags |= MLX5_IB_QP_MANAGED_SEND;
- if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
- qp->flags |= MLX5_IB_QP_MANAGED_RECV;
- }
-
- if (init_attr->qp_type == IB_QPT_UD &&
- (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO))
- if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
- mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n");
- return -EOPNOTSUPP;
- }
+ if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ MLX5_SET(qpc, qpc, block_lb_mc, 1);
+ if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
+ MLX5_SET(qpc, qpc, cd_master, 1);
+ if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
+ MLX5_SET(qpc, qpc, cd_slave_send, 1);
+ if (qp->flags & IB_QP_CREATE_MANAGED_RECV)
+ MLX5_SET(qpc, qpc, cd_slave_receive, 1);
- if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
- if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
- mlx5_ib_dbg(dev, "Scatter FCS is supported only for Raw Packet QPs");
- return -EOPNOTSUPP;
- }
- if (!MLX5_CAP_GEN(dev->mdev, eth_net_offloads) ||
- !MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
- mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n");
- return -EOPNOTSUPP;
- }
- qp->flags |= MLX5_IB_QP_CAP_SCATTER_FCS;
- }
+ MLX5_SET(qpc, qpc, rq_type, MLX5_SRQ_RQ);
+ MLX5_SET(qpc, qpc, no_sq, 1);
+ MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
+ MLX5_SET(qpc, qpc, xrcd, to_mxrcd(attr->xrcd)->xrcdn);
+ MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
- if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
- qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+ /* 0xffffff means we ask to work with cqe version 0 */
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
+ MLX5_SET(qpc, qpc, user_index, uidx);
- if (init_attr->create_flags & IB_QP_CREATE_CVLAN_STRIPPING) {
- if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
- MLX5_CAP_ETH(dev->mdev, vlan_cap)) ||
- (init_attr->qp_type != IB_QPT_RAW_PACKET))
- return -EOPNOTSUPP;
- qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING;
+ if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
+ MLX5_SET(qpc, qpc, end_padding_mode,
+ MLX5_WQ_END_PAD_MODE_ALIGN);
+ /* Special case to clean flag */
+ qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
}
- if (udata) {
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
- mlx5_ib_dbg(dev, "copy failed\n");
- return -EFAULT;
- }
+ base = &qp->trans_qp.base;
+ err = mlx5_core_create_qp(dev, &base->mqp, in, inlen);
+ kvfree(in);
+ if (err)
+ return err;
- if (!check_flags_mask(ucmd.flags,
- MLX5_QP_FLAG_ALLOW_SCATTER_CQE |
- MLX5_QP_FLAG_BFREG_INDEX |
- MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE |
- MLX5_QP_FLAG_SCATTER_CQE |
- MLX5_QP_FLAG_SIGNATURE |
- MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC |
- MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
- MLX5_QP_FLAG_TUNNEL_OFFLOADS |
- MLX5_QP_FLAG_UAR_PAGE_INDEX |
- MLX5_QP_FLAG_TYPE_DCI |
- MLX5_QP_FLAG_TYPE_DCT))
- return -EINVAL;
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
- err = get_qp_user_index(ucontext, &ucmd, udata->inlen, &uidx);
- if (err)
- return err;
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
- qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
- if (MLX5_CAP_GEN(dev->mdev, sctr_data_cqe))
- qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
- if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) {
- if (init_attr->qp_type != IB_QPT_RAW_PACKET ||
- !tunnel_offload_supported(mdev)) {
- mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n");
- return -EOPNOTSUPP;
- }
- qp->flags_en |= MLX5_QP_FLAG_TUNNEL_OFFLOADS;
- }
+ qp->trans_qp.xrcdn = to_mxrcd(attr->xrcd)->xrcdn;
+ return 0;
+}
- if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) {
- if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
- mlx5_ib_dbg(dev, "Self-LB UC isn't supported\n");
- return -EOPNOTSUPP;
- }
- qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
- }
+static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ struct ib_qp_init_attr *init_attr = params->attr;
+ struct mlx5_ib_create_qp *ucmd = params->ucmd;
+ struct ib_udata *udata = params->udata;
+ u32 uidx = params->uidx;
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_cq *send_cq;
+ struct mlx5_ib_cq *recv_cq;
+ unsigned long flags;
+ struct mlx5_ib_qp_base *base;
+ int mlx5_st;
+ void *qpc;
+ u32 *in;
+ int err;
- if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
- if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
- mlx5_ib_dbg(dev, "Self-LB UM isn't supported\n");
- return -EOPNOTSUPP;
- }
- qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
- }
+ mutex_init(&qp->mutex);
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
- if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) {
- if (init_attr->qp_type != IB_QPT_RC ||
- !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) {
- mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n");
- return -EOPNOTSUPP;
- }
- qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT;
- }
+ mlx5_st = to_mlx5_st(qp->type);
+ if (mlx5_st < 0)
+ return -EINVAL;
- if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
- if (init_attr->qp_type != IB_QPT_UD ||
- (MLX5_CAP_GEN(dev->mdev, port_type) !=
- MLX5_CAP_PORT_TYPE_IB) ||
- !mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) {
- mlx5_ib_dbg(dev, "Source QP option isn't supported\n");
- return -EOPNOTSUPP;
- }
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
- qp->flags |= MLX5_IB_QP_UNDERLAY;
- qp->underlay_qpn = init_attr->source_qpn;
- }
- } else {
- qp->wq_sig = !!wq_signature;
- }
+ if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
+ qp->underlay_qpn = init_attr->source_qpn;
base = (init_attr->qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) ?
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) ?
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
qp->has_rq = qp_has_rq(init_attr);
- err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
- qp, udata ? &ucmd : NULL);
+ err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
return err;
}
- if (pd) {
- if (udata) {
- __u32 max_wqes =
- 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
- mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
- if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
- ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
- mlx5_ib_dbg(dev, "invalid rq params\n");
- return -EINVAL;
- }
- if (ucmd.sq_wqe_count > max_wqes) {
- mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
- ucmd.sq_wqe_count, max_wqes);
- return -EINVAL;
- }
- if (init_attr->create_flags &
- MLX5_IB_QP_CREATE_SQPN_QP1) {
- mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n");
- return -EINVAL;
- }
- err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
- &resp, &inlen, base);
- if (err)
- mlx5_ib_dbg(dev, "err %d\n", err);
- } else {
- err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
- base);
- if (err)
- mlx5_ib_dbg(dev, "err %d\n", err);
- }
+ if (ucmd->rq_wqe_shift != qp->rq.wqe_shift ||
+ ucmd->rq_wqe_count != qp->rq.wqe_cnt)
+ return -EINVAL;
- if (err)
- return err;
- } else {
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
+ return -EINVAL;
- qp->create_type = MLX5_QP_EMPTY;
- }
+ err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
+ &inlen, base, ucmd);
+ if (err)
+ return err;
if (is_sqp(init_attr->qp_type))
qp->port = init_attr->port_num;
@@ -2234,33 +1975,34 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, st, mlx5_st);
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn);
- if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
- MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
- else
- MLX5_SET(qpc, qpc, latency_sensitive, 1);
-
-
- if (qp->wq_sig)
+ if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
MLX5_SET(qpc, qpc, wq_signature, 1);
- if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+ if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
MLX5_SET(qpc, qpc, block_lb_mc, 1);
- if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
MLX5_SET(qpc, qpc, cd_master, 1);
- if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
MLX5_SET(qpc, qpc, cd_slave_send, 1);
- if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ if (qp->flags & IB_QP_CREATE_MANAGED_RECV)
MLX5_SET(qpc, qpc, cd_slave_receive, 1);
- if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT)
+ if (qp->flags_en & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)
MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1);
- if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
- configure_responder_scat_cqe(init_attr, qpc);
- configure_requester_scat_cqe(dev, init_attr,
- udata ? &ucmd : NULL,
- qpc);
+ if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) &&
+ (init_attr->qp_type == IB_QPT_RC ||
+ init_attr->qp_type == IB_QPT_UC)) {
+ int rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
+
+ MLX5_SET(qpc, qpc, cs_res,
+ rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE :
+ MLX5_RES_SCAT_DATA32_CQE);
}
+ if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) &&
+ (qp->type == MLX5_IB_QPT_DCI || qp->type == IB_QPT_RC))
+ configure_requester_scat_cqe(dev, init_attr, ucmd, qpc);
if (qp->rq.wqe_cnt) {
MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
@@ -2281,12 +2023,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
/* Set default resources */
switch (init_attr->qp_type) {
- case IB_QPT_XRC_TGT:
- MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
- MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
- MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn);
- break;
case IB_QPT_XRC_INI:
MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
@@ -2314,52 +2050,160 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
MLX5_SET(qpc, qpc, user_index, uidx);
- /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
- if (init_attr->qp_type == IB_QPT_UD &&
- (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) {
- MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
- qp->flags |= MLX5_IB_QP_LSO;
- }
-
- if (init_attr->create_flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
- if (!MLX5_CAP_GEN(dev->mdev, end_pad)) {
- mlx5_ib_dbg(dev, "scatter end padding is not supported\n");
- err = -EOPNOTSUPP;
- goto err;
- } else if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
- MLX5_SET(qpc, qpc, end_padding_mode,
- MLX5_WQ_END_PAD_MODE_ALIGN);
- } else {
- qp->flags |= MLX5_IB_QP_PCI_WRITE_END_PADDING;
- }
- }
-
- if (inlen < 0) {
- err = -EINVAL;
- goto err;
+ if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING &&
+ init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ MLX5_SET(qpc, qpc, end_padding_mode,
+ MLX5_WQ_END_PAD_MODE_ALIGN);
+ /* Special case to clean flag */
+ qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
}
if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) {
- qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
+ qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr;
raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
- &resp);
- } else {
+ &params->resp);
+ } else
err = mlx5_core_create_qp(dev, &base->mqp, in, inlen);
- }
- if (err) {
- mlx5_ib_dbg(dev, "create qp failed\n");
+ kvfree(in);
+ if (err)
goto err_create;
+
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
+
+ get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq,
+ &send_cq, &recv_cq);
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx5_ib_lock_cqs(send_cq, recv_cq);
+ /* Maintain device to QPs access, needed for further handling via reset
+ * flow
+ */
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ /* Maintain CQ to QPs access, needed for further handling via reset flow
+ */
+ if (send_cq)
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ if (recv_cq)
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ mlx5_ib_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+ return 0;
+
+err_create:
+ destroy_qp(dev, qp, base, udata);
+ return err;
+}
+
+static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ struct ib_qp_init_attr *attr = params->attr;
+ u32 uidx = params->uidx;
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_cq *send_cq;
+ struct mlx5_ib_cq *recv_cq;
+ unsigned long flags;
+ struct mlx5_ib_qp_base *base;
+ int mlx5_st;
+ void *qpc;
+ u32 *in;
+ int err;
+
+ mutex_init(&qp->mutex);
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+
+ mlx5_st = to_mlx5_st(qp->type);
+ if (mlx5_st < 0)
+ return -EINVAL;
+
+ if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+ base = &qp->trans_qp.base;
+
+ qp->has_rq = qp_has_rq(attr);
+ err = set_rq_size(dev, &attr->cap, qp->has_rq, qp, NULL);
+ if (err) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ return err;
}
+ err = _create_kernel_qp(dev, attr, qp, &in, &inlen, base);
+ if (err)
+ return err;
+
+ if (is_sqp(attr->qp_type))
+ qp->port = attr->port_num;
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, st, mlx5_st);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+
+ if (attr->qp_type != MLX5_IB_QPT_REG_UMR)
+ MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
+ else
+ MLX5_SET(qpc, qpc, latency_sensitive, 1);
+
+
+ if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ MLX5_SET(qpc, qpc, block_lb_mc, 1);
+
+ if (qp->rq.wqe_cnt) {
+ MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
+ }
+
+ MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, attr));
+
+ if (qp->sq.wqe_cnt)
+ MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
+ else
+ MLX5_SET(qpc, qpc, no_sq, 1);
+
+ if (attr->srq) {
+ MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
+ to_msrq(attr->srq)->msrq.srqn);
+ } else {
+ MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
+ to_msrq(devr->s1)->msrq.srqn);
+ }
+
+ if (attr->send_cq)
+ MLX5_SET(qpc, qpc, cqn_snd, to_mcq(attr->send_cq)->mcq.cqn);
+
+ if (attr->recv_cq)
+ MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(attr->recv_cq)->mcq.cqn);
+
+ MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
+
+ /* 0xffffff means we ask to work with cqe version 0 */
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
+ MLX5_SET(qpc, qpc, user_index, uidx);
+
+ /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
+ if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO)
+ MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
+
+ err = mlx5_core_create_qp(dev, &base->mqp, in, inlen);
kvfree(in);
+ if (err)
+ goto err_create;
base->container_mibqp = qp;
base->mqp.event = mlx5_ib_qp_event;
- get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq,
+ get_cqs(qp->type, attr->send_cq, attr->recv_cq,
&send_cq, &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx5_ib_lock_cqs(send_cq, recv_cq);
@@ -2379,13 +2223,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return 0;
err_create:
- if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(dev, pd, qp, base, udata);
- else if (qp->create_type == MLX5_QP_KERNEL)
- destroy_qp_kernel(dev, qp);
-
-err:
- kvfree(in);
+ destroy_qp(dev, qp, base, NULL);
return err;
}
@@ -2447,11 +2285,6 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re
}
}
-static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
-{
- return to_mpd(qp->ibqp.pd);
-}
-
static void get_cqs(enum ib_qp_type qp_type,
struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
@@ -2472,14 +2305,10 @@ static void get_cqs(enum ib_qp_type qp_type,
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
- case IB_QPT_RAW_IPV6:
- case IB_QPT_RAW_ETHERTYPE:
case IB_QPT_RAW_PACKET:
*send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
*recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
break;
-
- case IB_QPT_MAX:
default:
*send_cq = NULL;
*recv_cq = NULL;
@@ -2505,13 +2334,13 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
}
base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) ?
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) ?
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
if (qp->state != IB_QPS_RESET) {
if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET &&
- !(qp->flags & MLX5_IB_QP_UNDERLAY)) {
+ !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
err = mlx5_core_qp_modify(dev, MLX5_CMD_OP_2RST_QP, 0,
NULL, &base->mqp);
} else {
@@ -2539,7 +2368,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (recv_cq)
list_del(&qp->cq_recv_list);
- if (qp->create_type == MLX5_QP_KERNEL) {
+ if (!udata) {
__mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
@@ -2550,7 +2379,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) {
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
destroy_raw_packet_qp(dev, qp);
} else {
err = mlx5_core_destroy_qp(dev, &base->mqp);
@@ -2559,254 +2388,446 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
base->mqp.qpn);
}
- if (qp->create_type == MLX5_QP_KERNEL)
- destroy_qp_kernel(dev, qp);
- else if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata);
+ destroy_qp(dev, qp, base, udata);
}
-static const char *ib_qp_type_str(enum ib_qp_type type)
+static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
{
- switch (type) {
- case IB_QPT_SMI:
- return "IB_QPT_SMI";
- case IB_QPT_GSI:
- return "IB_QPT_GSI";
+ struct ib_qp_init_attr *attr = params->attr;
+ struct mlx5_ib_create_qp *ucmd = params->ucmd;
+ u32 uidx = params->uidx;
+ void *dctc;
+
+ qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL);
+ if (!qp->dct.in)
+ return -ENOMEM;
+
+ MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid);
+ dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
+ MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(dctc, dctc, srqn_xrqn, to_msrq(attr->srq)->msrq.srqn);
+ MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn);
+ MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key);
+ MLX5_SET(dctc, dctc, user_index, uidx);
+
+ if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) {
+ int rcqe_sz = mlx5_ib_get_cqe_size(attr->recv_cq);
+
+ if (rcqe_sz == 128)
+ MLX5_SET(dctc, dctc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+ }
+
+ qp->state = IB_QPS_RESET;
+
+ return 0;
+}
+
+static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
+ enum ib_qp_type *type)
+{
+ if (attr->qp_type == IB_QPT_DRIVER && !MLX5_CAP_GEN(dev->mdev, dct))
+ goto out;
+
+ switch (attr->qp_type) {
+ case IB_QPT_XRC_TGT:
+ case IB_QPT_XRC_INI:
+ if (!MLX5_CAP_GEN(dev->mdev, xrc))
+ goto out;
+ fallthrough;
case IB_QPT_RC:
- return "IB_QPT_RC";
case IB_QPT_UC:
- return "IB_QPT_UC";
- case IB_QPT_UD:
- return "IB_QPT_UD";
- case IB_QPT_RAW_IPV6:
- return "IB_QPT_RAW_IPV6";
- case IB_QPT_RAW_ETHERTYPE:
- return "IB_QPT_RAW_ETHERTYPE";
- case IB_QPT_XRC_INI:
- return "IB_QPT_XRC_INI";
- case IB_QPT_XRC_TGT:
- return "IB_QPT_XRC_TGT";
+ case IB_QPT_SMI:
+ case MLX5_IB_QPT_HW_GSI:
+ case IB_QPT_DRIVER:
+ case IB_QPT_GSI:
+ if (dev->profile == &raw_eth_profile)
+ goto out;
case IB_QPT_RAW_PACKET:
- return "IB_QPT_RAW_PACKET";
+ case IB_QPT_UD:
case MLX5_IB_QPT_REG_UMR:
- return "MLX5_IB_QPT_REG_UMR";
- case IB_QPT_DRIVER:
- return "IB_QPT_DRIVER";
- case IB_QPT_MAX:
+ break;
default:
- return "Invalid QP type";
+ goto out;
}
+
+ *type = attr->qp_type;
+ return 0;
+
+out:
+ mlx5_ib_dbg(dev, "Unsupported QP type %d\n", attr->qp_type);
+ return -EOPNOTSUPP;
}
-static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
- struct ib_qp_init_attr *attr,
- struct mlx5_ib_create_qp *ucmd,
- struct ib_udata *udata)
+static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
{
struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
- struct mlx5_ib_qp *qp;
- int err = 0;
- u32 uidx = MLX5_IB_DEFAULT_UIDX;
- void *dctc;
- if (!attr->srq || !attr->recv_cq)
- return ERR_PTR(-EINVAL);
+ if (!udata) {
+ /* Kernel create_qp callers */
+ if (attr->rwq_ind_tbl)
+ return -EOPNOTSUPP;
- err = get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), &uidx);
- if (err)
- return ERR_PTR(err);
+ switch (attr->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ case IB_QPT_DRIVER:
+ return -EOPNOTSUPP;
+ default:
+ return 0;
+ }
+ }
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
+ /* Userspace create_qp callers */
+ if (attr->qp_type == IB_QPT_RAW_PACKET && !ucontext->cqe_version) {
+ mlx5_ib_dbg(dev,
+ "Raw Packet QP is only supported for CQE version > 0\n");
+ return -EINVAL;
+ }
- qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL);
- if (!qp->dct.in) {
- err = -ENOMEM;
- goto err_free;
+ if (attr->qp_type != IB_QPT_RAW_PACKET && attr->rwq_ind_tbl) {
+ mlx5_ib_dbg(dev,
+ "Wrong QP type %d for the RWQ indirect table\n",
+ attr->qp_type);
+ return -EINVAL;
}
- MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid);
- dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
- qp->qp_sub_type = MLX5_IB_QPT_DCT;
- MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
- MLX5_SET(dctc, dctc, srqn_xrqn, to_msrq(attr->srq)->msrq.srqn);
- MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn);
- MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key);
- MLX5_SET(dctc, dctc, user_index, uidx);
+ switch (attr->qp_type) {
+ case IB_QPT_SMI:
+ case MLX5_IB_QPT_HW_GSI:
+ case MLX5_IB_QPT_REG_UMR:
+ case IB_QPT_GSI:
+ mlx5_ib_dbg(dev, "Kernel doesn't support QP type %d\n",
+ attr->qp_type);
+ return -EINVAL;
+ default:
+ break;
+ }
- if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE)
- configure_responder_scat_cqe(attr, dctc);
+ /*
+ * We don't need to see this warning, it means that kernel code
+ * missing ib_pd. Placed here to catch developer's mistakes.
+ */
+ WARN_ONCE(!pd && attr->qp_type != IB_QPT_XRC_TGT,
+ "There is a missing PD pointer assignment\n");
+ return 0;
+}
- qp->state = IB_QPS_RESET;
+static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
+ bool cond, struct mlx5_ib_qp *qp)
+{
+ if (!(*flags & flag))
+ return;
- return &qp->ibqp;
-err_free:
- kfree(qp);
- return ERR_PTR(err);
+ if (cond) {
+ qp->flags_en |= flag;
+ *flags &= ~flag;
+ return;
+ }
+
+ if (flag == MLX5_QP_FLAG_SCATTER_CQE) {
+ /*
+ * We don't return error if this flag was provided,
+ * and mlx5 doesn't have right capability.
+ */
+ *flags &= ~MLX5_QP_FLAG_SCATTER_CQE;
+ return;
+ }
+ mlx5_ib_dbg(dev, "Vendor create QP flag 0x%X is not supported\n", flag);
}
-static int set_mlx_qp_type(struct mlx5_ib_dev *dev,
- struct ib_qp_init_attr *init_attr,
- struct mlx5_ib_create_qp *ucmd,
- struct ib_udata *udata)
+static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ void *ucmd, struct ib_qp_init_attr *attr)
{
- enum { MLX_QP_FLAGS = MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI };
- int err;
+ struct mlx5_core_dev *mdev = dev->mdev;
+ bool cond;
+ int flags;
- if (!udata)
+ if (attr->rwq_ind_tbl)
+ flags = ((struct mlx5_ib_create_qp_rss *)ucmd)->flags;
+ else
+ flags = ((struct mlx5_ib_create_qp *)ucmd)->flags;
+
+ switch (flags & (MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI)) {
+ case MLX5_QP_FLAG_TYPE_DCI:
+ qp->type = MLX5_IB_QPT_DCI;
+ break;
+ case MLX5_QP_FLAG_TYPE_DCT:
+ qp->type = MLX5_IB_QPT_DCT;
+ break;
+ default:
+ if (qp->type != IB_QPT_DRIVER)
+ break;
+ /*
+ * It is IB_QPT_DRIVER and or no subtype or
+ * wrong subtype were provided.
+ */
return -EINVAL;
+ }
- if (udata->inlen < sizeof(*ucmd)) {
- mlx5_ib_dbg(dev, "create_qp user command is smaller than expected\n");
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCI, true, qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCT, true, qp);
+
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SIGNATURE, true, qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SCATTER_CQE,
+ MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
+
+ if (qp->type == IB_QPT_RAW_PACKET) {
+ cond = MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) ||
+ MLX5_CAP_ETH(mdev, tunnel_stateless_gre) ||
+ MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TUNNEL_OFFLOADS,
+ cond, qp);
+ process_vendor_flag(dev, &flags,
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC, true,
+ qp);
+ process_vendor_flag(dev, &flags,
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC, true,
+ qp);
+ }
+
+ if (qp->type == IB_QPT_RC)
+ process_vendor_flag(dev, &flags,
+ MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE,
+ MLX5_CAP_GEN(mdev, qp_packet_based), qp);
+
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_BFREG_INDEX, true, qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_UAR_PAGE_INDEX, true, qp);
+
+ cond = qp->flags_en & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC);
+ if (attr->rwq_ind_tbl && cond) {
+ mlx5_ib_dbg(dev, "RSS RAW QP has unsupported flags 0x%X\n",
+ cond);
return -EINVAL;
}
- err = ib_copy_from_udata(ucmd, udata, sizeof(*ucmd));
- if (err)
- return err;
- if ((ucmd->flags & MLX_QP_FLAGS) == MLX5_QP_FLAG_TYPE_DCI) {
- init_attr->qp_type = MLX5_IB_QPT_DCI;
- } else {
- if ((ucmd->flags & MLX_QP_FLAGS) == MLX5_QP_FLAG_TYPE_DCT) {
- init_attr->qp_type = MLX5_IB_QPT_DCT;
- } else {
- mlx5_ib_dbg(dev, "Invalid QP flags\n");
- return -EINVAL;
- }
+ if (flags)
+ mlx5_ib_dbg(dev, "udata has unsupported flags 0x%X\n", flags);
+
+ return (flags) ? -EINVAL : 0;
}
- if (!MLX5_CAP_GEN(dev->mdev, dct)) {
- mlx5_ib_dbg(dev, "DC transport is not supported\n");
- return -EOPNOTSUPP;
+static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
+ bool cond, struct mlx5_ib_qp *qp)
+{
+ if (!(*flags & flag))
+ return;
+
+ if (cond) {
+ qp->flags |= flag;
+ *flags &= ~flag;
+ return;
}
- return 0;
+ if (flag == MLX5_IB_QP_CREATE_WC_TEST) {
+ /*
+ * Special case, if condition didn't meet, it won't be error,
+ * just different in-kernel flow.
+ */
+ *flags &= ~MLX5_IB_QP_CREATE_WC_TEST;
+ return;
+ }
+ mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag);
}
-struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *verbs_init_attr,
- struct ib_udata *udata)
+static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_qp_init_attr *attr)
{
- struct mlx5_ib_dev *dev;
- struct mlx5_ib_qp *qp;
- u16 xrcdn = 0;
- int err;
- struct ib_qp_init_attr mlx_init_attr;
- struct ib_qp_init_attr *init_attr = verbs_init_attr;
- struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
- udata, struct mlx5_ib_ucontext, ibucontext);
+ enum ib_qp_type qp_type = qp->type;
+ struct mlx5_core_dev *mdev = dev->mdev;
+ int create_flags = attr->create_flags;
+ bool cond;
- if (pd) {
- dev = to_mdev(pd->device);
+ if (qp->type == IB_QPT_UD && dev->profile == &raw_eth_profile)
+ if (create_flags & ~MLX5_IB_QP_CREATE_WC_TEST)
+ return -EINVAL;
- if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
- if (!ucontext) {
- mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
- return ERR_PTR(-EINVAL);
- } else if (!ucontext->cqe_version) {
- mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
- return ERR_PTR(-EINVAL);
- }
- }
- } else {
- /* being cautious here */
- if (init_attr->qp_type != IB_QPT_XRC_TGT &&
- init_attr->qp_type != MLX5_IB_QPT_REG_UMR) {
- pr_warn("%s: no PD for transport %s\n", __func__,
- ib_qp_type_str(init_attr->qp_type));
- return ERR_PTR(-EINVAL);
- }
- dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
+ if (qp_type == MLX5_IB_QPT_DCT)
+ return (create_flags) ? -EINVAL : 0;
+
+ if (qp_type == IB_QPT_RAW_PACKET && attr->rwq_ind_tbl)
+ return (create_flags) ? -EINVAL : 0;
+
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+ MLX5_CAP_GEN(mdev, block_lb_mc), qp);
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_CROSS_CHANNEL,
+ MLX5_CAP_GEN(mdev, cd), qp);
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_SEND,
+ MLX5_CAP_GEN(mdev, cd), qp);
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_RECV,
+ MLX5_CAP_GEN(mdev, cd), qp);
+
+ if (qp_type == IB_QPT_UD) {
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_IPOIB_UD_LSO,
+ MLX5_CAP_GEN(mdev, ipoib_basic_offloads),
+ qp);
+ cond = MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_IB;
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_SOURCE_QPN,
+ cond, qp);
+ }
+
+ if (qp_type == IB_QPT_RAW_PACKET) {
+ cond = MLX5_CAP_GEN(mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(mdev, scatter_fcs);
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_SCATTER_FCS, cond, qp);
+
+ cond = MLX5_CAP_GEN(mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(mdev, vlan_cap);
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_CVLAN_STRIPPING, cond, qp);
+ }
+
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_PCI_WRITE_END_PADDING,
+ MLX5_CAP_GEN(mdev, end_pad), qp);
+
+ process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST,
+ qp_type != MLX5_IB_QPT_REG_UMR, qp);
+ process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
+ true, qp);
+
+ if (create_flags)
+ mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n",
+ create_flags);
+
+ return (create_flags) ? -EINVAL : 0;
+}
+
+static int process_udata_size(struct mlx5_ib_dev *dev,
+ struct mlx5_create_qp_params *params)
+{
+ size_t ucmd = sizeof(struct mlx5_ib_create_qp);
+ struct ib_qp_init_attr *attr = params->attr;
+ struct ib_udata *udata = params->udata;
+ size_t outlen = udata->outlen;
+ size_t inlen = udata->inlen;
+
+ params->outlen = min(outlen, sizeof(struct mlx5_ib_create_qp_resp));
+ if (attr->qp_type == IB_QPT_DRIVER) {
+ params->inlen = (inlen < ucmd) ? 0 : ucmd;
+ goto out;
}
- if (init_attr->qp_type == IB_QPT_DRIVER) {
- struct mlx5_ib_create_qp ucmd;
+ if (!params->is_rss_raw) {
+ params->inlen = ucmd;
+ goto out;
+ }
- init_attr = &mlx_init_attr;
- memcpy(init_attr, verbs_init_attr, sizeof(*verbs_init_attr));
- err = set_mlx_qp_type(dev, init_attr, &ucmd, udata);
- if (err)
- return ERR_PTR(err);
+ /* RSS RAW QP */
+ if (inlen < offsetofend(struct mlx5_ib_create_qp_rss, flags))
+ return -EINVAL;
- if (init_attr->qp_type == MLX5_IB_QPT_DCI) {
- if (init_attr->cap.max_recv_wr ||
- init_attr->cap.max_recv_sge) {
- mlx5_ib_dbg(dev, "DCI QP requires zero size receive queue\n");
- return ERR_PTR(-EINVAL);
- }
- } else {
- return mlx5_ib_create_dct(pd, init_attr, &ucmd, udata);
- }
+ if (outlen < offsetofend(struct mlx5_ib_create_qp_resp, bfreg_index))
+ return -EINVAL;
+
+ ucmd = sizeof(struct mlx5_ib_create_qp_rss);
+ if (inlen > ucmd && !ib_is_udata_cleared(udata, ucmd, inlen - ucmd))
+ return -EINVAL;
+
+ params->inlen = min(ucmd, inlen);
+out:
+ if (!params->inlen)
+ mlx5_ib_dbg(dev, "udata is too small or not cleared\n");
+
+ return (params->inlen) ? 0 : -EINVAL;
+}
+
+static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ int err;
+
+ if (params->is_rss_raw) {
+ err = create_rss_raw_qp_tir(dev, pd, qp, params);
+ goto out;
}
- switch (init_attr->qp_type) {
- case IB_QPT_XRC_TGT:
- case IB_QPT_XRC_INI:
- if (!MLX5_CAP_GEN(dev->mdev, xrc)) {
- mlx5_ib_dbg(dev, "XRC not supported\n");
- return ERR_PTR(-ENOSYS);
- }
- init_attr->recv_cq = NULL;
- if (init_attr->qp_type == IB_QPT_XRC_TGT) {
- xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
- init_attr->send_cq = NULL;
- }
+ if (qp->type == MLX5_IB_QPT_DCT) {
+ err = create_dct(pd, qp, params);
+ goto out;
+ }
- /* fall through */
- case IB_QPT_RAW_PACKET:
- case IB_QPT_RC:
- case IB_QPT_UC:
- case IB_QPT_UD:
- case IB_QPT_SMI:
- case MLX5_IB_QPT_HW_GSI:
- case MLX5_IB_QPT_REG_UMR:
- case MLX5_IB_QPT_DCI:
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
+ if (qp->type == IB_QPT_XRC_TGT) {
+ err = create_xrc_tgt_qp(dev, qp, params);
+ goto out;
+ }
- err = create_qp_common(dev, pd, init_attr, udata, qp);
- if (err) {
- mlx5_ib_dbg(dev, "create_qp_common failed\n");
- kfree(qp);
- return ERR_PTR(err);
- }
+ if (params->udata)
+ err = create_user_qp(dev, pd, qp, params);
+ else
+ err = create_kernel_qp(dev, pd, qp, params);
- if (is_qp0(init_attr->qp_type))
- qp->ibqp.qp_num = 0;
- else if (is_qp1(init_attr->qp_type))
- qp->ibqp.qp_num = 1;
- else
- qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
+out:
+ if (err) {
+ mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type);
+ return err;
+ }
- mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
- qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
- init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1,
- init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1);
+ if (is_qp0(qp->type))
+ qp->ibqp.qp_num = 0;
+ else if (is_qp1(qp->type))
+ qp->ibqp.qp_num = 1;
+ else
+ qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
- qp->trans_qp.xrcdn = xrcdn;
+ mlx5_ib_dbg(dev,
+ "QP type %d, ib qpn 0x%X, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
+ qp->type, qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
+ params->attr->recv_cq ? to_mcq(params->attr->recv_cq)->mcq.cqn :
+ -1,
+ params->attr->send_cq ? to_mcq(params->attr->send_cq)->mcq.cqn :
+ -1);
- break;
+ return 0;
+}
- case IB_QPT_GSI:
- return mlx5_ib_gsi_create_qp(pd, init_attr);
+static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_qp_init_attr *attr)
+{
+ int ret = 0;
- case IB_QPT_RAW_IPV6:
- case IB_QPT_RAW_ETHERTYPE:
- case IB_QPT_MAX:
+ switch (qp->type) {
+ case MLX5_IB_QPT_DCT:
+ ret = (!attr->srq || !attr->recv_cq) ? -EINVAL : 0;
+ break;
+ case MLX5_IB_QPT_DCI:
+ ret = (attr->cap.max_recv_wr || attr->cap.max_recv_sge) ?
+ -EINVAL :
+ 0;
+ break;
+ case IB_QPT_RAW_PACKET:
+ ret = (attr->rwq_ind_tbl && attr->send_cq) ? -EINVAL : 0;
+ break;
default:
- mlx5_ib_dbg(dev, "unsupported qp type %d\n",
- init_attr->qp_type);
- /* Don't support raw QPs */
- return ERR_PTR(-EOPNOTSUPP);
+ break;
}
- if (verbs_init_attr->qp_type == IB_QPT_DRIVER)
- qp->qp_sub_type = init_attr->qp_type;
+ if (ret)
+ mlx5_ib_dbg(dev, "QP type %d has wrong attributes\n", qp->type);
- return &qp->ibqp;
+ return ret;
+}
+
+static int get_qp_uidx(struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ struct mlx5_ib_create_qp *ucmd = params->ucmd;
+ struct ib_udata *udata = params->udata;
+ struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+
+ if (params->is_rss_raw)
+ return 0;
+
+ return get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), &params->uidx);
}
static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
@@ -2828,6 +2849,104 @@ static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
return 0;
}
+struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mlx5_create_qp_params params = {};
+ struct mlx5_ib_dev *dev;
+ struct mlx5_ib_qp *qp;
+ enum ib_qp_type type;
+ int err;
+
+ dev = pd ? to_mdev(pd->device) :
+ to_mdev(to_mxrcd(attr->xrcd)->ibxrcd.device);
+
+ err = check_qp_type(dev, attr, &type);
+ if (err)
+ return ERR_PTR(err);
+
+ err = check_valid_flow(dev, pd, attr, udata);
+ if (err)
+ return ERR_PTR(err);
+
+ if (attr->qp_type == IB_QPT_GSI)
+ return mlx5_ib_gsi_create_qp(pd, attr);
+
+ params.udata = udata;
+ params.uidx = MLX5_IB_DEFAULT_UIDX;
+ params.attr = attr;
+ params.is_rss_raw = !!attr->rwq_ind_tbl;
+
+ if (udata) {
+ err = process_udata_size(dev, &params);
+ if (err)
+ return ERR_PTR(err);
+
+ params.ucmd = kzalloc(params.inlen, GFP_KERNEL);
+ if (!params.ucmd)
+ return ERR_PTR(-ENOMEM);
+
+ err = ib_copy_from_udata(params.ucmd, udata, params.inlen);
+ if (err)
+ goto free_ucmd;
+ }
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp) {
+ err = -ENOMEM;
+ goto free_ucmd;
+ }
+
+ qp->type = type;
+ if (udata) {
+ err = process_vendor_flags(dev, qp, params.ucmd, attr);
+ if (err)
+ goto free_qp;
+
+ err = get_qp_uidx(qp, &params);
+ if (err)
+ goto free_qp;
+ }
+ err = process_create_flags(dev, qp, attr);
+ if (err)
+ goto free_qp;
+
+ err = check_qp_attr(dev, qp, attr);
+ if (err)
+ goto free_qp;
+
+ err = create_qp(dev, pd, qp, &params);
+ if (err)
+ goto free_qp;
+
+ kfree(params.ucmd);
+ params.ucmd = NULL;
+
+ if (udata)
+ /*
+ * It is safe to copy response for all user create QP flows,
+ * including MLX5_IB_QPT_DCT, which doesn't need it.
+ * In that case, resp will be filled with zeros.
+ */
+ err = ib_copy_to_udata(udata, &params.resp, params.outlen);
+ if (err)
+ goto destroy_qp;
+
+ return &qp->ibqp;
+
+destroy_qp:
+ if (qp->type == MLX5_IB_QPT_DCT)
+ mlx5_ib_destroy_dct(qp);
+ else
+ destroy_qp_common(dev, qp, udata);
+ qp = NULL;
+free_qp:
+ kfree(qp);
+free_ucmd:
+ kfree(params.ucmd);
+ return ERR_PTR(err);
+}
+
int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
@@ -2836,7 +2955,7 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
if (unlikely(qp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_destroy_qp(qp);
- if (mqp->qp_sub_type == MLX5_IB_QPT_DCT)
+ if (mqp->type == MLX5_IB_QPT_DCT)
return mlx5_ib_destroy_dct(mqp);
destroy_qp_common(dev, mqp, udata);
@@ -2965,6 +3084,21 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
return err;
}
+static void mlx5_set_path_udp_sport(struct mlx5_qp_path *path,
+ const struct rdma_ah_attr *ah,
+ u32 lqpn, u32 rqpn)
+
+{
+ u32 fl = ah->grh.flow_label;
+ u16 sport;
+
+ if (!fl)
+ fl = rdma_calc_flow_label(lqpn, rqpn);
+
+ sport = rdma_flow_label_to_udp_sport(fl);
+ path->udp_sport = cpu_to_be16(sport);
+}
+
static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
const struct rdma_ah_attr *ah,
struct mlx5_qp_path *path, u8 port, int attr_mask,
@@ -2996,12 +3130,15 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return -EINVAL;
memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac));
- if (qp->ibqp.qp_type == IB_QPT_RC ||
- qp->ibqp.qp_type == IB_QPT_UC ||
- qp->ibqp.qp_type == IB_QPT_XRC_INI ||
- qp->ibqp.qp_type == IB_QPT_XRC_TGT)
- path->udp_sport =
- mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr);
+ if ((qp->ibqp.qp_type == IB_QPT_RC ||
+ qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ qp->ibqp.qp_type == IB_QPT_XRC_TGT) &&
+ (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) &&
+ (attr_mask & IB_QP_DEST_QPN))
+ mlx5_set_path_udp_sport(path, ah,
+ qp->ibqp.qp_num,
+ attr->dest_qp_num);
path->dci_cfi_prio_sl = (sl & 0x7) << 4;
gid_type = ah->grh.sgid_attr->gid_type;
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
@@ -3050,10 +3187,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
- MLX5_QP_OPTPAR_PRI_PORT,
+ MLX5_QP_OPTPAR_PRI_PORT |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
- MLX5_QP_OPTPAR_PRI_PORT,
+ MLX5_QP_OPTPAR_PRI_PORT |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY |
MLX5_QP_OPTPAR_PRI_PORT,
@@ -3061,17 +3200,20 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
- MLX5_QP_OPTPAR_PRI_PORT,
+ MLX5_QP_OPTPAR_PRI_PORT |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
},
[MLX5_QP_STATE_RTR] = {
[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
- MLX5_QP_OPTPAR_PKEY_INDEX,
+ MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RWE |
- MLX5_QP_OPTPAR_PKEY_INDEX,
+ MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX |
@@ -3080,7 +3222,8 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
- MLX5_QP_OPTPAR_PKEY_INDEX,
+ MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
},
},
[MLX5_QP_STATE_RTR] = {
@@ -3414,33 +3557,68 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return 0;
}
-static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev,
- struct mlx5_ib_pd *pd,
- struct mlx5_ib_qp_base *qp_base,
- u8 port_num, struct ib_udata *udata)
+static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+ u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ atomic_t *tx_port_affinity;
+
+ if (ucontext)
+ tx_port_affinity = &ucontext->tx_port_affinity;
+ else
+ tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity;
+
+ return (unsigned int)atomic_add_return(1, tx_port_affinity) %
+ MLX5_MAX_PORTS + 1;
+}
+
+static bool qp_supports_affinity(struct ib_qp *qp)
+{
+ if ((qp->qp_type == IB_QPT_RC) ||
+ (qp->qp_type == IB_QPT_UD) ||
+ (qp->qp_type == IB_QPT_UC) ||
+ (qp->qp_type == IB_QPT_RAW_PACKET) ||
+ (qp->qp_type == IB_QPT_XRC_INI) ||
+ (qp->qp_type == IB_QPT_XRC_TGT))
+ return true;
+ return false;
+}
+
+static unsigned int get_tx_affinity(struct ib_qp *qp,
+ const struct ib_qp_attr *attr,
+ int attr_mask, u8 init,
+ struct ib_udata *udata)
{
struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
- unsigned int tx_port_affinity;
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_qp_base *qp_base;
+ unsigned int tx_affinity;
+
+ if (!(dev->lag_active && qp_supports_affinity(qp)))
+ return 0;
- if (ucontext) {
- tx_port_affinity = (unsigned int)atomic_add_return(
- 1, &ucontext->tx_port_affinity) %
- MLX5_MAX_PORTS +
- 1;
+ if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
+ tx_affinity = mqp->gsi_lag_port;
+ else if (init)
+ tx_affinity = get_tx_affinity_rr(dev, udata);
+ else if ((attr_mask & IB_QP_AV) && attr->xmit_slave)
+ tx_affinity =
+ mlx5_lag_get_slave_port(dev->mdev, attr->xmit_slave);
+ else
+ return 0;
+
+ qp_base = &mqp->trans_qp.base;
+ if (ucontext)
mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n",
- tx_port_affinity, qp_base->mqp.qpn, ucontext);
- } else {
- tx_port_affinity =
- (unsigned int)atomic_add_return(
- 1, &dev->port[port_num].roce.tx_port_affinity) %
- MLX5_MAX_PORTS +
- 1;
+ tx_affinity, qp_base->mqp.qpn, ucontext);
+ else
mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n",
- tx_port_affinity, qp_base->mqp.qpn);
- }
-
- return tx_port_affinity;
+ tx_affinity, qp_base->mqp.qpn);
+ return tx_affinity;
}
static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
@@ -3516,15 +3694,14 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_qp_context *context;
struct mlx5_ib_pd *pd;
enum mlx5_qp_state mlx5_cur, mlx5_new;
- enum mlx5_qp_optpar optpar;
+ enum mlx5_qp_optpar optpar = 0;
u32 set_id = 0;
int mlx5_st;
int err;
u16 op;
u8 tx_affinity = 0;
- mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ?
- qp->qp_sub_type : ibqp->qp_type);
+ mlx5_st = to_mlx5_st(qp->type);
if (mlx5_st < 0)
return -EINVAL;
@@ -3532,7 +3709,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (!context)
return -ENOMEM;
- pd = get_pd(qp);
+ pd = to_mpd(qp->ibqp.pd);
context->flags = cpu_to_be32(mlx5_st << 16);
if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
@@ -3551,27 +3728,20 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}
}
- if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
- if ((ibqp->qp_type == IB_QPT_RC) ||
- (ibqp->qp_type == IB_QPT_UD &&
- !(qp->flags & MLX5_IB_QP_SQPN_QP1)) ||
- (ibqp->qp_type == IB_QPT_UC) ||
- (ibqp->qp_type == IB_QPT_RAW_PACKET) ||
- (ibqp->qp_type == IB_QPT_XRC_INI) ||
- (ibqp->qp_type == IB_QPT_XRC_TGT)) {
- if (dev->lag_active) {
- u8 p = mlx5_core_native_port_num(dev->mdev) - 1;
- tx_affinity = get_tx_affinity(dev, pd, base, p,
- udata);
- context->flags |= cpu_to_be32(tx_affinity << 24);
- }
- }
+ tx_affinity = get_tx_affinity(ibqp, attr, attr_mask,
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT, udata);
+ if (tx_affinity) {
+ context->flags |= cpu_to_be32(tx_affinity << 24);
+ if (new_state == IB_QPS_RTR &&
+ MLX5_CAP_GEN(dev->mdev, init2_lag_tx_port_affinity))
+ optpar |= MLX5_QP_OPTPAR_LAG_TX_AFF;
}
if (is_sqp(ibqp->qp_type)) {
context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
} else if ((ibqp->qp_type == IB_QPT_UD &&
- !(qp->flags & MLX5_IB_QP_UNDERLAY)) ||
+ !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) ||
ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
} else if (attr_mask & IB_QP_PATH_MTU) {
@@ -3676,7 +3846,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->port) - 1;
/* Underlay port should be used - index 0 function per port */
- if (qp->flags & MLX5_IB_QP_UNDERLAY)
+ if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
port_num = 0;
if (ibqp->counter)
@@ -3690,7 +3860,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->sq_crq_size |= cpu_to_be16(1 << 4);
- if (qp->flags & MLX5_IB_QP_SQPN_QP1)
+ if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
context->deth_sqpn = cpu_to_be32(1);
mlx5_cur = to_mlx5_state(cur_state);
@@ -3703,11 +3873,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}
op = optab[mlx5_cur][mlx5_new];
- optpar = ib_mask_to_mlx5_opt(attr_mask);
+ optpar |= ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) {
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
struct mlx5_modify_raw_qp_param raw_qp_param = {};
raw_qp_param.operation = op;
@@ -3985,11 +4155,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
- if (ibqp->qp_type == IB_QPT_DRIVER)
- qp_type = qp->qp_sub_type;
- else
- qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ?
- IB_QPT_GSI : ibqp->qp_type;
+ qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? IB_QPT_GSI :
+ qp->type;
if (qp_type == MLX5_IB_QPT_DCT)
return mlx5_ib_modify_dct(ibqp, attr, attr_mask, udata);
@@ -4003,7 +4170,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
}
- if (qp->flags & MLX5_IB_QP_UNDERLAY) {
+ if (qp->flags & IB_QP_CREATE_SOURCE_QPN) {
if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) {
mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n",
attr_mask);
@@ -4070,1432 +4237,6 @@ out:
return err;
}
-static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
- u32 wqe_sz, void **cur_edge)
-{
- u32 idx;
-
- idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
- *cur_edge = get_sq_edge(sq, idx);
-
- *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
-}
-
-/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
- * next nearby edge and get new address translation for current WQE position.
- * @sq - SQ buffer.
- * @seg: Current WQE position (16B aligned).
- * @wqe_sz: Total current WQE size [16B].
- * @cur_edge: Updated current edge.
- */
-static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
- u32 wqe_sz, void **cur_edge)
-{
- if (likely(*seg != *cur_edge))
- return;
-
- _handle_post_send_edge(sq, seg, wqe_sz, cur_edge);
-}
-
-/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
- * pointers. At the end @seg is aligned to 16B regardless the copied size.
- * @sq - SQ buffer.
- * @cur_edge: Updated current edge.
- * @seg: Current WQE position (16B aligned).
- * @wqe_sz: Total current WQE size [16B].
- * @src: Pointer to copy from.
- * @n: Number of bytes to copy.
- */
-static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
- void **seg, u32 *wqe_sz, const void *src,
- size_t n)
-{
- while (likely(n)) {
- size_t leftlen = *cur_edge - *seg;
- size_t copysz = min_t(size_t, leftlen, n);
- size_t stride;
-
- memcpy(*seg, src, copysz);
-
- n -= copysz;
- src += copysz;
- stride = !n ? ALIGN(copysz, 16) : copysz;
- *seg += stride;
- *wqe_sz += stride >> 4;
- handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
- }
-}
-
-static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
-{
- struct mlx5_ib_cq *cq;
- unsigned cur;
-
- cur = wq->head - wq->tail;
- if (likely(cur + nreq < wq->max_post))
- return 0;
-
- cq = to_mcq(ib_cq);
- spin_lock(&cq->lock);
- cur = wq->head - wq->tail;
- spin_unlock(&cq->lock);
-
- return cur + nreq >= wq->max_post;
-}
-
-static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
- u64 remote_addr, u32 rkey)
-{
- rseg->raddr = cpu_to_be64(remote_addr);
- rseg->rkey = cpu_to_be32(rkey);
- rseg->reserved = 0;
-}
-
-static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
- void **seg, int *size, void **cur_edge)
-{
- struct mlx5_wqe_eth_seg *eseg = *seg;
-
- memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
-
- if (wr->send_flags & IB_SEND_IP_CSUM)
- eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
- MLX5_ETH_WQE_L4_CSUM;
-
- if (wr->opcode == IB_WR_LSO) {
- struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
- size_t left, copysz;
- void *pdata = ud_wr->header;
- size_t stride;
-
- left = ud_wr->hlen;
- eseg->mss = cpu_to_be16(ud_wr->mss);
- eseg->inline_hdr.sz = cpu_to_be16(left);
-
- /* memcpy_send_wqe should get a 16B align address. Hence, we
- * first copy up to the current edge and then, if needed,
- * fall-through to memcpy_send_wqe.
- */
- copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
- left);
- memcpy(eseg->inline_hdr.start, pdata, copysz);
- stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
- sizeof(eseg->inline_hdr.start) + copysz, 16);
- *size += stride / 16;
- *seg += stride;
-
- if (copysz < left) {
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- left -= copysz;
- pdata += copysz;
- memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
- left);
- }
-
- return;
- }
-
- *seg += sizeof(struct mlx5_wqe_eth_seg);
- *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
-}
-
-static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
- const struct ib_send_wr *wr)
-{
- memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
- dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
- dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
-}
-
-static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
-{
- dseg->byte_count = cpu_to_be32(sg->length);
- dseg->lkey = cpu_to_be32(sg->lkey);
- dseg->addr = cpu_to_be64(sg->addr);
-}
-
-static u64 get_xlt_octo(u64 bytes)
-{
- return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
- MLX5_IB_UMR_OCTOWORD;
-}
-
-static __be64 frwr_mkey_mask(bool atomic)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_EN_RINVAL |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_SMALL_FENCE |
- MLX5_MKEY_MASK_FREE;
-
- if (atomic)
- result |= MLX5_MKEY_MASK_A;
-
- return cpu_to_be64(result);
-}
-
-static __be64 sig_mkey_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_EN_SIGERR |
- MLX5_MKEY_MASK_EN_RINVAL |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_SMALL_FENCE |
- MLX5_MKEY_MASK_FREE |
- MLX5_MKEY_MASK_BSF_EN;
-
- return cpu_to_be64(result);
-}
-
-static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct mlx5_ib_mr *mr, u8 flags, bool atomic)
-{
- int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
-
- memset(umr, 0, sizeof(*umr));
-
- umr->flags = flags;
- umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
- umr->mkey_mask = frwr_mkey_mask(atomic);
-}
-
-static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
-{
- memset(umr, 0, sizeof(*umr));
- umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
- umr->flags = MLX5_UMR_INLINE;
-}
-
-static __be64 get_umr_enable_mr_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_disable_mr_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_translation_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_access_mask(int atomic)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW;
-
- if (atomic)
- result |= MLX5_MKEY_MASK_A;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_pd_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_PD;
-
- return cpu_to_be64(result);
-}
-
-static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
-{
- if ((mask & MLX5_MKEY_MASK_PAGE_SIZE &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) ||
- (mask & MLX5_MKEY_MASK_A &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)))
- return -EPERM;
- return 0;
-}
-
-static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
- struct mlx5_wqe_umr_ctrl_seg *umr,
- const struct ib_send_wr *wr, int atomic)
-{
- const struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- memset(umr, 0, sizeof(*umr));
-
- if (!umrwr->ignore_free_state) {
- if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
- /* fail if free */
- umr->flags = MLX5_UMR_CHECK_FREE;
- else
- /* fail if not free */
- umr->flags = MLX5_UMR_CHECK_NOT_FREE;
- }
-
- umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
- u64 offset = get_xlt_octo(umrwr->offset);
-
- umr->xlt_offset = cpu_to_be16(offset & 0xffff);
- umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
- umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
- }
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
- umr->mkey_mask |= get_umr_update_translation_mask();
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
- umr->mkey_mask |= get_umr_update_access_mask(atomic);
- umr->mkey_mask |= get_umr_update_pd_mask();
- }
- if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
- umr->mkey_mask |= get_umr_enable_mr_mask();
- if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
- umr->mkey_mask |= get_umr_disable_mr_mask();
-
- if (!wr->num_sge)
- umr->flags |= MLX5_UMR_INLINE;
-
- return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
-}
-
-static u8 get_umr_flags(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
- (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
- MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
-}
-
-static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
- struct mlx5_ib_mr *mr,
- u32 key, int access)
-{
- int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1;
-
- memset(seg, 0, sizeof(*seg));
-
- if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
- seg->log2_page_size = ilog2(mr->ibmr.page_size);
- else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
- /* KLMs take twice the size of MTTs */
- ndescs *= 2;
-
- seg->flags = get_umr_flags(access) | mr->access_mode;
- seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
- seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
- seg->start_addr = cpu_to_be64(mr->ibmr.iova);
- seg->len = cpu_to_be64(mr->ibmr.length);
- seg->xlt_oct_size = cpu_to_be32(ndescs);
-}
-
-static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
-{
- memset(seg, 0, sizeof(*seg));
- seg->status = MLX5_MKEY_STATUS_FREE;
-}
-
-static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
- const struct ib_send_wr *wr)
-{
- const struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- memset(seg, 0, sizeof(*seg));
- if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
- seg->status = MLX5_MKEY_STATUS_FREE;
-
- seg->flags = convert_access(umrwr->access_flags);
- if (umrwr->pd)
- seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
- !umrwr->length)
- seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64);
-
- seg->start_addr = cpu_to_be64(umrwr->virt_addr);
- seg->len = cpu_to_be64(umrwr->length);
- seg->log2_page_size = umrwr->page_shift;
- seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
- mlx5_mkey_variant(umrwr->mkey));
-}
-
-static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
- struct mlx5_ib_mr *mr,
- struct mlx5_ib_pd *pd)
-{
- int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs);
-
- dseg->addr = cpu_to_be64(mr->desc_map);
- dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
- dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
-}
-
-static __be32 send_ieth(const struct ib_send_wr *wr)
-{
- switch (wr->opcode) {
- case IB_WR_SEND_WITH_IMM:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- return wr->ex.imm_data;
-
- case IB_WR_SEND_WITH_INV:
- return cpu_to_be32(wr->ex.invalidate_rkey);
-
- default:
- return 0;
- }
-}
-
-static u8 calc_sig(void *wqe, int size)
-{
- u8 *p = wqe;
- u8 res = 0;
- int i;
-
- for (i = 0; i < size; i++)
- res ^= p[i];
-
- return ~res;
-}
-
-static u8 wq_sig(void *wqe)
-{
- return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
-}
-
-static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
- void **wqe, int *wqe_sz, void **cur_edge)
-{
- struct mlx5_wqe_inline_seg *seg;
- size_t offset;
- int inl = 0;
- int i;
-
- seg = *wqe;
- *wqe += sizeof(*seg);
- offset = sizeof(*seg);
-
- for (i = 0; i < wr->num_sge; i++) {
- size_t len = wr->sg_list[i].length;
- void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
-
- inl += len;
-
- if (unlikely(inl > qp->max_inline_data))
- return -ENOMEM;
-
- while (likely(len)) {
- size_t leftlen;
- size_t copysz;
-
- handle_post_send_edge(&qp->sq, wqe,
- *wqe_sz + (offset >> 4),
- cur_edge);
-
- leftlen = *cur_edge - *wqe;
- copysz = min_t(size_t, leftlen, len);
-
- memcpy(*wqe, addr, copysz);
- len -= copysz;
- addr += copysz;
- *wqe += copysz;
- offset += copysz;
- }
- }
-
- seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
-
- *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
-
- return 0;
-}
-
-static u16 prot_field_size(enum ib_signature_type type)
-{
- switch (type) {
- case IB_SIG_TYPE_T10_DIF:
- return MLX5_DIF_SIZE;
- default:
- return 0;
- }
-}
-
-static u8 bs_selector(int block_size)
-{
- switch (block_size) {
- case 512: return 0x1;
- case 520: return 0x2;
- case 4096: return 0x3;
- case 4160: return 0x4;
- case 1073741824: return 0x5;
- default: return 0;
- }
-}
-
-static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
- struct mlx5_bsf_inl *inl)
-{
- /* Valid inline section and allow BSF refresh */
- inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
- MLX5_BSF_REFRESH_DIF);
- inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
- inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
- /* repeating block */
- inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
- inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
- MLX5_DIF_CRC : MLX5_DIF_IPCS;
-
- if (domain->sig.dif.ref_remap)
- inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
-
- if (domain->sig.dif.app_escape) {
- if (domain->sig.dif.ref_escape)
- inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
- else
- inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
- }
-
- inl->dif_app_bitmask_check =
- cpu_to_be16(domain->sig.dif.apptag_check_mask);
-}
-
-static int mlx5_set_bsf(struct ib_mr *sig_mr,
- struct ib_sig_attrs *sig_attrs,
- struct mlx5_bsf *bsf, u32 data_size)
-{
- struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
- struct mlx5_bsf_basic *basic = &bsf->basic;
- struct ib_sig_domain *mem = &sig_attrs->mem;
- struct ib_sig_domain *wire = &sig_attrs->wire;
-
- memset(bsf, 0, sizeof(*bsf));
-
- /* Basic + Extended + Inline */
- basic->bsf_size_sbs = 1 << 7;
- /* Input domain check byte mask */
- basic->check_byte_mask = sig_attrs->check_mask;
- basic->raw_data_size = cpu_to_be32(data_size);
-
- /* Memory domain */
- switch (sig_attrs->mem.sig_type) {
- case IB_SIG_TYPE_NONE:
- break;
- case IB_SIG_TYPE_T10_DIF:
- basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
- basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
- mlx5_fill_inl_bsf(mem, &bsf->m_inl);
- break;
- default:
- return -EINVAL;
- }
-
- /* Wire domain */
- switch (sig_attrs->wire.sig_type) {
- case IB_SIG_TYPE_NONE:
- break;
- case IB_SIG_TYPE_T10_DIF:
- if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
- mem->sig_type == wire->sig_type) {
- /* Same block structure */
- basic->bsf_size_sbs |= 1 << 4;
- if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
- basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
- if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
- basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
- if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
- basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
- } else
- basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
-
- basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
- mlx5_fill_inl_bsf(wire, &bsf->w_inl);
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int set_sig_data_segment(const struct ib_send_wr *send_wr,
- struct ib_mr *sig_mr,
- struct ib_sig_attrs *sig_attrs,
- struct mlx5_ib_qp *qp, void **seg, int *size,
- void **cur_edge)
-{
- struct mlx5_bsf *bsf;
- u32 data_len;
- u32 data_key;
- u64 data_va;
- u32 prot_len = 0;
- u32 prot_key = 0;
- u64 prot_va = 0;
- bool prot = false;
- int ret;
- int wqe_size;
- struct mlx5_ib_mr *mr = to_mmr(sig_mr);
- struct mlx5_ib_mr *pi_mr = mr->pi_mr;
-
- data_len = pi_mr->data_length;
- data_key = pi_mr->ibmr.lkey;
- data_va = pi_mr->data_iova;
- if (pi_mr->meta_ndescs) {
- prot_len = pi_mr->meta_length;
- prot_key = pi_mr->ibmr.lkey;
- prot_va = pi_mr->pi_iova;
- prot = true;
- }
-
- if (!prot || (data_key == prot_key && data_va == prot_va &&
- data_len == prot_len)) {
- /**
- * Source domain doesn't contain signature information
- * or data and protection are interleaved in memory.
- * So need construct:
- * ------------------
- * | data_klm |
- * ------------------
- * | BSF |
- * ------------------
- **/
- struct mlx5_klm *data_klm = *seg;
-
- data_klm->bcount = cpu_to_be32(data_len);
- data_klm->key = cpu_to_be32(data_key);
- data_klm->va = cpu_to_be64(data_va);
- wqe_size = ALIGN(sizeof(*data_klm), 64);
- } else {
- /**
- * Source domain contains signature information
- * So need construct a strided block format:
- * ---------------------------
- * | stride_block_ctrl |
- * ---------------------------
- * | data_klm |
- * ---------------------------
- * | prot_klm |
- * ---------------------------
- * | BSF |
- * ---------------------------
- **/
- struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
- struct mlx5_stride_block_entry *data_sentry;
- struct mlx5_stride_block_entry *prot_sentry;
- u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
- int prot_size;
-
- sblock_ctrl = *seg;
- data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
- prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
-
- prot_size = prot_field_size(sig_attrs->mem.sig_type);
- if (!prot_size) {
- pr_err("Bad block size given: %u\n", block_size);
- return -EINVAL;
- }
- sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
- prot_size);
- sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
- sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
- sblock_ctrl->num_entries = cpu_to_be16(2);
-
- data_sentry->bcount = cpu_to_be16(block_size);
- data_sentry->key = cpu_to_be32(data_key);
- data_sentry->va = cpu_to_be64(data_va);
- data_sentry->stride = cpu_to_be16(block_size);
-
- prot_sentry->bcount = cpu_to_be16(prot_size);
- prot_sentry->key = cpu_to_be32(prot_key);
- prot_sentry->va = cpu_to_be64(prot_va);
- prot_sentry->stride = cpu_to_be16(prot_size);
-
- wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
- sizeof(*prot_sentry), 64);
- }
-
- *seg += wqe_size;
- *size += wqe_size / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-
- bsf = *seg;
- ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
- if (ret)
- return -EINVAL;
-
- *seg += sizeof(*bsf);
- *size += sizeof(*bsf) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-
- return 0;
-}
-
-static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
- struct ib_mr *sig_mr, int access_flags,
- u32 size, u32 length, u32 pdn)
-{
- u32 sig_key = sig_mr->rkey;
- u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
-
- memset(seg, 0, sizeof(*seg));
-
- seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS;
- seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
- seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
- MLX5_MKEY_BSF_EN | pdn);
- seg->len = cpu_to_be64(length);
- seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
- seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
-}
-
-static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- u32 size)
-{
- memset(umr, 0, sizeof(*umr));
-
- umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
- umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
- umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
- umr->mkey_mask = sig_mkey_mask();
-}
-
-static int set_pi_umr_wr(const struct ib_send_wr *send_wr,
- struct mlx5_ib_qp *qp, void **seg, int *size,
- void **cur_edge)
-{
- const struct ib_reg_wr *wr = reg_wr(send_wr);
- struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr);
- struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr;
- struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs;
- u32 pdn = get_pd(qp)->pdn;
- u32 xlt_size;
- int region_len, ret;
-
- if (unlikely(send_wr->num_sge != 0) ||
- unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) ||
- unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) ||
- unlikely(!sig_mr->sig->sig_status_checked))
- return -EINVAL;
-
- /* length of the protected region, data + protection */
- region_len = pi_mr->ibmr.length;
-
- /**
- * KLM octoword size - if protection was provided
- * then we use strided block format (3 octowords),
- * else we use single KLM (1 octoword)
- **/
- if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE)
- xlt_size = 0x30;
- else
- xlt_size = sizeof(struct mlx5_klm);
-
- set_sig_umr_segment(*seg, xlt_size);
- *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-
- set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len,
- pdn);
- *seg += sizeof(struct mlx5_mkey_seg);
- *size += sizeof(struct mlx5_mkey_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-
- ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size,
- cur_edge);
- if (ret)
- return ret;
-
- sig_mr->sig->sig_status_checked = false;
- return 0;
-}
-
-static int set_psv_wr(struct ib_sig_domain *domain,
- u32 psv_idx, void **seg, int *size)
-{
- struct mlx5_seg_set_psv *psv_seg = *seg;
-
- memset(psv_seg, 0, sizeof(*psv_seg));
- psv_seg->psv_num = cpu_to_be32(psv_idx);
- switch (domain->sig_type) {
- case IB_SIG_TYPE_NONE:
- break;
- case IB_SIG_TYPE_T10_DIF:
- psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
- domain->sig.dif.app_tag);
- psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
- break;
- default:
- pr_err("Bad signature type (%d) is given.\n",
- domain->sig_type);
- return -EINVAL;
- }
-
- *seg += sizeof(*psv_seg);
- *size += sizeof(*psv_seg) / 16;
-
- return 0;
-}
-
-static int set_reg_wr(struct mlx5_ib_qp *qp,
- const struct ib_reg_wr *wr,
- void **seg, int *size, void **cur_edge,
- bool check_not_free)
-{
- struct mlx5_ib_mr *mr = to_mmr(wr->mr);
- struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
- struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
- int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
- bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
- bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
- u8 flags = 0;
-
- if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) {
- mlx5_ib_warn(to_mdev(qp->ibqp.device),
- "Fast update of %s for MR is disabled\n",
- (MLX5_CAP_GEN(dev->mdev,
- umr_modify_entity_size_disabled)) ?
- "entity size" :
- "atomic access");
- return -EINVAL;
- }
-
- if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
- mlx5_ib_warn(to_mdev(qp->ibqp.device),
- "Invalid IB_SEND_INLINE send flag\n");
- return -EINVAL;
- }
-
- if (check_not_free)
- flags |= MLX5_UMR_CHECK_NOT_FREE;
- if (umr_inline)
- flags |= MLX5_UMR_INLINE;
-
- set_reg_umr_seg(*seg, mr, flags, atomic);
- *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-
- set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
- *seg += sizeof(struct mlx5_mkey_seg);
- *size += sizeof(struct mlx5_mkey_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-
- if (umr_inline) {
- memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
- mr_list_size);
- *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
- } else {
- set_reg_data_seg(*seg, mr, pd);
- *seg += sizeof(struct mlx5_wqe_data_seg);
- *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
- }
- return 0;
-}
-
-static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
- void **cur_edge)
-{
- set_linv_umr_seg(*seg);
- *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- set_linv_mkey_seg(*seg);
- *seg += sizeof(struct mlx5_mkey_seg);
- *size += sizeof(struct mlx5_mkey_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-}
-
-static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
-{
- __be32 *p = NULL;
- int i, j;
-
- pr_debug("dump WQE index %u:\n", idx);
- for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
- if ((i & 0xf) == 0) {
- p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
- pr_debug("WQBB at %p:\n", (void *)p);
- j = 0;
- idx = (idx + 1) & (qp->sq.wqe_cnt - 1);
- }
- pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
- be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
- be32_to_cpu(p[j + 3]));
- }
-}
-
-static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
- struct mlx5_wqe_ctrl_seg **ctrl,
- const struct ib_send_wr *wr, unsigned int *idx,
- int *size, void **cur_edge, int nreq,
- bool send_signaled, bool solicited)
-{
- if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
- return -ENOMEM;
-
- *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
- *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
- *ctrl = *seg;
- *(uint32_t *)(*seg + 8) = 0;
- (*ctrl)->imm = send_ieth(wr);
- (*ctrl)->fm_ce_se = qp->sq_signal_bits |
- (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
- (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
-
- *seg += sizeof(**ctrl);
- *size = sizeof(**ctrl) / 16;
- *cur_edge = qp->sq.cur_edge;
-
- return 0;
-}
-
-static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
- struct mlx5_wqe_ctrl_seg **ctrl,
- const struct ib_send_wr *wr, unsigned *idx,
- int *size, void **cur_edge, int nreq)
-{
- return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
- wr->send_flags & IB_SEND_SIGNALED,
- wr->send_flags & IB_SEND_SOLICITED);
-}
-
-static void finish_wqe(struct mlx5_ib_qp *qp,
- struct mlx5_wqe_ctrl_seg *ctrl,
- void *seg, u8 size, void *cur_edge,
- unsigned int idx, u64 wr_id, int nreq, u8 fence,
- u32 mlx5_opcode)
-{
- u8 opmod = 0;
-
- ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
- mlx5_opcode | ((u32)opmod << 24));
- ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
- ctrl->fm_ce_se |= fence;
- if (unlikely(qp->wq_sig))
- ctrl->signature = wq_sig(ctrl);
-
- qp->sq.wrid[idx] = wr_id;
- qp->sq.w_list[idx].opcode = mlx5_opcode;
- qp->sq.wqe_head[idx] = qp->sq.head + nreq;
- qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
- qp->sq.w_list[idx].next = qp->sq.cur_post;
-
- /* We save the edge which was possibly updated during the WQE
- * construction, into SQ's cache.
- */
- seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
- qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
- get_sq_edge(&qp->sq, qp->sq.cur_post &
- (qp->sq.wqe_cnt - 1)) :
- cur_edge;
-}
-
-static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr, bool drain)
-{
- struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
- struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_core_dev *mdev = dev->mdev;
- struct ib_reg_wr reg_pi_wr;
- struct mlx5_ib_qp *qp;
- struct mlx5_ib_mr *mr;
- struct mlx5_ib_mr *pi_mr;
- struct mlx5_ib_mr pa_pi_mr;
- struct ib_sig_attrs *sig_attrs;
- struct mlx5_wqe_xrc_seg *xrc;
- struct mlx5_bf *bf;
- void *cur_edge;
- int uninitialized_var(size);
- unsigned long flags;
- unsigned idx;
- int err = 0;
- int num_sge;
- void *seg;
- int nreq;
- int i;
- u8 next_fence = 0;
- u8 fence;
-
- if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
- !drain)) {
- *bad_wr = wr;
- return -EIO;
- }
-
- if (unlikely(ibqp->qp_type == IB_QPT_GSI))
- return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
-
- qp = to_mqp(ibqp);
- bf = &qp->bf;
-
- spin_lock_irqsave(&qp->sq.lock, flags);
-
- for (nreq = 0; wr; nreq++, wr = wr->next) {
- if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
- mlx5_ib_warn(dev, "\n");
- err = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- num_sge = wr->num_sge;
- if (unlikely(num_sge > qp->sq.max_gs)) {
- mlx5_ib_warn(dev, "\n");
- err = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
- nreq);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- if (wr->opcode == IB_WR_REG_MR ||
- wr->opcode == IB_WR_REG_MR_INTEGRITY) {
- fence = dev->umr_fence;
- next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
- } else {
- if (wr->send_flags & IB_SEND_FENCE) {
- if (qp->next_fence)
- fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
- else
- fence = MLX5_FENCE_MODE_FENCE;
- } else {
- fence = qp->next_fence;
- }
- }
-
- switch (ibqp->qp_type) {
- case IB_QPT_XRC_INI:
- xrc = seg;
- seg += sizeof(*xrc);
- size += sizeof(*xrc) / 16;
- /* fall through */
- case IB_QPT_RC:
- switch (wr->opcode) {
- case IB_WR_RDMA_READ:
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
- rdma_wr(wr)->rkey);
- seg += sizeof(struct mlx5_wqe_raddr_seg);
- size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
- break;
-
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
- mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
- err = -ENOSYS;
- *bad_wr = wr;
- goto out;
-
- case IB_WR_LOCAL_INV:
- qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
- ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
- set_linv_wr(qp, &seg, &size, &cur_edge);
- num_sge = 0;
- break;
-
- case IB_WR_REG_MR:
- qp->sq.wr_data[idx] = IB_WR_REG_MR;
- ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
- err = set_reg_wr(qp, reg_wr(wr), &seg, &size,
- &cur_edge, true);
- if (err) {
- *bad_wr = wr;
- goto out;
- }
- num_sge = 0;
- break;
-
- case IB_WR_REG_MR_INTEGRITY:
- qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY;
-
- mr = to_mmr(reg_wr(wr)->mr);
- pi_mr = mr->pi_mr;
-
- if (pi_mr) {
- memset(&reg_pi_wr, 0,
- sizeof(struct ib_reg_wr));
-
- reg_pi_wr.mr = &pi_mr->ibmr;
- reg_pi_wr.access = reg_wr(wr)->access;
- reg_pi_wr.key = pi_mr->ibmr.rkey;
-
- ctrl->imm = cpu_to_be32(reg_pi_wr.key);
- /* UMR for data + prot registration */
- err = set_reg_wr(qp, &reg_pi_wr, &seg,
- &size, &cur_edge,
- false);
- if (err) {
- *bad_wr = wr;
- goto out;
- }
- finish_wqe(qp, ctrl, seg, size,
- cur_edge, idx, wr->wr_id,
- nreq, fence,
- MLX5_OPCODE_UMR);
-
- err = begin_wqe(qp, &seg, &ctrl, wr,
- &idx, &size, &cur_edge,
- nreq);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
- } else {
- memset(&pa_pi_mr, 0,
- sizeof(struct mlx5_ib_mr));
- /* No UMR, use local_dma_lkey */
- pa_pi_mr.ibmr.lkey =
- mr->ibmr.pd->local_dma_lkey;
-
- pa_pi_mr.ndescs = mr->ndescs;
- pa_pi_mr.data_length = mr->data_length;
- pa_pi_mr.data_iova = mr->data_iova;
- if (mr->meta_ndescs) {
- pa_pi_mr.meta_ndescs =
- mr->meta_ndescs;
- pa_pi_mr.meta_length =
- mr->meta_length;
- pa_pi_mr.pi_iova = mr->pi_iova;
- }
-
- pa_pi_mr.ibmr.length = mr->ibmr.length;
- mr->pi_mr = &pa_pi_mr;
- }
- ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
- /* UMR for sig MR */
- err = set_pi_umr_wr(wr, qp, &seg, &size,
- &cur_edge);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
- finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
- wr->wr_id, nreq, fence,
- MLX5_OPCODE_UMR);
-
- /*
- * SET_PSV WQEs are not signaled and solicited
- * on error
- */
- sig_attrs = mr->ibmr.sig_attrs;
- err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
- &size, &cur_edge, nreq, false,
- true);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
- err = set_psv_wr(&sig_attrs->mem,
- mr->sig->psv_memory.psv_idx,
- &seg, &size);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
- finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
- wr->wr_id, nreq, next_fence,
- MLX5_OPCODE_SET_PSV);
-
- err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
- &size, &cur_edge, nreq, false,
- true);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
- err = set_psv_wr(&sig_attrs->wire,
- mr->sig->psv_wire.psv_idx,
- &seg, &size);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
- finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
- wr->wr_id, nreq, next_fence,
- MLX5_OPCODE_SET_PSV);
-
- qp->next_fence =
- MLX5_FENCE_MODE_INITIATOR_SMALL;
- num_sge = 0;
- goto skip_psv;
-
- default:
- break;
- }
- break;
-
- case IB_QPT_UC:
- switch (wr->opcode) {
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
- rdma_wr(wr)->rkey);
- seg += sizeof(struct mlx5_wqe_raddr_seg);
- size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
- break;
-
- default:
- break;
- }
- break;
-
- case IB_QPT_SMI:
- if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
- mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
- err = -EPERM;
- *bad_wr = wr;
- goto out;
- }
- /* fall through */
- case MLX5_IB_QPT_HW_GSI:
- set_datagram_seg(seg, wr);
- seg += sizeof(struct mlx5_wqe_datagram_seg);
- size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
- handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
-
- break;
- case IB_QPT_UD:
- set_datagram_seg(seg, wr);
- seg += sizeof(struct mlx5_wqe_datagram_seg);
- size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
- handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
-
- /* handle qp that supports ud offload */
- if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
- struct mlx5_wqe_eth_pad *pad;
-
- pad = seg;
- memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
- seg += sizeof(struct mlx5_wqe_eth_pad);
- size += sizeof(struct mlx5_wqe_eth_pad) / 16;
- set_eth_seg(wr, qp, &seg, &size, &cur_edge);
- handle_post_send_edge(&qp->sq, &seg, size,
- &cur_edge);
- }
- break;
- case MLX5_IB_QPT_REG_UMR:
- if (wr->opcode != MLX5_IB_WR_UMR) {
- err = -EINVAL;
- mlx5_ib_warn(dev, "bad opcode\n");
- goto out;
- }
- qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
- ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
- err = set_reg_umr_segment(dev, seg, wr, !!(MLX5_CAP_GEN(mdev, atomic)));
- if (unlikely(err))
- goto out;
- seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
- set_reg_mkey_segment(seg, wr);
- seg += sizeof(struct mlx5_mkey_seg);
- size += sizeof(struct mlx5_mkey_seg) / 16;
- handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
- break;
-
- default:
- break;
- }
-
- if (wr->send_flags & IB_SEND_INLINE && num_sge) {
- err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
- if (unlikely(err)) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
- } else {
- for (i = 0; i < num_sge; i++) {
- handle_post_send_edge(&qp->sq, &seg, size,
- &cur_edge);
- if (likely(wr->sg_list[i].length)) {
- set_data_ptr_seg
- ((struct mlx5_wqe_data_seg *)seg,
- wr->sg_list + i);
- size += sizeof(struct mlx5_wqe_data_seg) / 16;
- seg += sizeof(struct mlx5_wqe_data_seg);
- }
- }
- }
-
- qp->next_fence = next_fence;
- finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
- fence, mlx5_ib_opcode[wr->opcode]);
-skip_psv:
- if (0)
- dump_wqe(qp, idx, size);
- }
-
-out:
- if (likely(nreq)) {
- qp->sq.head += nreq;
-
- /* Make sure that descriptors are written before
- * updating doorbell record and ringing the doorbell
- */
- wmb();
-
- qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
-
- /* Make sure doorbell record is visible to the HCA before
- * we hit doorbell */
- wmb();
-
- mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
- /* Make sure doorbells don't leak out of SQ spinlock
- * and reach the HCA out of order.
- */
- bf->offset ^= bf->buf_size;
- }
-
- spin_unlock_irqrestore(&qp->sq.lock, flags);
-
- return err;
-}
-
-int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr)
-{
- return _mlx5_ib_post_send(ibqp, wr, bad_wr, false);
-}
-
-static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
-{
- sig->signature = calc_sig(sig, size);
-}
-
-static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr, bool drain)
-{
- struct mlx5_ib_qp *qp = to_mqp(ibqp);
- struct mlx5_wqe_data_seg *scat;
- struct mlx5_rwqe_sig *sig;
- struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_core_dev *mdev = dev->mdev;
- unsigned long flags;
- int err = 0;
- int nreq;
- int ind;
- int i;
-
- if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
- !drain)) {
- *bad_wr = wr;
- return -EIO;
- }
-
- if (unlikely(ibqp->qp_type == IB_QPT_GSI))
- return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
-
- spin_lock_irqsave(&qp->rq.lock, flags);
-
- ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
-
- for (nreq = 0; wr; nreq++, wr = wr->next) {
- if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- if (unlikely(wr->num_sge > qp->rq.max_gs)) {
- err = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
- if (qp->wq_sig)
- scat++;
-
- for (i = 0; i < wr->num_sge; i++)
- set_data_ptr_seg(scat + i, wr->sg_list + i);
-
- if (i < qp->rq.max_gs) {
- scat[i].byte_count = 0;
- scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
- scat[i].addr = 0;
- }
-
- if (qp->wq_sig) {
- sig = (struct mlx5_rwqe_sig *)scat;
- set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
- }
-
- qp->rq.wrid[ind] = wr->wr_id;
-
- ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
- }
-
-out:
- if (likely(nreq)) {
- qp->rq.head += nreq;
-
- /* Make sure that descriptors are written before
- * doorbell record.
- */
- wmb();
-
- *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
- }
-
- spin_unlock_irqrestore(&qp->rq.lock, flags);
-
- return err;
-}
-
-int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr)
-{
- return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false);
-}
-
static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
{
switch (mlx5_state) {
@@ -5828,14 +4569,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
memset(qp_init_attr, 0, sizeof(*qp_init_attr));
memset(qp_attr, 0, sizeof(*qp_attr));
- if (unlikely(qp->qp_sub_type == MLX5_IB_QPT_DCT))
+ if (unlikely(qp->type == MLX5_IB_QPT_DCT))
return mlx5_ib_dct_query_qp(dev, qp, qp_attr,
qp_attr_mask, qp_init_attr);
mutex_lock(&qp->mutex);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) {
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
if (err)
goto out;
@@ -5869,18 +4610,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_init_attr->cap = qp_attr->cap;
- qp_init_attr->create_flags = 0;
- if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
- qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
-
- if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
- qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
- if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
- qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
- if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
- qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
- if (qp->flags & MLX5_IB_QP_SQPN_QP1)
- qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1;
+ qp_init_attr->create_flags = qp->flags;
qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
@@ -6541,7 +5271,7 @@ void mlx5_ib_drain_sq(struct ib_qp *qp)
sdrain.cqe.done = mlx5_ib_drain_qp_done;
init_completion(&sdrain.done);
- ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true);
+ ret = mlx5_ib_post_send_drain(qp, &swr.wr, &bad_swr);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
@@ -6571,7 +5301,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp)
rdrain.cqe.done = mlx5_ib_drain_qp_done;
init_completion(&rdrain.done);
- ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true);
+ ret = mlx5_ib_post_recv_drain(qp, &rwr, &bad_rwr);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index b1a8a9175040..6d1ff13d2283 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -310,12 +310,18 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
srq->msrq.event = mlx5_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (udata)
- if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
+ if (udata) {
+ struct mlx5_ib_create_srq_resp resp = {
+ .srqn = srq->msrq.srqn,
+ };
+
+ if (ib_copy_to_udata(udata, &resp, min(udata->outlen,
+ sizeof(resp)))) {
mlx5_ib_dbg(dev, "copy to user failed\n");
err = -EFAULT;
goto err_core;
}
+ }
init_attr->attr.max_wr = srq->msrq.max - 1;
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index c851570791af..bc50a712bf2e 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -132,38 +132,33 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
- u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
- u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_srq_in)] = {};
- MLX5_SET(destroy_srq_in, srq_in, opcode,
- MLX5_CMD_OP_DESTROY_SRQ);
- MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
- MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
+ MLX5_SET(destroy_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, in, srqn, srq->srqn);
+ MLX5_SET(destroy_srq_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
- sizeof(srq_out));
+ return mlx5_cmd_exec_in(dev->mdev, destroy_srq, in);
}
static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
u16 lwm, int is_srq)
{
- u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
- u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {};
- MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ);
- MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
- MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
- MLX5_SET(arm_rq_in, srq_in, lwm, lwm);
- MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
+ MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
+ MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
+ MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
+ MLX5_SET(arm_rq_in, in, lwm, lwm);
+ MLX5_SET(arm_rq_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
- sizeof(srq_out));
+ return mlx5_cmd_exec_in(dev->mdev, arm_rq, in);
}
static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
{
- u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_srq_in)] = {};
u32 *srq_out;
void *srqc;
int err;
@@ -172,20 +167,18 @@ static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
if (!srq_out)
return -ENOMEM;
- MLX5_SET(query_srq_in, srq_in, opcode,
- MLX5_CMD_OP_QUERY_SRQ);
- MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
- err = mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
- MLX5_ST_SZ_BYTES(query_srq_out));
+ MLX5_SET(query_srq_in, in, opcode, MLX5_CMD_OP_QUERY_SRQ);
+ MLX5_SET(query_srq_in, in, srqn, srq->srqn);
+ err = mlx5_cmd_exec_inout(dev->mdev, query_srq, in, out);
if (err)
goto out;
- srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
+ srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry);
get_srqc(srqc, out);
if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
out->flags |= MLX5_SRQ_FLAG_ERR;
out:
- kvfree(srq_out);
+ kvfree(out);
return err;
}
@@ -234,39 +227,35 @@ out:
static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev,
struct mlx5_core_srq *srq)
{
- u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
- u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {};
- MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
- MLX5_CMD_OP_DESTROY_XRC_SRQ);
- MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
- MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
+ MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, srq->srqn);
+ MLX5_SET(destroy_xrc_srq_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
- xrcsrq_out, sizeof(xrcsrq_out));
+ return mlx5_cmd_exec_in(dev->mdev, destroy_xrc_srq, in);
}
static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
u16 lwm)
{
- u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
- u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {};
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
+ MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, in, op_mod,
+ MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, srq->srqn);
+ MLX5_SET(arm_xrc_srq_in, in, lwm, lwm);
+ MLX5_SET(arm_xrc_srq_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
- xrcsrq_out, sizeof(xrcsrq_out));
+ return mlx5_cmd_exec_in(dev->mdev, arm_xrc_srq, in);
}
static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev,
struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
{
- u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
+ u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)] = {};
u32 *xrcsrq_out;
void *xrc_srqc;
int err;
@@ -274,14 +263,11 @@ static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev,
xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
if (!xrcsrq_out)
return -ENOMEM;
- memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
- MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode,
- MLX5_CMD_OP_QUERY_XRC_SRQ);
- MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+ MLX5_SET(query_xrc_srq_in, in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ);
+ MLX5_SET(query_xrc_srq_in, in, xrc_srqn, srq->srqn);
- err = mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
- xrcsrq_out, MLX5_ST_SZ_BYTES(query_xrc_srq_out));
+ err = mlx5_cmd_exec_inout(dev->mdev, query_xrc_srq, in, xrcsrq_out);
if (err)
goto out;
@@ -341,13 +327,12 @@ out:
static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
- u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {};
- u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {};
MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev->mdev, destroy_rmp, in);
}
static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
@@ -384,7 +369,7 @@ static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
- err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen);
+ err = mlx5_cmd_exec_inout(dev->mdev, modify_rmp, in, out);
out:
kvfree(in);
@@ -414,7 +399,7 @@ static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP);
MLX5_SET(query_rmp_in, rmp_in, rmpn, srq->srqn);
- err = mlx5_cmd_exec(dev->mdev, rmp_in, inlen, rmp_out, outlen);
+ err = mlx5_cmd_exec_inout(dev->mdev, query_rmp, rmp_in, rmp_out);
if (err)
goto out;
@@ -477,36 +462,34 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
- u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {};
MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
- MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
+ MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev->mdev, destroy_xrq, in);
}
static int arm_xrq_cmd(struct mlx5_ib_dev *dev,
struct mlx5_core_srq *srq,
u16 lwm)
{
- u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {};
- MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
- MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ);
+ MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
+ MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ);
MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
- MLX5_SET(arm_rq_in, in, lwm, lwm);
+ MLX5_SET(arm_rq_in, in, lwm, lwm);
MLX5_SET(arm_rq_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev->mdev, arm_rq, in);
}
static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
{
- u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {};
u32 *xrq_out;
int outlen = MLX5_ST_SZ_BYTES(query_xrq_out);
void *xrqc;
@@ -519,7 +502,7 @@ static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
- err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), xrq_out, outlen);
+ err = mlx5_cmd_exec_inout(dev->mdev, query_xrq, in, xrq_out);
if (err)
goto out;
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
new file mode 100644
index 000000000000..2c6df1c43b55
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -0,0 +1,1504 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/gfp.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/driver.h>
+#include "wr.h"
+
+static const u32 mlx5_ib_opcode[] = {
+ [IB_WR_SEND] = MLX5_OPCODE_SEND,
+ [IB_WR_LSO] = MLX5_OPCODE_LSO,
+ [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
+ [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
+ [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
+ [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
+ [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
+ [IB_WR_REG_MR] = MLX5_OPCODE_UMR,
+ [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
+ [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
+ [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
+};
+
+/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
+ * next nearby edge and get new address translation for current WQE position.
+ * @sq - SQ buffer.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @cur_edge: Updated current edge.
+ */
+static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
+ u32 wqe_sz, void **cur_edge)
+{
+ u32 idx;
+
+ if (likely(*seg != *cur_edge))
+ return;
+
+ idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
+ *cur_edge = get_sq_edge(sq, idx);
+
+ *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
+}
+
+/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
+ * pointers. At the end @seg is aligned to 16B regardless the copied size.
+ * @sq - SQ buffer.
+ * @cur_edge: Updated current edge.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @src: Pointer to copy from.
+ * @n: Number of bytes to copy.
+ */
+static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
+ void **seg, u32 *wqe_sz, const void *src,
+ size_t n)
+{
+ while (likely(n)) {
+ size_t leftlen = *cur_edge - *seg;
+ size_t copysz = min_t(size_t, leftlen, n);
+ size_t stride;
+
+ memcpy(*seg, src, copysz);
+
+ n -= copysz;
+ src += copysz;
+ stride = !n ? ALIGN(copysz, 16) : copysz;
+ *seg += stride;
+ *wqe_sz += stride >> 4;
+ handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
+ }
+}
+
+static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq,
+ struct ib_cq *ib_cq)
+{
+ struct mlx5_ib_cq *cq;
+ unsigned int cur;
+
+ cur = wq->head - wq->tail;
+ if (likely(cur + nreq < wq->max_post))
+ return 0;
+
+ cq = to_mcq(ib_cq);
+ spin_lock(&cq->lock);
+ cur = wq->head - wq->tail;
+ spin_unlock(&cq->lock);
+
+ return cur + nreq >= wq->max_post;
+}
+
+static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
+ u64 remote_addr, u32 rkey)
+{
+ rseg->raddr = cpu_to_be64(remote_addr);
+ rseg->rkey = cpu_to_be32(rkey);
+ rseg->reserved = 0;
+}
+
+static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+ void **seg, int *size, void **cur_edge)
+{
+ struct mlx5_wqe_eth_seg *eseg = *seg;
+
+ memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
+
+ if (wr->send_flags & IB_SEND_IP_CSUM)
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
+ MLX5_ETH_WQE_L4_CSUM;
+
+ if (wr->opcode == IB_WR_LSO) {
+ struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
+ size_t left, copysz;
+ void *pdata = ud_wr->header;
+ size_t stride;
+
+ left = ud_wr->hlen;
+ eseg->mss = cpu_to_be16(ud_wr->mss);
+ eseg->inline_hdr.sz = cpu_to_be16(left);
+
+ /* memcpy_send_wqe should get a 16B align address. Hence, we
+ * first copy up to the current edge and then, if needed,
+ * continue to memcpy_send_wqe.
+ */
+ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
+ left);
+ memcpy(eseg->inline_hdr.start, pdata, copysz);
+ stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
+ sizeof(eseg->inline_hdr.start) + copysz, 16);
+ *size += stride / 16;
+ *seg += stride;
+
+ if (copysz < left) {
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+ left -= copysz;
+ pdata += copysz;
+ memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
+ left);
+ }
+
+ return;
+ }
+
+ *seg += sizeof(struct mlx5_wqe_eth_seg);
+ *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
+}
+
+static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
+ const struct ib_send_wr *wr)
+{
+ memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
+ dseg->av.dqp_dct =
+ cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
+ dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
+}
+
+static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
+{
+ dseg->byte_count = cpu_to_be32(sg->length);
+ dseg->lkey = cpu_to_be32(sg->lkey);
+ dseg->addr = cpu_to_be64(sg->addr);
+}
+
+static u64 get_xlt_octo(u64 bytes)
+{
+ return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
+ MLX5_IB_UMR_OCTOWORD;
+}
+
+static __be64 frwr_mkey_mask(bool atomic)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR |
+ MLX5_MKEY_MASK_EN_RINVAL |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW |
+ MLX5_MKEY_MASK_SMALL_FENCE |
+ MLX5_MKEY_MASK_FREE;
+
+ if (atomic)
+ result |= MLX5_MKEY_MASK_A;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 sig_mkey_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR |
+ MLX5_MKEY_MASK_EN_SIGERR |
+ MLX5_MKEY_MASK_EN_RINVAL |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW |
+ MLX5_MKEY_MASK_SMALL_FENCE |
+ MLX5_MKEY_MASK_FREE |
+ MLX5_MKEY_MASK_BSF_EN;
+
+ return cpu_to_be64(result);
+}
+
+static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
+ struct mlx5_ib_mr *mr, u8 flags, bool atomic)
+{
+ int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
+
+ memset(umr, 0, sizeof(*umr));
+
+ umr->flags = flags;
+ umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
+ umr->mkey_mask = frwr_mkey_mask(atomic);
+}
+
+static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
+{
+ memset(umr, 0, sizeof(*umr));
+ umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+ umr->flags = MLX5_UMR_INLINE;
+}
+
+static __be64 get_umr_enable_mr_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_disable_mr_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_translation_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_access_mask(int atomic)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW;
+
+ if (atomic)
+ result |= MLX5_MKEY_MASK_A;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_pd_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_PD;
+
+ return cpu_to_be64(result);
+}
+
+static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
+{
+ if ((mask & MLX5_MKEY_MASK_PAGE_SIZE &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) ||
+ (mask & MLX5_MKEY_MASK_A &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)))
+ return -EPERM;
+ return 0;
+}
+
+static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
+ struct mlx5_wqe_umr_ctrl_seg *umr,
+ const struct ib_send_wr *wr, int atomic)
+{
+ const struct mlx5_umr_wr *umrwr = umr_wr(wr);
+
+ memset(umr, 0, sizeof(*umr));
+
+ if (!umrwr->ignore_free_state) {
+ if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
+ /* fail if free */
+ umr->flags = MLX5_UMR_CHECK_FREE;
+ else
+ /* fail if not free */
+ umr->flags = MLX5_UMR_CHECK_NOT_FREE;
+ }
+
+ umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
+ u64 offset = get_xlt_octo(umrwr->offset);
+
+ umr->xlt_offset = cpu_to_be16(offset & 0xffff);
+ umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
+ umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+ }
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
+ umr->mkey_mask |= get_umr_update_translation_mask();
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
+ umr->mkey_mask |= get_umr_update_access_mask(atomic);
+ umr->mkey_mask |= get_umr_update_pd_mask();
+ }
+ if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
+ umr->mkey_mask |= get_umr_enable_mr_mask();
+ if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
+ umr->mkey_mask |= get_umr_disable_mr_mask();
+
+ if (!wr->num_sge)
+ umr->flags |= MLX5_UMR_INLINE;
+
+ return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
+}
+
+static u8 get_umr_flags(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
+ MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
+}
+
+static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
+ struct mlx5_ib_mr *mr,
+ u32 key, int access)
+{
+ int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1;
+
+ memset(seg, 0, sizeof(*seg));
+
+ if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ seg->log2_page_size = ilog2(mr->ibmr.page_size);
+ else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
+ /* KLMs take twice the size of MTTs */
+ ndescs *= 2;
+
+ seg->flags = get_umr_flags(access) | mr->access_mode;
+ seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
+ seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
+ seg->start_addr = cpu_to_be64(mr->ibmr.iova);
+ seg->len = cpu_to_be64(mr->ibmr.length);
+ seg->xlt_oct_size = cpu_to_be32(ndescs);
+}
+
+static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
+{
+ memset(seg, 0, sizeof(*seg));
+ seg->status = MLX5_MKEY_STATUS_FREE;
+}
+
+static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
+ const struct ib_send_wr *wr)
+{
+ const struct mlx5_umr_wr *umrwr = umr_wr(wr);
+
+ memset(seg, 0, sizeof(*seg));
+ if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
+ seg->status = MLX5_MKEY_STATUS_FREE;
+
+ seg->flags = convert_access(umrwr->access_flags);
+ if (umrwr->pd)
+ seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
+ !umrwr->length)
+ seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64);
+
+ seg->start_addr = cpu_to_be64(umrwr->virt_addr);
+ seg->len = cpu_to_be64(umrwr->length);
+ seg->log2_page_size = umrwr->page_shift;
+ seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
+ mlx5_mkey_variant(umrwr->mkey));
+}
+
+static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
+ struct mlx5_ib_mr *mr,
+ struct mlx5_ib_pd *pd)
+{
+ int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs);
+
+ dseg->addr = cpu_to_be64(mr->desc_map);
+ dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
+ dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
+}
+
+static __be32 send_ieth(const struct ib_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ return wr->ex.imm_data;
+
+ case IB_WR_SEND_WITH_INV:
+ return cpu_to_be32(wr->ex.invalidate_rkey);
+
+ default:
+ return 0;
+ }
+}
+
+static u8 calc_sig(void *wqe, int size)
+{
+ u8 *p = wqe;
+ u8 res = 0;
+ int i;
+
+ for (i = 0; i < size; i++)
+ res ^= p[i];
+
+ return ~res;
+}
+
+static u8 wq_sig(void *wqe)
+{
+ return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
+}
+
+static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
+ void **wqe, int *wqe_sz, void **cur_edge)
+{
+ struct mlx5_wqe_inline_seg *seg;
+ size_t offset;
+ int inl = 0;
+ int i;
+
+ seg = *wqe;
+ *wqe += sizeof(*seg);
+ offset = sizeof(*seg);
+
+ for (i = 0; i < wr->num_sge; i++) {
+ size_t len = wr->sg_list[i].length;
+ void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+
+ inl += len;
+
+ if (unlikely(inl > qp->max_inline_data))
+ return -ENOMEM;
+
+ while (likely(len)) {
+ size_t leftlen;
+ size_t copysz;
+
+ handle_post_send_edge(&qp->sq, wqe,
+ *wqe_sz + (offset >> 4),
+ cur_edge);
+
+ leftlen = *cur_edge - *wqe;
+ copysz = min_t(size_t, leftlen, len);
+
+ memcpy(*wqe, addr, copysz);
+ len -= copysz;
+ addr += copysz;
+ *wqe += copysz;
+ offset += copysz;
+ }
+ }
+
+ seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
+
+ *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
+
+ return 0;
+}
+
+static u16 prot_field_size(enum ib_signature_type type)
+{
+ switch (type) {
+ case IB_SIG_TYPE_T10_DIF:
+ return MLX5_DIF_SIZE;
+ default:
+ return 0;
+ }
+}
+
+static u8 bs_selector(int block_size)
+{
+ switch (block_size) {
+ case 512: return 0x1;
+ case 520: return 0x2;
+ case 4096: return 0x3;
+ case 4160: return 0x4;
+ case 1073741824: return 0x5;
+ default: return 0;
+ }
+}
+
+static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
+ struct mlx5_bsf_inl *inl)
+{
+ /* Valid inline section and allow BSF refresh */
+ inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
+ MLX5_BSF_REFRESH_DIF);
+ inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
+ inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
+ /* repeating block */
+ inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
+ inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
+ MLX5_DIF_CRC : MLX5_DIF_IPCS;
+
+ if (domain->sig.dif.ref_remap)
+ inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
+
+ if (domain->sig.dif.app_escape) {
+ if (domain->sig.dif.ref_escape)
+ inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
+ else
+ inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
+ }
+
+ inl->dif_app_bitmask_check =
+ cpu_to_be16(domain->sig.dif.apptag_check_mask);
+}
+
+static int mlx5_set_bsf(struct ib_mr *sig_mr,
+ struct ib_sig_attrs *sig_attrs,
+ struct mlx5_bsf *bsf, u32 data_size)
+{
+ struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
+ struct mlx5_bsf_basic *basic = &bsf->basic;
+ struct ib_sig_domain *mem = &sig_attrs->mem;
+ struct ib_sig_domain *wire = &sig_attrs->wire;
+
+ memset(bsf, 0, sizeof(*bsf));
+
+ /* Basic + Extended + Inline */
+ basic->bsf_size_sbs = 1 << 7;
+ /* Input domain check byte mask */
+ basic->check_byte_mask = sig_attrs->check_mask;
+ basic->raw_data_size = cpu_to_be32(data_size);
+
+ /* Memory domain */
+ switch (sig_attrs->mem.sig_type) {
+ case IB_SIG_TYPE_NONE:
+ break;
+ case IB_SIG_TYPE_T10_DIF:
+ basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
+ basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
+ mlx5_fill_inl_bsf(mem, &bsf->m_inl);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Wire domain */
+ switch (sig_attrs->wire.sig_type) {
+ case IB_SIG_TYPE_NONE:
+ break;
+ case IB_SIG_TYPE_T10_DIF:
+ if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
+ mem->sig_type == wire->sig_type) {
+ /* Same block structure */
+ basic->bsf_size_sbs |= 1 << 4;
+ if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
+ basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
+ if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
+ basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
+ if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
+ basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
+ } else
+ basic->wire.bs_selector =
+ bs_selector(wire->sig.dif.pi_interval);
+
+ basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
+ mlx5_fill_inl_bsf(wire, &bsf->w_inl);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+static int set_sig_data_segment(const struct ib_send_wr *send_wr,
+ struct ib_mr *sig_mr,
+ struct ib_sig_attrs *sig_attrs,
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
+{
+ struct mlx5_bsf *bsf;
+ u32 data_len;
+ u32 data_key;
+ u64 data_va;
+ u32 prot_len = 0;
+ u32 prot_key = 0;
+ u64 prot_va = 0;
+ bool prot = false;
+ int ret;
+ int wqe_size;
+ struct mlx5_ib_mr *mr = to_mmr(sig_mr);
+ struct mlx5_ib_mr *pi_mr = mr->pi_mr;
+
+ data_len = pi_mr->data_length;
+ data_key = pi_mr->ibmr.lkey;
+ data_va = pi_mr->data_iova;
+ if (pi_mr->meta_ndescs) {
+ prot_len = pi_mr->meta_length;
+ prot_key = pi_mr->ibmr.lkey;
+ prot_va = pi_mr->pi_iova;
+ prot = true;
+ }
+
+ if (!prot || (data_key == prot_key && data_va == prot_va &&
+ data_len == prot_len)) {
+ /**
+ * Source domain doesn't contain signature information
+ * or data and protection are interleaved in memory.
+ * So need construct:
+ * ------------------
+ * | data_klm |
+ * ------------------
+ * | BSF |
+ * ------------------
+ **/
+ struct mlx5_klm *data_klm = *seg;
+
+ data_klm->bcount = cpu_to_be32(data_len);
+ data_klm->key = cpu_to_be32(data_key);
+ data_klm->va = cpu_to_be64(data_va);
+ wqe_size = ALIGN(sizeof(*data_klm), 64);
+ } else {
+ /**
+ * Source domain contains signature information
+ * So need construct a strided block format:
+ * ---------------------------
+ * | stride_block_ctrl |
+ * ---------------------------
+ * | data_klm |
+ * ---------------------------
+ * | prot_klm |
+ * ---------------------------
+ * | BSF |
+ * ---------------------------
+ **/
+ struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
+ struct mlx5_stride_block_entry *data_sentry;
+ struct mlx5_stride_block_entry *prot_sentry;
+ u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
+ int prot_size;
+
+ sblock_ctrl = *seg;
+ data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
+ prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
+
+ prot_size = prot_field_size(sig_attrs->mem.sig_type);
+ if (!prot_size) {
+ pr_err("Bad block size given: %u\n", block_size);
+ return -EINVAL;
+ }
+ sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
+ prot_size);
+ sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
+ sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
+ sblock_ctrl->num_entries = cpu_to_be16(2);
+
+ data_sentry->bcount = cpu_to_be16(block_size);
+ data_sentry->key = cpu_to_be32(data_key);
+ data_sentry->va = cpu_to_be64(data_va);
+ data_sentry->stride = cpu_to_be16(block_size);
+
+ prot_sentry->bcount = cpu_to_be16(prot_size);
+ prot_sentry->key = cpu_to_be32(prot_key);
+ prot_sentry->va = cpu_to_be64(prot_va);
+ prot_sentry->stride = cpu_to_be16(prot_size);
+
+ wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
+ sizeof(*prot_sentry), 64);
+ }
+
+ *seg += wqe_size;
+ *size += wqe_size / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ bsf = *seg;
+ ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
+ if (ret)
+ return -EINVAL;
+
+ *seg += sizeof(*bsf);
+ *size += sizeof(*bsf) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ return 0;
+}
+
+static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
+ struct ib_mr *sig_mr, int access_flags,
+ u32 size, u32 length, u32 pdn)
+{
+ u32 sig_key = sig_mr->rkey;
+ u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
+
+ memset(seg, 0, sizeof(*seg));
+
+ seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS;
+ seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
+ seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
+ MLX5_MKEY_BSF_EN | pdn);
+ seg->len = cpu_to_be64(length);
+ seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
+ seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+}
+
+static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+ u32 size)
+{
+ memset(umr, 0, sizeof(*umr));
+
+ umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
+ umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
+ umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
+ umr->mkey_mask = sig_mkey_mask();
+}
+
+static int set_pi_umr_wr(const struct ib_send_wr *send_wr,
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
+{
+ const struct ib_reg_wr *wr = reg_wr(send_wr);
+ struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr);
+ struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr;
+ struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs;
+ u32 pdn = to_mpd(qp->ibqp.pd)->pdn;
+ u32 xlt_size;
+ int region_len, ret;
+
+ if (unlikely(send_wr->num_sge != 0) ||
+ unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) ||
+ unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) ||
+ unlikely(!sig_mr->sig->sig_status_checked))
+ return -EINVAL;
+
+ /* length of the protected region, data + protection */
+ region_len = pi_mr->ibmr.length;
+
+ /**
+ * KLM octoword size - if protection was provided
+ * then we use strided block format (3 octowords),
+ * else we use single KLM (1 octoword)
+ **/
+ if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE)
+ xlt_size = 0x30;
+ else
+ xlt_size = sizeof(struct mlx5_klm);
+
+ set_sig_umr_segment(*seg, xlt_size);
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len,
+ pdn);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size,
+ cur_edge);
+ if (ret)
+ return ret;
+
+ sig_mr->sig->sig_status_checked = false;
+ return 0;
+}
+
+static int set_psv_wr(struct ib_sig_domain *domain,
+ u32 psv_idx, void **seg, int *size)
+{
+ struct mlx5_seg_set_psv *psv_seg = *seg;
+
+ memset(psv_seg, 0, sizeof(*psv_seg));
+ psv_seg->psv_num = cpu_to_be32(psv_idx);
+ switch (domain->sig_type) {
+ case IB_SIG_TYPE_NONE:
+ break;
+ case IB_SIG_TYPE_T10_DIF:
+ psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
+ domain->sig.dif.app_tag);
+ psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
+ break;
+ default:
+ pr_err("Bad signature type (%d) is given.\n",
+ domain->sig_type);
+ return -EINVAL;
+ }
+
+ *seg += sizeof(*psv_seg);
+ *size += sizeof(*psv_seg) / 16;
+
+ return 0;
+}
+
+static int set_reg_wr(struct mlx5_ib_qp *qp,
+ const struct ib_reg_wr *wr,
+ void **seg, int *size, void **cur_edge,
+ bool check_not_free)
+{
+ struct mlx5_ib_mr *mr = to_mmr(wr->mr);
+ struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
+ struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
+ int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
+ bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
+ bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
+ u8 flags = 0;
+
+ if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) {
+ mlx5_ib_warn(to_mdev(qp->ibqp.device),
+ "Fast update of %s for MR is disabled\n",
+ (MLX5_CAP_GEN(dev->mdev,
+ umr_modify_entity_size_disabled)) ?
+ "entity size" :
+ "atomic access");
+ return -EINVAL;
+ }
+
+ if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
+ mlx5_ib_warn(to_mdev(qp->ibqp.device),
+ "Invalid IB_SEND_INLINE send flag\n");
+ return -EINVAL;
+ }
+
+ if (check_not_free)
+ flags |= MLX5_UMR_CHECK_NOT_FREE;
+ if (umr_inline)
+ flags |= MLX5_UMR_INLINE;
+
+ set_reg_umr_seg(*seg, mr, flags, atomic);
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ if (umr_inline) {
+ memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
+ mr_list_size);
+ *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
+ } else {
+ set_reg_data_seg(*seg, mr, pd);
+ *seg += sizeof(struct mlx5_wqe_data_seg);
+ *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
+ }
+ return 0;
+}
+
+static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
+{
+ set_linv_umr_seg(*seg);
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+ set_linv_mkey_seg(*seg);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+}
+
+static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
+{
+ __be32 *p = NULL;
+ int i, j;
+
+ pr_debug("dump WQE index %u:\n", idx);
+ for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
+ if ((i & 0xf) == 0) {
+ p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
+ pr_debug("WQBB at %p:\n", (void *)p);
+ j = 0;
+ idx = (idx + 1) & (qp->sq.wqe_cnt - 1);
+ }
+ pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
+ be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
+ be32_to_cpu(p[j + 3]));
+ }
+}
+
+static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+ struct mlx5_wqe_ctrl_seg **ctrl,
+ const struct ib_send_wr *wr, unsigned int *idx,
+ int *size, void **cur_edge, int nreq,
+ bool send_signaled, bool solicited)
+{
+ if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
+ return -ENOMEM;
+
+ *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
+ *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
+ *ctrl = *seg;
+ *(uint32_t *)(*seg + 8) = 0;
+ (*ctrl)->imm = send_ieth(wr);
+ (*ctrl)->fm_ce_se = qp->sq_signal_bits |
+ (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
+ (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
+
+ *seg += sizeof(**ctrl);
+ *size = sizeof(**ctrl) / 16;
+ *cur_edge = qp->sq.cur_edge;
+
+ return 0;
+}
+
+static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+ struct mlx5_wqe_ctrl_seg **ctrl,
+ const struct ib_send_wr *wr, unsigned int *idx, int *size,
+ void **cur_edge, int nreq)
+{
+ return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
+ wr->send_flags & IB_SEND_SIGNALED,
+ wr->send_flags & IB_SEND_SOLICITED);
+}
+
+static void finish_wqe(struct mlx5_ib_qp *qp,
+ struct mlx5_wqe_ctrl_seg *ctrl,
+ void *seg, u8 size, void *cur_edge,
+ unsigned int idx, u64 wr_id, int nreq, u8 fence,
+ u32 mlx5_opcode)
+{
+ u8 opmod = 0;
+
+ ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
+ mlx5_opcode | ((u32)opmod << 24));
+ ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
+ ctrl->fm_ce_se |= fence;
+ if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE))
+ ctrl->signature = wq_sig(ctrl);
+
+ qp->sq.wrid[idx] = wr_id;
+ qp->sq.w_list[idx].opcode = mlx5_opcode;
+ qp->sq.wqe_head[idx] = qp->sq.head + nreq;
+ qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
+ qp->sq.w_list[idx].next = qp->sq.cur_post;
+
+ /* We save the edge which was possibly updated during the WQE
+ * construction, into SQ's cache.
+ */
+ seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
+ qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
+ get_sq_edge(&qp->sq, qp->sq.cur_post &
+ (qp->sq.wqe_cnt - 1)) :
+ cur_edge;
+}
+
+static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size)
+{
+ set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey);
+ *seg += sizeof(struct mlx5_wqe_raddr_seg);
+ *size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
+}
+
+static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
+ struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
+ int *size, void **cur_edge, unsigned int idx)
+{
+ qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
+ (*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey);
+ set_linv_wr(qp, seg, size, cur_edge);
+}
+
+static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
+ struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
+ void **cur_edge, unsigned int idx)
+{
+ qp->sq.wr_data[idx] = IB_WR_REG_MR;
+ (*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key);
+ return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true);
+}
+
+static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ const struct ib_send_wr *wr,
+ struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
+ void **cur_edge, unsigned int *idx, int nreq,
+ struct ib_sig_domain *domain, u32 psv_index,
+ u8 next_fence)
+{
+ int err;
+
+ /*
+ * SET_PSV WQEs are not signaled and solicited on error.
+ */
+ err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
+ false, true);
+ if (unlikely(err)) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ goto out;
+ }
+ err = set_psv_wr(domain, psv_index, seg, size);
+ if (unlikely(err)) {
+ mlx5_ib_warn(dev, "\n");
+ goto out;
+ }
+ finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
+ next_fence, MLX5_OPCODE_SET_PSV);
+
+out:
+ return err;
+}
+
+static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ const struct ib_send_wr *wr,
+ struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
+ int *size, void **cur_edge,
+ unsigned int *idx, int nreq, u8 fence,
+ u8 next_fence)
+{
+ struct mlx5_ib_mr *mr;
+ struct mlx5_ib_mr *pi_mr;
+ struct mlx5_ib_mr pa_pi_mr;
+ struct ib_sig_attrs *sig_attrs;
+ struct ib_reg_wr reg_pi_wr;
+ int err;
+
+ qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY;
+
+ mr = to_mmr(reg_wr(wr)->mr);
+ pi_mr = mr->pi_mr;
+
+ if (pi_mr) {
+ memset(&reg_pi_wr, 0,
+ sizeof(struct ib_reg_wr));
+
+ reg_pi_wr.mr = &pi_mr->ibmr;
+ reg_pi_wr.access = reg_wr(wr)->access;
+ reg_pi_wr.key = pi_mr->ibmr.rkey;
+
+ (*ctrl)->imm = cpu_to_be32(reg_pi_wr.key);
+ /* UMR for data + prot registration */
+ err = set_reg_wr(qp, &reg_pi_wr, seg, size, cur_edge, false);
+ if (unlikely(err))
+ goto out;
+
+ finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
+ nreq, fence, MLX5_OPCODE_UMR);
+
+ err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq);
+ if (unlikely(err)) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ goto out;
+ }
+ } else {
+ memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr));
+ /* No UMR, use local_dma_lkey */
+ pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey;
+ pa_pi_mr.ndescs = mr->ndescs;
+ pa_pi_mr.data_length = mr->data_length;
+ pa_pi_mr.data_iova = mr->data_iova;
+ if (mr->meta_ndescs) {
+ pa_pi_mr.meta_ndescs = mr->meta_ndescs;
+ pa_pi_mr.meta_length = mr->meta_length;
+ pa_pi_mr.pi_iova = mr->pi_iova;
+ }
+
+ pa_pi_mr.ibmr.length = mr->ibmr.length;
+ mr->pi_mr = &pa_pi_mr;
+ }
+ (*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey);
+ /* UMR for sig MR */
+ err = set_pi_umr_wr(wr, qp, seg, size, cur_edge);
+ if (unlikely(err)) {
+ mlx5_ib_warn(dev, "\n");
+ goto out;
+ }
+ finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
+ fence, MLX5_OPCODE_UMR);
+
+ sig_attrs = mr->ibmr.sig_attrs;
+ err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
+ &sig_attrs->mem, mr->sig->psv_memory.psv_idx,
+ next_fence);
+ if (unlikely(err))
+ goto out;
+
+ err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
+ &sig_attrs->wire, mr->sig->psv_wire.psv_idx,
+ next_fence);
+ if (unlikely(err))
+ goto out;
+
+ qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+out:
+ return err;
+}
+
+static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ const struct ib_send_wr *wr,
+ struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
+ void **cur_edge, unsigned int *idx, int nreq, u8 fence,
+ u8 next_fence, int *num_sge)
+{
+ int err = 0;
+
+ switch (wr->opcode) {
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ handle_rdma_op(wr, seg, size);
+ break;
+
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
+ mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
+ err = -EOPNOTSUPP;
+ goto out;
+
+ case IB_WR_LOCAL_INV:
+ handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx);
+ *num_sge = 0;
+ break;
+
+ case IB_WR_REG_MR:
+ err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx);
+ if (unlikely(err))
+ goto out;
+ *num_sge = 0;
+ break;
+
+ case IB_WR_REG_MR_INTEGRITY:
+ err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size,
+ cur_edge, idx, nreq, fence,
+ next_fence);
+ if (unlikely(err))
+ goto out;
+ *num_sge = 0;
+ break;
+
+ default:
+ break;
+ }
+
+out:
+ return err;
+}
+
+static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size)
+{
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ handle_rdma_op(wr, seg, size);
+ break;
+ default:
+ break;
+ }
+}
+
+static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp,
+ const struct ib_send_wr *wr, void **seg,
+ int *size, void **cur_edge)
+{
+ set_datagram_seg(*seg, wr);
+ *seg += sizeof(struct mlx5_wqe_datagram_seg);
+ *size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+}
+
+static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
+ void **seg, int *size, void **cur_edge)
+{
+ set_datagram_seg(*seg, wr);
+ *seg += sizeof(struct mlx5_wqe_datagram_seg);
+ *size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ /* handle qp that supports ud offload */
+ if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
+ struct mlx5_wqe_eth_pad *pad;
+
+ pad = *seg;
+ memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
+ *seg += sizeof(struct mlx5_wqe_eth_pad);
+ *size += sizeof(struct mlx5_wqe_eth_pad) / 16;
+ set_eth_seg(wr, qp, seg, size, cur_edge);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+ }
+}
+
+static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ const struct ib_send_wr *wr,
+ struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
+ int *size, void **cur_edge, unsigned int idx)
+{
+ int err = 0;
+
+ if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) {
+ err = -EINVAL;
+ mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode);
+ goto out;
+ }
+
+ qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
+ (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey);
+ err = set_reg_umr_segment(dev, *seg, wr,
+ !!(MLX5_CAP_GEN(dev->mdev, atomic)));
+ if (unlikely(err))
+ goto out;
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+ set_reg_mkey_segment(*seg, wr);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+out:
+ return err;
+}
+
+int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr, bool drain)
+{
+ struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_qp *qp;
+ struct mlx5_wqe_xrc_seg *xrc;
+ struct mlx5_bf *bf;
+ void *cur_edge;
+ int uninitialized_var(size);
+ unsigned long flags;
+ unsigned int idx;
+ int err = 0;
+ int num_sge;
+ void *seg;
+ int nreq;
+ int i;
+ u8 next_fence = 0;
+ u8 fence;
+
+ if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+ !drain)) {
+ *bad_wr = wr;
+ return -EIO;
+ }
+
+ if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+ return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
+
+ qp = to_mqp(ibqp);
+ bf = &qp->bf;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ for (nreq = 0; wr; nreq++, wr = wr->next) {
+ if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
+ mlx5_ib_warn(dev, "\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ num_sge = wr->num_sge;
+ if (unlikely(num_sge > qp->sq.max_gs)) {
+ mlx5_ib_warn(dev, "\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
+ nreq);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (wr->opcode == IB_WR_REG_MR ||
+ wr->opcode == IB_WR_REG_MR_INTEGRITY) {
+ fence = dev->umr_fence;
+ next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+ } else {
+ if (wr->send_flags & IB_SEND_FENCE) {
+ if (qp->next_fence)
+ fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
+ else
+ fence = MLX5_FENCE_MODE_FENCE;
+ } else {
+ fence = qp->next_fence;
+ }
+ }
+
+ switch (ibqp->qp_type) {
+ case IB_QPT_XRC_INI:
+ xrc = seg;
+ seg += sizeof(*xrc);
+ size += sizeof(*xrc) / 16;
+ fallthrough;
+ case IB_QPT_RC:
+ err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size,
+ &cur_edge, &idx, nreq, fence,
+ next_fence, &num_sge);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ goto out;
+ } else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) {
+ goto skip_psv;
+ }
+ break;
+
+ case IB_QPT_UC:
+ handle_qpt_uc(wr, &seg, &size);
+ break;
+ case IB_QPT_SMI:
+ if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
+ mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
+ err = -EPERM;
+ *bad_wr = wr;
+ goto out;
+ }
+ fallthrough;
+ case MLX5_IB_QPT_HW_GSI:
+ handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge);
+ break;
+ case IB_QPT_UD:
+ handle_qpt_ud(qp, wr, &seg, &size, &cur_edge);
+ break;
+ case MLX5_IB_QPT_REG_UMR:
+ err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg,
+ &size, &cur_edge, idx);
+ if (unlikely(err))
+ goto out;
+ break;
+
+ default:
+ break;
+ }
+
+ if (wr->send_flags & IB_SEND_INLINE && num_sge) {
+ err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
+ if (unlikely(err)) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+ } else {
+ for (i = 0; i < num_sge; i++) {
+ handle_post_send_edge(&qp->sq, &seg, size,
+ &cur_edge);
+ if (unlikely(!wr->sg_list[i].length))
+ continue;
+
+ set_data_ptr_seg(
+ (struct mlx5_wqe_data_seg *)seg,
+ wr->sg_list + i);
+ size += sizeof(struct mlx5_wqe_data_seg) / 16;
+ seg += sizeof(struct mlx5_wqe_data_seg);
+ }
+ }
+
+ qp->next_fence = next_fence;
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
+ fence, mlx5_ib_opcode[wr->opcode]);
+skip_psv:
+ if (0)
+ dump_wqe(qp, idx, size);
+ }
+
+out:
+ if (likely(nreq)) {
+ qp->sq.head += nreq;
+
+ /* Make sure that descriptors are written before
+ * updating doorbell record and ringing the doorbell
+ */
+ wmb();
+
+ qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+
+ /* Make sure doorbell record is visible to the HCA before
+ * we hit doorbell.
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
+ /* Make sure doorbells don't leak out of SQ spinlock
+ * and reach the HCA out of order.
+ */
+ bf->offset ^= bf->buf_size;
+ }
+
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+ return err;
+}
+
+static void set_sig_seg(struct mlx5_rwqe_sig *sig, int max_gs)
+{
+ sig->signature = calc_sig(sig, (max_gs + 1) << 2);
+}
+
+int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr, bool drain)
+{
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_wqe_data_seg *scat;
+ struct mlx5_rwqe_sig *sig;
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int ind;
+ int i;
+
+ if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+ !drain)) {
+ *bad_wr = wr;
+ return -EIO;
+ }
+
+ if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+ return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
+
+ spin_lock_irqsave(&qp->rq.lock, flags);
+
+ ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
+
+ for (nreq = 0; wr; nreq++, wr = wr->next) {
+ if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
+ if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
+ scat++;
+
+ for (i = 0; i < wr->num_sge; i++)
+ set_data_ptr_seg(scat + i, wr->sg_list + i);
+
+ if (i < qp->rq.max_gs) {
+ scat[i].byte_count = 0;
+ scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
+ scat[i].addr = 0;
+ }
+
+ if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) {
+ sig = (struct mlx5_rwqe_sig *)scat;
+ set_sig_seg(sig, qp->rq.max_gs);
+ }
+
+ qp->rq.wrid[ind] = wr->wr_id;
+
+ ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
+ }
+
+out:
+ if (likely(nreq)) {
+ qp->rq.head += nreq;
+
+ /* Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
+ }
+
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/wr.h b/drivers/infiniband/hw/mlx5/wr.h
new file mode 100644
index 000000000000..4f0057516402
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/wr.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_WR_H
+#define _MLX5_IB_WR_H
+
+#include "mlx5_ib.h"
+
+enum {
+ MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64,
+};
+
+struct mlx5_wqe_eth_pad {
+ u8 rsvd0[16];
+};
+
+
+/* get_sq_edge - Get the next nearby edge.
+ *
+ * An 'edge' is defined as the first following address after the end
+ * of the fragment or the SQ. Accordingly, during the WQE construction
+ * which repetitively increases the pointer to write the next data, it
+ * simply should check if it gets to an edge.
+ *
+ * @sq - SQ buffer.
+ * @idx - Stride index in the SQ buffer.
+ *
+ * Return:
+ * The new edge.
+ */
+static inline void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
+{
+ void *fragment_end;
+
+ fragment_end = mlx5_frag_buf_get_wqe
+ (&sq->fbc,
+ mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx));
+
+ return fragment_end + MLX5_SEND_WQE_BB;
+}
+
+int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr, bool drain);
+int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr, bool drain);
+
+static inline int mlx5_ib_post_send_nodrain(struct ib_qp *ibqp,
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ return mlx5_ib_post_send(ibqp, wr, bad_wr, false);
+}
+
+static inline int mlx5_ib_post_send_drain(struct ib_qp *ibqp,
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ return mlx5_ib_post_send(ibqp, wr, bad_wr, true);
+}
+
+static inline int mlx5_ib_post_recv_nodrain(struct ib_qp *ibqp,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ return mlx5_ib_post_recv(ibqp, wr, bad_wr, false);
+}
+
+static inline int mlx5_ib_post_recv_drain(struct ib_qp *ibqp,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ return mlx5_ib_post_recv(ibqp, wr, bad_wr, true);
+}
+#endif /* _MLX5_IB_WR_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 69a3e4f62fb1..bc3e3d741ca3 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -388,14 +388,15 @@ static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
}
-static int mthca_ah_create(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata)
+static int mthca_ah_create(struct ib_ah *ibah,
+ struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct mthca_ah *ah = to_mah(ibah);
- return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd), ah_attr,
- ah);
+ return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd),
+ init_attr->ah_attr, ah);
}
static void mthca_ah_destroy(struct ib_ah *ah, u32 flags)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 2b7f00ac41b0..6eea02b18968 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -155,7 +155,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
return status;
}
-int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags,
+int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata)
{
u32 *ahid_addr;
@@ -165,6 +165,7 @@ int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags,
u16 vlan_tag = 0xffff;
const struct ib_gid_attr *sgid_attr;
struct ocrdma_pd *pd = get_ocrdma_pd(ibah->pd);
+ struct rdma_ah_attr *attr = init_attr->ah_attr;
struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device);
if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) ||
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 9780afcde780..8b73b3489f3a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -51,7 +51,7 @@ enum {
OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */
};
-int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags,
+int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags);
int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index a5bd3adaf90a..d6b94a713573 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -2750,12 +2750,12 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
return 0;
}
-int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags,
+int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata)
{
struct qedr_ah *ah = get_qedr_ah(ibah);
- rdma_copy_ah_attr(&ah->attr, attr);
+ rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr);
return 0;
}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 18027844eb87..5e02387e068d 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -70,7 +70,7 @@ int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_recv_wr);
-int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags,
+int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
void qedr_destroy_ah(struct ib_ah *ibah, u32 flags);
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 91d64dd71a8a..8bcbc884e5b6 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2375,7 +2375,6 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
struct qib_devdata *dd = ppd->dd;
u64 val, guid, ibc;
unsigned long flags;
- int ret = 0;
/*
* SerDes model not in Pd, but still need to
@@ -2510,7 +2509,7 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
val | ERR_MASK_N(IBStatusChanged));
/* Always zero until we start messing with SerDes for real */
- return ret;
+ return 0;
}
/**
@@ -6875,7 +6874,7 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd)
struct qib_devdata *dd = ppd->dd;
unsigned lastbuf, erstbuf;
u64 senddmabufmask[3] = { 0 };
- int n, ret = 0;
+ int n;
qib_write_kreg_port(ppd, krp_senddmabase, ppd->sdma_descq_phys);
qib_sdma_7322_setlengen(ppd);
@@ -6904,7 +6903,7 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd)
qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]);
qib_write_kreg_port(ppd, krp_senddmabufmask1, senddmabufmask[1]);
qib_write_kreg_port(ppd, krp_senddmabufmask2, senddmabufmask[2]);
- return ret;
+ return 0;
}
/* sdma_lock must be held */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index faf7ecd7b3fa..ccbded2d26ce 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -509,9 +509,10 @@ void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
*
* @return: 0 on success, otherwise errno.
*/
-int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata)
+int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
struct pvrdma_dev *dev = to_vdev(ibah->device);
struct pvrdma_ah *ah = to_vah(ibah);
const struct ib_global_route *grh;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index e4a48f5c0c85..267702226f10 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -414,7 +414,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
-int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags,
+int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags);
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index ee02c6176007..40480add7dd3 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -98,14 +98,14 @@ EXPORT_SYMBOL(rvt_check_ah);
*
* Return: 0 on success
*/
-int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
- u32 create_flags, struct ib_udata *udata)
+int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct rvt_ah *ah = ibah_to_rvtah(ibah);
struct rvt_dev_info *dev = ib_to_rvt(ibah->device);
unsigned long flags;
- if (rvt_check_ah(ibah->device, ah_attr))
+ if (rvt_check_ah(ibah->device, init_attr->ah_attr))
return -EINVAL;
spin_lock_irqsave(&dev->n_ahs_lock, flags);
@@ -117,10 +117,11 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
dev->n_ahs_allocated++;
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
- rdma_copy_ah_attr(&ah->attr, ah_attr);
+ rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr);
if (dev->driver_f.notify_new_ah)
- dev->driver_f.notify_new_ah(ibah->device, ah_attr, ah);
+ dev->driver_f.notify_new_ah(ibah->device,
+ init_attr->ah_attr, ah);
return 0;
}
diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h
index bbb4d3bdec4e..40b7123fec76 100644
--- a/drivers/infiniband/sw/rdmavt/ah.h
+++ b/drivers/infiniband/sw/rdmavt/ah.h
@@ -50,8 +50,8 @@
#include <rdma/rdma_vt.h>
-int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
- u32 create_flags, struct ib_udata *udata);
+int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags);
int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 4afdd2e20883..5642eefb4ba1 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -77,6 +77,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
{
rxe->max_inline_data = RXE_MAX_INLINE_DATA;
+ rxe->attr.vendor_id = RXE_VENDOR_ID;
rxe->attr.max_mr_size = RXE_MAX_MR_SIZE;
rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP;
rxe->attr.max_qp = RXE_MAX_QP;
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index f59616b02477..99e9d8ba9767 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -127,6 +127,9 @@ enum rxe_device_param {
/* Delay before calling arbiter timer */
RXE_NSEC_ARB_TIMER_DELAY = 200,
+
+ /* IBTA v1.4 A3.3.1 VENDOR INFORMATION section */
+ RXE_VENDOR_ID = 0XFFFFFF,
};
/* default/initial rxe port parameters */
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 9dd4bd7aea92..b8a22af724e8 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -195,15 +195,16 @@ static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
rxe_drop_ref(pd);
}
-static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr,
- u32 flags, struct ib_udata *udata)
+static int rxe_create_ah(struct ib_ah *ibah,
+ struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
int err;
struct rxe_dev *rxe = to_rdev(ibah->device);
struct rxe_ah *ah = to_rah(ibah);
- err = rxe_av_chk_attr(rxe, attr);
+ err = rxe_av_chk_attr(rxe, init_attr->ah_attr);
if (err)
return err;
@@ -211,7 +212,7 @@ static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr,
if (err)
return err;
- rxe_init_av(attr, &ah->av);
+ rxe_init_av(init_attr->ah_attr, &ah->av);
return 0;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 81b8227214f1..d4c6a97ce4c0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -86,7 +86,7 @@ struct workqueue_struct *ipoib_workqueue;
struct ib_sa_client ipoib_sa_client;
-static void ipoib_add_one(struct ib_device *device);
+static int ipoib_add_one(struct ib_device *device);
static void ipoib_remove_one(struct ib_device *device, void *client_data);
static void ipoib_neigh_reclaim(struct rcu_head *rp);
static struct net_device *ipoib_get_net_dev_by_params(
@@ -479,9 +479,6 @@ static struct net_device *ipoib_get_net_dev_by_params(
if (ret)
return NULL;
- if (!dev_list)
- return NULL;
-
/* See if we can find a unique device matching the L2 parameters */
matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
gid, NULL, &net_dev);
@@ -2514,7 +2511,7 @@ sysfs_failed:
return ERR_PTR(-ENOMEM);
}
-static void ipoib_add_one(struct ib_device *device)
+static int ipoib_add_one(struct ib_device *device)
{
struct list_head *dev_list;
struct net_device *dev;
@@ -2524,7 +2521,7 @@ static void ipoib_add_one(struct ib_device *device)
dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL);
if (!dev_list)
- return;
+ return -ENOMEM;
INIT_LIST_HEAD(dev_list);
@@ -2541,10 +2538,11 @@ static void ipoib_add_one(struct ib_device *device)
if (!count) {
kfree(dev_list);
- return;
+ return -EOPNOTSUPP;
}
ib_set_client_data(device, &ipoib_client, dev_list);
+ return 0;
}
static void ipoib_remove_one(struct ib_device *device, void *client_data)
@@ -2552,9 +2550,6 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
struct list_head *dev_list = client_data;
- if (!dev_list)
- return;
-
list_for_each_entry_safe(priv, tmp, dev_list, list) {
LIST_HEAD(head);
ipoib_parent_unregister_pre(priv->dev);
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
index 6e8d650c17c7..874a8eb7638c 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -113,7 +113,7 @@ struct opa_vnic_vema_port {
struct mutex lock;
};
-static void opa_vnic_vema_add_one(struct ib_device *device);
+static int opa_vnic_vema_add_one(struct ib_device *device);
static void opa_vnic_vema_rem_one(struct ib_device *device,
void *client_data);
@@ -989,18 +989,18 @@ static void opa_vnic_ctrl_config_dev(struct opa_vnic_ctrl_port *cport, bool en)
*
* Allocate the vnic control port and initialize it.
*/
-static void opa_vnic_vema_add_one(struct ib_device *device)
+static int opa_vnic_vema_add_one(struct ib_device *device)
{
struct opa_vnic_ctrl_port *cport;
int rc, size = sizeof(*cport);
if (!rdma_cap_opa_vnic(device))
- return;
+ return -EOPNOTSUPP;
size += device->phys_port_cnt * sizeof(struct opa_vnic_vema_port);
cport = kzalloc(size, GFP_KERNEL);
if (!cport)
- return;
+ return -ENOMEM;
cport->num_ports = device->phys_port_cnt;
cport->ibdev = device;
@@ -1012,6 +1012,7 @@ static void opa_vnic_vema_add_one(struct ib_device *device)
ib_set_client_data(device, &opa_vnic_client, cport);
opa_vnic_ctrl_config_dev(cport, true);
+ return 0;
}
/**
@@ -1026,9 +1027,6 @@ static void opa_vnic_vema_rem_one(struct ib_device *device,
{
struct opa_vnic_ctrl_port *cport = client_data;
- if (!cport)
- return;
-
c_info("removing VNIC client\n");
opa_vnic_ctrl_config_dev(cport, false);
vema_unregister(cport);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index cd1181c39ed2..00b4f88b113e 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -146,7 +146,7 @@ module_param(ch_count, uint, 0444);
MODULE_PARM_DESC(ch_count,
"Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
-static void srp_add_one(struct ib_device *device);
+static int srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device, void *client_data);
static void srp_rename_dev(struct ib_device *device, void *client_data);
static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
@@ -4132,7 +4132,7 @@ static void srp_rename_dev(struct ib_device *device, void *client_data)
}
}
-static void srp_add_one(struct ib_device *device)
+static int srp_add_one(struct ib_device *device)
{
struct srp_device *srp_dev;
struct ib_device_attr *attr = &device->attrs;
@@ -4144,7 +4144,7 @@ static void srp_add_one(struct ib_device *device)
srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
if (!srp_dev)
- return;
+ return -ENOMEM;
/*
* Use the smallest page size supported by the HCA, down to a
@@ -4197,8 +4197,12 @@ static void srp_add_one(struct ib_device *device)
srp_dev->dev = device;
srp_dev->pd = ib_alloc_pd(device, flags);
- if (IS_ERR(srp_dev->pd))
- goto free_dev;
+ if (IS_ERR(srp_dev->pd)) {
+ int ret = PTR_ERR(srp_dev->pd);
+
+ kfree(srp_dev);
+ return ret;
+ }
if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey;
@@ -4212,10 +4216,7 @@ static void srp_add_one(struct ib_device *device)
}
ib_set_client_data(device, &srp_client, srp_dev);
- return;
-
-free_dev:
- kfree(srp_dev);
+ return 0;
}
static void srp_remove_one(struct ib_device *device, void *client_data)
@@ -4225,8 +4226,6 @@ static void srp_remove_one(struct ib_device *device, void *client_data)
struct srp_target_port *target;
srp_dev = client_data;
- if (!srp_dev)
- return;
list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
device_unregister(&host->dev);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 98552749d71c..7ed38d1cb997 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -135,14 +135,11 @@ static bool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new)
static void srpt_event_handler(struct ib_event_handler *handler,
struct ib_event *event)
{
- struct srpt_device *sdev;
+ struct srpt_device *sdev =
+ container_of(handler, struct srpt_device, event_handler);
struct srpt_port *sport;
u8 port_num;
- sdev = ib_get_client_data(event->device, &srpt_client);
- if (!sdev || sdev->device != event->device)
- return;
-
pr_debug("ASYNC event= %d on device= %s\n", event->event,
dev_name(&sdev->device->dev));
@@ -3104,7 +3101,7 @@ static int srpt_use_srq(struct srpt_device *sdev, bool use_srq)
* srpt_add_one - InfiniBand device addition callback function
* @device: Describes a HCA.
*/
-static void srpt_add_one(struct ib_device *device)
+static int srpt_add_one(struct ib_device *device)
{
struct srpt_device *sdev;
struct srpt_port *sport;
@@ -3115,14 +3112,16 @@ static void srpt_add_one(struct ib_device *device)
sdev = kzalloc(struct_size(sdev, port, device->phys_port_cnt),
GFP_KERNEL);
if (!sdev)
- goto err;
+ return -ENOMEM;
sdev->device = device;
mutex_init(&sdev->sdev_mutex);
sdev->pd = ib_alloc_pd(device, 0);
- if (IS_ERR(sdev->pd))
+ if (IS_ERR(sdev->pd)) {
+ ret = PTR_ERR(sdev->pd);
goto free_dev;
+ }
sdev->lkey = sdev->pd->local_dma_lkey;
@@ -3138,6 +3137,7 @@ static void srpt_add_one(struct ib_device *device)
if (IS_ERR(sdev->cm_id)) {
pr_info("ib_create_cm_id() failed: %ld\n",
PTR_ERR(sdev->cm_id));
+ ret = PTR_ERR(sdev->cm_id);
sdev->cm_id = NULL;
if (!rdma_cm_id)
goto err_ring;
@@ -3182,7 +3182,8 @@ static void srpt_add_one(struct ib_device *device)
mutex_init(&sport->port_gid_id.mutex);
INIT_LIST_HEAD(&sport->port_gid_id.tpg_list);
- if (srpt_refresh_port(sport)) {
+ ret = srpt_refresh_port(sport);
+ if (ret) {
pr_err("MAD registration failed for %s-%d.\n",
dev_name(&sdev->device->dev), i);
goto err_event;
@@ -3193,10 +3194,9 @@ static void srpt_add_one(struct ib_device *device)
list_add_tail(&sdev->list, &srpt_dev_list);
spin_unlock(&srpt_dev_lock);
-out:
ib_set_client_data(device, &srpt_client, sdev);
pr_debug("added %s.\n", dev_name(&device->dev));
- return;
+ return 0;
err_event:
ib_unregister_event_handler(&sdev->event_handler);
@@ -3208,10 +3208,8 @@ err_ring:
ib_dealloc_pd(sdev->pd);
free_dev:
kfree(sdev);
-err:
- sdev = NULL;
pr_info("%s(%s) failed.\n", __func__, dev_name(&device->dev));
- goto out;
+ return ret;
}
/**
@@ -3224,12 +3222,6 @@ static void srpt_remove_one(struct ib_device *device, void *client_data)
struct srpt_device *sdev = client_data;
int i;
- if (!sdev) {
- pr_info("%s(%s): nothing to do.\n", __func__,
- dev_name(&device->dev));
- return;
- }
-
srpt_unregister_mad_agent(sdev);
ib_unregister_event_handler(&sdev->event_handler);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c9dd6e99ad56..6b22e5a96f10 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1321,7 +1321,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 stat_rate_support[0x10];
u8 reserved_at_1f0[0x1];
u8 pci_sync_for_fw_update_event[0x1];
- u8 reserved_at_1f2[0xa];
+ u8 reserved_at_1f2[0x6];
+ u8 init2_lag_tx_port_affinity[0x1];
+ u8 reserved_at_1fa[0x3];
u8 cqe_version[0x4];
u8 compact_address_vector[0x1];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 1af5e460b5f6..def601199a1a 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -66,6 +66,7 @@ enum mlx5_qp_optpar {
MLX5_QP_OPTPAR_RETRY_COUNT = 1 << 12,
MLX5_QP_OPTPAR_RNR_RETRY = 1 << 13,
MLX5_QP_OPTPAR_ACK_TIMEOUT = 1 << 14,
+ MLX5_QP_OPTPAR_LAG_TX_AFF = 1 << 15,
MLX5_QP_OPTPAR_PRI_PORT = 1 << 16,
MLX5_QP_OPTPAR_SRQN = 1 << 18,
MLX5_QP_OPTPAR_CQN_RCV = 1 << 19,
@@ -321,6 +322,7 @@ struct mlx5_av {
struct mlx5_ib_ah {
struct ib_ah ibah;
struct mlx5_av av;
+ u8 xmit_port;
};
static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 4e62650e2127..8c093fc1bb9f 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -559,20 +559,6 @@ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc);
/**
- * ib_mad_snoop_handler - Callback handler for snooping sent MADs.
- * @mad_agent: MAD agent that snooped the MAD.
- * @send_buf: send MAD data buffer.
- * @mad_send_wc: Work completion information on the sent MAD. Valid
- * only for snooping that occurs on a send completion.
- *
- * Clients snooping MADs should not modify data referenced by the @send_buf
- * or @mad_send_wc.
- */
-typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
- struct ib_mad_send_buf *send_buf,
- struct ib_mad_send_wc *mad_send_wc);
-
-/**
* ib_mad_recv_handler - callback handler for a received MAD.
* @mad_agent: MAD agent requesting the received MAD.
* @send_buf: Send buffer if found, else NULL
@@ -581,8 +567,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
* MADs received in response to a send request operation will be handed to
* the user before the send operation completes. All data buffers given
* to registered agents through this routine are owned by the receiving
- * client, except for snooping agents. Clients snooping MADs should not
- * modify the data referenced by @mad_recv_wc.
+ * client.
*/
typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
struct ib_mad_send_buf *send_buf,
@@ -595,7 +580,6 @@ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
* @mr: Memory region for system memory usable for DMA.
* @recv_handler: Callback handler for a received MAD.
* @send_handler: Callback handler for a sent MAD.
- * @snoop_handler: Callback handler for snooped sent MADs.
* @context: User-specified context associated with this registration.
* @hi_tid: Access layer assigned transaction ID for this client.
* Unsolicited MADs sent by this client will have the upper 32-bits
@@ -612,7 +596,6 @@ struct ib_mad_agent {
struct ib_qp *qp;
ib_mad_recv_handler recv_handler;
ib_mad_send_handler send_handler;
- ib_mad_snoop_handler snoop_handler;
void *context;
u32 hi_tid;
u32 flags;
@@ -720,36 +703,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
ib_mad_recv_handler recv_handler,
void *context,
u32 registration_flags);
-
-enum ib_mad_snoop_flags {
- /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/
- /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/
- IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2),
- /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/
- IB_MAD_SNOOP_RECVS = (1<<4)
- /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/
- /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/
-};
-
-/**
- * ib_register_mad_snoop - Register to snoop sent and received MADs.
- * @device: The device to register with.
- * @port_num: The port on the specified device to use.
- * @qp_type: Specifies which QP traffic to snoop. Must be either
- * IB_QPT_SMI or IB_QPT_GSI.
- * @mad_snoop_flags: Specifies information where snooping occurs.
- * @send_handler: The callback routine invoked for a snooped send.
- * @recv_handler: The callback routine invoked for a snooped receive.
- * @context: User specified context associated with the registration.
- */
-struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
- u8 port_num,
- enum ib_qp_type qp_type,
- int mad_snoop_flags,
- ib_mad_snoop_handler snoop_handler,
- ib_mad_recv_handler recv_handler,
- void *context);
-
/**
* ib_unregister_mad_agent - Unregisters a client from using MAD services.
* @mad_agent: Corresponding MAD registration request to deregister.
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index bbc5cfb57cd2..4c488cade70f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -880,6 +880,12 @@ struct ib_mr_status {
*/
__attribute_const__ enum ib_rate mult_to_ib_rate(int mult);
+struct rdma_ah_init_attr {
+ struct rdma_ah_attr *ah_attr;
+ u32 flags;
+ struct net_device *xmit_slave;
+};
+
enum rdma_ah_attr_type {
RDMA_AH_ATTR_TYPE_UNDEFINED,
RDMA_AH_ATTR_TYPE_IB,
@@ -1267,6 +1273,7 @@ struct ib_qp_attr {
u8 alt_port_num;
u8 alt_timeout;
u32 rate_limit;
+ struct net_device *xmit_slave;
};
enum ib_wr_opcode {
@@ -2403,8 +2410,8 @@ struct ib_device_ops {
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
- int (*create_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata);
+ int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata);
int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
void (*destroy_ah)(struct ib_ah *ah, u32 flags);
@@ -2709,12 +2716,13 @@ struct ib_device {
/* Used by iWarp CM */
char iw_ifname[IFNAMSIZ];
u32 iw_driver_flags;
+ u32 lag_flags;
};
struct ib_client_nl_info;
struct ib_client {
const char *name;
- void (*add) (struct ib_device *);
+ int (*add)(struct ib_device *ibdev);
void (*remove)(struct ib_device *, void *client_data);
void (*rename)(struct ib_device *dev, void *client_data);
int (*get_nl_info)(struct ib_device *ibdev, void *client_data,
@@ -4701,4 +4709,48 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
bool rdma_dev_access_netns(const struct ib_device *device,
const struct net *net);
+
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
+#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF)
+
+/**
+ * rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based
+ * on the flow_label
+ *
+ * This function will convert the 20 bit flow_label input to a valid RoCE v2
+ * UDP src port 14 bit value. All RoCE V2 drivers should use this same
+ * convention.
+ */
+static inline u16 rdma_flow_label_to_udp_sport(u32 fl)
+{
+ u32 fl_low = fl & 0x03fff, fl_high = fl & 0xFC000;
+
+ fl_low ^= fl_high >> 14;
+ return (u16)(fl_low | IB_ROCE_UDP_ENCAP_VALID_PORT_MIN);
+}
+
+/**
+ * rdma_calc_flow_label - generate a RDMA symmetric flow label value based on
+ * local and remote qpn values
+ *
+ * This function folded the multiplication results of two qpns, 24 bit each,
+ * fields, and converts it to a 20 bit results.
+ *
+ * This function will create symmetric flow_label value based on the local
+ * and remote qpn values. this will allow both the requester and responder
+ * to calculate the same flow_label for a given connection.
+ *
+ * This helper function should be used by driver in case the upper layer
+ * provide a zero flow_label value. This is to improve entropy of RDMA
+ * traffic in the network.
+ */
+static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn)
+{
+ u64 v = (u64)lqpn * rqpn;
+
+ v ^= v >> 20;
+ v ^= v >> 40;
+
+ return (u32)(v & IB_GRH_FLOWLABEL_MASK);
+}
#endif /* IB_VERBS_H */
diff --git a/include/rdma/lag.h b/include/rdma/lag.h
new file mode 100644
index 000000000000..7c06ec9b2eef
--- /dev/null
+++ b/include/rdma/lag.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_LAG_H_
+#define _RDMA_LAG_H_
+
+#include <net/lag.h>
+
+struct ib_device;
+struct rdma_ah_attr;
+
+enum rdma_lag_flags {
+ RDMA_LAG_FLAGS_HASH_ALL_SLAVES = 1 << 0
+};
+
+void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave);
+struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags);
+
+#endif /* _RDMA_LAG_H_ */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 71f48cfdc24c..ea8e794785ed 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -390,14 +390,6 @@ __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr);
const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
int reason);
/**
- * rdma_is_consumer_reject - return true if the consumer rejected the connect
- * request.
- * @id: Communication identifier that received the REJECT event.
- * @reason: Value returned in the REJECT event status field.
- */
-bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason);
-
-/**
* rdma_consumer_reject_data - return the consumer reject private data and
* length, if any.
* @id: Communication identifier that received the REJECT event.
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 5fc10108703a..982bf2340840 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -440,7 +440,7 @@ struct rvt_qp {
/*
* This sge list MUST be last. Do not add anything below here.
*/
- struct rvt_sge r_sg_list[0] /* verified SGEs */
+ struct rvt_sge r_sg_list[] /* verified SGEs */
____cacheline_aligned_in_smp;
};
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index e42940a215a3..1bb6e75d254b 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -164,6 +164,8 @@ struct rdma_ucm_query_route_resp {
__u32 num_paths;
__u8 port_num;
__u8 reserved[3];
+ __u32 ibdev_index;
+ __u32 reserved1;
};
struct rdma_ucm_query_addr_resp {
@@ -175,6 +177,8 @@ struct rdma_ucm_query_addr_resp {
__u16 dst_size;
struct __kernel_sockaddr_storage src_addr;
struct __kernel_sockaddr_storage dst_addr;
+ __u32 ibdev_index;
+ __u32 reserved1;
};
struct rdma_ucm_query_path_resp {
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a792d8a3872a..90212ed3edf1 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -127,19 +127,20 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
queue_work(rds_wq, &rds_ibdev->free_work);
}
-static void rds_ib_add_one(struct ib_device *device)
+static int rds_ib_add_one(struct ib_device *device)
{
struct rds_ib_device *rds_ibdev;
bool has_fr, has_fmr;
+ int ret;
/* Only handle IB (no iWARP) devices */
if (device->node_type != RDMA_NODE_IB_CA)
- return;
+ return -EOPNOTSUPP;
rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
ibdev_to_node(device));
if (!rds_ibdev)
- return;
+ return -ENOMEM;
spin_lock_init(&rds_ibdev->spinlock);
refcount_set(&rds_ibdev->refcount, 1);
@@ -182,12 +183,14 @@ static void rds_ib_add_one(struct ib_device *device)
if (!rds_ibdev->vector_load) {
pr_err("RDS/IB: %s failed to allocate vector memory\n",
__func__);
+ ret = -ENOMEM;
goto put_dev;
}
rds_ibdev->dev = device;
rds_ibdev->pd = ib_alloc_pd(device, 0);
if (IS_ERR(rds_ibdev->pd)) {
+ ret = PTR_ERR(rds_ibdev->pd);
rds_ibdev->pd = NULL;
goto put_dev;
}
@@ -195,12 +198,15 @@ static void rds_ib_add_one(struct ib_device *device)
device->dma_device,
sizeof(struct rds_header),
L1_CACHE_BYTES, 0);
- if (!rds_ibdev->rid_hdrs_pool)
+ if (!rds_ibdev->rid_hdrs_pool) {
+ ret = -ENOMEM;
goto put_dev;
+ }
rds_ibdev->mr_1m_pool =
rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);
if (IS_ERR(rds_ibdev->mr_1m_pool)) {
+ ret = PTR_ERR(rds_ibdev->mr_1m_pool);
rds_ibdev->mr_1m_pool = NULL;
goto put_dev;
}
@@ -208,6 +214,7 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->mr_8k_pool =
rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL);
if (IS_ERR(rds_ibdev->mr_8k_pool)) {
+ ret = PTR_ERR(rds_ibdev->mr_8k_pool);
rds_ibdev->mr_8k_pool = NULL;
goto put_dev;
}
@@ -227,12 +234,13 @@ static void rds_ib_add_one(struct ib_device *device)
refcount_inc(&rds_ibdev->refcount);
ib_set_client_data(device, &rds_ib_client, rds_ibdev);
- refcount_inc(&rds_ibdev->refcount);
rds_ib_nodev_connect();
+ return 0;
put_dev:
rds_ib_dev_put(rds_ibdev);
+ return ret;
}
/*
@@ -274,9 +282,6 @@ static void rds_ib_remove_one(struct ib_device *device, void *client_data)
{
struct rds_ib_device *rds_ibdev = client_data;
- if (!rds_ibdev)
- return;
-
rds_ib_dev_shutdown(rds_ibdev);
/* stop connection attempts from getting a reference to this device. */
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 04b6fefb8bce..2fad5f3fe093 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -547,18 +547,18 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
static struct ib_client smc_ib_client;
/* callback function for ib_register_client() */
-static void smc_ib_add_dev(struct ib_device *ibdev)
+static int smc_ib_add_dev(struct ib_device *ibdev)
{
struct smc_ib_device *smcibdev;
u8 port_cnt;
int i;
if (ibdev->node_type != RDMA_NODE_IB_CA)
- return;
+ return -EOPNOTSUPP;
smcibdev = kzalloc(sizeof(*smcibdev), GFP_KERNEL);
if (!smcibdev)
- return;
+ return -ENOMEM;
smcibdev->ibdev = ibdev;
INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work);
@@ -583,17 +583,14 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
smcibdev->pnetid[i]);
}
schedule_work(&smcibdev->port_event_work);
+ return 0;
}
/* callback function for ib_unregister_client() */
static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
{
- struct smc_ib_device *smcibdev;
+ struct smc_ib_device *smcibdev = client_data;
- smcibdev = ib_get_client_data(ibdev, &smc_ib_client);
- if (!smcibdev || smcibdev->ibdev != ibdev)
- return;
- ib_set_client_data(ibdev, &smc_ib_client, NULL);
spin_lock(&smc_ib_devices.lock);
list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
spin_unlock(&smc_ib_devices.lock);