summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx5/ib_rep.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5/ib_rep.c')
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c215
1 files changed, 194 insertions, 21 deletions
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index b25e0b33a11a..bbecca405171 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -8,20 +8,95 @@
#include "srq.h"
static int
-mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
+ struct mlx5_eswitch_rep *rep,
+ int vport_index)
{
struct mlx5_ib_dev *ibdev;
- int vport_index;
+ struct net_device *ndev;
ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
- vport_index = rep->vport_index;
+ if (!ibdev)
+ return -EINVAL;
ibdev->port[vport_index].rep = rep;
rep->rep_data[REP_IB].priv = ibdev;
- write_lock(&ibdev->port[vport_index].roce.netdev_lock);
- ibdev->port[vport_index].roce.netdev =
- mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
- write_unlock(&ibdev->port[vport_index].roce.netdev_lock);
+ ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
+
+ return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
+}
+
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
+
+static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
+{
+ struct mlx5_core_dev *peer_dev;
+ int i;
+
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+ if (mlx5_lag_is_mpesw(peer_dev))
+ *num_ports += peer_num_ports;
+ else
+ /* Only 1 ib port is the representor for all uplinks */
+ *num_ports += peer_num_ports - 1;
+ }
+}
+
+static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner,
+ struct mlx5_core_dev *new_owner)
+{
+ int ret;
+
+ if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) ||
+ !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support))
+ return 0;
+
+ if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) ||
+ !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch))
+ return 0;
+
+ ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
+ FS_FT_RDMA_TRANSPORT_TX);
+ if (ret)
+ return ret;
+
+ ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
+ FS_FT_RDMA_TRANSPORT_RX);
+ if (ret) {
+ mlx5_fs_set_root_dev(cur_owner, cur_owner,
+ FS_FT_RDMA_TRANSPORT_TX);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void mlx5_ib_release_transport(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_dev *peer_dev;
+ int i, ret;
+
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev);
+ WARN_ON_ONCE(ret);
+ }
+}
+
+static int mlx5_ib_take_transport(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_dev *peer_dev;
+ int ret;
+ int i;
+
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ ret = mlx5_ib_set_owner_transport(peer_dev, dev);
+ if (ret) {
+ mlx5_ib_release_transport(dev);
+ return ret;
+ }
+ }
return 0;
}
@@ -30,19 +105,58 @@ static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
u32 num_ports = mlx5_eswitch_get_total_vports(dev);
+ struct mlx5_core_dev *lag_master = dev;
const struct mlx5_ib_profile *profile;
+ struct mlx5_core_dev *peer_dev;
struct mlx5_ib_dev *ibdev;
+ int new_uplink = false;
int vport_index;
int ret;
+ int i;
+
+ vport_index = rep->vport_index;
+
+ if (mlx5_lag_is_shared_fdb(dev)) {
+ if (mlx5_lag_is_master(dev)) {
+ mlx5_ib_num_ports_update(dev, &num_ports);
+ } else {
+ if (rep->vport == MLX5_VPORT_UPLINK) {
+ if (!mlx5_lag_is_mpesw(dev))
+ return 0;
+ new_uplink = true;
+ }
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+ if (mlx5_lag_is_master(peer_dev))
+ lag_master = peer_dev;
+ else if (!mlx5_lag_is_mpesw(dev))
+ /* Only 1 ib port is the representor for all uplinks */
+ peer_n_ports--;
+
+ if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
+ vport_index += peer_n_ports;
+ }
+ }
+ }
- if (rep->vport == MLX5_VPORT_UPLINK)
+ if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
profile = &raw_eth_profile;
else
- return mlx5_ib_set_vport_rep(dev, rep);
+ return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
- ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
- if (!ibdev)
- return -ENOMEM;
+ if (mlx5_lag_is_shared_fdb(dev)) {
+ ret = mlx5_ib_take_transport(lag_master);
+ if (ret)
+ return ret;
+ }
+
+ ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
+ mlx5_core_net(lag_master));
+ if (!ibdev) {
+ ret = -ENOMEM;
+ goto release_transport;
+ }
ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
GFP_KERNEL);
@@ -54,16 +168,23 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
ibdev->is_rep = true;
vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
- ibdev->port[vport_index].roce.netdev =
- mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
- ibdev->mdev = dev;
+ ibdev->mdev = lag_master;
ibdev->num_ports = num_ports;
+ ibdev->ib_dev.phys_port_cnt = num_ports;
+ ret = ib_device_set_netdev(&ibdev->ib_dev,
+ mlx5_ib_get_rep_netdev(lag_master->priv.eswitch,
+ rep->vport),
+ vport_index + 1);
+ if (ret)
+ goto fail_add;
ret = __mlx5_ib_add(ibdev, profile);
if (ret)
goto fail_add;
rep->rep_data[REP_IB].priv = ibdev;
+ if (mlx5_lag_is_shared_fdb(lag_master))
+ mlx5_ib_register_peer_vport_reps(lag_master);
return 0;
@@ -71,6 +192,10 @@ fail_add:
kfree(ibdev->port);
fail_port:
ib_dealloc_device(&ibdev->ib_dev);
+release_transport:
+ if (mlx5_lag_is_shared_fdb(lag_master))
+ mlx5_ib_release_transport(lag_master);
+
return ret;
}
@@ -82,18 +207,54 @@ static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
static void
mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
{
+ struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
+ int vport_index = rep->vport_index;
struct mlx5_ib_port *port;
+ int i;
+
+ if (WARN_ON(!mdev))
+ return;
+
+ if (!dev)
+ return;
+
+ if (mlx5_lag_is_shared_fdb(mdev) &&
+ !mlx5_lag_is_master(mdev)) {
+ if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
+ return;
+ for (i = 0; i < dev->num_ports; i++) {
+ if (dev->port[i].rep == rep)
+ break;
+ }
+ if (WARN_ON(i == dev->num_ports))
+ return;
+ vport_index = i;
+ }
+
+ port = &dev->port[vport_index];
- port = &dev->port[rep->vport_index];
- write_lock(&port->roce.netdev_lock);
- port->roce.netdev = NULL;
- write_unlock(&port->roce.netdev_lock);
+ ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1);
rep->rep_data[REP_IB].priv = NULL;
port->rep = NULL;
- if (rep->vport == MLX5_VPORT_UPLINK)
+ if (rep->vport == MLX5_VPORT_UPLINK) {
+
+ if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
+ return;
+
+ if (mlx5_lag_is_shared_fdb(mdev)) {
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5_eswitch *esw;
+
+ mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+ }
+ mlx5_ib_release_transport(mdev);
+ }
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ }
}
static const struct mlx5_eswitch_rep_ops rep_ops = {
@@ -102,6 +263,18 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
.get_proto_dev = mlx5_ib_rep_to_dev,
};
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5_eswitch *esw;
+ int i;
+
+ mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+ }
+}
+
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
u16 vport_num)
{
@@ -123,7 +296,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
rep = dev->port[port - 1].rep;
- return mlx5_eswitch_add_send_to_vport_rule(esw, rep, sq->base.mqp.qpn);
+ return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
}
static int mlx5r_rep_probe(struct auxiliary_device *adev,