summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox')
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c125
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h114
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c87
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c157
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c64
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c311
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h55
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c54
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c593
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c790
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c236
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c270
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c54
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c660
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c357
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c225
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c287
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c1126
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h181
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c198
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c83
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c47
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c249
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c151
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c92
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c273
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c60
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c82
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c241
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c270
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c120
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h76
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/thermal.c108
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/thermal.h20
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_thermal.c165
109 files changed, 6774 insertions, 2763 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 2f79378fbf6e..65cb63f6c465 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -228,7 +228,9 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring)
{
- return ring->prod - ring->cons > ring->full_size;
+ u32 used = READ_ONCE(ring->prod) - READ_ONCE(ring->cons);
+
+ return used > ring->full_size;
}
static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
@@ -1083,7 +1085,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP);
}
- ring->prod += nr_txbb;
+ WRITE_ONCE(ring->prod, ring->prod + nr_txbb);
/* If we used a bounce buffer then copy descriptor back into place */
if (unlikely(bounce))
@@ -1214,7 +1216,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
rx_ring->xdp_tx++;
- ring->prod += MLX4_EN_XDP_TX_NRTXBB;
+ WRITE_ONCE(ring->prod, ring->prod + MLX4_EN_XDP_TX_NRTXBB);
/* Ensure new descriptor hits memory
* before setting ownership of this descriptor to HW
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 4ac4d883047b..321f801c1d7c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -323,7 +323,7 @@ struct mlx4_en_tx_ring {
struct mlx4_en_rx_desc {
/* actual number of entries depends on rx ring stride */
- struct mlx4_wqe_data_seg data[0];
+ DECLARE_FLEX_ARRAY(struct mlx4_wqe_data_seg, data);
};
struct mlx4_en_rx_ring {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8d4e25cc54ea..ddf1e352f51d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -16,7 +16,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
- diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
+ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \
fw_reset.o qos.o lib/tout.o lib/aso.o
#
@@ -69,14 +69,15 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o
#
mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
ecpf.o rdma.o esw/legacy.o \
- esw/debugfs.o esw/devlink_port.o esw/vporttbl.o esw/qos.o
+ esw/devlink_port.o esw/vporttbl.o esw/qos.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o
-mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o
+mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o en/rep/bridge.o
+mlx5_core-$(CONFIG_THERMAL) += thermal.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
@@ -111,8 +112,8 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o
steering/dr_ste_v2.o \
steering/dr_cmd.o steering/dr_fw.o \
steering/dr_action.o steering/fs_dr.o \
- steering/dr_definer.o \
- steering/dr_dbg.o lib/smfs.o
+ steering/dr_definer.o steering/dr_ptrn.o \
+ steering/dr_arg.o steering/dr_dbg.o lib/smfs.o
#
# SF device
#
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index b00e33ed05e9..d53de39539a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1802,7 +1802,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
if (in_size <= 16)
goto cache_miss;
- for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+ for (i = 0; i < dev->profile.num_cmd_caches; i++) {
ch = &cmd->cache[i];
if (in_size > ch->max_inbox_size)
continue;
@@ -2097,7 +2097,7 @@ static void destroy_msg_cache(struct mlx5_core_dev *dev)
struct mlx5_cmd_msg *n;
int i;
- for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+ for (i = 0; i < dev->profile.num_cmd_caches; i++) {
ch = &dev->cmd.cache[i];
list_for_each_entry_safe(msg, n, &ch->head, list) {
list_del(&msg->list);
@@ -2127,7 +2127,7 @@ static void create_msg_cache(struct mlx5_core_dev *dev)
int k;
/* Initialize and fill the caches with initial entries */
- for (k = 0; k < MLX5_NUM_COMMAND_CACHES; k++) {
+ for (k = 0; k < dev->profile.num_cmd_caches; k++) {
ch = &cmd->cache[k];
spin_lock_init(&ch->lock);
INIT_LIST_HEAD(&ch->head);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 2e7806001fdc..1b33533b15de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -35,6 +35,7 @@
#include <linux/mlx5/mlx5_ifc_vdpa.h>
#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
+#include "devlink.h"
/* intf dev list mutex */
static DEFINE_MUTEX(mlx5_intf_mutex);
@@ -106,17 +107,6 @@ bool mlx5_eth_supported(struct mlx5_core_dev *dev)
return true;
}
-static bool is_eth_enabled(struct mlx5_core_dev *dev)
-{
- union devlink_param_value val;
- int err;
-
- err = devl_param_driverinit_value_get(priv_to_devlink(dev),
- DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
- &val);
- return err ? false : val.vbool;
-}
-
bool mlx5_vnet_supported(struct mlx5_core_dev *dev)
{
if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET))
@@ -245,7 +235,7 @@ static const struct mlx5_adev_device {
.is_enabled = &is_ib_enabled },
[MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth",
.is_supported = &mlx5_eth_supported,
- .is_enabled = &is_eth_enabled },
+ .is_enabled = &mlx5_core_is_eth_enabled },
[MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep",
.is_supported = &is_eth_rep_supported },
[MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index c5d2fdcabd56..4b607785d694 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -202,7 +202,7 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a
break;
/* On fw_activate action, also driver is reloaded and reinit performed */
*actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
- ret = mlx5_load_one_devl_locked(dev, false);
+ ret = mlx5_load_one_devl_locked(dev, true);
break;
default:
/* Unsupported action should not get to this function */
@@ -494,6 +494,61 @@ static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id,
return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL;
}
+static int
+mlx5_devlink_hairpin_num_queues_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ return val.vu32 ? 0 : -EINVAL;
+}
+
+static int
+mlx5_devlink_hairpin_queue_size_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u32 val32 = val.vu32;
+
+ if (!is_power_of_2(val32)) {
+ NL_SET_ERR_MSG_MOD(extack, "Value is not power of two");
+ return -EINVAL;
+ }
+
+ if (val32 > BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))) {
+ NL_SET_ERR_MSG_FMT_MOD(
+ extack, "Maximum hairpin queue size is %lu",
+ BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets)));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void mlx5_devlink_hairpin_params_init_values(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+ u32 link_speed = 0;
+ u64 link_speed64;
+
+ /* set hairpin pair per each 50Gbs share of the link */
+ mlx5_port_max_linkspeed(dev, &link_speed);
+ link_speed = max_t(u32, link_speed, 50000);
+ link_speed64 = link_speed;
+ do_div(link_speed64, 50000);
+
+ value.vu32 = link_speed64;
+ devl_param_driverinit_value_set(
+ devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, value);
+
+ value.vu32 =
+ BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(dev),
+ MLX5_CAP_GEN(dev, log_max_hairpin_num_packets)));
+ devl_param_driverinit_value_set(
+ devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, value);
+}
+
static const struct devlink_param mlx5_devlink_params[] = {
DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
NULL, NULL, mlx5_devlink_enable_roce_validate),
@@ -547,6 +602,14 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
static const struct devlink_param mlx5_devlink_eth_params[] = {
DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
NULL, NULL, NULL),
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES,
+ "hairpin_num_queues", DEVLINK_PARAM_TYPE_U32,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
+ mlx5_devlink_hairpin_num_queues_validate),
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE,
+ "hairpin_queue_size", DEVLINK_PARAM_TYPE_U32,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
+ mlx5_devlink_hairpin_queue_size_validate),
};
static int mlx5_devlink_eth_params_register(struct devlink *devlink)
@@ -567,6 +630,9 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink)
devl_param_driverinit_value_set(devlink,
DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
value);
+
+ mlx5_devlink_hairpin_params_init_values(devlink);
+
return 0;
}
@@ -805,6 +871,11 @@ int mlx5_devlink_params_register(struct devlink *devlink)
{
int err;
+ /* Here only the driver init params should be registered.
+ * Runtime params should be registered by the code which
+ * behaviour they configure.
+ */
+
err = devl_params_register(devlink, mlx5_devlink_params,
ARRAY_SIZE(mlx5_devlink_params));
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
index 212b12424146..defba5bd91d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -12,6 +12,8 @@ enum mlx5_devlink_param_id {
MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT,
+ MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES,
+ MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE,
};
struct mlx5_trap_ctx {
@@ -44,4 +46,15 @@ void mlx5_devlink_free(struct devlink *devlink);
int mlx5_devlink_params_register(struct devlink *devlink);
void mlx5_devlink_params_unregister(struct devlink *devlink);
+static inline bool mlx5_core_is_eth_enabled(struct mlx5_core_dev *dev)
+{
+ union devlink_param_value val;
+ int err;
+
+ err = devl_param_driverinit_value_get(priv_to_devlink(dev),
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ &val);
+ return err ? false : val.vbool;
+}
+
#endif /* __MLX5_DEVLINK_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
new file mode 100644
index 000000000000..9114661cd967
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */
+
+#include "reporter_vnic.h"
+#include "devlink.h"
+
+#define VNIC_ENV_GET64(vnic_env_stats, c) \
+ MLX5_GET64(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \
+ vport_env.c)
+
+struct mlx5_vnic_diag_stats {
+ __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)];
+};
+
+int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
+ struct devlink_fmsg *fmsg,
+ u16 vport_num, bool other_vport)
+{
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
+ struct mlx5_vnic_diag_stats vnic;
+ int err;
+
+ MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
+ MLX5_SET(query_vnic_env_in, in, vport_number, vport_num);
+ MLX5_SET(query_vnic_env_in, in, other_vport, !!other_vport);
+
+ err = mlx5_cmd_exec_inout(dev, query_vnic_env, in, &vnic.query_vnic_env_out);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, "vNIC env counters");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "total_error_queues",
+ VNIC_ENV_GET64(&vnic, total_error_queues));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "send_queue_priority_update_flow",
+ VNIC_ENV_GET64(&vnic, send_queue_priority_update_flow));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "comp_eq_overrun",
+ VNIC_ENV_GET64(&vnic, comp_eq_overrun));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "async_eq_overrun",
+ VNIC_ENV_GET64(&vnic, async_eq_overrun));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "cq_overrun",
+ VNIC_ENV_GET64(&vnic, cq_overrun));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "invalid_command",
+ VNIC_ENV_GET64(&vnic, invalid_command));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "quota_exceeded_command",
+ VNIC_ENV_GET64(&vnic, quota_exceeded_command));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard",
+ VNIC_ENV_GET64(&vnic, nic_receive_steering_discard));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int mlx5_reporter_vnic_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+
+ return mlx5_reporter_vnic_diagnose_counters(dev, fmsg, 0, false);
+}
+
+static const struct devlink_health_reporter_ops mlx5_reporter_vnic_ops = {
+ .name = "vnic",
+ .diagnose = mlx5_reporter_vnic_diagnose,
+};
+
+void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct devlink *devlink = priv_to_devlink(dev);
+
+ health->vnic_reporter =
+ devlink_health_reporter_create(devlink,
+ &mlx5_reporter_vnic_ops,
+ 0, dev);
+ if (IS_ERR(health->vnic_reporter))
+ mlx5_core_warn(dev,
+ "Failed to create vnic reporter, err = %ld\n",
+ PTR_ERR(health->vnic_reporter));
+}
+
+void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ if (!IS_ERR_OR_NULL(health->vnic_reporter))
+ devlink_health_reporter_destroy(health->vnic_reporter);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h
new file mode 100644
index 000000000000..eba87a39e9b1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+ * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.
+ */
+#ifndef __MLX5_REPORTER_VNIC_H
+#define __MLX5_REPORTER_VNIC_H
+
+#include "mlx5_core.h"
+
+void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev);
+void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
+ struct devlink_fmsg *fmsg,
+ u16 vport_num, bool other_vport);
+
+#endif /* __MLX5_REPORTER_VNIC_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 4a19ef4a9811..b8987a404d75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -335,15 +335,20 @@ static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params)
params->mqprio.num_tc : 1;
}
+/* Keep this enum consistent with the corresponding strings array
+ * declared in en/reporter_rx.c
+ */
enum {
- MLX5E_RQ_STATE_ENABLED,
+ MLX5E_RQ_STATE_ENABLED = 0,
MLX5E_RQ_STATE_RECOVERING,
- MLX5E_RQ_STATE_AM,
+ MLX5E_RQ_STATE_DIM,
MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */
MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */
MLX5E_RQ_STATE_MINI_CQE_ENHANCED, /* set when enhanced mini_cqe_cap is used */
+ MLX5E_RQ_STATE_XSK, /* set to indicate an xsk rq */
+ MLX5E_NUM_RQ_STATES, /* Must be kept last */
};
struct mlx5e_cq {
@@ -384,16 +389,20 @@ struct mlx5e_sq_dma {
enum mlx5e_dma_map_type type;
};
+/* Keep this enum consistent with with the corresponding strings array
+ * declared in en/reporter_tx.c
+ */
enum {
- MLX5E_SQ_STATE_ENABLED,
+ MLX5E_SQ_STATE_ENABLED = 0,
MLX5E_SQ_STATE_MPWQE,
MLX5E_SQ_STATE_RECOVERING,
MLX5E_SQ_STATE_IPSEC,
- MLX5E_SQ_STATE_AM,
+ MLX5E_SQ_STATE_DIM,
MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
MLX5E_SQ_STATE_PENDING_XSK_TX,
MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC,
MLX5E_SQ_STATE_XDP_MULTIBUF,
+ MLX5E_NUM_SQ_STATES, /* Must be kept last */
};
struct mlx5e_tx_mpwqe {
@@ -466,64 +475,18 @@ struct mlx5e_txqsq {
cqe_ts_to_ns ptp_cyc2time;
} ____cacheline_aligned_in_smp;
-union mlx5e_alloc_unit {
- struct page *page;
- struct xdp_buff *xsk;
-};
-
-/* XDP packets can be transmitted in different ways. On completion, we need to
- * distinguish between them to clean up things in a proper way.
- */
-enum mlx5e_xdp_xmit_mode {
- /* An xdp_frame was transmitted due to either XDP_REDIRECT from another
- * device or XDP_TX from an XSK RQ. The frame has to be unmapped and
- * returned.
- */
- MLX5E_XDP_XMIT_MODE_FRAME,
-
- /* The xdp_frame was created in place as a result of XDP_TX from a
- * regular RQ. No DMA remapping happened, and the page belongs to us.
- */
- MLX5E_XDP_XMIT_MODE_PAGE,
-
- /* No xdp_frame was created at all, the transmit happened from a UMEM
- * page. The UMEM Completion Ring producer pointer has to be increased.
- */
- MLX5E_XDP_XMIT_MODE_XSK,
-};
-
-struct mlx5e_xdp_info {
- enum mlx5e_xdp_xmit_mode mode;
- union {
- struct {
- struct xdp_frame *xdpf;
- dma_addr_t dma_addr;
- } frame;
- struct {
- struct mlx5e_rq *rq;
- struct page *page;
- } page;
- };
-};
-
-struct mlx5e_xmit_data {
- dma_addr_t dma_addr;
- void *data;
- u32 len;
-};
-
struct mlx5e_xdp_info_fifo {
- struct mlx5e_xdp_info *xi;
+ union mlx5e_xdp_info *xi;
u32 *cc;
u32 *pc;
u32 mask;
};
struct mlx5e_xdpsq;
+struct mlx5e_xmit_data;
typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
struct mlx5e_xmit_data *,
- struct skb_shared_info *,
int);
struct mlx5e_xdpsq {
@@ -596,16 +559,36 @@ struct mlx5e_icosq {
struct work_struct recover_work;
} ____cacheline_aligned_in_smp;
+struct mlx5e_frag_page {
+ struct page *page;
+ u16 frags;
+};
+
+enum mlx5e_wqe_frag_flag {
+ MLX5E_WQE_FRAG_LAST_IN_PAGE,
+ MLX5E_WQE_FRAG_SKIP_RELEASE,
+};
+
struct mlx5e_wqe_frag_info {
- union mlx5e_alloc_unit *au;
+ union {
+ struct mlx5e_frag_page *frag_page;
+ struct xdp_buff **xskp;
+ };
u32 offset;
- bool last_in_page;
+ u8 flags;
+};
+
+union mlx5e_alloc_units {
+ DECLARE_FLEX_ARRAY(struct mlx5e_frag_page, frag_pages);
+ DECLARE_FLEX_ARRAY(struct page *, pages);
+ DECLARE_FLEX_ARRAY(struct xdp_buff *, xsk_buffs);
};
struct mlx5e_mpw_info {
u16 consumed_strides;
- DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
- union mlx5e_alloc_unit alloc_units[];
+ DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
+ struct mlx5e_frag_page linear_page;
+ union mlx5e_alloc_units alloc_units;
};
#define MLX5E_MAX_RX_FRAGS 4
@@ -616,11 +599,6 @@ struct mlx5e_mpw_info {
#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
-struct mlx5e_page_cache {
- u32 head;
- u32 tail;
- struct page *page_cache[MLX5E_CACHE_SIZE];
-};
struct mlx5e_rq;
typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
@@ -652,19 +630,24 @@ struct mlx5e_rq_frags_info {
struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS];
u8 num_frags;
u8 log_num_frags;
- u8 wqe_bulk;
+ u16 wqe_bulk;
+ u16 refill_unit;
u8 wqe_index_mask;
};
struct mlx5e_dma_info {
dma_addr_t addr;
- struct page *page;
+ union {
+ struct mlx5e_frag_page *frag_page;
+ struct page *page;
+ };
};
struct mlx5e_shampo_hd {
u32 mkey;
struct mlx5e_dma_info *info;
- struct page *last_page;
+ struct mlx5e_frag_page *pages;
+ u16 curr_page_index;
u16 hd_per_wq;
u16 hd_per_wqe;
unsigned long *bitmap;
@@ -693,7 +676,7 @@ struct mlx5e_rq {
struct {
struct mlx5_wq_cyc wq;
struct mlx5e_wqe_frag_info *frags;
- union mlx5e_alloc_unit *alloc_units;
+ union mlx5e_alloc_units *alloc_units;
struct mlx5e_rq_frags_info info;
mlx5e_fp_skb_from_cqe skb_from_cqe;
} wqe;
@@ -729,7 +712,6 @@ struct mlx5e_rq {
struct mlx5e_rq_stats *stats;
struct mlx5e_cq cq;
struct mlx5e_cq_decomp cqd;
- struct mlx5e_page_cache page_cache;
struct hwtstamp_config *tstamp;
struct mlx5_clock *clock;
struct mlx5e_icosq *icosq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index a21bd1179477..ef546ed8b4d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -253,17 +253,20 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
struct mlx5e_xsk_param *xsk,
bool mpwqe)
{
+ u32 sz;
+
/* XSK frames are mapped as individual pages, because frames may come in
* an arbitrary order from random locations in the UMEM.
*/
if (xsk)
return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;
- /* XDP in mlx5e doesn't support multiple packets per page. */
- if (params->xdp_prog)
- return PAGE_SIZE;
+ sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
- return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
+ /* XDP in mlx5e doesn't support multiple packets per page.
+ * Do not assume sz <= PAGE_SIZE if params->xdp_prog is set.
+ */
+ return params->xdp_prog && sz < PAGE_SIZE ? PAGE_SIZE : sz;
}
static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
@@ -320,6 +323,20 @@ static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
}
+bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+ u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+ return mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
+ log_wqe_num_of_strides,
+ page_shift, umr_mode);
+}
+
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
@@ -402,6 +419,10 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
+ /* XDP in mlx5e doesn't support multiple packets per page. */
+ if (params->xdp_prog)
+ return PAGE_SHIFT;
+
return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
@@ -553,7 +574,7 @@ bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
u32 link_speed = 0;
u32 pci_bw = 0;
- mlx5e_port_max_linkspeed(mdev, &link_speed);
+ mlx5_port_max_linkspeed(mdev, &link_speed);
pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
link_speed, pci_bw);
@@ -572,9 +593,6 @@ int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params
if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
return -EOPNOTSUPP;
- if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
- return -EINVAL;
-
return 0;
}
@@ -667,6 +685,48 @@ static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp)
return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE;
}
+static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params,
+ struct mlx5e_rq_frags_info *info)
+{
+ u16 bulk_bound_rq_size = (1 << params->log_rq_mtu_frames) / 4;
+ u32 bulk_bound_rq_size_in_bytes;
+ u32 sum_frag_strides = 0;
+ u32 wqe_bulk_in_bytes;
+ u16 split_factor;
+ u32 wqe_bulk;
+ int i;
+
+ for (i = 0; i < info->num_frags; i++)
+ sum_frag_strides += info->arr[i].frag_stride;
+
+ /* For MTUs larger than PAGE_SIZE, align to PAGE_SIZE to reflect
+ * amount of consumed pages per wqe in bytes.
+ */
+ if (sum_frag_strides > PAGE_SIZE)
+ sum_frag_strides = ALIGN(sum_frag_strides, PAGE_SIZE);
+
+ bulk_bound_rq_size_in_bytes = bulk_bound_rq_size * sum_frag_strides;
+
+#define MAX_WQE_BULK_BYTES(xdp) ((xdp ? 256 : 512) * 1024)
+
+ /* A WQE bulk should not exceed min(512KB, 1/4 of rq size). For XDP
+ * keep bulk size smaller to avoid filling the page_pool cache on
+ * every bulk refill.
+ */
+ wqe_bulk_in_bytes = min_t(u32, MAX_WQE_BULK_BYTES(params->xdp_prog),
+ bulk_bound_rq_size_in_bytes);
+ wqe_bulk = DIV_ROUND_UP(wqe_bulk_in_bytes, sum_frag_strides);
+
+ /* Make sure that allocations don't start when the page is still used
+ * by older WQEs.
+ */
+ info->wqe_bulk = max_t(u16, info->wqe_index_mask + 1, wqe_bulk);
+
+ split_factor = DIV_ROUND_UP(MAX_WQE_BULK_BYTES(params->xdp_prog),
+ PP_ALLOC_CACHE_REFILL * PAGE_SIZE);
+ info->refill_unit = DIV_ROUND_UP(info->wqe_bulk, split_factor);
+}
+
#define DEFAULT_FRAG_SIZE (2048)
static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
@@ -774,11 +834,14 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
}
out:
- /* Bulking optimization to skip allocation until at least 8 WQEs can be
- * allocated in a row. At the same time, never start allocation when
- * the page is still used by older WQEs.
+ /* Bulking optimization to skip allocation until a large enough number
+ * of WQEs can be allocated in a row. Bulking also influences how well
+ * deferred page release works.
*/
- info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8);
+ mlx5e_rx_compute_wqe_bulk_params(params, info);
+
+ mlx5_core_dbg(mdev, "%s: wqe_bulk = %u, wqe_bulk_refill_unit = %u\n",
+ __func__, info->wqe_bulk, info->refill_unit);
info->log_num_frags = order_base_2(info->num_frags);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index c9be6eb88012..a5d20f6d6d9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -153,6 +153,9 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
static inline void mlx5e_params_print_info(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 505ba41195b9..dbe2b19a9570 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -32,101 +32,6 @@
#include "port.h"
-/* speed in units of 1Mb */
-static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
- [MLX5E_1000BASE_CX_SGMII] = 1000,
- [MLX5E_1000BASE_KX] = 1000,
- [MLX5E_10GBASE_CX4] = 10000,
- [MLX5E_10GBASE_KX4] = 10000,
- [MLX5E_10GBASE_KR] = 10000,
- [MLX5E_20GBASE_KR2] = 20000,
- [MLX5E_40GBASE_CR4] = 40000,
- [MLX5E_40GBASE_KR4] = 40000,
- [MLX5E_56GBASE_R4] = 56000,
- [MLX5E_10GBASE_CR] = 10000,
- [MLX5E_10GBASE_SR] = 10000,
- [MLX5E_10GBASE_ER] = 10000,
- [MLX5E_40GBASE_SR4] = 40000,
- [MLX5E_40GBASE_LR4] = 40000,
- [MLX5E_50GBASE_SR2] = 50000,
- [MLX5E_100GBASE_CR4] = 100000,
- [MLX5E_100GBASE_SR4] = 100000,
- [MLX5E_100GBASE_KR4] = 100000,
- [MLX5E_100GBASE_LR4] = 100000,
- [MLX5E_100BASE_TX] = 100,
- [MLX5E_1000BASE_T] = 1000,
- [MLX5E_10GBASE_T] = 10000,
- [MLX5E_25GBASE_CR] = 25000,
- [MLX5E_25GBASE_KR] = 25000,
- [MLX5E_25GBASE_SR] = 25000,
- [MLX5E_50GBASE_CR2] = 50000,
- [MLX5E_50GBASE_KR2] = 50000,
-};
-
-static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
- [MLX5E_SGMII_100M] = 100,
- [MLX5E_1000BASE_X_SGMII] = 1000,
- [MLX5E_5GBASE_R] = 5000,
- [MLX5E_10GBASE_XFI_XAUI_1] = 10000,
- [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000,
- [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000,
- [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000,
- [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000,
- [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000,
- [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000,
- [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000,
- [MLX5E_400GAUI_8] = 400000,
- [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000,
- [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000,
- [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000,
-};
-
-bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev)
-{
- struct mlx5e_port_eth_proto eproto;
- int err;
-
- if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet))
- return true;
-
- err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto);
- if (err)
- return false;
-
- return !!eproto.cap;
-}
-
-static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev,
- const u32 **arr, u32 *size,
- bool force_legacy)
-{
- bool ext = force_legacy ? false : mlx5e_ptys_ext_supported(mdev);
-
- *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) :
- ARRAY_SIZE(mlx5e_link_speed);
- *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed;
-}
-
-int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
- struct mlx5e_port_eth_proto *eproto)
-{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
- int err;
-
- if (!eproto)
- return -EINVAL;
-
- err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
- if (err)
- return err;
-
- eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
- eth_proto_capability);
- eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin);
- eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
- return 0;
-}
-
void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
u8 *an_disable_cap, u8 *an_disable_admin)
{
@@ -172,30 +77,14 @@ int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
sizeof(out), MLX5_REG_PTYS, 0, 1);
}
-u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
- bool force_legacy)
-{
- unsigned long temp = eth_proto_oper;
- const u32 *table;
- u32 speed = 0;
- u32 max_size;
- int i;
-
- mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
- i = find_first_bit(&temp, max_size);
- if (i < max_size)
- speed = table[i];
- return speed;
-}
-
int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
{
- struct mlx5e_port_eth_proto eproto;
+ struct mlx5_port_eth_proto eproto;
bool force_legacy = false;
bool ext;
int err;
- ext = mlx5e_ptys_ext_supported(mdev);
+ ext = mlx5_ptys_ext_supported(mdev);
err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
if (err)
goto out;
@@ -205,7 +94,7 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
if (err)
goto out;
}
- *speed = mlx5e_port_ptys2speed(mdev, eproto.oper, force_legacy);
+ *speed = mlx5_port_ptys2speed(mdev, eproto.oper, force_legacy);
if (!(*speed))
err = -EINVAL;
@@ -213,46 +102,6 @@ out:
return err;
}
-int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
-{
- struct mlx5e_port_eth_proto eproto;
- u32 max_speed = 0;
- const u32 *table;
- u32 max_size;
- bool ext;
- int err;
- int i;
-
- ext = mlx5e_ptys_ext_supported(mdev);
- err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
- if (err)
- return err;
-
- mlx5e_port_get_speed_arr(mdev, &table, &max_size, false);
- for (i = 0; i < max_size; ++i)
- if (eproto.cap & MLX5E_PROT_MASK(i))
- max_speed = max(max_speed, table[i]);
-
- *speed = max_speed;
- return 0;
-}
-
-u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
- bool force_legacy)
-{
- u32 link_modes = 0;
- const u32 *table;
- u32 max_size;
- int i;
-
- mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
- for (i = 0; i < max_size; ++i) {
- if (table[i] == speed)
- link_modes |= MLX5E_PROT_MASK(i);
- }
- return link_modes;
-}
-
int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out)
{
int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index 3f474e370828..d1da225f35da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -36,25 +36,11 @@
#include <linux/mlx5/driver.h>
#include "en.h"
-struct mlx5e_port_eth_proto {
- u32 cap;
- u32 admin;
- u32 oper;
-};
-
-int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
- struct mlx5e_port_eth_proto *eproto);
void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
u8 *an_disable_cap, u8 *an_disable_admin);
int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
u32 proto_admin, bool ext);
-u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
- bool force_legacy);
int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
-int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
-u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
- bool force_legacy);
-bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev);
int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
int mlx5e_port_query_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index eb5aeba3addf..eb5abd0e55d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -81,23 +81,23 @@ void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask)
-static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
+static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_ci, u16 skb_id)
{
- return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
+ return (ptpsq->ts_cqe_ctr_mask && (skb_ci != skb_id));
}
static bool mlx5e_ptp_ts_cqe_ooo(struct mlx5e_ptpsq *ptpsq, u16 skb_id)
{
- u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
- u16 skb_pc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc);
+ u16 skb_ci = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ u16 skb_pi = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc);
- if (PTP_WQE_CTR2IDX(skb_id - skb_cc) >= PTP_WQE_CTR2IDX(skb_pc - skb_cc))
+ if (PTP_WQE_CTR2IDX(skb_id - skb_ci) >= PTP_WQE_CTR2IDX(skb_pi - skb_ci))
return true;
return false;
}
-static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc,
+static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_ci,
u16 skb_id, int budget)
{
struct skb_shared_hwtstamps hwts = {};
@@ -105,13 +105,13 @@ static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_
ptpsq->cq_stats->resync_event++;
- while (skb_cc != skb_id) {
+ while (skb_ci != skb_id) {
skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
skb_tstamp_tx(skb, &hwts);
ptpsq->cq_stats->resync_cqe++;
napi_consume_skb(skb, budget);
- skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ skb_ci = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
}
}
@@ -120,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
int budget)
{
u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
- u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ u16 skb_ci = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
struct mlx5e_txqsq *sq = &ptpsq->txqsq;
struct sk_buff *skb;
ktime_t hwtstamp;
@@ -131,13 +131,13 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
goto out;
}
- if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id)) {
+ if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_ci, skb_id)) {
if (mlx5e_ptp_ts_cqe_ooo(ptpsq, skb_id)) {
/* already handled by a previous resync */
ptpsq->cq_stats->ooo_cqe_drop++;
return;
}
- mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id, budget);
+ mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_ci, skb_id, budget);
}
skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index ce85b48d327d..fd191925ab4b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -220,6 +220,7 @@ mlx5_esw_bridge_port_obj_add(struct net_device *dev,
struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
const struct switchdev_obj *obj = port_obj_info->obj;
const struct switchdev_obj_port_vlan *vlan;
+ const struct switchdev_obj_port_mdb *mdb;
u16 vport_num, esw_owner_vhca_id;
int err;
@@ -235,6 +236,11 @@ mlx5_esw_bridge_port_obj_add(struct net_device *dev,
err = mlx5_esw_bridge_port_vlan_add(vport_num, esw_owner_vhca_id, vlan->vid,
vlan->flags, br_offloads, extack);
break;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ mdb = SWITCHDEV_OBJ_PORT_MDB(obj);
+ err = mlx5_esw_bridge_port_mdb_add(dev, vport_num, esw_owner_vhca_id, mdb->addr,
+ mdb->vid, br_offloads, extack);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -248,6 +254,7 @@ mlx5_esw_bridge_port_obj_del(struct net_device *dev,
{
const struct switchdev_obj *obj = port_obj_info->obj;
const struct switchdev_obj_port_vlan *vlan;
+ const struct switchdev_obj_port_mdb *mdb;
u16 vport_num, esw_owner_vhca_id;
if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
@@ -261,6 +268,11 @@ mlx5_esw_bridge_port_obj_del(struct net_device *dev,
vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
mlx5_esw_bridge_port_vlan_del(vport_num, esw_owner_vhca_id, vlan->vid, br_offloads);
break;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ mdb = SWITCHDEV_OBJ_PORT_MDB(obj);
+ mlx5_esw_bridge_port_mdb_del(dev, vport_num, esw_owner_vhca_id, mdb->addr, mdb->vid,
+ br_offloads);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -306,6 +318,10 @@ mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
attr->u.vlan_protocol,
br_offloads);
break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
+ err = mlx5_esw_bridge_mcast_set(vport_num, esw_owner_vhca_id,
+ !attr->u.mc_disabled, br_offloads);
+ break;
default:
err = -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 8f7452dc00ee..b5c773ffc763 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -426,39 +426,58 @@ static bool mlx5e_rep_macvlan_mode_supported(const struct net_device *dev)
return macvlan->mode == MACVLAN_MODE_PASSTHRU;
}
-static int
-mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
- struct mlx5e_rep_priv *rpriv,
- struct flow_block_offload *f,
- flow_setup_cb_t *setup_cb,
- void *data,
- void (*cleanup)(struct flow_block_cb *block_cb))
+static bool
+mlx5e_rep_check_indr_block_supported(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev,
+ struct flow_block_offload *f)
{
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- bool is_ovs_int_port = netif_is_ovs_master(netdev);
- struct mlx5e_rep_indr_block_priv *indr_priv;
- struct flow_block_cb *block_cb;
+ struct net_device *macvlan_real_dev;
- if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
- !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) &&
- !is_ovs_int_port) {
- if (!(netif_is_macvlan(netdev) && macvlan_dev_real_dev(netdev) == rpriv->netdev))
- return -EOPNOTSUPP;
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
+ f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+ return false;
+
+ if (mlx5e_tc_tun_device_to_offload(priv, netdev))
+ return true;
+
+ if (is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev)
+ return true;
+
+ if (netif_is_macvlan(netdev)) {
if (!mlx5e_rep_macvlan_mode_supported(netdev)) {
netdev_warn(netdev, "Offloading ingress filter is supported only with macvlan passthru mode");
- return -EOPNOTSUPP;
+ return false;
}
+
+ macvlan_real_dev = macvlan_dev_real_dev(netdev);
+
+ if (macvlan_real_dev == rpriv->netdev)
+ return true;
+ if (netif_is_bond_master(macvlan_real_dev))
+ return true;
}
- if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
- f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
- return -EOPNOTSUPP;
+ if (netif_is_ovs_master(netdev) && f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS &&
+ mlx5e_tc_int_port_supported(esw))
+ return true;
- if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && !is_ovs_int_port)
- return -EOPNOTSUPP;
+ return false;
+}
- if (is_ovs_int_port && !mlx5e_tc_int_port_supported(esw))
+static int
+mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
+ struct mlx5e_rep_priv *rpriv,
+ struct flow_block_offload *f,
+ flow_setup_cb_t *setup_cb,
+ void *data,
+ void (*cleanup)(struct flow_block_cb *block_cb))
+{
+ struct mlx5e_rep_indr_block_priv *indr_priv;
+ struct flow_block_cb *block_cb;
+
+ if (!mlx5e_rep_check_indr_block_supported(rpriv, netdev, f))
return -EOPNOTSUPP;
f->unlocked_driver_cb = true;
@@ -715,5 +734,6 @@ forward:
return;
free_skb:
+ dev_put(tc_priv.fwd_dev);
dev_kfree_skb_any(skb);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index c462fe76495b..e8eea9ffd5eb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -8,6 +8,19 @@
#include "ptp.h"
#include "lib/tout.h"
+/* Keep this string array consistent with the MLX5E_RQ_STATE_* enums in en.h */
+static const char * const rq_sw_state_type_name[] = {
+ [MLX5E_RQ_STATE_ENABLED] = "enabled",
+ [MLX5E_RQ_STATE_RECOVERING] = "recovering",
+ [MLX5E_RQ_STATE_DIM] = "dim",
+ [MLX5E_RQ_STATE_NO_CSUM_COMPLETE] = "no_csum_complete",
+ [MLX5E_RQ_STATE_CSUM_FULL] = "csum_full",
+ [MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX] = "mini_cqe_hw_stridx",
+ [MLX5E_RQ_STATE_SHAMPO] = "shampo",
+ [MLX5E_RQ_STATE_MINI_CQE_ENHANCED] = "mini_cqe_enhanced",
+ [MLX5E_RQ_STATE_XSK] = "xsk",
+};
+
static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
{
int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
@@ -108,9 +121,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
mlx5e_reset_icosq_cc_pc(icosq);
- mlx5e_free_rx_in_progress_descs(rq);
+ mlx5e_free_rx_missing_descs(rq);
if (xskrq)
- mlx5e_free_rx_in_progress_descs(xskrq);
+ mlx5e_free_rx_missing_descs(xskrq);
clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
mlx5e_activate_icosq(icosq);
@@ -239,6 +252,27 @@ static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state,
return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
}
+static int mlx5e_health_rq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_rq *rq)
+{
+ int err;
+ int i;
+
+ BUILD_BUG_ON_MSG(ARRAY_SIZE(rq_sw_state_type_name) != MLX5E_NUM_RQ_STATES,
+ "rq_sw_state_type_name string array must be consistent with MLX5E_RQ_STATE_* enum in en.h");
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State");
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(rq_sw_state_type_name); ++i) {
+ err = devlink_fmsg_u32_pair_put(fmsg, rq_sw_state_type_name[i],
+ test_bit(i, &rq->state));
+ if (err)
+ return err;
+ }
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
static int
mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq,
struct devlink_fmsg *fmsg)
@@ -265,10 +299,6 @@ mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq,
if (err)
return err;
- err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state);
- if (err)
- return err;
-
err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter);
if (err)
return err;
@@ -281,6 +311,10 @@ mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq,
if (err)
return err;
+ err = mlx5e_health_rq_put_sw_state(fmsg, rq);
+ if (err)
+ return err;
+
err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 34666e2b3871..b35ff289af49 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -6,6 +6,19 @@
#include "en/devlink.h"
#include "lib/tout.h"
+/* Keep this string array consistent with the MLX5E_SQ_STATE_* enums in en.h */
+static const char * const sq_sw_state_type_name[] = {
+ [MLX5E_SQ_STATE_ENABLED] = "enabled",
+ [MLX5E_SQ_STATE_MPWQE] = "mpwqe",
+ [MLX5E_SQ_STATE_RECOVERING] = "recovering",
+ [MLX5E_SQ_STATE_IPSEC] = "ipsec",
+ [MLX5E_SQ_STATE_DIM] = "dim",
+ [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline",
+ [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx",
+ [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync",
+ [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf",
+};
+
static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
{
struct mlx5_core_dev *dev = sq->mdev;
@@ -37,6 +50,27 @@ static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
sq->pc = 0;
}
+static int mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *sq)
+{
+ int err;
+ int i;
+
+ BUILD_BUG_ON_MSG(ARRAY_SIZE(sq_sw_state_type_name) != MLX5E_NUM_SQ_STATES,
+ "sq_sw_state_type_name string array must be consistent with MLX5E_SQ_STATE_* enum in en.h");
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State");
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(sq_sw_state_type_name); ++i) {
+ err = devlink_fmsg_u32_pair_put(fmsg, sq_sw_state_type_name[i],
+ test_bit(i, &sq->state));
+ if (err)
+ return err;
+ }
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
{
struct mlx5_core_dev *mdev;
@@ -190,6 +224,10 @@ mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
if (err)
return err;
+ err = mlx5e_health_sq_put_sw_state(fmsg, sq);
+ if (err)
+ return err;
+
err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
index a278f52d52b0..9db1b5307a8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
@@ -4,15 +4,6 @@
#include "act.h"
#include "en/tc_priv.h"
-static bool
-tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -26,7 +17,6 @@ tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_accept = {
- .can_offload = tc_act_can_offload_accept,
.parse_action = tc_act_parse_accept,
.is_terminating_action = true,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
index eba0c8698926..fc923a99b6a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
@@ -82,26 +82,6 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
parse_state->flow_action = flow_action;
}
-void
-mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
- struct mlx5e_tc_flow_action *flow_action_reorder)
-{
- struct flow_action_entry *act;
- int i, j = 0;
-
- flow_action_for_each(i, act, flow_action) {
- /* Add CT action to be first. */
- if (act->id == FLOW_ACTION_CT)
- flow_action_reorder->entries[j++] = act;
- }
-
- flow_action_for_each(i, act, flow_action) {
- if (act->id == FLOW_ACTION_CT)
- continue;
- flow_action_reorder->entries[j++] = act;
- }
-}
-
int
mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
struct flow_action *flow_action,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
index 8346557eeaf6..0e6e1872ac62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -17,8 +17,6 @@ struct mlx5e_tc_act_parse_state {
struct mlx5e_tc_flow *flow;
struct netlink_ext_ack *extack;
u32 actions;
- bool ct;
- bool ct_clear;
bool encap;
bool decap;
bool mpls_push;
@@ -56,6 +54,8 @@ struct mlx5e_tc_act {
const struct flow_action_entry *act,
struct mlx5_flow_attr *attr);
+ bool (*is_missable)(const struct flow_action_entry *act);
+
int (*offload_action)(struct mlx5e_priv *priv,
struct flow_offload_action *fl_act,
struct flow_action_entry *act);
@@ -110,10 +110,6 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
struct flow_action *flow_action,
struct netlink_ext_ack *extack);
-void
-mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
- struct mlx5e_tc_flow_action *flow_action_reorder);
-
int
mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
struct flow_action *flow_action,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
index a829c94289c1..92d3952dfa8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
@@ -5,53 +5,22 @@
#include "en/tc_priv.h"
#include "en/tc_ct.h"
-static bool
-tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
- struct netlink_ext_ack *extack = parse_state->extack;
-
- if (parse_state->ct && !clear_action) {
- NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supported");
- return false;
- }
-
- return true;
-}
-
static int
tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
struct mlx5e_priv *priv,
struct mlx5_flow_attr *attr)
{
- bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
int err;
- /* It's redundant to do ct clear more than once. */
- if (clear_action && parse_state->ct_clear)
- return 0;
-
- err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr,
- &attr->parse_attr->mod_hdr_acts,
- act, parse_state->extack);
+ err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr, act, parse_state->extack);
if (err)
return err;
-
if (mlx5e_is_eswitch_flow(parse_state->flow))
attr->esw_attr->split_count = attr->esw_attr->out_count;
- if (clear_action) {
- parse_state->ct_clear = true;
- } else {
- attr->flags |= MLX5_ATTR_FLAG_CT;
- flow_flag_set(parse_state->flow, CT);
- parse_state->ct = true;
- }
+ attr->flags |= MLX5_ATTR_FLAG_CT;
return 0;
}
@@ -61,27 +30,10 @@ tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5e_priv *priv,
struct mlx5_flow_attr *attr)
{
- struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts;
- int err;
-
- /* If ct action exist, we can ignore previous ct_clear actions */
- if (parse_state->ct)
+ if (!(attr->flags & MLX5_ATTR_FLAG_CT))
return 0;
- if (parse_state->ct_clear) {
- err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts);
- if (err) {
- NL_SET_ERR_MSG_MOD(parse_state->extack,
- "Failed to set registers for ct clear");
- return err;
- }
- attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-
- /* Prevent handling of additional, redundant clear actions */
- parse_state->ct_clear = false;
- }
-
- return 0;
+ return mlx5_tc_ct_flow_offload(parse_state->ct_priv, attr);
}
static bool
@@ -95,10 +47,16 @@ tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv,
return true;
}
+static bool
+tc_act_is_missable_ct(const struct flow_action_entry *act)
+{
+ return !(act->ct.action & TCA_CT_ACT_CLEAR);
+}
+
struct mlx5e_tc_act mlx5e_tc_act_ct = {
- .can_offload = tc_act_can_offload_ct,
.parse_action = tc_act_parse_ct,
- .is_multi_table_act = tc_act_is_multi_table_act_ct,
.post_parse = tc_act_post_parse_ct,
+ .is_multi_table_act = tc_act_is_multi_table_act_ct,
+ .is_missable = tc_act_is_missable_ct,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
index 7d16aeabb119..5dc81715d625 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
@@ -4,15 +4,6 @@
#include "act.h"
#include "en/tc_priv.h"
-static bool
-tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -25,7 +16,6 @@ tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_drop = {
- .can_offload = tc_act_can_offload_drop,
.parse_action = tc_act_parse_drop,
.is_terminating_action = true,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
index 07cc65596f89..291193f7120d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -234,6 +234,9 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
if (mlx5_lag_mpesw_do_mirred(priv->mdev, out_dev, extack))
return -EOPNOTSUPP;
+ if (netif_is_macvlan(out_dev))
+ out_dev = macvlan_dev_real_dev(out_dev);
+
out_dev = get_fdb_out_dev(uplink_dev, out_dev);
if (!out_dev)
return -ENODEV;
@@ -250,9 +253,6 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
return err;
}
- if (netif_is_macvlan(out_dev))
- out_dev = macvlan_dev_real_dev(out_dev);
-
err = verify_uplink_forwarding(priv, attr, out_dev, extack);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
index 47597c524e59..3b272bbf4c53 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
@@ -78,15 +78,6 @@ out_err:
return err;
}
-static bool
-tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -114,6 +105,5 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_pedit = {
- .can_offload = tc_act_can_offload_pedit,
.parse_action = tc_act_parse_pedit,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
index 6454b031ff7a..80b4bc64380a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
@@ -4,15 +4,6 @@
#include "act.h"
#include "en/tc_priv.h"
-static bool
-tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -31,6 +22,5 @@ tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_ptype = {
- .can_offload = tc_act_can_offload_ptype,
.parse_action = tc_act_parse_ptype,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
index 2c0196431302..2df02f99cecf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
@@ -6,25 +6,6 @@
#include "en/tc_priv.h"
#include "en/tc/act/sample.h"
-static bool
-tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- struct netlink_ext_ack *extack = parse_state->extack;
- bool ct_nat;
-
- ct_nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
-
- if (flow_flag_test(parse_state->flow, CT) && ct_nat) {
- NL_SET_ERR_MSG_MOD(extack, "Sample action with CT NAT is not supported");
- return false;
- }
-
- return true;
-}
-
static int
tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -65,7 +46,6 @@ tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv,
}
struct mlx5e_tc_act mlx5e_tc_act_sample = {
- .can_offload = tc_act_can_offload_sample,
.parse_action = tc_act_parse_sample,
.is_multi_table_act = tc_act_is_multi_table_act_sample,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
index 915ce201aeb2..1b78bd9c106a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
@@ -5,15 +5,6 @@
#include "en/tc_priv.h"
#include "eswitch.h"
-static bool
-tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -27,6 +18,5 @@ tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_trap = {
- .can_offload = tc_act_can_offload_trap,
.parse_action = tc_act_parse_trap,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
index b4fa2de9711d..f1cae21c2c37 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
@@ -32,15 +32,6 @@ tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
return 0;
}
-static bool
-tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -58,6 +49,5 @@ struct mlx5e_tc_act mlx5e_tc_act_tun_encap = {
};
struct mlx5e_tc_act mlx5e_tc_act_tun_decap = {
- .can_offload = tc_act_can_offload_tun_decap,
.parse_action = tc_act_parse_tun_decap,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
index 2e0d88b513aa..c8a3eaf189f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
@@ -141,15 +141,6 @@ mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
return err;
}
-static bool
-tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -205,7 +196,6 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_vlan = {
- .can_offload = tc_act_can_offload_vlan,
.parse_action = tc_act_parse_vlan,
.post_parse = tc_act_post_parse_vlan,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
index 9a8a1a6bd99e..310b99230760 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
@@ -50,15 +50,6 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
return err;
}
-static bool
-tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -81,6 +72,5 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle = {
- .can_offload = tc_act_can_offload_vlan_mangle,
.parse_action = tc_act_parse_vlan_mangle,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
index 4e48946c4c2a..0290e0dea539 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
@@ -106,22 +106,17 @@ err_rule:
}
struct mlx5e_post_act_handle *
-mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr)
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *post_attr)
{
- u32 attr_sz = ns_to_attr_sz(post_act->ns_type);
struct mlx5e_post_act_handle *handle;
- struct mlx5_flow_attr *post_attr;
int err;
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
- post_attr = mlx5_alloc_flow_attr(post_act->ns_type);
- if (!handle || !post_attr) {
- kfree(post_attr);
+ if (!handle) {
kfree(handle);
return ERR_PTR(-ENOMEM);
}
- memcpy(post_attr, attr, attr_sz);
post_attr->chain = 0;
post_attr->prio = 0;
post_attr->ft = post_act->ft;
@@ -145,7 +140,6 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at
return handle;
err_xarray:
- kfree(post_attr);
kfree(handle);
return ERR_PTR(err);
}
@@ -164,7 +158,6 @@ mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_han
if (!IS_ERR_OR_NULL(handle->rule))
mlx5e_tc_post_act_unoffload(post_act, handle);
xa_erase(&post_act->ids, handle->id);
- kfree(handle->attr);
kfree(handle);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
index f476774c0b75..40b8df184af5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
@@ -19,7 +19,7 @@ void
mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act);
struct mlx5e_post_act_handle *
-mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr);
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *post_attr);
void
mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
index 558a776359af..5db239cae814 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
@@ -14,10 +14,10 @@
#define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024)
-static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = {
+static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = {
.max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE,
.max_num_groups = 0, /* default num of groups */
- .flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP,
+ .flags = 0,
};
struct mlx5e_tc_psample {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 314983bc6f08..ead38ef69483 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -83,12 +83,6 @@ struct mlx5_tc_ct_priv {
struct mlx5_tc_ct_debugfs debugfs;
};
-struct mlx5_ct_flow {
- struct mlx5_flow_attr *pre_ct_attr;
- struct mlx5_flow_handle *pre_ct_rule;
- struct mlx5_ct_ft *ft;
-};
-
struct mlx5_ct_zone_rule {
struct mlx5_ct_fs_rule *rule;
struct mlx5e_mod_hdr_handle *mh;
@@ -598,12 +592,6 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
return 0;
}
-int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_mod_hdr_acts *mod_acts)
-{
- return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
-}
-
static int
mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
char *modact)
@@ -920,6 +908,7 @@ mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv *ct_priv,
zone_rule->rule = rule;
mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, old_attr, zone_rule->mh);
zone_rule->mh = mh;
+ mlx5_put_label_mapping(ct_priv, old_attr->ct_attr.ct_labels_id);
kfree(old_attr);
kvfree(spec);
@@ -1545,7 +1534,6 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_acts,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack)
{
@@ -1555,8 +1543,8 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
return -EOPNOTSUPP;
}
+ attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */
attr->ct_attr.zone = act->ct.zone;
- attr->ct_attr.ct_action = act->ct.action;
attr->ct_attr.nf_ft = act->ct.flow_table;
attr->ct_attr.act_miss_cookie = act->miss_cookie;
@@ -1892,14 +1880,14 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
/* We translate the tc filter with CT action to the following HW model:
*
- * +---------------------+
- * + ft prio (tc chain) +
- * + original match +
- * +---------------------+
+ * +-----------------------+
+ * + rule (either original +
+ * + or post_act rule) +
+ * +-----------------------+
* | set act_miss_cookie mapping
* | set fte_id
* | set tunnel_id
- * | do decap
+ * | rest of actions before the CT action (for this orig/post_act rule)
* |
* +-------------+
* | Chain 0 |
@@ -1924,32 +1912,21 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* | do nat (if needed)
* v
* +--------------+
- * + post_act + original filter actions
+ * + post_act + rest of parsed filter's actions
* + fte_id match +------------------------>
* +--------------+
*
*/
-static struct mlx5_flow_handle *
+static int
__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5_flow_spec *orig_spec,
struct mlx5_flow_attr *attr)
{
bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
- struct mlx5e_tc_mod_hdr_acts *pre_mod_acts;
- u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
- struct mlx5_flow_attr *pre_ct_attr;
- struct mlx5_modify_hdr *mod_hdr;
- struct mlx5_ct_flow *ct_flow;
int act_miss_mapping = 0, err;
struct mlx5_ct_ft *ft;
u16 zone;
- ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
- if (!ct_flow) {
- return ERR_PTR(-ENOMEM);
- }
-
/* Register for CT established events */
ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
attr->ct_attr.nf_ft);
@@ -1958,23 +1935,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
ct_dbg("Failed to register to ft callback");
goto err_ft;
}
- ct_flow->ft = ft;
-
- /* Base flow attributes of both rules on original rule attribute */
- ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
- if (!ct_flow->pre_ct_attr) {
- err = -ENOMEM;
- goto err_alloc_pre;
- }
-
- pre_ct_attr = ct_flow->pre_ct_attr;
- memcpy(pre_ct_attr, attr, attr_sz);
- pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts;
-
- /* Modify the original rule's action to fwd and modify, leave decap */
- pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
- pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ attr->ct_attr.ft = ft;
err = mlx5e_tc_action_miss_mapping_get(ct_priv->priv, attr, attr->ct_attr.act_miss_cookie,
&act_miss_mapping);
@@ -1982,136 +1943,89 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
ct_dbg("Failed to get register mapping for act miss");
goto err_get_act_miss;
}
- attr->ct_attr.act_miss_mapping = act_miss_mapping;
- err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
- MAPPED_OBJ_TO_REG, act_miss_mapping);
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts,
+ ct_priv->ns_type, MAPPED_OBJ_TO_REG, act_miss_mapping);
if (err) {
ct_dbg("Failed to set act miss register mapping");
goto err_mapping;
}
- /* If original flow is decap, we do it before going into ct table
- * so add a rewrite for the tunnel match_id.
- */
- if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
- attr->chain == 0) {
- err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts,
- ct_priv->ns_type,
- TUNNEL_TO_REG,
- attr->tunnel_id);
- if (err) {
- ct_dbg("Failed to set tunnel register mapping");
- goto err_mapping;
- }
- }
-
- /* Change original rule point to ct table
- * Chain 0 sets the zone and jumps to ct table
+ /* Chain 0 sets the zone and jumps to ct table
* Other chains jump to pre_ct table to align with act_ct cached logic
*/
- pre_ct_attr->dest_chain = 0;
if (!attr->chain) {
zone = ft->zone & MLX5_CT_ZONE_MASK;
- err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
- ZONE_TO_REG, zone);
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts,
+ ct_priv->ns_type, ZONE_TO_REG, zone);
if (err) {
ct_dbg("Failed to set zone register mapping");
goto err_mapping;
}
- pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct;
} else {
- pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
- }
-
- mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
- pre_mod_acts->num_actions,
- pre_mod_acts->actions);
- if (IS_ERR(mod_hdr)) {
- err = PTR_ERR(mod_hdr);
- ct_dbg("Failed to create pre ct mod hdr");
- goto err_mapping;
- }
- pre_ct_attr->modify_hdr = mod_hdr;
- ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
- pre_ct_attr);
- if (IS_ERR(ct_flow->pre_ct_rule)) {
- err = PTR_ERR(ct_flow->pre_ct_rule);
- ct_dbg("Failed to add pre ct rule");
- goto err_insert_orig;
+ attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
}
- attr->ct_attr.ct_flow = ct_flow;
- mlx5e_mod_hdr_dealloc(pre_mod_acts);
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ attr->ct_attr.act_miss_mapping = act_miss_mapping;
- return ct_flow->pre_ct_rule;
+ return 0;
-err_insert_orig:
- mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
err_mapping:
- mlx5e_mod_hdr_dealloc(pre_mod_acts);
mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, act_miss_mapping);
err_get_act_miss:
- kfree(ct_flow->pre_ct_attr);
-err_alloc_pre:
mlx5_tc_ct_del_ft_cb(ct_priv, ft);
err_ft:
- kfree(ct_flow);
netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
- return ERR_PTR(err);
+ return err;
}
-struct mlx5_flow_handle *
-mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+int
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr)
{
- struct mlx5_flow_handle *rule;
+ int err;
if (!priv)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
+
+ if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) {
+ err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts,
+ 0, 0, 0, 0);
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ }
+
+ if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
+ return 0;
mutex_lock(&priv->control_lock);
- rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
+ err = __mlx5_tc_ct_flow_offload(priv, attr);
mutex_unlock(&priv->control_lock);
- return rule;
+ return err;
}
static void
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5_ct_flow *ct_flow,
struct mlx5_flow_attr *attr)
{
- struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
- struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
-
- mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
- mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
-
mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, attr->ct_attr.act_miss_mapping);
- mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
-
- kfree(ct_flow->pre_ct_attr);
- kfree(ct_flow);
+ mlx5_tc_ct_del_ft_cb(ct_priv, attr->ct_attr.ft);
}
void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr)
{
- struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
-
- /* We are called on error to clean up stuff from parsing
- * but we don't have anything for now
- */
- if (!ct_flow)
+ if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
return;
mutex_lock(&priv->control_lock);
- __mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
+ __mlx5_tc_ct_delete_flow(priv, attr);
mutex_unlock(&priv->control_lock);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 5c5ddaa83055..8e9316fa46d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -25,11 +25,11 @@ struct nf_flowtable;
struct mlx5_ct_attr {
u16 zone;
u16 ct_action;
- struct mlx5_ct_flow *ct_flow;
struct nf_flowtable *nf_ft;
u32 ct_labels_id;
u32 act_miss_mapping;
u64 act_miss_cookie;
+ struct mlx5_ct_ft *ft;
};
#define zone_to_reg_ct {\
@@ -113,15 +113,12 @@ int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec);
int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_acts,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack);
-struct mlx5_flow_handle *
-mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+int
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr);
+
void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr);
@@ -130,10 +127,6 @@ bool
mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
struct sk_buff *skb, u8 zone_restore_id);
-int
-mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_mod_hdr_acts *mod_acts);
-
#else /* CONFIG_MLX5_TC_CT */
static inline struct mlx5_tc_ct_priv *
@@ -176,16 +169,8 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
}
static inline int
-mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_mod_hdr_acts *mod_acts)
-{
- return -EOPNOTSUPP;
-}
-
-static inline int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_acts,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack)
{
@@ -193,13 +178,11 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
return -EOPNOTSUPP;
}
-static inline struct mlx5_flow_handle *
+static inline int
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+ struct mlx5_flow_attr *attr)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
static inline void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
index 451fd4342a5a..ba2b1f24ff14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -25,12 +25,11 @@ enum {
MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
- MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
- MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
- MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9,
- MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10,
- MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 11,
- MLX5E_TC_FLOW_FLAG_USE_ACT_STATS = MLX5E_TC_FLOW_BASE + 12,
+ MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 7,
+ MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 8,
+ MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 9,
+ MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 10,
+ MLX5E_TC_FLOW_FLAG_USE_ACT_STATS = MLX5E_TC_FLOW_BASE + 11,
};
struct mlx5e_tc_flow_parse_attr {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index b38f693bbb52..92065568bb19 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -115,6 +115,9 @@ int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
struct mlx5e_encap_key *b);
+bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b,
+ __be16 tun_flags);
#endif /* CONFIG_MLX5_ESWITCH */
#endif //__MLX5_EN_TC_TUNNEL_H__
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 780224fd67a1..20c2d2ecaf93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -3,6 +3,7 @@
#include <net/fib_notifier.h>
#include <net/nexthop.h>
+#include <net/ip_tunnels.h>
#include "tc_tun_encap.h"
#include "en_tc.h"
#include "tc_tun.h"
@@ -97,7 +98,6 @@ int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
else if (ip_version == 6) {
int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
- struct in6_addr zerov6 = {};
daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
@@ -105,8 +105,8 @@ int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
- if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
- !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
+ if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
+ ipv6_addr_any(&tun_attr->src_ip.v6))
return 0;
}
#endif
@@ -571,6 +571,37 @@ bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
}
+bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b,
+ __be16 tun_flags)
+{
+ struct ip_tunnel_info *a_info;
+ struct ip_tunnel_info *b_info;
+ bool a_has_opts, b_has_opts;
+
+ if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
+ return false;
+
+ a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
+ b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
+
+ /* keys are equal when both don't have any options attached */
+ if (!a_has_opts && !b_has_opts)
+ return true;
+
+ if (a_has_opts != b_has_opts)
+ return false;
+
+ /* options stored in memory next to ip_tunnel_info struct */
+ a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
+ b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
+
+ return a_info->options_len == b_info->options_len &&
+ !memcmp(ip_tunnel_info_opts(a_info),
+ ip_tunnel_info_opts(b_info),
+ a_info->options_len);
+}
+
static int cmp_decap_info(struct mlx5e_decap_key *a,
struct mlx5e_decap_key *b)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
index 054d80c4e65c..2bcd10b6d653 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
@@ -337,29 +337,7 @@ static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv,
static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a,
struct mlx5e_encap_key *b)
{
- struct ip_tunnel_info *a_info;
- struct ip_tunnel_info *b_info;
- bool a_has_opts, b_has_opts;
-
- if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
- return false;
-
- a_has_opts = !!(a->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT);
- b_has_opts = !!(b->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT);
-
- /* keys are equal when both don't have any options attached */
- if (!a_has_opts && !b_has_opts)
- return true;
-
- if (a_has_opts != b_has_opts)
- return false;
-
- /* geneve options stored in memory next to ip_tunnel_info struct */
- a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
- b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
-
- return a_info->options_len == b_info->options_len &&
- memcmp(a_info + 1, b_info + 1, a_info->options_len) == 0;
+ return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_GENEVE_OPT);
}
struct mlx5e_tc_tunnel geneve_tunnel = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
index 1f62c702b625..a184d739d5f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2018 Mellanox Technologies. */
+#include <net/ip_tunnels.h>
#include <net/vxlan.h>
#include "lib/vxlan.h"
#include "en/tc_tun.h"
@@ -86,9 +87,11 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
const struct ip_tunnel_key *tun_key = &e->tun_info->key;
__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
struct udphdr *udp = (struct udphdr *)(buf);
+ const struct vxlan_metadata *md;
struct vxlanhdr *vxh;
- if (tun_key->tun_flags & TUNNEL_VXLAN_OPT)
+ if ((tun_key->tun_flags & TUNNEL_VXLAN_OPT) &&
+ e->tun_info->options_len != sizeof(*md))
return -EOPNOTSUPP;
vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
*ip_proto = IPPROTO_UDP;
@@ -96,6 +99,57 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
udp->dest = tun_key->tp_dst;
vxh->vx_flags = VXLAN_HF_VNI;
vxh->vx_vni = vxlan_vni_field(tun_id);
+ if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) {
+ md = ip_tunnel_info_opts(e->tun_info);
+ vxlan_build_gbp_hdr(vxh, md);
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_vxlan_gbp_option(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_enc_opts enc_opts;
+ void *misc5_c, *misc5_v;
+ u32 *gbp, *gbp_mask;
+
+ flow_rule_match_enc_opts(rule, &enc_opts);
+
+ if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) &&
+ !MLX5_CAP_ESW_FT_FIELD_SUPPORT_2(priv->mdev, tunnel_header_0_1)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on VxLAN GBP is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (enc_opts.key->dst_opt_type != TUNNEL_VXLAN_OPT) {
+ NL_SET_ERR_MSG_MOD(extack, "Wrong VxLAN option type: not GBP");
+ return -EOPNOTSUPP;
+ }
+
+ if (enc_opts.key->len != sizeof(*gbp) ||
+ enc_opts.mask->len != sizeof(*gbp_mask)) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN GBP option/mask len is not 32 bits");
+ return -EINVAL;
+ }
+
+ gbp = (u32 *)&enc_opts.key->data[0];
+ gbp_mask = (u32 *)&enc_opts.mask->data[0];
+
+ if (*gbp_mask & ~VXLAN_GBP_MASK) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Wrong VxLAN GBP mask(0x%08X)\n", *gbp_mask);
+ return -EINVAL;
+ }
+
+ misc5_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_5);
+ misc5_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_5);
+ MLX5_SET(fte_match_set_misc5, misc5_c, tunnel_header_0, *gbp_mask);
+ MLX5_SET(fte_match_set_misc5, misc5_v, tunnel_header_0, *gbp);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5;
return 0;
}
@@ -122,6 +176,14 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
if (!enc_keyid.mask->keyid)
return 0;
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+ int err;
+
+ err = mlx5e_tc_tun_parse_vxlan_gbp_option(priv, spec, f);
+ if (err)
+ return err;
+ }
+
/* match on VNI is required */
if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
@@ -143,6 +205,12 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
return 0;
}
+static bool mlx5e_tc_tun_encap_info_equal_vxlan(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b)
+{
+ return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_VXLAN_OPT);
+}
+
static int mlx5e_tc_tun_get_remote_ifindex(struct net_device *mirred_dev)
{
const struct vxlan_dev *vxlan = netdev_priv(mirred_dev);
@@ -160,6 +228,6 @@ struct mlx5e_tc_tunnel vxlan_tunnel = {
.generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan,
.parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan,
.parse_tunnel = mlx5e_tc_tun_parse_vxlan,
- .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_vxlan,
.get_remote_ifindex = mlx5e_tc_tun_get_remote_ifindex,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index b9c2f67d3794..47381e949f1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -65,13 +65,11 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget);
int mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
/* RX */
-void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page);
-void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle);
INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq));
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
-void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
+void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq);
static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
{
@@ -79,6 +77,19 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
}
/* TX */
+struct mlx5e_xmit_data {
+ dma_addr_t dma_addr;
+ void *data;
+ u32 len : 31;
+ u32 has_frags : 1;
+};
+
+struct mlx5e_xmit_data_frags {
+ struct mlx5e_xmit_data xd;
+ struct skb_shared_info *sinfo;
+ dma_addr_t *dma_arr;
+};
+
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
@@ -86,7 +97,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
static inline bool
mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
{
- return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
+ return (u16)(*fifo->pc - *fifo->cc) <= fifo->mask;
}
static inline bool
@@ -489,7 +500,7 @@ static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size
static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i)
{
- size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe);
+ size_t isz = struct_size(rq->mpwqe.info, alloc_units.frag_pages, rq->mpwqe.pages_per_wqe);
return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index d9d3b9e1f15a..f0e6095809fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -61,9 +61,8 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
struct xdp_buff *xdp)
{
struct page *page = virt_to_page(xdp->data);
- struct skb_shared_info *sinfo = NULL;
- struct mlx5e_xmit_data xdptxd;
- struct mlx5e_xdp_info xdpi;
+ struct mlx5e_xmit_data_frags xdptxdf = {};
+ struct mlx5e_xmit_data *xdptxd;
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
int i;
@@ -72,8 +71,10 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
if (unlikely(!xdpf))
return false;
- xdptxd.data = xdpf->data;
- xdptxd.len = xdpf->len;
+ xdptxd = &xdptxdf.xd;
+ xdptxd->data = xdpf->data;
+ xdptxd->len = xdpf->len;
+ xdptxd->has_frags = xdp_frame_has_frags(xdpf);
if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
/* The xdp_buff was in the UMEM and was copied into a newly
@@ -88,24 +89,29 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
*/
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
- xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+ if (unlikely(xdptxd->has_frags))
+ return false;
- dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len,
+ dma_addr = dma_map_single(sq->pdev, xdptxd->data, xdptxd->len,
DMA_TO_DEVICE);
if (dma_mapping_error(sq->pdev, dma_addr)) {
xdp_return_frame(xdpf);
return false;
}
- xdptxd.dma_addr = dma_addr;
- xdpi.frame.xdpf = xdpf;
- xdpi.frame.dma_addr = dma_addr;
+ xdptxd->dma_addr = dma_addr;
if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0)))
+ mlx5e_xmit_xdp_frame, sq, xdptxd, 0)))
return false;
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME });
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .frame.xdpf = xdpf });
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .frame.dma_addr = dma_addr });
return true;
}
@@ -115,17 +121,15 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
* mode.
*/
- xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
- xdpi.page.rq = rq;
-
dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
- dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL);
+ dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd->len, DMA_BIDIRECTIONAL);
- if (unlikely(xdp_frame_has_frags(xdpf))) {
- sinfo = xdp_get_shared_info_from_frame(xdpf);
+ if (xdptxd->has_frags) {
+ xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf);
+ xdptxdf.dma_arr = NULL;
- for (i = 0; i < sinfo->nr_frags; i++) {
- skb_frag_t *frag = &sinfo->frags[i];
+ for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &xdptxdf.sinfo->frags[i];
dma_addr_t addr;
u32 len;
@@ -137,22 +141,34 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
}
}
- xdptxd.dma_addr = dma_addr;
+ xdptxd->dma_addr = dma_addr;
if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0)))
+ mlx5e_xmit_xdp_frame, sq, xdptxd, 0)))
return false;
- xdpi.page.page = page;
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
-
- if (unlikely(xdp_frame_has_frags(xdpf))) {
- for (i = 0; i < sinfo->nr_frags; i++) {
- skb_frag_t *frag = &sinfo->frags[i];
-
- xdpi.page.page = skb_frag_page(frag);
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ /* xmit_mode == MLX5E_XDP_XMIT_MODE_PAGE */
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_PAGE });
+
+ if (xdptxd->has_frags) {
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info)
+ { .page.num = 1 + xdptxdf.sinfo->nr_frags });
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .page.page = page });
+ for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &xdptxdf.sinfo->frags[i];
+
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info)
+ { .page.page = skb_frag_page(frag) });
}
+ } else {
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .page.num = 1 });
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .page.page = page });
}
return true;
@@ -268,8 +284,6 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
- if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
- mlx5e_page_dma_unmap(rq, virt_to_page(xdp->data));
rq->stats->xdp_redirect++;
return true;
default:
@@ -383,26 +397,43 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
- struct skb_shared_info *sinfo, int check_result);
+ int check_result);
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
- struct skb_shared_info *sinfo, int check_result)
+ int check_result)
{
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
+ struct mlx5e_xmit_data *p = xdptxd;
+ struct mlx5e_xmit_data tmp;
- if (unlikely(sinfo)) {
- /* MPWQE is enabled, but a multi-buffer packet is queued for
- * transmission. MPWQE can't send fragmented packets, so close
- * the current session and fall back to a regular WQE.
- */
- if (unlikely(sq->mpwqe.wqe))
- mlx5e_xdp_mpwqe_complete(sq);
- return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0);
+ if (xdptxd->has_frags) {
+ struct mlx5e_xmit_data_frags *xdptxdf =
+ container_of(xdptxd, struct mlx5e_xmit_data_frags, xd);
+
+ if (!!xdptxd->len + xdptxdf->sinfo->nr_frags > 1) {
+ /* MPWQE is enabled, but a multi-buffer packet is queued for
+ * transmission. MPWQE can't send fragmented packets, so close
+ * the current session and fall back to a regular WQE.
+ */
+ if (unlikely(sq->mpwqe.wqe))
+ mlx5e_xdp_mpwqe_complete(sq);
+ return mlx5e_xmit_xdp_frame(sq, xdptxd, 0);
+ }
+ if (!xdptxd->len) {
+ skb_frag_t *frag = &xdptxdf->sinfo->frags[0];
+
+ tmp.data = skb_frag_address(frag);
+ tmp.len = skb_frag_size(frag);
+ tmp.dma_addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[0] :
+ page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
+ p = &tmp;
+ }
}
- if (unlikely(xdptxd->len > sq->hw_mtu)) {
+ if (unlikely(p->len > sq->hw_mtu)) {
stats->err++;
return false;
}
@@ -420,7 +451,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx
mlx5e_xdp_mpwqe_session_start(sq);
}
- mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
+ mlx5e_xdp_mpwqe_add_dseg(sq, p, stats);
if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs)))
mlx5e_xdp_mpwqe_complete(sq);
@@ -448,8 +479,10 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
- struct skb_shared_info *sinfo, int check_result)
+ int check_result)
{
+ struct mlx5e_xmit_data_frags *xdptxdf =
+ container_of(xdptxd, struct mlx5e_xmit_data_frags, xd);
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5_wqe_data_seg *dseg;
@@ -461,26 +494,34 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
u16 ds_cnt, inline_hdr_sz;
u8 num_wqebbs = 1;
int num_frags = 0;
+ bool inline_ok;
+ bool linear;
u16 pi;
struct mlx5e_xdpsq_stats *stats = sq->stats;
- if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
+ inline_ok = sq->min_inline_mode == MLX5_INLINE_MODE_NONE ||
+ dma_len >= MLX5E_XDP_MIN_INLINE;
+
+ if (unlikely(!inline_ok || sq->hw_mtu < dma_len)) {
stats->err++;
return false;
}
- ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
+ inline_hdr_sz = 0;
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
- ds_cnt++;
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+
+ linear = !!(dma_len - inline_hdr_sz);
+ ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + linear + !!inline_hdr_sz;
/* check_result must be 0 if sinfo is passed. */
if (!check_result) {
int stop_room = 1;
- if (unlikely(sinfo)) {
- ds_cnt += sinfo->nr_frags;
- num_frags = sinfo->nr_frags;
+ if (xdptxd->has_frags) {
+ ds_cnt += xdptxdf->sinfo->nr_frags;
+ num_frags = xdptxdf->sinfo->nr_frags;
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
/* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
* enough to hold all fragments.
@@ -501,53 +542,53 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
eseg = &wqe->eth;
dseg = wqe->data;
- inline_hdr_sz = 0;
-
/* copy the inline part if required */
- if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+ if (inline_hdr_sz) {
memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
- MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
- dma_len -= MLX5E_XDP_MIN_INLINE;
- dma_addr += MLX5E_XDP_MIN_INLINE;
- inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+ inline_hdr_sz - sizeof(eseg->inline_hdr.start));
+ dma_len -= inline_hdr_sz;
+ dma_addr += inline_hdr_sz;
dseg++;
}
/* write the dma part */
- dseg->addr = cpu_to_be64(dma_addr);
- dseg->byte_count = cpu_to_be32(dma_len);
+ if (linear) {
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+ dseg->lkey = sq->mkey_be;
+ dseg++;
+ }
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
- if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) {
- u8 num_pkts = 1 + num_frags;
+ if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) {
int i;
memset(&cseg->trailer, 0, sizeof(cseg->trailer));
memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
- dseg->lkey = sq->mkey_be;
for (i = 0; i < num_frags; i++) {
- skb_frag_t *frag = &sinfo->frags[i];
+ skb_frag_t *frag = &xdptxdf->sinfo->frags[i];
dma_addr_t addr;
- addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
+ addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] :
+ page_pool_get_dma_addr(skb_frag_page(frag)) +
skb_frag_off(frag);
- dseg++;
dseg->addr = cpu_to_be64(addr);
dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
dseg->lkey = sq->mkey_be;
+ dseg++;
}
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
.num_wqebbs = num_wqebbs,
- .num_pkts = num_pkts,
+ .num_pkts = 1,
};
sq->pc += num_wqebbs;
@@ -566,26 +607,67 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_wqe_info *wi,
u32 *xsk_frames,
- bool recycle,
struct xdp_frame_bulk *bq)
{
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
u16 i;
for (i = 0; i < wi->num_pkts; i++) {
- struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+ union mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
switch (xdpi.mode) {
- case MLX5E_XDP_XMIT_MODE_FRAME:
+ case MLX5E_XDP_XMIT_MODE_FRAME: {
/* XDP_TX from the XSK RQ and XDP_REDIRECT */
- dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
- xdpi.frame.xdpf->len, DMA_TO_DEVICE);
- xdp_return_frame_bulk(xdpi.frame.xdpf, bq);
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
+
+ xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+ xdpf = xdpi.frame.xdpf;
+ xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+ dma_addr = xdpi.frame.dma_addr;
+
+ dma_unmap_single(sq->pdev, dma_addr,
+ xdpf->len, DMA_TO_DEVICE);
+ if (xdp_frame_has_frags(xdpf)) {
+ struct skb_shared_info *sinfo;
+ int j;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (j = 0; j < sinfo->nr_frags; j++) {
+ skb_frag_t *frag = &sinfo->frags[j];
+
+ xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+ dma_addr = xdpi.frame.dma_addr;
+
+ dma_unmap_single(sq->pdev, dma_addr,
+ skb_frag_size(frag), DMA_TO_DEVICE);
+ }
+ }
+ xdp_return_frame_bulk(xdpf, bq);
break;
- case MLX5E_XDP_XMIT_MODE_PAGE:
+ }
+ case MLX5E_XDP_XMIT_MODE_PAGE: {
/* XDP_TX from the regular RQ */
- mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle);
+ u8 num, n = 0;
+
+ xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+ num = xdpi.page.num;
+
+ do {
+ struct page *page;
+
+ xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+ page = xdpi.page.page;
+
+ /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
+ * as we know this is a page_pool page.
+ */
+ page_pool_put_defragged_page(page->pp,
+ page, -1, true);
+ } while (++n < num);
+
break;
+ }
case MLX5E_XDP_XMIT_MODE_XSK:
/* AF_XDP send */
(*xsk_frames)++;
@@ -638,7 +720,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
sqcc += wi->num_wqebbs;
- mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq);
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq);
} while (!last_wqe);
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
@@ -685,7 +767,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
sq->cc += wi->num_wqebbs;
- mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq);
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq);
}
xdp_flush_frame_bulk(&bq);
@@ -719,34 +801,79 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
sq = &priv->channels.c[sq_num]->xdpsq;
for (i = 0; i < n; i++) {
+ struct mlx5e_xmit_data_frags xdptxdf = {};
struct xdp_frame *xdpf = frames[i];
- struct mlx5e_xmit_data xdptxd;
- struct mlx5e_xdp_info xdpi;
+ dma_addr_t dma_arr[MAX_SKB_FRAGS];
+ struct mlx5e_xmit_data *xdptxd;
bool ret;
- xdptxd.data = xdpf->data;
- xdptxd.len = xdpf->len;
- xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
- xdptxd.len, DMA_TO_DEVICE);
+ xdptxd = &xdptxdf.xd;
+ xdptxd->data = xdpf->data;
+ xdptxd->len = xdpf->len;
+ xdptxd->has_frags = xdp_frame_has_frags(xdpf);
+ xdptxd->dma_addr = dma_map_single(sq->pdev, xdptxd->data,
+ xdptxd->len, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr)))
+ if (unlikely(dma_mapping_error(sq->pdev, xdptxd->dma_addr)))
break;
- xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
- xdpi.frame.xdpf = xdpf;
- xdpi.frame.dma_addr = xdptxd.dma_addr;
+ if (xdptxd->has_frags) {
+ int j;
+
+ xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf);
+ xdptxdf.dma_arr = dma_arr;
+ for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) {
+ skb_frag_t *frag = &xdptxdf.sinfo->frags[j];
+
+ dma_arr[j] = dma_map_single(sq->pdev, skb_frag_address(frag),
+ skb_frag_size(frag), DMA_TO_DEVICE);
+
+ if (!dma_mapping_error(sq->pdev, dma_arr[j]))
+ continue;
+ /* mapping error */
+ while (--j >= 0)
+ dma_unmap_single(sq->pdev, dma_arr[j],
+ skb_frag_size(&xdptxdf.sinfo->frags[j]),
+ DMA_TO_DEVICE);
+ goto out;
+ }
+ }
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0);
+ mlx5e_xmit_xdp_frame, sq, xdptxd, 0);
if (unlikely(!ret)) {
- dma_unmap_single(sq->pdev, xdptxd.dma_addr,
- xdptxd.len, DMA_TO_DEVICE);
+ int j;
+
+ dma_unmap_single(sq->pdev, xdptxd->dma_addr,
+ xdptxd->len, DMA_TO_DEVICE);
+ if (!xdptxd->has_frags)
+ break;
+ for (j = 0; j < xdptxdf.sinfo->nr_frags; j++)
+ dma_unmap_single(sq->pdev, dma_arr[j],
+ skb_frag_size(&xdptxdf.sinfo->frags[j]),
+ DMA_TO_DEVICE);
break;
}
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+
+ /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME });
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .frame.xdpf = xdpf });
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .frame.dma_addr = xdptxd->dma_addr });
+ if (xdptxd->has_frags) {
+ int j;
+
+ for (j = 0; j < xdptxdf.sinfo->nr_frags; j++)
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info)
+ { .frame.dma_addr = dma_arr[j] });
+ }
nxmit++;
}
+out:
if (flags & XDP_XMIT_FLUSH) {
if (sq->mpwqe.wqe)
mlx5e_xdp_mpwqe_complete(sq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 10bcfa6f88c1..9e8e6184f9e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -50,6 +50,53 @@ struct mlx5e_xdp_buff {
struct mlx5e_rq *rq;
};
+/* XDP packets can be transmitted in different ways. On completion, we need to
+ * distinguish between them to clean up things in a proper way.
+ */
+enum mlx5e_xdp_xmit_mode {
+ /* An xdp_frame was transmitted due to either XDP_REDIRECT from another
+ * device or XDP_TX from an XSK RQ. The frame has to be unmapped and
+ * returned.
+ */
+ MLX5E_XDP_XMIT_MODE_FRAME,
+
+ /* The xdp_frame was created in place as a result of XDP_TX from a
+ * regular RQ. No DMA remapping happened, and the page belongs to us.
+ */
+ MLX5E_XDP_XMIT_MODE_PAGE,
+
+ /* No xdp_frame was created at all, the transmit happened from a UMEM
+ * page. The UMEM Completion Ring producer pointer has to be increased.
+ */
+ MLX5E_XDP_XMIT_MODE_XSK,
+};
+
+/* xmit_mode entry is pushed to the fifo per packet, followed by multiple
+ * entries, as follows:
+ *
+ * MLX5E_XDP_XMIT_MODE_FRAME:
+ * xdpf, dma_addr_1, dma_addr_2, ... , dma_addr_num.
+ * 'num' is derived from xdpf.
+ *
+ * MLX5E_XDP_XMIT_MODE_PAGE:
+ * num, page_1, page_2, ... , page_num.
+ *
+ * MLX5E_XDP_XMIT_MODE_XSK:
+ * none.
+ */
+union mlx5e_xdp_info {
+ enum mlx5e_xdp_xmit_mode mode;
+ union {
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
+ } frame;
+ union {
+ struct mlx5e_rq *rq;
+ u8 num;
+ struct page *page;
+ } page;
+};
+
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
@@ -66,11 +113,9 @@ extern const struct xdp_metadata_ops mlx5e_xdp_metadata_ops;
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd,
- struct skb_shared_info *sinfo,
int check_result));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd,
- struct skb_shared_info *sinfo,
int check_result));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq));
@@ -179,14 +224,14 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
static inline void
mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
- struct mlx5e_xdp_info *xi)
+ union mlx5e_xdp_info xi)
{
u32 i = (*fifo->pc)++ & fifo->mask;
- fifo->xi[i] = *xi;
+ fifo->xi[i] = xi;
}
-static inline struct mlx5e_xdp_info
+static inline union mlx5e_xdp_info
mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo)
{
return fifo->xi[(*fifo->cc)++ & fifo->mask];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index fab787600459..d97e6df66f45 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -22,6 +22,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
struct mlx5e_icosq *icosq = rq->icosq;
struct mlx5_wq_cyc *wq = &icosq->wq;
struct mlx5e_umr_wqe *umr_wqe;
+ struct xdp_buff **xsk_buffs;
int batch, i;
u32 offset; /* 17-bit value with MTT. */
u16 pi;
@@ -29,9 +30,9 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
goto err;
- BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff);
- batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
+ xsk_buffs = (struct xdp_buff **)wi->alloc_units.xsk_buffs;
+ batch = xsk_buff_alloc_batch(rq->xsk_pool, xsk_buffs,
rq->mpwqe.pages_per_wqe);
/* If batch < pages_per_wqe, either:
@@ -41,8 +42,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
* the first error, which will mean there are no more valid descriptors.
*/
for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
- wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool);
- if (unlikely(!wi->alloc_units[batch].xsk))
+ xsk_buffs[batch] = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!xsk_buffs[batch]))
goto err_reuse_batch;
}
@@ -52,8 +53,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
for (i = 0; i < batch; i++) {
- struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
- dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+ struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
.ptag = cpu_to_be64(addr | MLX5_EN_WR),
@@ -62,8 +63,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
}
} else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
for (i = 0; i < batch; i++) {
- struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
- dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+ struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
.key = rq->mkey_be,
@@ -75,8 +76,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
for (i = 0; i < batch; i++) {
- struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
- dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+ struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
.key = rq->mkey_be,
@@ -102,8 +103,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
__be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
for (i = 0; i < batch; i++) {
- struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
- dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+ struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
.key = rq->mkey_be,
@@ -119,7 +120,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
}
}
- bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
+ bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
wi->consumed_strides = 0;
umr_wqe->ctrl.opmod_idx_opcode =
@@ -149,7 +150,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
err_reuse_batch:
while (--batch >= 0)
- xsk_buff_free(wi->alloc_units[batch].xsk);
+ xsk_buff_free(xsk_buffs[batch]);
err:
rq->stats->buff_alloc_err++;
@@ -163,13 +164,10 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
u32 contig, alloc;
int i;
- /* mlx5e_init_frags_partition creates a 1:1 mapping between
- * rq->wqe.frags and rq->wqe.alloc_units, which allows us to
- * allocate XDP buffers straight into alloc_units.
+ /* Each rq->wqe.frags->xskp is 1:1 mapped to an element inside the
+ * rq->wqe.alloc_units->xsk_buffs array allocated here.
*/
- BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) !=
- sizeof(rq->wqe.alloc_units[0].xsk));
- buffs = (struct xdp_buff **)rq->wqe.alloc_units;
+ buffs = rq->wqe.alloc_units->xsk_buffs;
contig = mlx5_wq_cyc_get_size(wq) - ix;
if (wqe_bulk <= contig) {
alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
@@ -189,8 +187,9 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
/* Assumes log_num_frags == 0. */
frag = &rq->wqe.frags[j];
- addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
+ addr = xsk_buff_xdp_get_frame_dma(*frag->xskp);
wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
+ frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
}
return alloc;
@@ -211,12 +210,13 @@ int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
/* Assumes log_num_frags == 0. */
frag = &rq->wqe.frags[j];
- frag->au->xsk = xsk_buff_alloc(rq->xsk_pool);
- if (unlikely(!frag->au->xsk))
+ *frag->xskp = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!*frag->xskp))
return i;
- addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
+ addr = xsk_buff_xdp_get_frame_dma(*frag->xskp);
wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
+ frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
}
return wqe_bulk;
@@ -251,7 +251,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
u32 head_offset,
u32 page_idx)
{
- struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[page_idx].xsk);
+ struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units.xsk_buffs[page_idx]);
struct bpf_prog *prog;
/* Check packet size. Note LRO doesn't use linear SKB */
@@ -291,7 +291,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
prog = rcu_dereference(rq->xdp_prog);
if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
- __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */
return NULL; /* page/packet was consumed by XDP */
}
@@ -306,7 +306,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
struct mlx5_cqe64 *cqe,
u32 cqe_bcnt)
{
- struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->au->xsk);
+ struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(*wi->xskp);
struct bpf_prog *prog;
/* wi->offset is not used in this function, because xdp->data and the
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 81a567e17264..ed279f450976 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -93,13 +93,19 @@ static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *param
struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool,
struct mlx5e_xsk_param *xsk)
{
+ struct mlx5e_rq *xskrq = &c->xskrq;
int err;
- err = mlx5e_init_xsk_rq(c, params, pool, xsk, &c->xskrq);
+ err = mlx5e_init_xsk_rq(c, params, pool, xsk, xskrq);
if (err)
return err;
- return mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), &c->xskrq);
+ err = mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), xskrq);
+ if (err)
+ return err;
+
+ __set_bit(MLX5E_RQ_STATE_XSK, &xskrq->state);
+ return 0;
}
int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 367a9505ca4f..597f319d4770 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -44,7 +44,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
* same.
*/
static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_info *xdpi)
+ union mlx5e_xdp_info *xdpi)
{
u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
@@ -54,15 +54,14 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
wi->num_pkts = 1;
nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, *xdpi);
sq->doorbell_cseg = &nopwqe->ctrl;
}
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
{
struct xsk_buff_pool *pool = sq->xsk_pool;
- struct mlx5e_xmit_data xdptxd;
- struct mlx5e_xdp_info xdpi;
+ union mlx5e_xdp_info xdpi;
bool work_done = true;
bool flush = false;
@@ -73,6 +72,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
mlx5e_xmit_xdp_frame_check_mpwqe,
mlx5e_xmit_xdp_frame_check,
sq);
+ struct mlx5e_xmit_data xdptxd = {};
struct xdp_desc desc;
bool ret;
@@ -97,7 +97,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd,
check_result);
if (unlikely(!ret)) {
if (sq->mpwqe.wqe)
@@ -105,7 +105,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
mlx5e_xsk_tx_post_err(sq, &xdpi);
} else {
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
}
flush = true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 7b0d3de0ec6c..55b38544422f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -35,11 +35,15 @@
#include <crypto/aead.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
+#include <net/netevent.h>
#include "en.h"
#include "ipsec.h"
#include "ipsec_rxtx.h"
+#define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
+#define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
+
static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
{
return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
@@ -50,27 +54,71 @@ static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x)
return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle;
}
+static void mlx5e_ipsec_handle_tx_limit(struct work_struct *_work)
+{
+ struct mlx5e_ipsec_dwork *dwork =
+ container_of(_work, struct mlx5e_ipsec_dwork, dwork.work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry;
+ struct xfrm_state *x = sa_entry->x;
+
+ spin_lock(&x->lock);
+ xfrm_state_check_expire(x);
+ if (x->km.state == XFRM_STATE_EXPIRED) {
+ sa_entry->attrs.drop = true;
+ mlx5e_accel_ipsec_fs_modify(sa_entry);
+ }
+ spin_unlock(&x->lock);
+
+ if (sa_entry->attrs.drop)
+ return;
+
+ queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork,
+ MLX5_IPSEC_RESCHED);
+}
+
static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
{
- struct xfrm_replay_state_esn *replay_esn;
+ struct xfrm_state *x = sa_entry->x;
u32 seq_bottom = 0;
+ u32 esn, esn_msb;
u8 overlap;
- if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) {
- sa_entry->esn_state.trigger = 0;
+ switch (x->xso.type) {
+ case XFRM_DEV_OFFLOAD_PACKET:
+ switch (x->xso.dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ esn = x->replay_esn->seq;
+ esn_msb = x->replay_esn->seq_hi;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ esn = x->replay_esn->oseq;
+ esn_msb = x->replay_esn->oseq_hi;
+ break;
+ default:
+ WARN_ON(true);
+ return false;
+ }
+ break;
+ case XFRM_DEV_OFFLOAD_CRYPTO:
+ /* Already parsed by XFRM core */
+ esn = x->replay_esn->seq;
+ break;
+ default:
+ WARN_ON(true);
return false;
}
- replay_esn = sa_entry->x->replay_esn;
- if (replay_esn->seq >= replay_esn->replay_window)
- seq_bottom = replay_esn->seq - replay_esn->replay_window + 1;
-
overlap = sa_entry->esn_state.overlap;
- sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x,
- htonl(seq_bottom));
+ if (esn >= x->replay_esn->replay_window)
+ seq_bottom = esn - x->replay_esn->replay_window + 1;
+
+ if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
+ esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
+
+ sa_entry->esn_state.esn = esn;
+ sa_entry->esn_state.esn_msb = esn_msb;
- sa_entry->esn_state.trigger = 1;
if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
sa_entry->esn_state.overlap = 0;
return true;
@@ -87,25 +135,161 @@ static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_accel_esp_xfrm_attrs *attrs)
{
struct xfrm_state *x = sa_entry->x;
+ s64 start_value, n;
- attrs->hard_packet_limit = x->lft.hard_packet_limit;
+ attrs->lft.hard_packet_limit = x->lft.hard_packet_limit;
+ attrs->lft.soft_packet_limit = x->lft.soft_packet_limit;
if (x->lft.soft_packet_limit == XFRM_INF)
return;
- /* Hardware decrements hard_packet_limit counter through
- * the operation. While fires an event when soft_packet_limit
- * is reached. It emans that we need substitute the numbers
- * in order to properly count soft limit.
+ /* Compute hard limit initial value and number of rounds.
+ *
+ * The counting pattern of hardware counter goes:
+ * value -> 2^31-1
+ * 2^31 | (2^31-1) -> 2^31-1
+ * 2^31 | (2^31-1) -> 2^31-1
+ * [..]
+ * 2^31 | (2^31-1) -> 0
*
- * As an example:
- * XFRM user sets soft limit is 2 and hard limit is 9 and
- * expects to see soft event after 2 packets and hard event
- * after 9 packets. In our case, the hard limit will be set
- * to 9 and soft limit is comparator to 7 so user gets the
- * soft event after 2 packeta
+ * The pattern is created by using an ASO operation to atomically set
+ * bit 31 after the down counter clears bit 31. This is effectively an
+ * atomic addition of 2**31 to the counter.
+ *
+ * We wish to configure the counter, within the above pattern, so that
+ * when it reaches 0, it has hit the hard limit. This is defined by this
+ * system of equations:
+ *
+ * hard_limit == start_value + n * 2^31
+ * n >= 0
+ * start_value < 2^32, start_value >= 0
+ *
+ * These equations are not single-solution, there are often two choices:
+ * hard_limit == start_value + n * 2^31
+ * hard_limit == (start_value+2^31) + (n-1) * 2^31
+ *
+ * The algorithm selects the solution that keeps the counter value
+ * above 2^31 until the final iteration.
+ */
+
+ /* Start by estimating n and compute start_value */
+ n = attrs->lft.hard_packet_limit / BIT_ULL(31);
+ start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
+
+ /* Choose the best of the two solutions: */
+ if (n >= 1)
+ n -= 1;
+
+ /* Computed values solve the system of equations: */
+ start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
+
+ /* The best solution means: when there are multiple iterations we must
+ * start above 2^31 and count down to 2**31 to get the interrupt.
+ */
+ attrs->lft.hard_packet_limit = lower_32_bits(start_value);
+ attrs->lft.numb_rounds_hard = (u64)n;
+
+ /* Compute soft limit initial value and number of rounds.
+ *
+ * The soft_limit is achieved by adjusting the counter's
+ * interrupt_value. This is embedded in the counting pattern created by
+ * hard packet calculations above.
+ *
+ * We wish to compute the interrupt_value for the soft_limit. This is
+ * defined by this system of equations:
+ *
+ * soft_limit == start_value - soft_value + n * 2^31
+ * n >= 0
+ * soft_value < 2^32, soft_value >= 0
+ * for n == 0 start_value > soft_value
+ *
+ * As with compute_hard_n_value() the equations are not single-solution.
+ * The algorithm selects the solution that has:
+ * 2^30 <= soft_limit < 2^31 + 2^30
+ * for the interior iterations, which guarantees a large guard band
+ * around the counter hard limit and next interrupt.
+ */
+
+ /* Start by estimating n and compute soft_value */
+ n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31);
+ start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) -
+ x->lft.soft_packet_limit;
+
+ /* Compare against constraints and adjust n */
+ if (n < 0)
+ n = 0;
+ else if (start_value >= BIT_ULL(32))
+ n -= 1;
+ else if (start_value < 0)
+ n += 1;
+
+ /* Choose the best of the two solutions: */
+ start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
+ if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30))
+ n += 1;
+
+ /* Note that the upper limit of soft_value happens naturally because we
+ * always select the lowest soft_value.
*/
- attrs->soft_packet_limit =
- x->lft.hard_packet_limit - x->lft.soft_packet_limit;
+
+ /* Computed values solve the system of equations: */
+ start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
+
+ /* The best solution means: when there are multiple iterations we must
+ * not fall below 2^30 as that would get too close to the false
+ * hard_limit and when we reach an interior iteration for soft_limit it
+ * has to be far away from 2**32-1 which is the counter reset point
+ * after the +2^31 to accommodate latency.
+ */
+ attrs->lft.soft_packet_limit = lower_32_bits(start_value);
+ attrs->lft.numb_rounds_soft = (u64)n;
+}
+
+static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
+ struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ struct xfrm_state *x = sa_entry->x;
+ struct net_device *netdev;
+ struct neighbour *n;
+ u8 addr[ETH_ALEN];
+ const void *pkey;
+ u8 *dst, *src;
+
+ if (attrs->mode != XFRM_MODE_TUNNEL ||
+ attrs->type != XFRM_DEV_OFFLOAD_PACKET)
+ return;
+
+ netdev = x->xso.real_dev;
+
+ mlx5_query_mac_address(mdev, addr);
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ src = attrs->dmac;
+ dst = attrs->smac;
+ pkey = &attrs->saddr.a4;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ src = attrs->smac;
+ dst = attrs->dmac;
+ pkey = &attrs->daddr.a4;
+ break;
+ default:
+ return;
+ }
+
+ ether_addr_copy(src, addr);
+ n = neigh_lookup(&arp_tbl, pkey, netdev);
+ if (!n) {
+ n = neigh_create(&arp_tbl, pkey, netdev);
+ if (IS_ERR(n))
+ return;
+ neigh_event_send(n, NULL);
+ attrs->drop = true;
+ } else {
+ neigh_ha_snapshot(addr, n, netdev);
+ ether_addr_copy(dst, addr);
+ }
+ neigh_release(n);
}
void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
@@ -141,11 +325,11 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
aes_gcm->icv_len = x->aead->alg_icv_len;
/* esn */
- if (sa_entry->esn_state.trigger) {
- attrs->esn_trigger = true;
- attrs->esn = sa_entry->esn_state.esn;
- attrs->esn_overlap = sa_entry->esn_state.overlap;
- attrs->replay_window = x->replay_esn->replay_window;
+ if (x->props.flags & XFRM_STATE_ESN) {
+ attrs->replay_esn.trigger = true;
+ attrs->replay_esn.esn = sa_entry->esn_state.esn;
+ attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
+ attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
}
attrs->dir = x->xso.dir;
@@ -163,8 +347,10 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
attrs->upspec.sport = ntohs(x->sel.sport);
attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
attrs->upspec.proto = x->sel.proto;
+ attrs->mode = x->props.mode;
mlx5e_ipsec_init_limits(sa_entry, attrs);
+ mlx5e_ipsec_init_macs(sa_entry, attrs);
}
static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
@@ -233,6 +419,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
+ if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
+ NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
+ return -EINVAL;
+ }
+
switch (x->xso.type) {
case XFRM_DEV_OFFLOAD_CRYPTO:
if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
@@ -240,11 +431,6 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
- if (x->props.mode != XFRM_MODE_TRANSPORT &&
- x->props.mode != XFRM_MODE_TUNNEL) {
- NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
- return -EINVAL;
- }
break;
case XFRM_DEV_OFFLOAD_PACKET:
if (!(mlx5_ipsec_device_caps(mdev) &
@@ -253,8 +439,9 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
- if (x->props.mode != XFRM_MODE_TRANSPORT) {
- NL_SET_ERR_MSG_MOD(extack, "Only transport xfrm states may be offloaded in packet mode");
+ if (x->props.mode == XFRM_MODE_TUNNEL &&
+ !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
+ NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
return -EINVAL;
}
@@ -283,6 +470,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one");
return -EINVAL;
}
+
+ if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) {
+ NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0");
+ return -EINVAL;
+ }
break;
default:
NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
@@ -291,14 +483,134 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return 0;
}
-static void _update_xfrm_state(struct work_struct *work)
+static void mlx5e_ipsec_modify_state(struct work_struct *_work)
+{
+ struct mlx5e_ipsec_work *work =
+ container_of(_work, struct mlx5e_ipsec_work, work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
+ struct mlx5_accel_esp_xfrm_attrs *attrs;
+
+ attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs;
+
+ mlx5_accel_esp_modify_xfrm(sa_entry, attrs);
+}
+
+static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct xfrm_state *x = sa_entry->x;
+
+ if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO ||
+ x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
+ return;
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn;
+ return;
+ }
+
+ sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
+}
+
+static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
+{
+ struct mlx5e_ipsec_work *work =
+ container_of(_work, struct mlx5e_ipsec_work, work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
+ struct mlx5e_ipsec_netevent_data *data = work->data;
+ struct mlx5_accel_esp_xfrm_attrs *attrs;
+
+ attrs = &sa_entry->attrs;
+
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ ether_addr_copy(attrs->smac, data->addr);
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ ether_addr_copy(attrs->dmac, data->addr);
+ break;
+ default:
+ WARN_ON_ONCE(true);
+ }
+ attrs->drop = false;
+ mlx5e_accel_ipsec_fs_modify(sa_entry);
+}
+
+static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct xfrm_state *x = sa_entry->x;
+ struct mlx5e_ipsec_work *work;
+ void *data = NULL;
+
+ switch (x->xso.type) {
+ case XFRM_DEV_OFFLOAD_CRYPTO:
+ if (!(x->props.flags & XFRM_STATE_ESN))
+ return 0;
+ break;
+ case XFRM_DEV_OFFLOAD_PACKET:
+ if (x->props.mode != XFRM_MODE_TUNNEL)
+ return 0;
+ break;
+ default:
+ break;
+ }
+
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ switch (x->xso.type) {
+ case XFRM_DEV_OFFLOAD_CRYPTO:
+ data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+ if (!data)
+ goto free_work;
+
+ INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
+ break;
+ case XFRM_DEV_OFFLOAD_PACKET:
+ data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data),
+ GFP_KERNEL);
+ if (!data)
+ goto free_work;
+
+ INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
+ break;
+ default:
+ break;
+ }
+
+ work->data = data;
+ work->sa_entry = sa_entry;
+ sa_entry->work = work;
+ return 0;
+
+free_work:
+ kfree(work);
+ return -ENOMEM;
+}
+
+static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
{
- struct mlx5e_ipsec_modify_state_work *modify_work =
- container_of(work, struct mlx5e_ipsec_modify_state_work, work);
- struct mlx5e_ipsec_sa_entry *sa_entry = container_of(
- modify_work, struct mlx5e_ipsec_sa_entry, modify_work);
+ struct xfrm_state *x = sa_entry->x;
+ struct mlx5e_ipsec_dwork *dwork;
+
+ if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ return 0;
+
+ if (x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
+ return 0;
+
+ if (x->lft.soft_packet_limit == XFRM_INF &&
+ x->lft.hard_packet_limit == XFRM_INF)
+ return 0;
- mlx5_accel_esp_modify_xfrm(sa_entry, &modify_work->attrs);
+ dwork = kzalloc(sizeof(*dwork), GFP_KERNEL);
+ if (!dwork)
+ return -ENOMEM;
+
+ dwork->sa_entry = sa_entry;
+ INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_tx_limit);
+ sa_entry->dwork = dwork;
+ return 0;
}
static int mlx5e_xfrm_add_state(struct xfrm_state *x,
@@ -308,6 +620,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
struct net_device *netdev = x->xso.real_dev;
struct mlx5e_ipsec *ipsec;
struct mlx5e_priv *priv;
+ gfp_t gfp;
int err;
priv = netdev_priv(netdev);
@@ -315,30 +628,52 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
return -EOPNOTSUPP;
ipsec = priv->ipsec;
- err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
- if (err)
- return err;
-
- sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+ gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL;
+ sa_entry = kzalloc(sizeof(*sa_entry), gfp);
if (!sa_entry)
return -ENOMEM;
sa_entry->x = x;
sa_entry->ipsec = ipsec;
+ /* Check if this SA is originated from acquire flow temporary SA */
+ if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
+ goto out;
+
+ err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
+ if (err)
+ goto err_xfrm;
/* check esn */
- mlx5e_ipsec_update_esn_state(sa_entry);
+ if (x->props.flags & XFRM_STATE_ESN)
+ mlx5e_ipsec_update_esn_state(sa_entry);
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
+
+ err = mlx5_ipsec_create_work(sa_entry);
+ if (err)
+ goto err_xfrm;
+
+ err = mlx5e_ipsec_create_dwork(sa_entry);
+ if (err)
+ goto release_work;
+
/* create hw context */
err = mlx5_ipsec_create_sa_ctx(sa_entry);
if (err)
- goto err_xfrm;
+ goto release_dwork;
err = mlx5e_accel_ipsec_fs_add_rule(sa_entry);
if (err)
goto err_hw_ctx;
+ if (x->props.mode == XFRM_MODE_TUNNEL &&
+ x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
+ !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) {
+ NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings");
+ err = -EINVAL;
+ goto err_add_rule;
+ }
+
/* We use *_bh() variant because xfrm_timer_handler(), which runs
* in softirq context, can reach our state delete logic and we need
* xa_erase_bh() there.
@@ -348,11 +683,18 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
if (err)
goto err_add_rule;
- if (x->xso.dir == XFRM_DEV_OFFLOAD_OUT)
- sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
- mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
+ mlx5e_ipsec_set_esn_ops(sa_entry);
+
+ if (sa_entry->dwork)
+ queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
+ MLX5_IPSEC_RESCHED);
- INIT_WORK(&sa_entry->modify_work.work, _update_xfrm_state);
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
+ x->props.mode == XFRM_MODE_TUNNEL)
+ xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
+ MLX5E_IPSEC_TUNNEL_SA);
+
+out:
x->xso.offload_handle = (unsigned long)sa_entry;
return 0;
@@ -360,32 +702,101 @@ err_add_rule:
mlx5e_accel_ipsec_fs_del_rule(sa_entry);
err_hw_ctx:
mlx5_ipsec_free_sa_ctx(sa_entry);
+release_dwork:
+ kfree(sa_entry->dwork);
+release_work:
+ if (sa_entry->work)
+ kfree(sa_entry->work->data);
+ kfree(sa_entry->work);
err_xfrm:
kfree(sa_entry);
- NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy");
+ NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state");
return err;
}
static void mlx5e_xfrm_del_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+ struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
struct mlx5e_ipsec_sa_entry *old;
+ if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
+ return;
+
old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
WARN_ON(old != sa_entry);
+
+ if (attrs->mode == XFRM_MODE_TUNNEL &&
+ attrs->type == XFRM_DEV_OFFLOAD_PACKET)
+ /* Make sure that no ARP requests are running in parallel */
+ flush_workqueue(ipsec->wq);
+
}
static void mlx5e_xfrm_free_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
- cancel_work_sync(&sa_entry->modify_work.work);
+ if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
+ goto sa_entry_free;
+
+ if (sa_entry->work)
+ cancel_work_sync(&sa_entry->work->work);
+
+ if (sa_entry->dwork)
+ cancel_delayed_work_sync(&sa_entry->dwork->dwork);
+
mlx5e_accel_ipsec_fs_del_rule(sa_entry);
mlx5_ipsec_free_sa_ctx(sa_entry);
+ kfree(sa_entry->dwork);
+ if (sa_entry->work)
+ kfree(sa_entry->work->data);
+ kfree(sa_entry->work);
+sa_entry_free:
kfree(sa_entry);
}
+static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5_accel_esp_xfrm_attrs *attrs;
+ struct mlx5e_ipsec_netevent_data *data;
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct mlx5e_ipsec *ipsec;
+ struct neighbour *n = ptr;
+ struct net_device *netdev;
+ struct xfrm_state *x;
+ unsigned long idx;
+
+ if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
+ return NOTIFY_DONE;
+
+ ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
+ xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
+ attrs = &sa_entry->attrs;
+
+ if (attrs->family == AF_INET) {
+ if (!neigh_key_eq32(n, &attrs->saddr.a4) &&
+ !neigh_key_eq32(n, &attrs->daddr.a4))
+ continue;
+ } else {
+ if (!neigh_key_eq128(n, &attrs->saddr.a4) &&
+ !neigh_key_eq128(n, &attrs->daddr.a4))
+ continue;
+ }
+
+ x = sa_entry->x;
+ netdev = x->xso.real_dev;
+ data = sa_entry->work->data;
+
+ neigh_ha_snapshot(data->addr, n, netdev);
+ queue_work(ipsec->wq, &sa_entry->work->work);
+ }
+
+ return NOTIFY_DONE;
+}
+
void mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
struct mlx5e_ipsec *ipsec;
@@ -402,8 +813,8 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC);
ipsec->mdev = priv->mdev;
- ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
- priv->netdev->name);
+ ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0,
+ priv->netdev->name);
if (!ipsec->wq)
goto err_wq;
@@ -414,6 +825,13 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
goto err_aso;
}
+ if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
+ ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
+ ret = register_netevent_notifier(&ipsec->netevent_nb);
+ if (ret)
+ goto clear_aso;
+ }
+
ret = mlx5e_accel_ipsec_fs_init(ipsec);
if (ret)
goto err_fs_init;
@@ -424,6 +842,9 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
return;
err_fs_init:
+ if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
+ unregister_netevent_notifier(&ipsec->netevent_nb);
+clear_aso:
if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
mlx5e_ipsec_aso_cleanup(ipsec);
err_aso:
@@ -442,6 +863,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
return;
mlx5e_accel_ipsec_fs_cleanup(ipsec);
+ if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
+ unregister_netevent_notifier(&ipsec->netevent_nb);
if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
mlx5e_ipsec_aso_cleanup(ipsec);
destroy_workqueue(ipsec->wq);
@@ -467,41 +890,43 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
- struct mlx5e_ipsec_modify_state_work *modify_work =
- &sa_entry->modify_work;
+ struct mlx5e_ipsec_work *work = sa_entry->work;
+ struct mlx5e_ipsec_sa_entry *sa_entry_shadow;
bool need_update;
need_update = mlx5e_ipsec_update_esn_state(sa_entry);
if (!need_update)
return;
- mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs);
- queue_work(sa_entry->ipsec->wq, &modify_work->work);
+ sa_entry_shadow = work->data;
+ memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow));
+ mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs);
+ queue_work(sa_entry->ipsec->wq, &work->work);
}
static void mlx5e_xfrm_update_curlft(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
- int err;
+ struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
+ u64 packets, bytes, lastuse;
- lockdep_assert_held(&x->lock);
+ lockdep_assert(lockdep_is_held(&x->lock) ||
+ lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex));
- if (sa_entry->attrs.soft_packet_limit == XFRM_INF)
- /* Limits are not configured, as soft limit
- * must be lowever than hard limit.
- */
+ if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
return;
- err = mlx5e_ipsec_aso_query(sa_entry, NULL);
- if (err)
- return;
-
- mlx5e_ipsec_aso_update_curlft(sa_entry, &x->curlft.packets);
+ mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
+ x->curlft.packets += packets;
+ x->curlft.bytes += bytes;
}
-static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x,
+static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
+ struct xfrm_policy *x,
struct netlink_ext_ack *extack)
{
+ struct xfrm_selector *sel = &x->selector;
+
if (x->type != XFRM_POLICY_TYPE_MAIN) {
NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types");
return -EINVAL;
@@ -519,8 +944,9 @@ static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x,
return -EINVAL;
}
- if (!x->xfrm_vec[0].reqid) {
- NL_SET_ERR_MSG_MOD(extack, "Cannot offload policy without reqid");
+ if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP &&
+ addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0");
return -EINVAL;
}
@@ -529,12 +955,24 @@ static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x,
return -EINVAL;
}
- if (x->selector.proto != IPPROTO_IP &&
- (x->selector.proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) {
+ if (sel->proto != IPPROTO_IP &&
+ (sel->proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) {
NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction");
return -EINVAL;
}
+ if (x->priority) {
+ if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) {
+ NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority");
+ return -EINVAL;
+ }
+
+ if (x->priority == U32_MAX) {
+ NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -560,6 +998,7 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
attrs->upspec.sport = ntohs(sel->sport);
attrs->upspec.sport_mask = ntohs(sel->sport_mask);
attrs->upspec.proto = sel->proto;
+ attrs->prio = x->priority;
}
static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
@@ -576,7 +1015,7 @@ static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
return -EOPNOTSUPP;
}
- err = mlx5e_xfrm_validate_policy(x, extack);
+ err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 12f044330639..4e9887171508 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -60,10 +60,24 @@ struct upspec {
u8 proto;
};
+struct mlx5_ipsec_lft {
+ u64 hard_packet_limit;
+ u64 soft_packet_limit;
+ u64 numb_rounds_hard;
+ u64 numb_rounds_soft;
+};
+
+struct mlx5_replay_esn {
+ u32 replay_window;
+ u32 esn;
+ u32 esn_msb;
+ u8 overlap : 1;
+ u8 trigger : 1;
+};
+
struct mlx5_accel_esp_xfrm_attrs {
- u32 esn;
u32 spi;
- u32 flags;
+ u32 mode;
struct aes_gcm_keymat aes_gcm;
union {
@@ -78,15 +92,15 @@ struct mlx5_accel_esp_xfrm_attrs {
struct upspec upspec;
u8 dir : 2;
- u8 esn_overlap : 1;
- u8 esn_trigger : 1;
u8 type : 2;
+ u8 drop : 1;
u8 family;
- u32 replay_window;
+ struct mlx5_replay_esn replay_esn;
u32 authsize;
u32 reqid;
- u64 hard_packet_limit;
- u64 soft_packet_limit;
+ struct mlx5_ipsec_lft lft;
+ u8 smac[ETH_ALEN];
+ u8 dmac[ETH_ALEN];
};
enum mlx5_ipsec_cap {
@@ -94,6 +108,8 @@ enum mlx5_ipsec_cap {
MLX5_IPSEC_CAP_ESN = 1 << 1,
MLX5_IPSEC_CAP_PACKET_OFFLOAD = 1 << 2,
MLX5_IPSEC_CAP_ROCE = 1 << 3,
+ MLX5_IPSEC_CAP_PRIO = 1 << 4,
+ MLX5_IPSEC_CAP_TUNNEL = 1 << 5,
};
struct mlx5e_priv;
@@ -124,8 +140,17 @@ struct mlx5e_ipsec_tx;
struct mlx5e_ipsec_work {
struct work_struct work;
- struct mlx5e_ipsec *ipsec;
- u32 id;
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ void *data;
+};
+
+struct mlx5e_ipsec_netevent_data {
+ u8 addr[ETH_ALEN];
+};
+
+struct mlx5e_ipsec_dwork {
+ struct delayed_work dwork;
+ struct mlx5e_ipsec_sa_entry *sa_entry;
};
struct mlx5e_ipsec_aso {
@@ -148,12 +173,13 @@ struct mlx5e_ipsec {
struct mlx5e_ipsec_tx *tx;
struct mlx5e_ipsec_aso *aso;
struct notifier_block nb;
+ struct notifier_block netevent_nb;
struct mlx5_ipsec_fs *roce;
};
struct mlx5e_ipsec_esn_state {
u32 esn;
- u8 trigger: 1;
+ u32 esn_msb;
u8 overlap: 1;
};
@@ -161,11 +187,13 @@ struct mlx5e_ipsec_rule {
struct mlx5_flow_handle *rule;
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_pkt_reformat *pkt_reformat;
+ struct mlx5_fc *fc;
};
-struct mlx5e_ipsec_modify_state_work {
- struct work_struct work;
- struct mlx5_accel_esp_xfrm_attrs attrs;
+struct mlx5e_ipsec_limits {
+ u64 round;
+ u8 soft_limit_hit : 1;
+ u8 fix_limit : 1;
};
struct mlx5e_ipsec_sa_entry {
@@ -178,7 +206,9 @@ struct mlx5e_ipsec_sa_entry {
u32 ipsec_obj_id;
u32 enc_key_id;
struct mlx5e_ipsec_rule ipsec_rule;
- struct mlx5e_ipsec_modify_state_work modify_work;
+ struct mlx5e_ipsec_work *work;
+ struct mlx5e_ipsec_dwork *dwork;
+ struct mlx5e_ipsec_limits limits;
};
struct mlx5_accel_pol_xfrm_attrs {
@@ -198,6 +228,7 @@ struct mlx5_accel_pol_xfrm_attrs {
u8 type : 2;
u8 dir : 2;
u32 reqid;
+ u32 prio;
};
struct mlx5e_ipsec_pol_entry {
@@ -219,6 +250,8 @@ int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry);
void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry);
int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry);
void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry);
+void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry);
+bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry);
int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
@@ -233,9 +266,6 @@ void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec);
int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_wqe_aso_ctrl_seg *data);
-void mlx5e_ipsec_aso_update_curlft(struct mlx5e_ipsec_sa_entry *sa_entry,
- u64 *packets);
-
void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv,
void *ipsec_stats);
@@ -252,6 +282,13 @@ mlx5e_ipsec_pol2dev(struct mlx5e_ipsec_pol_entry *pol_entry)
{
return pol_entry->ipsec->mdev;
}
+
+static inline bool addr6_all_zero(__be32 *addr6)
+{
+ static const __be32 zaddr6[4] = {};
+
+ return !memcmp(addr6, zaddr6, sizeof(zaddr6));
+}
#else
static inline void mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 9871ba1b25ff..dbe87bf89c0d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -4,11 +4,15 @@
#include <linux/netdevice.h>
#include "en.h"
#include "en/fs.h"
+#include "eswitch.h"
#include "ipsec.h"
#include "fs_core.h"
#include "lib/ipsec_fs_roce.h"
+#include "lib/fs_chains.h"
#define NUM_IPSEC_FTE BIT(15)
+#define MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE 16
+#define IPSEC_TUNNEL_DEFAULT_TTL 0x40
struct mlx5e_ipsec_fc {
struct mlx5_fc *cnt;
@@ -34,13 +38,18 @@ struct mlx5e_ipsec_rx {
struct mlx5e_ipsec_miss sa;
struct mlx5e_ipsec_rule status;
struct mlx5e_ipsec_fc *fc;
+ struct mlx5_fs_chains *chains;
+ u8 allow_tunnel_mode : 1;
};
struct mlx5e_ipsec_tx {
struct mlx5e_ipsec_ft ft;
struct mlx5e_ipsec_miss pol;
+ struct mlx5e_ipsec_rule status;
struct mlx5_flow_namespace *ns;
struct mlx5e_ipsec_fc *fc;
+ struct mlx5_fs_chains *chains;
+ u8 allow_tunnel_mode : 1;
};
/* IPsec RX flow steering */
@@ -51,9 +60,70 @@ static enum mlx5_traffic_types family2tt(u32 family)
return MLX5_TT_IPV6_IPSEC_ESP;
}
+static struct mlx5e_ipsec_rx *ipsec_rx(struct mlx5e_ipsec *ipsec, u32 family)
+{
+ if (family == AF_INET)
+ return ipsec->rx_ipv4;
+
+ return ipsec->rx_ipv6;
+}
+
+static struct mlx5_fs_chains *
+ipsec_chains_create(struct mlx5_core_dev *mdev, struct mlx5_flow_table *miss_ft,
+ enum mlx5_flow_namespace_type ns, int base_prio,
+ int base_level, struct mlx5_flow_table **root_ft)
+{
+ struct mlx5_chains_attr attr = {};
+ struct mlx5_fs_chains *chains;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
+ MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+ attr.max_grp_num = 2;
+ attr.default_ft = miss_ft;
+ attr.ns = ns;
+ attr.fs_base_prio = base_prio;
+ attr.fs_base_level = base_level;
+ chains = mlx5_chains_create(mdev, &attr);
+ if (IS_ERR(chains))
+ return chains;
+
+ /* Create chain 0, prio 1, level 0 to connect chains to prev in fs_core */
+ ft = mlx5_chains_get_table(chains, 0, 1, 0);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto err_chains_get;
+ }
+
+ *root_ft = ft;
+ return chains;
+
+err_chains_get:
+ mlx5_chains_destroy(chains);
+ return ERR_PTR(err);
+}
+
+static void ipsec_chains_destroy(struct mlx5_fs_chains *chains)
+{
+ mlx5_chains_put_table(chains, 0, 1, 0);
+ mlx5_chains_destroy(chains);
+}
+
+static struct mlx5_flow_table *
+ipsec_chains_get_table(struct mlx5_fs_chains *chains, u32 prio)
+{
+ return mlx5_chains_get_table(chains, 0, prio + 1, 0);
+}
+
+static void ipsec_chains_put_table(struct mlx5_fs_chains *chains, u32 prio)
+{
+ mlx5_chains_put_table(chains, 0, prio + 1, 0);
+}
+
static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns,
int level, int prio,
- int max_num_groups)
+ int max_num_groups, u32 flags)
{
struct mlx5_flow_table_attr ft_attr = {};
@@ -62,6 +132,7 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns,
ft_attr.max_fte = NUM_IPSEC_FTE;
ft_attr.level = level;
ft_attr.prio = prio;
+ ft_attr.flags = flags;
return mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
}
@@ -170,14 +241,24 @@ out:
static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx *rx, u32 family)
{
- mlx5_del_flow_rules(rx->pol.rule);
- mlx5_destroy_flow_group(rx->pol.group);
- mlx5_destroy_flow_table(rx->ft.pol);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false);
+
+ /* disconnect */
+ mlx5_ttc_fwd_default_dest(ttc, family2tt(family));
+
+ if (rx->chains) {
+ ipsec_chains_destroy(rx->chains);
+ } else {
+ mlx5_del_flow_rules(rx->pol.rule);
+ mlx5_destroy_flow_group(rx->pol.group);
+ mlx5_destroy_flow_table(rx->ft.pol);
+ }
mlx5_del_flow_rules(rx->sa.rule);
mlx5_destroy_flow_group(rx->sa.group);
mlx5_destroy_flow_table(rx->ft.sa);
-
+ if (rx->allow_tunnel_mode)
+ mlx5_eswitch_unblock_encap(mdev);
mlx5_del_flow_rules(rx->status.rule);
mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
mlx5_destroy_flow_table(rx->ft.status);
@@ -193,6 +274,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
struct mlx5_flow_destination default_dest;
struct mlx5_flow_destination dest[2];
struct mlx5_flow_table *ft;
+ u32 flags = 0;
int err;
default_dest = mlx5_ttc_get_default_dest(ttc, family2tt(family));
@@ -203,7 +285,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
return err;
ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL,
- MLX5E_NIC_PRIO, 1);
+ MLX5E_NIC_PRIO, 1, 0);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_fs_ft_status;
@@ -226,8 +308,12 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
goto err_add;
/* Create FT */
- ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO,
- 2);
+ if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)
+ rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev);
+ if (rx->allow_tunnel_mode)
+ flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO, 2,
+ flags);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_fs_ft;
@@ -238,8 +324,22 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
if (err)
goto err_fs;
+ if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) {
+ rx->chains = ipsec_chains_create(mdev, rx->ft.sa,
+ MLX5_FLOW_NAMESPACE_KERNEL,
+ MLX5E_NIC_PRIO,
+ MLX5E_ACCEL_FS_POL_FT_LEVEL,
+ &rx->ft.pol);
+ if (IS_ERR(rx->chains)) {
+ err = PTR_ERR(rx->chains);
+ goto err_pol_ft;
+ }
+
+ goto connect;
+ }
+
ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_POL_FT_LEVEL, MLX5E_NIC_PRIO,
- 2);
+ 2, 0);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_pol_ft;
@@ -252,6 +352,12 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
if (err)
goto err_pol_miss;
+connect:
+ /* connect */
+ memset(dest, 0x00, sizeof(*dest));
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[0].ft = rx->ft.pol;
+ mlx5_ttc_fwd_dest(ttc, family2tt(family), &dest[0]);
return 0;
err_pol_miss:
@@ -262,6 +368,8 @@ err_pol_ft:
err_fs:
mlx5_destroy_flow_table(rx->ft.sa);
err_fs_ft:
+ if (rx->allow_tunnel_mode)
+ mlx5_eswitch_unblock_encap(mdev);
mlx5_del_flow_rules(rx->status.rule);
mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
err_add:
@@ -271,83 +379,191 @@ err_fs_ft_status:
return err;
}
-static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5_core_dev *mdev,
- struct mlx5e_ipsec *ipsec, u32 family)
+static int rx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
+ struct mlx5e_ipsec_rx *rx, u32 family)
{
- struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false);
- struct mlx5_flow_destination dest = {};
- struct mlx5e_ipsec_rx *rx;
- int err = 0;
-
- if (family == AF_INET)
- rx = ipsec->rx_ipv4;
- else
- rx = ipsec->rx_ipv6;
+ int err;
- mutex_lock(&rx->ft.mutex);
if (rx->ft.refcnt)
goto skip;
- /* create FT */
err = rx_create(mdev, ipsec, rx, family);
if (err)
- goto out;
-
- /* connect */
- dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = rx->ft.pol;
- mlx5_ttc_fwd_dest(ttc, family2tt(family), &dest);
+ return err;
skip:
rx->ft.refcnt++;
-out:
+ return 0;
+}
+
+static void rx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx,
+ u32 family)
+{
+ if (--rx->ft.refcnt)
+ return;
+
+ rx_destroy(ipsec->mdev, ipsec, rx, family);
+}
+
+static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5_core_dev *mdev,
+ struct mlx5e_ipsec *ipsec, u32 family)
+{
+ struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family);
+ int err;
+
+ mutex_lock(&rx->ft.mutex);
+ err = rx_get(mdev, ipsec, rx, family);
mutex_unlock(&rx->ft.mutex);
if (err)
return ERR_PTR(err);
+
return rx;
}
-static void rx_ft_put(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
- u32 family)
+static struct mlx5_flow_table *rx_ft_get_policy(struct mlx5_core_dev *mdev,
+ struct mlx5e_ipsec *ipsec,
+ u32 family, u32 prio)
{
- struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false);
- struct mlx5e_ipsec_rx *rx;
+ struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family);
+ struct mlx5_flow_table *ft;
+ int err;
- if (family == AF_INET)
- rx = ipsec->rx_ipv4;
- else
- rx = ipsec->rx_ipv6;
+ mutex_lock(&rx->ft.mutex);
+ err = rx_get(mdev, ipsec, rx, family);
+ if (err)
+ goto err_get;
+
+ ft = rx->chains ? ipsec_chains_get_table(rx->chains, prio) : rx->ft.pol;
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto err_get_ft;
+ }
+
+ mutex_unlock(&rx->ft.mutex);
+ return ft;
+
+err_get_ft:
+ rx_put(ipsec, rx, family);
+err_get:
+ mutex_unlock(&rx->ft.mutex);
+ return ERR_PTR(err);
+}
+
+static void rx_ft_put(struct mlx5e_ipsec *ipsec, u32 family)
+{
+ struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family);
mutex_lock(&rx->ft.mutex);
- rx->ft.refcnt--;
- if (rx->ft.refcnt)
- goto out;
+ rx_put(ipsec, rx, family);
+ mutex_unlock(&rx->ft.mutex);
+}
- /* disconnect */
- mlx5_ttc_fwd_default_dest(ttc, family2tt(family));
+static void rx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 family, u32 prio)
+{
+ struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family);
- /* remove FT */
- rx_destroy(mdev, ipsec, rx, family);
+ mutex_lock(&rx->ft.mutex);
+ if (rx->chains)
+ ipsec_chains_put_table(rx->chains, prio);
-out:
+ rx_put(ipsec, rx, family);
mutex_unlock(&rx->ft.mutex);
}
+static int ipsec_counter_rule_tx(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *fte;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* create fte */
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest.counter_id = mlx5_fc_id(tx->fc->cnt);
+ fte = mlx5_add_flow_rules(tx->ft.status, spec, &flow_act, &dest, 1);
+ if (IS_ERR(fte)) {
+ err = PTR_ERR(fte);
+ mlx5_core_err(mdev, "Fail to add ipsec tx counter rule err=%d\n", err);
+ goto err_rule;
+ }
+
+ kvfree(spec);
+ tx->status.rule = fte;
+ return 0;
+
+err_rule:
+ kvfree(spec);
+ return err;
+}
+
/* IPsec TX flow steering */
+static void tx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
+ struct mlx5_ipsec_fs *roce)
+{
+ mlx5_ipsec_fs_roce_tx_destroy(roce);
+ if (tx->chains) {
+ ipsec_chains_destroy(tx->chains);
+ } else {
+ mlx5_del_flow_rules(tx->pol.rule);
+ mlx5_destroy_flow_group(tx->pol.group);
+ mlx5_destroy_flow_table(tx->ft.pol);
+ }
+
+ mlx5_destroy_flow_table(tx->ft.sa);
+ if (tx->allow_tunnel_mode)
+ mlx5_eswitch_unblock_encap(mdev);
+ mlx5_del_flow_rules(tx->status.rule);
+ mlx5_destroy_flow_table(tx->ft.status);
+}
+
static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
struct mlx5_ipsec_fs *roce)
{
struct mlx5_flow_destination dest = {};
struct mlx5_flow_table *ft;
+ u32 flags = 0;
int err;
- ft = ipsec_ft_create(tx->ns, 1, 0, 4);
+ ft = ipsec_ft_create(tx->ns, 2, 0, 1, 0);
if (IS_ERR(ft))
return PTR_ERR(ft);
+ tx->ft.status = ft;
+
+ err = ipsec_counter_rule_tx(mdev, tx);
+ if (err)
+ goto err_status_rule;
+ if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)
+ tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev);
+ if (tx->allow_tunnel_mode)
+ flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft = ipsec_ft_create(tx->ns, 1, 0, 4, flags);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto err_sa_ft;
+ }
tx->ft.sa = ft;
- ft = ipsec_ft_create(tx->ns, 0, 0, 2);
+ if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) {
+ tx->chains = ipsec_chains_create(
+ mdev, tx->ft.sa, MLX5_FLOW_NAMESPACE_EGRESS_IPSEC, 0, 0,
+ &tx->ft.pol);
+ if (IS_ERR(tx->chains)) {
+ err = PTR_ERR(tx->chains);
+ goto err_pol_ft;
+ }
+
+ goto connect_roce;
+ }
+
+ ft = ipsec_ft_create(tx->ns, 0, 0, 2, 0);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_pol_ft;
@@ -356,44 +572,102 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = tx->ft.sa;
err = ipsec_miss_create(mdev, tx->ft.pol, &tx->pol, &dest);
- if (err)
- goto err_pol_miss;
+ if (err) {
+ mlx5_destroy_flow_table(tx->ft.pol);
+ goto err_pol_ft;
+ }
+connect_roce:
err = mlx5_ipsec_fs_roce_tx_create(mdev, roce, tx->ft.pol);
if (err)
goto err_roce;
return 0;
err_roce:
- mlx5_del_flow_rules(tx->pol.rule);
- mlx5_destroy_flow_group(tx->pol.group);
-err_pol_miss:
- mlx5_destroy_flow_table(tx->ft.pol);
+ if (tx->chains) {
+ ipsec_chains_destroy(tx->chains);
+ } else {
+ mlx5_del_flow_rules(tx->pol.rule);
+ mlx5_destroy_flow_group(tx->pol.group);
+ mlx5_destroy_flow_table(tx->ft.pol);
+ }
err_pol_ft:
mlx5_destroy_flow_table(tx->ft.sa);
+err_sa_ft:
+ if (tx->allow_tunnel_mode)
+ mlx5_eswitch_unblock_encap(mdev);
+ mlx5_del_flow_rules(tx->status.rule);
+err_status_rule:
+ mlx5_destroy_flow_table(tx->ft.status);
return err;
}
-static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5_core_dev *mdev,
- struct mlx5e_ipsec *ipsec)
+static int tx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
+ struct mlx5e_ipsec_tx *tx)
{
- struct mlx5e_ipsec_tx *tx = ipsec->tx;
- int err = 0;
+ int err;
- mutex_lock(&tx->ft.mutex);
if (tx->ft.refcnt)
goto skip;
err = tx_create(mdev, tx, ipsec->roce);
if (err)
- goto out;
+ return err;
skip:
tx->ft.refcnt++;
-out:
+ return 0;
+}
+
+static void tx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx)
+{
+ if (--tx->ft.refcnt)
+ return;
+
+ tx_destroy(ipsec->mdev, tx, ipsec->roce);
+}
+
+static struct mlx5_flow_table *tx_ft_get_policy(struct mlx5_core_dev *mdev,
+ struct mlx5e_ipsec *ipsec,
+ u32 prio)
+{
+ struct mlx5e_ipsec_tx *tx = ipsec->tx;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ mutex_lock(&tx->ft.mutex);
+ err = tx_get(mdev, ipsec, tx);
+ if (err)
+ goto err_get;
+
+ ft = tx->chains ? ipsec_chains_get_table(tx->chains, prio) : tx->ft.pol;
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto err_get_ft;
+ }
+
+ mutex_unlock(&tx->ft.mutex);
+ return ft;
+
+err_get_ft:
+ tx_put(ipsec, tx);
+err_get:
+ mutex_unlock(&tx->ft.mutex);
+ return ERR_PTR(err);
+}
+
+static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5_core_dev *mdev,
+ struct mlx5e_ipsec *ipsec)
+{
+ struct mlx5e_ipsec_tx *tx = ipsec->tx;
+ int err;
+
+ mutex_lock(&tx->ft.mutex);
+ err = tx_get(mdev, ipsec, tx);
mutex_unlock(&tx->ft.mutex);
if (err)
return ERR_PTR(err);
+
return tx;
}
@@ -402,53 +676,72 @@ static void tx_ft_put(struct mlx5e_ipsec *ipsec)
struct mlx5e_ipsec_tx *tx = ipsec->tx;
mutex_lock(&tx->ft.mutex);
- tx->ft.refcnt--;
- if (tx->ft.refcnt)
- goto out;
+ tx_put(ipsec, tx);
+ mutex_unlock(&tx->ft.mutex);
+}
- mlx5_ipsec_fs_roce_tx_destroy(ipsec->roce);
- mlx5_del_flow_rules(tx->pol.rule);
- mlx5_destroy_flow_group(tx->pol.group);
- mlx5_destroy_flow_table(tx->ft.pol);
- mlx5_destroy_flow_table(tx->ft.sa);
-out:
+static void tx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 prio)
+{
+ struct mlx5e_ipsec_tx *tx = ipsec->tx;
+
+ mutex_lock(&tx->ft.mutex);
+ if (tx->chains)
+ ipsec_chains_put_table(tx->chains, prio);
+
+ tx_put(ipsec, tx);
mutex_unlock(&tx->ft.mutex);
}
static void setup_fte_addr4(struct mlx5_flow_spec *spec, __be32 *saddr,
__be32 *daddr)
{
+ if (!*saddr && !*daddr)
+ return;
+
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 4);
- memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
- outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), saddr, 4);
- memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
- outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), daddr, 4);
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
- outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
- outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ if (*saddr) {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), saddr, 4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ }
+
+ if (*daddr) {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), daddr, 4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ }
}
static void setup_fte_addr6(struct mlx5_flow_spec *spec, __be32 *saddr,
__be32 *daddr)
{
+ if (addr6_all_zero(saddr) && addr6_all_zero(daddr))
+ return;
+
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 6);
- memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
- outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), saddr, 16);
- memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
- outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), daddr, 16);
- memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), 0xff, 16);
- memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16);
+ if (!addr6_all_zero(saddr)) {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), saddr, 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), 0xff, 16);
+ }
+
+ if (!addr6_all_zero(daddr)) {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), daddr, 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16);
+ }
}
static void setup_fte_esp(struct mlx5_flow_spec *spec)
@@ -560,40 +853,181 @@ static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir,
return 0;
}
+static int
+setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm_attrs *attrs,
+ struct mlx5_pkt_reformat_params *reformat_params)
+{
+ struct ip_esp_hdr *esp_hdr;
+ struct ipv6hdr *ipv6hdr;
+ struct ethhdr *eth_hdr;
+ struct iphdr *iphdr;
+ char *reformatbf;
+ size_t bfflen;
+ void *hdr;
+
+ bfflen = sizeof(*eth_hdr);
+
+ if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) {
+ bfflen += sizeof(*esp_hdr) + 8;
+
+ switch (attrs->family) {
+ case AF_INET:
+ bfflen += sizeof(*iphdr);
+ break;
+ case AF_INET6:
+ bfflen += sizeof(*ipv6hdr);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ reformatbf = kzalloc(bfflen, GFP_KERNEL);
+ if (!reformatbf)
+ return -ENOMEM;
+
+ eth_hdr = (struct ethhdr *)reformatbf;
+ switch (attrs->family) {
+ case AF_INET:
+ eth_hdr->h_proto = htons(ETH_P_IP);
+ break;
+ case AF_INET6:
+ eth_hdr->h_proto = htons(ETH_P_IPV6);
+ break;
+ default:
+ goto free_reformatbf;
+ }
+
+ ether_addr_copy(eth_hdr->h_dest, attrs->dmac);
+ ether_addr_copy(eth_hdr->h_source, attrs->smac);
+
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ reformat_params->type = MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ reformat_params->type = MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL;
+ reformat_params->param_0 = attrs->authsize;
+
+ hdr = reformatbf + sizeof(*eth_hdr);
+ switch (attrs->family) {
+ case AF_INET:
+ iphdr = (struct iphdr *)hdr;
+ memcpy(&iphdr->saddr, &attrs->saddr.a4, 4);
+ memcpy(&iphdr->daddr, &attrs->daddr.a4, 4);
+ iphdr->version = 4;
+ iphdr->ihl = 5;
+ iphdr->ttl = IPSEC_TUNNEL_DEFAULT_TTL;
+ iphdr->protocol = IPPROTO_ESP;
+ hdr += sizeof(*iphdr);
+ break;
+ case AF_INET6:
+ ipv6hdr = (struct ipv6hdr *)hdr;
+ memcpy(&ipv6hdr->saddr, &attrs->saddr.a6, 16);
+ memcpy(&ipv6hdr->daddr, &attrs->daddr.a6, 16);
+ ipv6hdr->nexthdr = IPPROTO_ESP;
+ ipv6hdr->version = 6;
+ ipv6hdr->hop_limit = IPSEC_TUNNEL_DEFAULT_TTL;
+ hdr += sizeof(*ipv6hdr);
+ break;
+ default:
+ goto free_reformatbf;
+ }
+
+ esp_hdr = (struct ip_esp_hdr *)hdr;
+ esp_hdr->spi = htonl(attrs->spi);
+ break;
+ default:
+ goto free_reformatbf;
+ }
+
+ reformat_params->size = bfflen;
+ reformat_params->data = reformatbf;
+ return 0;
+
+free_reformatbf:
+ kfree(reformatbf);
+ return -EINVAL;
+}
+
+static int
+setup_pkt_transport_reformat(struct mlx5_accel_esp_xfrm_attrs *attrs,
+ struct mlx5_pkt_reformat_params *reformat_params)
+{
+ u8 *reformatbf;
+ __be32 spi;
+
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ reformat_params->type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ if (attrs->family == AF_INET)
+ reformat_params->type =
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4;
+ else
+ reformat_params->type =
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6;
+
+ reformatbf = kzalloc(MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE,
+ GFP_KERNEL);
+ if (!reformatbf)
+ return -ENOMEM;
+
+ /* convert to network format */
+ spi = htonl(attrs->spi);
+ memcpy(reformatbf, &spi, sizeof(spi));
+
+ reformat_params->param_0 = attrs->authsize;
+ reformat_params->size =
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE;
+ reformat_params->data = reformatbf;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int setup_pkt_reformat(struct mlx5_core_dev *mdev,
struct mlx5_accel_esp_xfrm_attrs *attrs,
struct mlx5_flow_act *flow_act)
{
- enum mlx5_flow_namespace_type ns_type = MLX5_FLOW_NAMESPACE_EGRESS;
struct mlx5_pkt_reformat_params reformat_params = {};
struct mlx5_pkt_reformat *pkt_reformat;
- u8 reformatbf[16] = {};
- __be32 spi;
+ enum mlx5_flow_namespace_type ns_type;
+ int ret;
- if (attrs->dir == XFRM_DEV_OFFLOAD_IN) {
- reformat_params.type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT;
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
- goto cmd;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ ns_type = MLX5_FLOW_NAMESPACE_EGRESS;
+ break;
+ default:
+ return -EINVAL;
}
- if (attrs->family == AF_INET)
- reformat_params.type =
- MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4;
- else
- reformat_params.type =
- MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6;
-
- /* convert to network format */
- spi = htonl(attrs->spi);
- memcpy(reformatbf, &spi, 4);
+ switch (attrs->mode) {
+ case XFRM_MODE_TRANSPORT:
+ ret = setup_pkt_transport_reformat(attrs, &reformat_params);
+ break;
+ case XFRM_MODE_TUNNEL:
+ ret = setup_pkt_tunnel_reformat(mdev, attrs, &reformat_params);
+ break;
+ default:
+ ret = -EINVAL;
+ }
- reformat_params.param_0 = attrs->authsize;
- reformat_params.size = sizeof(reformatbf);
- reformat_params.data = &reformatbf;
+ if (ret)
+ return ret;
-cmd:
pkt_reformat =
mlx5_packet_reformat_alloc(mdev, &reformat_params, ns_type);
+ kfree(reformat_params.data);
if (IS_ERR(pkt_reformat))
return PTR_ERR(pkt_reformat);
@@ -607,11 +1041,12 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
- struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_destination dest[2];
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
struct mlx5e_ipsec_rx *rx;
+ struct mlx5_fc *counter;
int err;
rx = rx_ft_get(mdev, ipsec, attrs->family);
@@ -648,14 +1083,25 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
break;
}
+ counter = mlx5_fc_create(mdev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_add_cnt;
+ }
flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC;
flow_act.crypto.obj_id = sa_entry->ipsec_obj_id;
flow_act.flags |= FLOW_ACT_NO_APPEND;
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = rx->ft.status;
- rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, &dest, 1);
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ if (attrs->drop)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ else
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[0].ft = rx->ft.status;
+ dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[1].counter_id = mlx5_fc_id(counter);
+ rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, dest, 2);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
mlx5_core_err(mdev, "fail to add RX ipsec rule err=%d\n", err);
@@ -665,10 +1111,13 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
sa_entry->ipsec_rule.rule = rule;
sa_entry->ipsec_rule.modify_hdr = flow_act.modify_hdr;
+ sa_entry->ipsec_rule.fc = counter;
sa_entry->ipsec_rule.pkt_reformat = flow_act.pkt_reformat;
return 0;
err_add_flow:
+ mlx5_fc_destroy(mdev, counter);
+err_add_cnt:
if (flow_act.pkt_reformat)
mlx5_packet_reformat_dealloc(mdev, flow_act.pkt_reformat);
err_pkt_reformat:
@@ -676,7 +1125,7 @@ err_pkt_reformat:
err_mod_header:
kvfree(spec);
err_alloc:
- rx_ft_put(mdev, ipsec, attrs->family);
+ rx_ft_put(ipsec, attrs->family);
return err;
}
@@ -685,12 +1134,13 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
- struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_destination dest[2];
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
struct mlx5e_ipsec_tx *tx;
- int err = 0;
+ struct mlx5_fc *counter;
+ int err;
tx = tx_ft_get(mdev, ipsec);
if (IS_ERR(tx))
@@ -717,7 +1167,8 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
setup_fte_reg_a(spec);
break;
case XFRM_DEV_OFFLOAD_PACKET:
- setup_fte_reg_c0(spec, attrs->reqid);
+ if (attrs->reqid)
+ setup_fte_reg_c0(spec, attrs->reqid);
err = setup_pkt_reformat(mdev, attrs, &flow_act);
if (err)
goto err_pkt_reformat;
@@ -726,15 +1177,27 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
break;
}
+ counter = mlx5_fc_create(mdev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_add_cnt;
+ }
+
flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC;
flow_act.crypto.obj_id = sa_entry->ipsec_obj_id;
flow_act.flags |= FLOW_ACT_NO_APPEND;
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW |
- MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT |
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest.counter_id = mlx5_fc_id(tx->fc->cnt);
- rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, &dest, 1);
+ if (attrs->drop)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ else
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ dest[0].ft = tx->ft.status;
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[1].counter_id = mlx5_fc_id(counter);
+ rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, dest, 2);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
mlx5_core_err(mdev, "fail to add TX ipsec rule err=%d\n", err);
@@ -743,10 +1206,13 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
kvfree(spec);
sa_entry->ipsec_rule.rule = rule;
+ sa_entry->ipsec_rule.fc = counter;
sa_entry->ipsec_rule.pkt_reformat = flow_act.pkt_reformat;
return 0;
err_add_flow:
+ mlx5_fc_destroy(mdev, counter);
+err_add_cnt:
if (flow_act.pkt_reformat)
mlx5_packet_reformat_dealloc(mdev, flow_act.pkt_reformat);
err_pkt_reformat:
@@ -760,16 +1226,17 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
{
struct mlx5_accel_pol_xfrm_attrs *attrs = &pol_entry->attrs;
struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry);
+ struct mlx5e_ipsec_tx *tx = pol_entry->ipsec->tx;
struct mlx5_flow_destination dest[2] = {};
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
- struct mlx5e_ipsec_tx *tx;
+ struct mlx5_flow_table *ft;
int err, dstn = 0;
- tx = tx_ft_get(mdev, pol_entry->ipsec);
- if (IS_ERR(tx))
- return PTR_ERR(tx);
+ ft = tx_ft_get_policy(mdev, pol_entry->ipsec, attrs->prio);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec) {
@@ -785,14 +1252,16 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
setup_fte_no_frags(spec);
setup_fte_upper_proto_match(spec, &attrs->upspec);
- err = setup_modify_header(mdev, attrs->reqid, XFRM_DEV_OFFLOAD_OUT,
- &flow_act);
- if (err)
- goto err_mod_header;
-
switch (attrs->action) {
case XFRM_POLICY_ALLOW:
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ if (!attrs->reqid)
+ break;
+
+ err = setup_modify_header(mdev, attrs->reqid,
+ XFRM_DEV_OFFLOAD_OUT, &flow_act);
+ if (err)
+ goto err_mod_header;
break;
case XFRM_POLICY_BLOCK:
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
@@ -804,14 +1273,14 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
default:
WARN_ON(true);
err = -EINVAL;
- goto err_action;
+ goto err_mod_header;
}
flow_act.flags |= FLOW_ACT_NO_APPEND;
dest[dstn].ft = tx->ft.sa;
dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dstn++;
- rule = mlx5_add_flow_rules(tx->ft.pol, spec, &flow_act, dest, dstn);
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
mlx5_core_err(mdev, "fail to add TX ipsec rule err=%d\n", err);
@@ -824,11 +1293,12 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
return 0;
err_action:
- mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr);
+ if (flow_act.modify_hdr)
+ mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr);
err_mod_header:
kvfree(spec);
err_alloc:
- tx_ft_put(pol_entry->ipsec);
+ tx_ft_put_policy(pol_entry->ipsec, attrs->prio);
return err;
}
@@ -840,12 +1310,15 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
+ struct mlx5_flow_table *ft;
struct mlx5e_ipsec_rx *rx;
int err, dstn = 0;
- rx = rx_ft_get(mdev, pol_entry->ipsec, attrs->family);
- if (IS_ERR(rx))
- return PTR_ERR(rx);
+ ft = rx_ft_get_policy(mdev, pol_entry->ipsec, attrs->family, attrs->prio);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
+
+ rx = ipsec_rx(pol_entry->ipsec, attrs->family);
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec) {
@@ -880,7 +1353,7 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest[dstn].ft = rx->ft.sa;
dstn++;
- rule = mlx5_add_flow_rules(rx->ft.pol, spec, &flow_act, dest, dstn);
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
mlx5_core_err(mdev, "Fail to add RX IPsec policy rule err=%d\n", err);
@@ -894,7 +1367,7 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
err_action:
kvfree(spec);
err_alloc:
- rx_ft_put(mdev, pol_entry->ipsec, attrs->family);
+ rx_ft_put_policy(pol_entry->ipsec, attrs->family, attrs->prio);
return err;
}
@@ -1022,7 +1495,7 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
mlx5_del_flow_rules(ipsec_rule->rule);
-
+ mlx5_fc_destroy(mdev, ipsec_rule->fc);
if (ipsec_rule->pkt_reformat)
mlx5_packet_reformat_dealloc(mdev, ipsec_rule->pkt_reformat);
@@ -1032,7 +1505,7 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
}
mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr);
- rx_ft_put(mdev, sa_entry->ipsec, sa_entry->attrs.family);
+ rx_ft_put(sa_entry->ipsec, sa_entry->attrs.family);
}
int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry)
@@ -1051,12 +1524,15 @@ void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry)
mlx5_del_flow_rules(ipsec_rule->rule);
if (pol_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
- rx_ft_put(mdev, pol_entry->ipsec, pol_entry->attrs.family);
+ rx_ft_put_policy(pol_entry->ipsec, pol_entry->attrs.family,
+ pol_entry->attrs.prio);
return;
}
- mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr);
- tx_ft_put(pol_entry->ipsec);
+ if (ipsec_rule->modify_hdr)
+ mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr);
+
+ tx_ft_put_policy(pol_entry->ipsec, pol_entry->attrs.prio);
}
void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec)
@@ -1126,3 +1602,31 @@ err_rx_ipv4:
kfree(ipsec->tx);
return err;
}
+
+void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec_sa_entry sa_entry_shadow = {};
+ int err;
+
+ memcpy(&sa_entry_shadow, sa_entry, sizeof(*sa_entry));
+ memset(&sa_entry_shadow.ipsec_rule, 0x00, sizeof(sa_entry->ipsec_rule));
+
+ err = mlx5e_accel_ipsec_fs_add_rule(&sa_entry_shadow);
+ if (err)
+ return;
+
+ mlx5e_accel_ipsec_fs_del_rule(sa_entry);
+ memcpy(sa_entry, &sa_entry_shadow, sizeof(*sa_entry));
+}
+
+bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec_rx *rx =
+ ipsec_rx(sa_entry->ipsec, sa_entry->attrs.family);
+ struct mlx5e_ipsec_tx *tx = sa_entry->ipsec->tx;
+
+ if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT)
+ return tx->allow_tunnel_mode;
+
+ return rx->allow_tunnel_mode;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
index 5fa7a4c40429..df90e19066bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -8,6 +8,7 @@
enum {
MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET,
+ MLX5_IPSEC_ASO_REMOVE_FLOW_SOFT_LFT_OFFSET,
};
u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
@@ -36,11 +37,24 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp))
caps |= MLX5_IPSEC_CAP_CRYPTO;
- if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload) &&
- MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_esp_trasport) &&
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_del_esp_trasport) &&
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap))
- caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD;
+ if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload)) {
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev,
+ reformat_add_esp_trasport) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ reformat_del_esp_trasport) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap))
+ caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD;
+
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level))
+ caps |= MLX5_IPSEC_CAP_PRIO;
+
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev,
+ reformat_l2_to_l3_esp_tunnel) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ reformat_l3_esp_tunnel_to_l2))
+ caps |= MLX5_IPSEC_CAP_TUNNEL;
+ }
if (mlx5_get_roce_state(mdev) &&
MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & MLX5_FT_NIC_RX_2_NIC_RX_RDMA &&
@@ -68,15 +82,17 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn,
void *aso_ctx;
aso_ctx = MLX5_ADDR_OF(ipsec_obj, obj, ipsec_aso);
- if (attrs->esn_trigger) {
+ if (attrs->replay_esn.trigger) {
MLX5_SET(ipsec_aso, aso_ctx, esn_event_arm, 1);
if (attrs->dir == XFRM_DEV_OFFLOAD_IN) {
MLX5_SET(ipsec_aso, aso_ctx, window_sz,
- attrs->replay_window / 64);
+ attrs->replay_esn.replay_window / 64);
MLX5_SET(ipsec_aso, aso_ctx, mode,
MLX5_IPSEC_ASO_REPLAY_PROTECTION);
- }
+ }
+ MLX5_SET(ipsec_aso, aso_ctx, mode_parameter,
+ attrs->replay_esn.esn);
}
/* ASO context */
@@ -93,15 +109,15 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn,
if (attrs->dir == XFRM_DEV_OFFLOAD_OUT)
MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN);
- if (attrs->hard_packet_limit != XFRM_INF) {
+ if (attrs->lft.hard_packet_limit != XFRM_INF) {
MLX5_SET(ipsec_aso, aso_ctx, remove_flow_pkt_cnt,
- lower_32_bits(attrs->hard_packet_limit));
+ attrs->lft.hard_packet_limit);
MLX5_SET(ipsec_aso, aso_ctx, hard_lft_arm, 1);
}
- if (attrs->soft_packet_limit != XFRM_INF) {
+ if (attrs->lft.soft_packet_limit != XFRM_INF) {
MLX5_SET(ipsec_aso, aso_ctx, remove_flow_soft_lft,
- lower_32_bits(attrs->soft_packet_limit));
+ attrs->lft.soft_packet_limit);
MLX5_SET(ipsec_aso, aso_ctx, soft_lft_arm, 1);
}
@@ -128,10 +144,10 @@ static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
salt_iv_p = MLX5_ADDR_OF(ipsec_obj, obj, implicit_iv);
memcpy(salt_iv_p, &aes_gcm->seq_iv, sizeof(aes_gcm->seq_iv));
/* esn */
- if (attrs->esn_trigger) {
+ if (attrs->replay_esn.trigger) {
MLX5_SET(ipsec_obj, obj, esn_en, 1);
- MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn);
- MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->esn_overlap);
+ MLX5_SET(ipsec_obj, obj, esn_msb, attrs->replay_esn.esn_msb);
+ MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->replay_esn.overlap);
}
MLX5_SET(ipsec_obj, obj, dekn, sa_entry->enc_key_id);
@@ -217,9 +233,6 @@ static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry,
void *obj;
int err;
- if (!attrs->esn_trigger)
- return 0;
-
general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC))
return -EINVAL;
@@ -247,8 +260,8 @@ static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry,
MLX5_SET64(ipsec_obj, obj, modify_field_select,
MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP |
MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB);
- MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn);
- MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->esn_overlap);
+ MLX5_SET(ipsec_obj, obj, esn_msb, attrs->replay_esn.esn_msb);
+ MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->replay_esn.overlap);
/* general object fields set */
MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
@@ -268,29 +281,24 @@ void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry,
memcpy(&sa_entry->attrs, attrs, sizeof(sa_entry->attrs));
}
-static void
-mlx5e_ipsec_aso_update_esn(struct mlx5e_ipsec_sa_entry *sa_entry,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
+static void mlx5e_ipsec_aso_update(struct mlx5e_ipsec_sa_entry *sa_entry,
+ struct mlx5_wqe_aso_ctrl_seg *data)
{
- struct mlx5_wqe_aso_ctrl_seg data = {};
+ data->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT << 6;
+ data->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE |
+ MLX5_ASO_ALWAYS_TRUE << 4;
- data.data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT << 6;
- data.condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE | MLX5_ASO_ALWAYS_TRUE
- << 4;
- data.data_offset_condition_operand = MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET;
- data.bitwise_data = cpu_to_be64(BIT_ULL(54));
- data.data_mask = data.bitwise_data;
-
- mlx5e_ipsec_aso_query(sa_entry, &data);
+ mlx5e_ipsec_aso_query(sa_entry, data);
}
static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry,
u32 mode_param)
{
struct mlx5_accel_esp_xfrm_attrs attrs = {};
+ struct mlx5_wqe_aso_ctrl_seg data = {};
if (mode_param < MLX5E_IPSEC_ESN_SCOPE_MID) {
- sa_entry->esn_state.esn++;
+ sa_entry->esn_state.esn_msb++;
sa_entry->esn_state.overlap = 0;
} else {
sa_entry->esn_state.overlap = 1;
@@ -298,25 +306,129 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry,
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
mlx5_accel_esp_modify_xfrm(sa_entry, &attrs);
- mlx5e_ipsec_aso_update_esn(sa_entry, &attrs);
+
+ data.data_offset_condition_operand =
+ MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET;
+ data.bitwise_data = cpu_to_be64(BIT_ULL(54));
+ data.data_mask = data.bitwise_data;
+
+ mlx5e_ipsec_aso_update(sa_entry, &data);
+}
+
+static void mlx5e_ipsec_aso_update_hard(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_wqe_aso_ctrl_seg data = {};
+
+ data.data_offset_condition_operand =
+ MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET;
+ data.bitwise_data = cpu_to_be64(BIT_ULL(57) + BIT_ULL(31));
+ data.data_mask = data.bitwise_data;
+ mlx5e_ipsec_aso_update(sa_entry, &data);
+}
+
+static void mlx5e_ipsec_aso_update_soft(struct mlx5e_ipsec_sa_entry *sa_entry,
+ u32 val)
+{
+ struct mlx5_wqe_aso_ctrl_seg data = {};
+
+ data.data_offset_condition_operand =
+ MLX5_IPSEC_ASO_REMOVE_FLOW_SOFT_LFT_OFFSET;
+ data.bitwise_data = cpu_to_be64(val);
+ data.data_mask = cpu_to_be64(U32_MAX);
+ mlx5e_ipsec_aso_update(sa_entry, &data);
+}
+
+static void mlx5e_ipsec_handle_limits(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+ struct mlx5e_ipsec_aso *aso = ipsec->aso;
+ bool soft_arm, hard_arm;
+ u64 hard_cnt;
+
+ lockdep_assert_held(&sa_entry->x->lock);
+
+ soft_arm = !MLX5_GET(ipsec_aso, aso->ctx, soft_lft_arm);
+ hard_arm = !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm);
+ if (!soft_arm && !hard_arm)
+ /* It is not lifetime event */
+ return;
+
+ hard_cnt = MLX5_GET(ipsec_aso, aso->ctx, remove_flow_pkt_cnt);
+ if (!hard_cnt || hard_arm) {
+ /* It is possible to see packet counter equal to zero without
+ * hard limit event armed. Such situation can be if packet
+ * decreased, while we handled soft limit event.
+ *
+ * However it will be HW/FW bug if hard limit event is raised
+ * and packet counter is not zero.
+ */
+ WARN_ON_ONCE(hard_arm && hard_cnt);
+
+ /* Notify about hard limit */
+ xfrm_state_check_expire(sa_entry->x);
+ return;
+ }
+
+ /* We are in soft limit event. */
+ if (!sa_entry->limits.soft_limit_hit &&
+ sa_entry->limits.round == attrs->lft.numb_rounds_soft) {
+ sa_entry->limits.soft_limit_hit = true;
+ /* Notify about soft limit */
+ xfrm_state_check_expire(sa_entry->x);
+
+ if (sa_entry->limits.round == attrs->lft.numb_rounds_hard)
+ goto hard;
+
+ if (attrs->lft.soft_packet_limit > BIT_ULL(31)) {
+ /* We cannot avoid a soft_value that might have the high
+ * bit set. For instance soft_value=2^31+1 cannot be
+ * adjusted to the low bit clear version of soft_value=1
+ * because it is too close to 0.
+ *
+ * Thus we have this corner case where we can hit the
+ * soft_limit with the high bit set, but cannot adjust
+ * the counter. Thus we set a temporary interrupt_value
+ * at least 2^30 away from here and do the adjustment
+ * then.
+ */
+ mlx5e_ipsec_aso_update_soft(sa_entry,
+ BIT_ULL(31) - BIT_ULL(30));
+ sa_entry->limits.fix_limit = true;
+ return;
+ }
+
+ sa_entry->limits.fix_limit = true;
+ }
+
+hard:
+ if (sa_entry->limits.round == attrs->lft.numb_rounds_hard) {
+ mlx5e_ipsec_aso_update_soft(sa_entry, 0);
+ attrs->lft.soft_packet_limit = XFRM_INF;
+ return;
+ }
+
+ mlx5e_ipsec_aso_update_hard(sa_entry);
+ sa_entry->limits.round++;
+ if (sa_entry->limits.round == attrs->lft.numb_rounds_soft)
+ mlx5e_ipsec_aso_update_soft(sa_entry,
+ attrs->lft.soft_packet_limit);
+ if (sa_entry->limits.fix_limit) {
+ sa_entry->limits.fix_limit = false;
+ mlx5e_ipsec_aso_update_soft(sa_entry, BIT_ULL(31) - 1);
+ }
}
static void mlx5e_ipsec_handle_event(struct work_struct *_work)
{
struct mlx5e_ipsec_work *work =
container_of(_work, struct mlx5e_ipsec_work, work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = work->data;
struct mlx5_accel_esp_xfrm_attrs *attrs;
- struct mlx5e_ipsec_sa_entry *sa_entry;
struct mlx5e_ipsec_aso *aso;
- struct mlx5e_ipsec *ipsec;
int ret;
- sa_entry = xa_load(&work->ipsec->sadb, work->id);
- if (!sa_entry)
- goto out;
-
- ipsec = sa_entry->ipsec;
- aso = ipsec->aso;
+ aso = sa_entry->ipsec->aso;
attrs = &sa_entry->attrs;
spin_lock(&sa_entry->x->lock);
@@ -324,21 +436,18 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work)
if (ret)
goto unlock;
- if (attrs->esn_trigger &&
+ if (attrs->replay_esn.trigger &&
!MLX5_GET(ipsec_aso, aso->ctx, esn_event_arm)) {
u32 mode_param = MLX5_GET(ipsec_aso, aso->ctx, mode_parameter);
mlx5e_ipsec_update_esn_state(sa_entry, mode_param);
}
- if (attrs->soft_packet_limit != XFRM_INF)
- if (!MLX5_GET(ipsec_aso, aso->ctx, soft_lft_arm) ||
- !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm))
- xfrm_state_check_expire(sa_entry->x);
+ if (attrs->lft.soft_packet_limit != XFRM_INF)
+ mlx5e_ipsec_handle_limits(sa_entry);
unlock:
spin_unlock(&sa_entry->x->lock);
-out:
kfree(work);
}
@@ -346,6 +455,7 @@ static int mlx5e_ipsec_event(struct notifier_block *nb, unsigned long event,
void *data)
{
struct mlx5e_ipsec *ipsec = container_of(nb, struct mlx5e_ipsec, nb);
+ struct mlx5e_ipsec_sa_entry *sa_entry;
struct mlx5_eqe_obj_change *object;
struct mlx5e_ipsec_work *work;
struct mlx5_eqe *eqe = data;
@@ -360,13 +470,16 @@ static int mlx5e_ipsec_event(struct notifier_block *nb, unsigned long event,
if (type != MLX5_GENERAL_OBJECT_TYPES_IPSEC)
return NOTIFY_DONE;
+ sa_entry = xa_load(&ipsec->sadb, be32_to_cpu(object->obj_id));
+ if (!sa_entry)
+ return NOTIFY_DONE;
+
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return NOTIFY_DONE;
INIT_WORK(&work->work, mlx5e_ipsec_handle_event);
- work->ipsec = ipsec;
- work->id = be32_to_cpu(object->obj_id);
+ work->data = sa_entry;
queue_work(ipsec->wq, &work->work);
return NOTIFY_OK;
@@ -457,6 +570,7 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_wqe_aso_ctrl_seg *ctrl;
struct mlx5e_hw_objs *res;
struct mlx5_aso_wqe *wqe;
+ unsigned long expires;
u8 ds_cnt;
int ret;
@@ -478,22 +592,12 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry,
mlx5e_ipsec_aso_copy(ctrl, data);
mlx5_aso_post_wqe(aso->aso, false, &wqe->ctrl);
- ret = mlx5_aso_poll_cq(aso->aso, false);
+ expires = jiffies + msecs_to_jiffies(10);
+ do {
+ ret = mlx5_aso_poll_cq(aso->aso, false);
+ if (ret)
+ usleep_range(2, 10);
+ } while (ret && time_is_after_jiffies(expires));
spin_unlock_bh(&aso->lock);
return ret;
}
-
-void mlx5e_ipsec_aso_update_curlft(struct mlx5e_ipsec_sa_entry *sa_entry,
- u64 *packets)
-{
- struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
- struct mlx5e_ipsec_aso *aso = ipsec->aso;
- u64 hard_cnt;
-
- hard_cnt = MLX5_GET(ipsec_aso, aso->ctx, remove_flow_pkt_cnt);
- /* HW decresases the limit till it reaches zero to fire an avent.
- * We need to fix the calculations, so the returned count is a total
- * number of passed packets and not how much left.
- */
- *packets = sa_entry->attrs.hard_packet_limit - hard_cnt;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
index 51f1cd8364c2..6b7b563f844a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
@@ -4,6 +4,7 @@
#include <linux/mlx5/device.h>
#include <linux/mlx5/mlx5_ifc.h>
#include <linux/xarray.h>
+#include <linux/if_vlan.h>
#include "en.h"
#include "lib/aso.h"
@@ -348,12 +349,21 @@ static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec,
sa->macsec_rule = NULL;
}
+static struct mlx5e_priv *macsec_netdev_priv(const struct net_device *dev)
+{
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+ if (is_vlan_dev(dev))
+ return netdev_priv(vlan_dev_priv(dev)->real_dev);
+#endif
+ return netdev_priv(dev);
+}
+
static int mlx5e_macsec_init_sa(struct macsec_context *ctx,
struct mlx5e_macsec_sa *sa,
bool encrypt,
bool is_tx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec *macsec = priv->macsec;
struct mlx5_macsec_rule_attrs rule_attrs;
struct mlx5_core_dev *mdev = priv->mdev;
@@ -427,7 +437,7 @@ static int macsec_rx_sa_active_update(struct macsec_context *ctx,
struct mlx5e_macsec_sa *rx_sa,
bool active)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec *macsec = priv->macsec;
int err = 0;
@@ -508,9 +518,9 @@ static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_ke
static int mlx5e_macsec_add_txsa(struct macsec_context *ctx)
{
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa;
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
const struct macsec_secy *secy = ctx->secy;
struct mlx5e_macsec_device *macsec_device;
struct mlx5_core_dev *mdev = priv->mdev;
@@ -583,9 +593,9 @@ out:
static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx)
{
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa;
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
u8 assoc_num = ctx->sa.assoc_num;
struct mlx5e_macsec_sa *tx_sa;
@@ -645,7 +655,7 @@ out:
static int mlx5e_macsec_del_txsa(struct macsec_context *ctx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
u8 assoc_num = ctx->sa.assoc_num;
struct mlx5e_macsec_sa *tx_sa;
@@ -696,7 +706,7 @@ static u32 mlx5e_macsec_get_sa_from_hashtable(struct rhashtable *sci_hash, sci_t
static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx)
{
struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc;
struct mlx5e_macsec_device *macsec_device;
struct mlx5e_macsec_rx_sc *rx_sc;
@@ -776,7 +786,7 @@ out:
static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc;
struct mlx5e_macsec_device *macsec_device;
struct mlx5e_macsec_rx_sc *rx_sc;
@@ -854,7 +864,7 @@ static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec
static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
struct mlx5e_macsec_rx_sc *rx_sc;
struct mlx5e_macsec *macsec;
@@ -890,8 +900,8 @@ out:
static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx)
{
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa;
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
struct mlx5_core_dev *mdev = priv->mdev;
u8 assoc_num = ctx->sa.assoc_num;
@@ -976,8 +986,8 @@ out:
static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx)
{
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa;
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
u8 assoc_num = ctx->sa.assoc_num;
struct mlx5e_macsec_rx_sc *rx_sc;
@@ -1033,7 +1043,7 @@ out:
static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
sci_t sci = ctx->sa.rx_sa->sc->sci;
struct mlx5e_macsec_rx_sc *rx_sc;
@@ -1085,7 +1095,7 @@ out:
static int mlx5e_macsec_add_secy(struct macsec_context *ctx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct net_device *dev = ctx->secy->netdev;
const struct net_device *netdev = ctx->netdev;
struct mlx5e_macsec_device *macsec_device;
@@ -1137,7 +1147,7 @@ out:
static int macsec_upd_secy_hw_address(struct macsec_context *ctx,
struct mlx5e_macsec_device *macsec_device)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct net_device *dev = ctx->secy->netdev;
struct mlx5e_macsec *macsec = priv->macsec;
struct mlx5e_macsec_rx_sc *rx_sc, *tmp;
@@ -1184,8 +1194,8 @@ out:
*/
static int mlx5e_macsec_upd_secy(struct macsec_context *ctx)
{
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
const struct net_device *dev = ctx->secy->netdev;
struct mlx5e_macsec_device *macsec_device;
struct mlx5e_macsec_sa *tx_sa;
@@ -1240,7 +1250,7 @@ out:
static int mlx5e_macsec_del_secy(struct macsec_context *ctx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
struct mlx5e_macsec_rx_sc *rx_sc, *tmp;
struct mlx5e_macsec_sa *tx_sa;
@@ -1741,7 +1751,7 @@ void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev,
{
struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
u32 macsec_meta_data = be32_to_cpu(cqe->ft_metadata);
- struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(netdev);
struct mlx5e_macsec_rx_sc *rx_sc;
struct mlx5e_macsec *macsec;
u32 fs_id;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
index 5b658a5588c6..7fc901a6ec5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
@@ -4,6 +4,7 @@
#include <net/macsec.h>
#include <linux/netdevice.h>
#include <linux/mlx5/qp.h>
+#include <linux/if_vlan.h>
#include "fs_core.h"
#include "en/fs.h"
#include "en_accel/macsec_fs.h"
@@ -292,8 +293,6 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs)
}
/* Tx crypto table MKE rule - MKE packets shouldn't be offloaded */
- memset(&flow_act, 0, sizeof(flow_act));
- memset(spec, 0, sizeof(*spec));
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
@@ -510,6 +509,8 @@ static void macsec_fs_tx_del_rule(struct mlx5e_macsec_fs *macsec_fs,
macsec_fs_tx_ft_put(macsec_fs);
}
+#define MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES 1
+
static union mlx5e_macsec_rule *
macsec_fs_tx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
const struct macsec_context *macsec_ctx,
@@ -555,6 +556,10 @@ macsec_fs_tx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
reformat_params.type = MLX5_REFORMAT_TYPE_ADD_MACSEC;
reformat_params.size = reformat_size;
reformat_params.data = reformatbf;
+
+ if (is_vlan_dev(macsec_ctx->netdev))
+ reformat_params.param_0 = MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES;
+
flow_act.pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev,
&reformat_params,
MLX5_FLOW_NAMESPACE_EGRESS_MACSEC);
@@ -1109,7 +1114,6 @@ static void macsec_fs_rx_setup_fte(struct mlx5_flow_spec *spec,
static union mlx5e_macsec_rule *
macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
- const struct macsec_context *macsec_ctx,
struct mlx5_macsec_rule_attrs *attrs,
u32 fs_id)
{
@@ -1334,7 +1338,7 @@ mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs,
{
return (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ?
macsec_fs_tx_add_rule(macsec_fs, macsec_ctx, attrs, sa_fs_id) :
- macsec_fs_rx_add_rule(macsec_fs, macsec_ctx, attrs, *sa_fs_id);
+ macsec_fs_rx_add_rule(macsec_fs, attrs, *sa_fs_id);
}
void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 79fd21ecb9cb..1f5a2110d31f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -220,7 +220,7 @@ static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev,
struct ptys2ethtool_config **arr,
u32 *size)
{
- bool ext = mlx5e_ptys_ext_supported(mdev);
+ bool ext = mlx5_ptys_ext_supported(mdev);
*arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table;
*size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) :
@@ -895,7 +895,7 @@ static void get_speed_duplex(struct net_device *netdev,
if (!netif_carrier_ok(netdev))
goto out;
- speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy);
+ speed = mlx5_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy);
if (!speed) {
if (data_rate_oper)
speed = 100 * data_rate_oper;
@@ -980,7 +980,7 @@ static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp,
struct ethtool_link_ksettings *link_ksettings)
{
unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising;
- bool ext = mlx5e_ptys_ext_supported(mdev);
+ bool ext = mlx5_ptys_ext_supported(mdev);
ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext);
}
@@ -1160,7 +1160,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
const struct ethtool_link_ksettings *link_ksettings)
{
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_port_eth_proto eproto;
+ struct mlx5_port_eth_proto eproto;
const unsigned long *adver;
bool an_changes = false;
u8 an_disable_admin;
@@ -1180,7 +1180,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
autoneg = link_ksettings->base.autoneg;
speed = link_ksettings->base.speed;
- ext_supported = mlx5e_ptys_ext_supported(mdev);
+ ext_supported = mlx5_ptys_ext_supported(mdev);
ext = ext_requested(autoneg, adver, ext_supported);
if (!ext_supported && ext)
return -EOPNOTSUPP;
@@ -1194,7 +1194,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
goto out;
}
link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) :
- mlx5e_port_speed2linkmodes(mdev, speed, !ext);
+ mlx5_port_speed2linkmodes(mdev, speed, !ext);
err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 05796f8b1d7c..33bfe4d7338b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -783,6 +783,7 @@ static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs)
ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
+ ft->t = NULL;
fs_err(fs, "fail to create promisc table err=%d\n", err);
return err;
}
@@ -810,7 +811,7 @@ static void mlx5e_del_promisc_rule(struct mlx5e_flow_steering *fs)
static void mlx5e_destroy_promisc_table(struct mlx5e_flow_steering *fs)
{
- if (WARN(!fs->promisc.ft.t, "Trying to remove non-existing promiscuous table"))
+ if (!fs->promisc.ft.t)
return;
mlx5e_del_promisc_rule(fs);
mlx5_destroy_flow_table(fs->promisc.ft.t);
@@ -1490,6 +1491,8 @@ err:
void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs)
{
+ if (!fs)
+ return;
debugfs_remove_recursive(fs->dfs_root);
mlx5e_fs_ethtool_free(fs);
mlx5e_fs_tc_free(fs);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7ca7e9b57607..2944691f06ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -262,23 +262,30 @@ static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node)
shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL,
node);
- if (!shampo->bitmap)
- return -ENOMEM;
-
shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq,
sizeof(*shampo->info)),
GFP_KERNEL, node);
- if (!shampo->info) {
- kvfree(shampo->bitmap);
- return -ENOMEM;
- }
+ shampo->pages = kvzalloc_node(array_size(shampo->hd_per_wq,
+ sizeof(*shampo->pages)),
+ GFP_KERNEL, node);
+ if (!shampo->bitmap || !shampo->info || !shampo->pages)
+ goto err_nomem;
+
return 0;
+
+err_nomem:
+ kvfree(shampo->info);
+ kvfree(shampo->bitmap);
+ kvfree(shampo->pages);
+
+ return -ENOMEM;
}
static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
{
kvfree(rq->mpwqe.shampo->bitmap);
kvfree(rq->mpwqe.shampo->info);
+ kvfree(rq->mpwqe.shampo->pages);
}
static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
@@ -286,13 +293,23 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
size_t alloc_size;
- alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, alloc_units,
+ alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info,
+ alloc_units.frag_pages,
rq->mpwqe.pages_per_wqe));
rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node);
if (!rq->mpwqe.info)
return -ENOMEM;
+ /* For deferred page release (release right before alloc), make sure
+ * that on first round release is not called.
+ */
+ for (int i = 0; i < wq_sz; i++) {
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, i);
+
+ bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
+ }
+
mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe);
return 0;
@@ -499,14 +516,12 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
struct mlx5e_wqe_frag_info *prev = NULL;
int i;
- if (rq->xsk_pool) {
- /* Assumptions used by XSK batched allocator. */
- WARN_ON(rq->wqe.info.num_frags != 1);
- WARN_ON(rq->wqe.info.log_num_frags != 0);
- WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE);
- }
+ WARN_ON(rq->xsk_pool);
+
+ next_frag.frag_page = &rq->wqe.alloc_units->frag_pages[0];
- next_frag.au = &rq->wqe.alloc_units[0];
+ /* Skip first release due to deferred release. */
+ next_frag.flags = BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
@@ -516,10 +531,11 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
- next_frag.au++;
+ /* Pages are assigned at runtime. */
+ next_frag.frag_page++;
next_frag.offset = 0;
if (prev)
- prev->last_in_page = true;
+ prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE);
}
*frag = next_frag;
@@ -530,25 +546,68 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
}
if (prev)
- prev->last_in_page = true;
+ prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE);
+}
+
+static void mlx5e_init_xsk_buffs(struct mlx5e_rq *rq)
+{
+ int i;
+
+ /* Assumptions used by XSK batched allocator. */
+ WARN_ON(rq->wqe.info.num_frags != 1);
+ WARN_ON(rq->wqe.info.log_num_frags != 0);
+ WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE);
+
+ /* Considering the above assumptions a fragment maps to a single
+ * xsk_buff.
+ */
+ for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
+ rq->wqe.frags[i].xskp = &rq->wqe.alloc_units->xsk_buffs[i];
+
+ /* Skip first release due to deferred release as WQES are
+ * not allocated yet.
+ */
+ rq->wqe.frags[i].flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
+ }
}
-static int mlx5e_init_au_list(struct mlx5e_rq *rq, int wq_sz, int node)
+static int mlx5e_init_wqe_alloc_info(struct mlx5e_rq *rq, int node)
{
+ int wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
int len = wq_sz << rq->wqe.info.log_num_frags;
+ struct mlx5e_wqe_frag_info *frags;
+ union mlx5e_alloc_units *aus;
+ int aus_sz;
- rq->wqe.alloc_units = kvzalloc_node(array_size(len, sizeof(*rq->wqe.alloc_units)),
- GFP_KERNEL, node);
- if (!rq->wqe.alloc_units)
+ if (rq->xsk_pool)
+ aus_sz = sizeof(*aus->xsk_buffs);
+ else
+ aus_sz = sizeof(*aus->frag_pages);
+
+ aus = kvzalloc_node(array_size(len, aus_sz), GFP_KERNEL, node);
+ if (!aus)
return -ENOMEM;
- mlx5e_init_frags_partition(rq);
+ frags = kvzalloc_node(array_size(len, sizeof(*frags)), GFP_KERNEL, node);
+ if (!frags) {
+ kvfree(aus);
+ return -ENOMEM;
+ }
+
+ rq->wqe.alloc_units = aus;
+ rq->wqe.frags = frags;
+
+ if (rq->xsk_pool)
+ mlx5e_init_xsk_buffs(rq);
+ else
+ mlx5e_init_frags_partition(rq);
return 0;
}
-static void mlx5e_free_au_list(struct mlx5e_rq *rq)
+static void mlx5e_free_wqe_alloc_info(struct mlx5e_rq *rq)
{
+ kvfree(rq->wqe.frags);
kvfree(rq->wqe.alloc_units);
}
@@ -693,7 +752,6 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
struct mlx5e_rq_param *rqp,
int node, struct mlx5e_rq *rq)
{
- struct page_pool_params pp_params = { 0 };
struct mlx5_core_dev *mdev = rq->mdev;
void *rqc = rqp->rqc;
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
@@ -745,6 +803,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
pool_size = rq->mpwqe.pages_per_wqe <<
mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk);
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk) && params->xdp_prog)
+ pool_size *= 2; /* additional page per packet for the linear part */
+
rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
rq->mpwqe.num_strides =
BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
@@ -778,18 +839,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
rq->wqe.info = rqp->frags_info;
rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
- rq->wqe.frags =
- kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
- (wq_sz << rq->wqe.info.log_num_frags)),
- GFP_KERNEL, node);
- if (!rq->wqe.frags) {
- err = -ENOMEM;
- goto err_rq_wq_destroy;
- }
-
- err = mlx5e_init_au_list(rq, wq_sz, node);
+ err = mlx5e_init_wqe_alloc_info(rq, node);
if (err)
- goto err_rq_frags;
+ goto err_rq_wq_destroy;
}
if (xsk) {
@@ -798,12 +850,16 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq);
} else {
/* Create a page_pool and register it with rxq */
+ struct page_pool_params pp_params = { 0 };
+
pp_params.order = 0;
- pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
+ pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | PP_FLAG_PAGE_FRAG;
pp_params.pool_size = pool_size;
pp_params.nid = node;
pp_params.dev = rq->pdev;
+ pp_params.napi = rq->cq.napi;
pp_params.dma_dir = rq->buff.map_dir;
+ pp_params.max_len = PAGE_SIZE;
/* page_pool can be used even when there is no rq->xdp_prog,
* given page_pool does not handle DMA mapping there is no
@@ -869,9 +925,6 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
- rq->page_cache.head = 0;
- rq->page_cache.tail = 0;
-
return 0;
err_destroy_page_pool:
@@ -888,9 +941,7 @@ err_rq_drop_page:
mlx5e_free_mpwqe_rq_drop_page(rq);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
- mlx5e_free_au_list(rq);
-err_rq_frags:
- kvfree(rq->wqe.frags);
+ mlx5e_free_wqe_alloc_info(rq);
}
err_rq_wq_destroy:
mlx5_wq_destroy(&rq->wq_ctrl);
@@ -904,7 +955,6 @@ err_rq_xdp_prog:
static void mlx5e_free_rq(struct mlx5e_rq *rq)
{
struct bpf_prog *old_prog;
- int i;
if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) {
old_prog = rcu_dereference_protected(rq->xdp_prog,
@@ -921,17 +971,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
mlx5e_rq_free_shampo(rq);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
- kvfree(rq->wqe.frags);
- mlx5e_free_au_list(rq);
- }
-
- for (i = rq->page_cache.head; i != rq->page_cache.tail;
- i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
- /* With AF_XDP, page_cache is not used, so this loop is not
- * entered, and it's safe to call mlx5e_page_release_dynamic
- * directly.
- */
- mlx5e_page_release_dynamic(rq, rq->page_cache.page_cache[i], false);
+ mlx5e_free_wqe_alloc_info(rq);
}
xdp_rxq_info_unreg(&rq->xdp_rxq);
@@ -1094,7 +1134,7 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
return -ETIMEDOUT;
}
-void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
+void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq)
{
struct mlx5_wq_ll *wq;
u16 head;
@@ -1106,8 +1146,12 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
wq = &rq->mpwqe.wq;
head = wq->head;
- /* Outstanding UMR WQEs (in progress) start at wq->head */
- for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
+ /* Release WQEs that are in missing state: they have been
+ * popped from the list after completion but were not freed
+ * due to deferred release.
+ * Also free the linked-list reserved entry, hence the "+ 1".
+ */
+ for (i = 0; i < mlx5_wq_ll_missing(wq) + 1; i++) {
rq->dealloc_wqe(rq, head);
head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
}
@@ -1134,7 +1178,7 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
- mlx5e_free_rx_in_progress_descs(rq);
+ mlx5e_free_rx_missing_descs(rq);
while (!mlx5_wq_ll_is_empty(wq)) {
struct mlx5e_rx_wqe_ll *wqe;
@@ -1152,12 +1196,21 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
0, true);
} else {
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ u16 missing = mlx5_wq_cyc_missing(wq);
+ u16 head = mlx5_wq_cyc_get_head(wq);
while (!mlx5_wq_cyc_is_empty(wq)) {
wqe_ix = mlx5_wq_cyc_get_tail(wq);
rq->dealloc_wqe(rq, wqe_ix);
mlx5_wq_cyc_pop(wq);
}
+ /* Missing slots might also contain unreleased pages due to
+ * deferred release.
+ */
+ while (missing--) {
+ wqe_ix = mlx5_wq_cyc_ctr2ix(wq, head++);
+ rq->dealloc_wqe(rq, wqe_ix);
+ }
}
}
@@ -1188,7 +1241,7 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
__set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
if (params->rx_dim_enabled)
- __set_bit(MLX5E_RQ_STATE_AM, &rq->state);
+ __set_bit(MLX5E_RQ_STATE_DIM, &rq->state);
/* We disable csum_complete when XDP is enabled since
* XDP programs might manipulate packets which will render
@@ -1251,17 +1304,19 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
{
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
- int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+ int entries = wq_sz * MLX5_SEND_WQEBB_NUM_DS * 2; /* upper bound for maximum num of
+ * entries of all xmit_modes.
+ */
size_t size;
- size = array_size(sizeof(*xdpi_fifo->xi), dsegs_per_wq);
+ size = array_size(sizeof(*xdpi_fifo->xi), entries);
xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa);
if (!xdpi_fifo->xi)
return -ENOMEM;
xdpi_fifo->pc = &sq->xdpi_fifo_pc;
xdpi_fifo->cc = &sq->xdpi_fifo_cc;
- xdpi_fifo->mask = dsegs_per_wq - 1;
+ xdpi_fifo->mask = entries - 1;
return 0;
}
@@ -1664,7 +1719,7 @@ int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
if (params->tx_dim_enabled)
- sq->state |= BIT(MLX5E_SQ_STATE_AM);
+ sq->state |= BIT(MLX5E_SQ_STATE_DIM);
return 0;
@@ -1811,11 +1866,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
csp.min_inline_mode = sq->min_inline_mode;
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- /* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet
- * supported by upstream, and there is no defined trigger to allow
- * transmitting redirected multi-buffer frames.
- */
- if (param->is_xdp_mb && !is_redirect)
+ if (param->is_xdp_mb)
set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state);
err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
@@ -1839,7 +1890,6 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
- struct mlx5_wqe_data_seg *dseg;
sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) {
.num_wqebbs = 1,
@@ -1848,9 +1898,6 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
-
- dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
- dseg->lkey = sq->mkey_be;
}
}
@@ -4017,9 +4064,9 @@ void mlx5e_set_xdp_feature(struct net_device *netdev)
val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_XSK_ZEROCOPY |
- NETDEV_XDP_ACT_NDO_XMIT;
- if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
- val |= NETDEV_XDP_ACT_RX_SG;
+ NETDEV_XDP_ACT_RX_SG |
+ NETDEV_XDP_ACT_NDO_XMIT |
+ NETDEV_XDP_ACT_NDO_XMIT_SG;
xdp_set_features_flag(netdev, val);
}
@@ -4213,19 +4260,24 @@ static bool mlx5e_params_validate_xdp(struct net_device *netdev,
/* No XSK params: AF_XDP can't be enabled yet at the point of setting
* the XDP program.
*/
- is_linear = mlx5e_rx_is_linear_skb(mdev, params, NULL);
-
- if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
- netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
- params->sw_mtu,
- mlx5e_xdp_max_mtu(params, NULL));
- return false;
- }
- if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) {
- netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
- params->sw_mtu,
- mlx5e_xdp_max_mtu(params, NULL));
- return false;
+ is_linear = params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC ?
+ mlx5e_rx_is_linear_skb(mdev, params, NULL) :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL);
+
+ if (!is_linear) {
+ if (!params->xdp_prog->aux->xdp_has_frags) {
+ netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
+ params->sw_mtu,
+ mlx5e_xdp_max_mtu(params, NULL));
+ return false;
+ }
+ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+ !mlx5e_verify_params_rx_mpwqe_strides(mdev, params, NULL)) {
+ netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
+ params->sw_mtu,
+ mlx5e_xdp_max_mtu(params, NULL));
+ return false;
+ }
}
return true;
@@ -4717,20 +4769,15 @@ static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
queue_work(priv->wq, &priv->tx_timeout_work);
}
-static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
+static int mlx5e_xdp_allowed(struct net_device *netdev, struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
{
- struct net_device *netdev = priv->netdev;
- struct mlx5e_params new_params;
-
- if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+ if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
return -EINVAL;
}
- new_params = priv->channels.params;
- new_params.xdp_prog = prog;
-
- if (!mlx5e_params_validate_xdp(netdev, priv->mdev, &new_params))
+ if (!mlx5e_params_validate_xdp(netdev, mdev, params))
return -EINVAL;
return 0;
@@ -4757,8 +4804,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
mutex_lock(&priv->state_lock);
+ new_params = priv->channels.params;
+ new_params.xdp_prog = prog;
+
if (prog) {
- err = mlx5e_xdp_allowed(priv, prog);
+ err = mlx5e_xdp_allowed(netdev, priv->mdev, &new_params);
if (err)
goto unlock;
}
@@ -4766,22 +4816,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
/* no need for full reset when exchanging programs */
reset = (!priv->channels.params.xdp_prog || !prog);
- new_params = priv->channels.params;
- new_params.xdp_prog = prog;
-
- /* XDP affects striding RQ parameters. Block XDP if striding RQ won't be
- * supported with the new parameters: if PAGE_SIZE is bigger than
- * MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though
- * the MTU is small enough for the linear mode, because XDP uses strides
- * of PAGE_SIZE on regular RQs.
- */
- if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
- /* Checking for regular RQs here; XSK RQs were checked on XSK bind. */
- err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params);
- if (err)
- goto unlock;
- }
-
old_prog = priv->channels.params.xdp_prog;
err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
@@ -5076,6 +5110,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->vlan_features |= NETIF_F_SG;
netdev->vlan_features |= NETIF_F_HW_CSUM;
+ netdev->vlan_features |= NETIF_F_HW_MACSEC;
netdev->vlan_features |= NETIF_F_GRO;
netdev->vlan_features |= NETIF_F_TSO;
netdev->vlan_features |= NETIF_F_TSO6;
@@ -5270,6 +5305,7 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
mlx5e_health_destroy_reporters(priv);
mlx5e_ktls_cleanup(priv);
mlx5e_fs_cleanup(priv->fs);
+ priv->fs = NULL;
}
static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
@@ -5725,8 +5761,8 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
/* Validate the max_wqe_size_sq capability. */
if (WARN_ON_ONCE(mlx5e_get_max_sq_wqebbs(priv->mdev) < MLX5E_MAX_TX_WQEBBS)) {
- mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %lu\n",
- mlx5e_get_max_sq_wqebbs(priv->mdev), MLX5E_MAX_TX_WQEBBS);
+ mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %u\n",
+ mlx5e_get_max_sq_wqebbs(priv->mdev), (unsigned int)MLX5E_MAX_TX_WQEBBS);
return -EIO;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 8ff654b4e9e1..1fc386eccaf8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -53,6 +53,7 @@
#include "lib/vxlan.h"
#define CREATE_TRACE_POINTS
#include "diag/en_rep_tracepoint.h"
+#include "diag/reporter_vnic.h"
#include "en_accel/ipsec.h"
#include "en/tc/int_port.h"
#include "en/ptp.h"
@@ -828,6 +829,7 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev,
static void mlx5e_cleanup_rep(struct mlx5e_priv *priv)
{
mlx5e_fs_cleanup(priv->fs);
+ priv->fs = NULL;
}
static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv)
@@ -994,6 +996,7 @@ err_close_drop_rq:
priv->rx_res = NULL;
err_free_fs:
mlx5e_fs_cleanup(priv->fs);
+ priv->fs = NULL;
return err;
}
@@ -1294,6 +1297,50 @@ static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv)
return ARRAY_SIZE(mlx5e_ul_rep_stats_grps);
}
+static int
+mlx5e_rep_vnic_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_rep_priv *rpriv = devlink_health_reporter_priv(reporter);
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ return mlx5_reporter_vnic_diagnose_counters(rep->esw->dev, fmsg,
+ rep->vport, true);
+}
+
+static const struct devlink_health_reporter_ops mlx5_rep_vnic_reporter_ops = {
+ .name = "vnic",
+ .diagnose = mlx5e_rep_vnic_reporter_diagnose,
+};
+
+static void mlx5e_rep_vnic_reporter_create(struct mlx5e_priv *priv,
+ struct devlink_port *dl_port)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct devlink_health_reporter *reporter;
+
+ reporter = devl_port_health_reporter_create(dl_port,
+ &mlx5_rep_vnic_reporter_ops,
+ 0, rpriv);
+ if (IS_ERR(reporter)) {
+ mlx5_core_err(priv->mdev,
+ "Failed to create representor vnic reporter, err = %ld\n",
+ PTR_ERR(reporter));
+ return;
+ }
+
+ rpriv->rep_vnic_reporter = reporter;
+}
+
+static void mlx5e_rep_vnic_reporter_destroy(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ if (!IS_ERR_OR_NULL(rpriv->rep_vnic_reporter))
+ devl_health_reporter_destroy(rpriv->rep_vnic_reporter);
+}
+
static const struct mlx5e_profile mlx5e_rep_profile = {
.init = mlx5e_init_rep,
.cleanup = mlx5e_cleanup_rep,
@@ -1394,8 +1441,10 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch,
rpriv->rep->vport);
- if (dl_port)
+ if (dl_port) {
SET_NETDEV_DEVLINK_PORT(netdev, dl_port);
+ mlx5e_rep_vnic_reporter_create(priv, dl_port);
+ }
err = register_netdev(netdev);
if (err) {
@@ -1408,8 +1457,8 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
return 0;
err_detach_netdev:
+ mlx5e_rep_vnic_reporter_destroy(priv);
mlx5e_detach_netdev(netdev_priv(netdev));
-
err_cleanup_profile:
priv->profile->cleanup(priv);
@@ -1458,6 +1507,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
}
unregister_netdev(netdev);
+ mlx5e_rep_vnic_reporter_destroy(priv);
mlx5e_detach_netdev(priv);
priv->profile->cleanup(priv);
mlx5e_destroy_netdev(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index dcfad0bf0f45..80b7f5079a5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -118,6 +118,7 @@ struct mlx5e_rep_priv {
struct rtnl_link_stats64 prev_vf_vport_stats;
struct mlx5_flow_handle *send_to_vport_meta_rule;
struct rhashtable tc_ht;
+ struct devlink_health_reporter *rep_vnic_reporter;
};
static inline
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 3f7b63d6616b..69634829558e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -271,98 +271,35 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem);
}
-static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page)
-{
- struct mlx5e_page_cache *cache = &rq->page_cache;
- u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
- struct mlx5e_rq_stats *stats = rq->stats;
-
- if (tail_next == cache->head) {
- stats->cache_full++;
- return false;
- }
-
- if (!dev_page_is_reusable(page)) {
- stats->cache_waive++;
- return false;
- }
-
- cache->page_cache[cache->tail] = page;
- cache->tail = tail_next;
- return true;
-}
-
-static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au)
-{
- struct mlx5e_page_cache *cache = &rq->page_cache;
- struct mlx5e_rq_stats *stats = rq->stats;
- dma_addr_t addr;
-
- if (unlikely(cache->head == cache->tail)) {
- stats->cache_empty++;
- return false;
- }
-
- if (page_ref_count(cache->page_cache[cache->head]) != 1) {
- stats->cache_busy++;
- return false;
- }
-
- au->page = cache->page_cache[cache->head];
- cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
- stats->cache_reuse++;
+#define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64)
- addr = page_pool_get_dma_addr(au->page);
- /* Non-XSK always uses PAGE_SIZE. */
- dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir);
- return true;
-}
-
-static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au)
+static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq,
+ struct mlx5e_frag_page *frag_page)
{
- dma_addr_t addr;
+ struct page *page;
- if (mlx5e_rx_cache_get(rq, au))
- return 0;
-
- au->page = page_pool_dev_alloc_pages(rq->page_pool);
- if (unlikely(!au->page))
+ page = page_pool_dev_alloc_pages(rq->page_pool);
+ if (unlikely(!page))
return -ENOMEM;
- /* Non-XSK always uses PAGE_SIZE. */
- addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir);
- if (unlikely(dma_mapping_error(rq->pdev, addr))) {
- page_pool_recycle_direct(rq->page_pool, au->page);
- au->page = NULL;
- return -ENOMEM;
- }
- page_pool_set_dma_addr(au->page, addr);
+ page_pool_fragment_page(page, MLX5E_PAGECNT_BIAS_MAX);
+
+ *frag_page = (struct mlx5e_frag_page) {
+ .page = page,
+ .frags = 0,
+ };
return 0;
}
-void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page)
+static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq,
+ struct mlx5e_frag_page *frag_page)
{
- dma_addr_t dma_addr = page_pool_get_dma_addr(page);
+ u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags;
+ struct page *page = frag_page->page;
- dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir,
- DMA_ATTR_SKIP_CPU_SYNC);
- page_pool_set_dma_addr(page, 0);
-}
-
-void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle)
-{
- if (likely(recycle)) {
- if (mlx5e_rx_cache_put(rq, page))
- return;
-
- mlx5e_page_dma_unmap(rq, page);
- page_pool_recycle_direct(rq->page_pool, page);
- } else {
- mlx5e_page_dma_unmap(rq, page);
- page_pool_release_page(rq->page_pool, page);
- put_page(page);
- }
+ if (page_pool_defrag_page(page, drain_count) == 0)
+ page_pool_put_defragged_page(rq->page_pool, page, -1, true);
}
static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
@@ -371,22 +308,31 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
int err = 0;
if (!frag->offset)
- /* On first frag (offset == 0), replenish page (alloc_unit actually).
- * Other frags that point to the same alloc_unit (with a different
+ /* On first frag (offset == 0), replenish page.
+ * Other frags that point to the same page (with a different
* offset) should just use the new one without replenishing again
* by themselves.
*/
- err = mlx5e_page_alloc_pool(rq, frag->au);
+ err = mlx5e_page_alloc_fragmented(rq, frag->frag_page);
return err;
}
+static bool mlx5e_frag_can_release(struct mlx5e_wqe_frag_info *frag)
+{
+#define CAN_RELEASE_MASK \
+ (BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE) | BIT(MLX5E_WQE_FRAG_SKIP_RELEASE))
+
+#define CAN_RELEASE_VALUE BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE)
+
+ return (frag->flags & CAN_RELEASE_MASK) == CAN_RELEASE_VALUE;
+}
+
static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
- struct mlx5e_wqe_frag_info *frag,
- bool recycle)
+ struct mlx5e_wqe_frag_info *frag)
{
- if (frag->last_in_page)
- mlx5e_page_release_dynamic(rq, frag->au->page, recycle);
+ if (mlx5e_frag_can_release(frag))
+ mlx5e_page_release_fragmented(rq, frag->frag_page);
}
static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
@@ -409,8 +355,10 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
if (unlikely(err))
goto free_frags;
+ frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
+
headroom = i == 0 ? rq->buff.headroom : 0;
- addr = page_pool_get_dma_addr(frag->au->page);
+ addr = page_pool_get_dma_addr(frag->frag_page->page);
wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom);
}
@@ -418,35 +366,66 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
free_frags:
while (--i >= 0)
- mlx5e_put_rx_frag(rq, --frag, true);
+ mlx5e_put_rx_frag(rq, --frag);
return err;
}
static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
- struct mlx5e_wqe_frag_info *wi,
- bool recycle)
+ struct mlx5e_wqe_frag_info *wi)
{
int i;
- if (rq->xsk_pool) {
- /* The `recycle` parameter is ignored, and the page is always
- * put into the Reuse Ring, because there is no way to return
- * the page to the userspace when the interface goes down.
- */
- xsk_buff_free(wi->au->xsk);
- return;
- }
-
for (i = 0; i < rq->wqe.info.num_frags; i++, wi++)
- mlx5e_put_rx_frag(rq, wi, recycle);
+ mlx5e_put_rx_frag(rq, wi);
+}
+
+static void mlx5e_xsk_free_rx_wqe(struct mlx5e_wqe_frag_info *wi)
+{
+ if (!(wi->flags & BIT(MLX5E_WQE_FRAG_SKIP_RELEASE)))
+ xsk_buff_free(*wi->xskp);
}
static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
{
struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix);
- mlx5e_free_rx_wqe(rq, wi, false);
+ if (rq->xsk_pool)
+ mlx5e_xsk_free_rx_wqe(wi);
+ else
+ mlx5e_free_rx_wqe(rq, wi);
+}
+
+static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ int i;
+
+ for (i = 0; i < wqe_bulk; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_wqe_frag_info *wi;
+
+ wi = get_frag(rq, j);
+ /* The page is always put into the Reuse Ring, because there
+ * is no way to return the page to the userspace when the
+ * interface goes down.
+ */
+ mlx5e_xsk_free_rx_wqe(wi);
+ }
+}
+
+static void mlx5e_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ int i;
+
+ for (i = 0; i < wqe_bulk; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_wqe_frag_info *wi;
+
+ wi = get_frag(rq, j);
+ mlx5e_free_rx_wqe(rq, wi);
+ }
}
static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
@@ -467,18 +446,71 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
return i;
}
+static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+{
+ int remaining = wqe_bulk;
+ int i = 0;
+
+ /* The WQE bulk is split into smaller bulks that are sized
+ * according to the page pool cache refill size to avoid overflowing
+ * the page pool cache due to too many page releases at once.
+ */
+ do {
+ int refill = min_t(u16, rq->wqe.info.refill_unit, remaining);
+ int alloc_count;
+
+ mlx5e_free_rx_wqes(rq, ix + i, refill);
+ alloc_count = mlx5e_alloc_rx_wqes(rq, ix + i, refill);
+ i += alloc_count;
+ if (unlikely(alloc_count != refill))
+ break;
+
+ remaining -= refill;
+ } while (remaining);
+
+ return i;
+}
+
+static void
+mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinfo,
+ struct xdp_buff *xdp, struct mlx5e_frag_page *frag_page,
+ u32 frag_offset, u32 len)
+{
+ skb_frag_t *frag;
+
+ dma_addr_t addr = page_pool_get_dma_addr(frag_page->page);
+
+ dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir);
+ if (!xdp_buff_has_frags(xdp)) {
+ /* Init on the first fragment to avoid cold cache access
+ * when possible.
+ */
+ sinfo->nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ xdp_buff_set_frags_flag(xdp);
+ }
+
+ frag = &sinfo->frags[sinfo->nr_frags++];
+ __skb_frag_set_page(frag, frag_page->page);
+ skb_frag_off_set(frag, frag_offset);
+ skb_frag_size_set(frag, len);
+
+ if (page_is_pfmemalloc(frag_page->page))
+ xdp_buff_set_frag_pfmemalloc(xdp);
+ sinfo->xdp_frags_size += len;
+}
+
static inline void
mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
- union mlx5e_alloc_unit *au, u32 frag_offset, u32 len,
+ struct page *page, u32 frag_offset, u32 len,
unsigned int truesize)
{
- dma_addr_t addr = page_pool_get_dma_addr(au->page);
+ dma_addr_t addr = page_pool_get_dma_addr(page);
dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len,
rq->buff.map_dir);
- page_ref_inc(au->page);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
- au->page, frag_offset, len, truesize);
+ page, frag_offset, len, truesize);
}
static inline void
@@ -496,30 +528,36 @@ mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb,
}
static void
-mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
+mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
{
- union mlx5e_alloc_unit *alloc_units = wi->alloc_units;
bool no_xdp_xmit;
int i;
/* A common case for AF_XDP. */
- if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe))
+ if (bitmap_full(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe))
return;
- no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
+ no_xdp_xmit = bitmap_empty(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
if (rq->xsk_pool) {
- /* The `recycle` parameter is ignored, and the page is always
- * put into the Reuse Ring, because there is no way to return
- * the page to the userspace when the interface goes down.
+ struct xdp_buff **xsk_buffs = wi->alloc_units.xsk_buffs;
+
+ /* The page is always put into the Reuse Ring, because there
+ * is no way to return the page to userspace when the interface
+ * goes down.
*/
for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
- if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
- xsk_buff_free(alloc_units[i].xsk);
+ if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap))
+ xsk_buff_free(xsk_buffs[i]);
} else {
- for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
- if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
- mlx5e_page_release_dynamic(rq, alloc_units[i].page, recycle);
+ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) {
+ if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap)) {
+ struct mlx5e_frag_page *frag_page;
+
+ frag_page = &wi->alloc_units.frag_pages[i];
+ mlx5e_page_release_fragmented(rq, frag_page);
+ }
+ }
}
}
@@ -583,7 +621,8 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
u16 entries, pi, header_offset, err, wqe_bbs, new_entries;
u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey;
- struct page *page = shampo->last_page;
+ u16 page_index = shampo->curr_page_index;
+ struct mlx5e_frag_page *frag_page;
u64 addr = shampo->last_addr;
struct mlx5e_dma_info *dma_info;
struct mlx5e_umr_wqe *umr_wqe;
@@ -597,6 +636,8 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs);
+ frag_page = &shampo->pages[page_index];
+
for (i = 0; i < entries; i++, index++) {
dma_info = &shampo->info[index];
if (i >= klm_entries || (index < shampo->pi && shampo->pi - index <
@@ -605,16 +646,20 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
if (!(header_offset & (PAGE_SIZE - 1))) {
- union mlx5e_alloc_unit au;
+ page_index = (page_index + 1) & (shampo->hd_per_wq - 1);
+ frag_page = &shampo->pages[page_index];
- err = mlx5e_page_alloc_pool(rq, &au);
+ err = mlx5e_page_alloc_fragmented(rq, frag_page);
if (unlikely(err))
goto err_unmap;
- page = dma_info->page = au.page;
- addr = dma_info->addr = page_pool_get_dma_addr(au.page);
+
+ addr = page_pool_get_dma_addr(frag_page->page);
+
+ dma_info->addr = addr;
+ dma_info->frag_page = frag_page;
} else {
dma_info->addr = addr + header_offset;
- dma_info->page = page;
+ dma_info->frag_page = frag_page;
}
update_klm:
@@ -632,7 +677,7 @@ update_klm:
};
shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1);
- shampo->last_page = page;
+ shampo->curr_page_index = page_index;
shampo->last_addr = addr;
sq->pc += wqe_bbs;
sq->doorbell_cseg = &umr_wqe->ctrl;
@@ -644,7 +689,7 @@ err_unmap:
dma_info = &shampo->info[--index];
if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) {
dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE);
- mlx5e_page_release_dynamic(rq, dma_info->page, true);
+ mlx5e_page_release_fragmented(rq, dma_info->frag_page);
}
}
rq->stats->buff_alloc_err++;
@@ -693,8 +738,8 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
- union mlx5e_alloc_unit *au = &wi->alloc_units[0];
struct mlx5e_icosq *sq = rq->icosq;
+ struct mlx5e_frag_page *frag_page;
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_umr_wqe *umr_wqe;
u32 offset; /* 17-bit value with MTT. */
@@ -712,13 +757,15 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
- for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) {
+ frag_page = &wi->alloc_units.frag_pages[0];
+
+ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, frag_page++) {
dma_addr_t addr;
- err = mlx5e_page_alloc_pool(rq, au);
+ err = mlx5e_page_alloc_fragmented(rq, frag_page);
if (unlikely(err))
goto err_unmap;
- addr = page_pool_get_dma_addr(au->page);
+ addr = page_pool_get_dma_addr(frag_page->page);
umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
.ptag = cpu_to_be64(addr | MLX5_EN_WR),
};
@@ -735,7 +782,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
sizeof(*umr_wqe->inline_mtts) * pad);
}
- bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
+ bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
wi->consumed_strides = 0;
umr_wqe->ctrl.opmod_idx_opcode =
@@ -759,8 +806,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
err_unmap:
while (--i >= 0) {
- au--;
- mlx5e_page_release_dynamic(rq, au->page, true);
+ frag_page--;
+ mlx5e_page_release_fragmented(rq, frag_page);
}
err:
@@ -778,8 +825,8 @@ err:
void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close)
{
struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ struct mlx5e_frag_page *deleted_page = NULL;
int hd_per_wq = shampo->hd_per_wq;
- struct page *deleted_page = NULL;
struct mlx5e_dma_info *hd_info;
int i, index = start;
@@ -792,10 +839,12 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close
hd_info = &shampo->info[index];
hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE);
- if (hd_info->page != deleted_page) {
- deleted_page = hd_info->page;
- mlx5e_page_release_dynamic(rq, hd_info->page, false);
+ if (hd_info->frag_page && hd_info->frag_page != deleted_page) {
+ deleted_page = hd_info->frag_page;
+ mlx5e_page_release_fragmented(rq, hd_info->frag_page);
}
+
+ hd_info->frag_page = NULL;
}
if (start + len > hd_per_wq) {
@@ -810,8 +859,13 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close
static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
- /* Don't recycle, this function is called on rq/netdev close */
- mlx5e_free_rx_mpwqe(rq, wi, false);
+ /* This function is called on rq/netdev close. */
+ mlx5e_free_rx_mpwqe(rq, wi);
+
+ /* Avoid a second release of the wqe pages: dealloc is called also
+ * for missing wqes on an already flushed RQ.
+ */
+ bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
}
INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
@@ -838,17 +892,20 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
*/
wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask;
- if (!rq->xsk_pool)
- count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
- else if (likely(!rq->xsk_pool->dma_need_sync))
+ if (!rq->xsk_pool) {
+ count = mlx5e_refill_rx_wqes(rq, head, wqe_bulk);
+ } else if (likely(!rq->xsk_pool->dma_need_sync)) {
+ mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk);
count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk);
- else
+ } else {
+ mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk);
/* If dma_need_sync is true, it's more efficient to call
* xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch,
* because the latter does the same check and returns only one
* frame.
*/
count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk);
+ }
mlx5_wq_cyc_push_n(wq, count);
if (unlikely(count != wqe_bulk)) {
@@ -1029,6 +1086,11 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
head = rq->mpwqe.actual_wq_head;
i = missing;
do {
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, head);
+
+ /* Deferred free for better page pool cache usage. */
+ mlx5e_free_rx_mpwqe(rq, wi);
+
alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) :
mlx5e_alloc_rx_mpwqe(rq, head);
@@ -1133,7 +1195,7 @@ static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index)
struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index];
u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom;
- return page_address(last_head->page) + head_offset;
+ return page_address(last_head->frag_page->page) + head_offset;
}
static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4)
@@ -1573,10 +1635,10 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
}
static void mlx5e_fill_mxbuf(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
- void *va, u16 headroom, u32 len,
+ void *va, u16 headroom, u32 frame_sz, u32 len,
struct mlx5e_xdp_buff *mxbuf)
{
- xdp_init_buff(&mxbuf->xdp, rq->buff.frame0_sz, &rq->xdp_rxq);
+ xdp_init_buff(&mxbuf->xdp, frame_sz, &rq->xdp_rxq);
xdp_prepare_buff(&mxbuf->xdp, va, headroom, len, true);
mxbuf->cqe = cqe;
mxbuf->rq = rq;
@@ -1586,7 +1648,7 @@ static struct sk_buff *
mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
struct mlx5_cqe64 *cqe, u32 cqe_bcnt)
{
- union mlx5e_alloc_unit *au = wi->au;
+ struct mlx5e_frag_page *frag_page = wi->frag_page;
u16 rx_headroom = rq->buff.headroom;
struct bpf_prog *prog;
struct sk_buff *skb;
@@ -1595,11 +1657,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
dma_addr_t addr;
u32 frag_size;
- va = page_address(au->page) + wi->offset;
+ va = page_address(frag_page->page) + wi->offset;
data = va + rx_headroom;
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
- addr = page_pool_get_dma_addr(au->page);
+ addr = page_pool_get_dma_addr(frag_page->page);
dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
frag_size, rq->buff.map_dir);
net_prefetch(data);
@@ -1609,7 +1671,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
struct mlx5e_xdp_buff mxbuf;
net_prefetchw(va); /* xdp_frame data area */
- mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf);
+ mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
+ cqe_bcnt, &mxbuf);
if (mlx5e_xdp_handle(rq, prog, &mxbuf))
return NULL; /* page/packet was consumed by XDP */
@@ -1623,7 +1686,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
return NULL;
/* queue up for recycling/reuse */
- page_ref_inc(au->page);
+ skb_mark_for_recycle(skb);
+ frag_page->frags++;
return skb;
}
@@ -1634,8 +1698,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
{
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
struct mlx5e_wqe_frag_info *head_wi = wi;
- union mlx5e_alloc_unit *au = wi->au;
u16 rx_headroom = rq->buff.headroom;
+ struct mlx5e_frag_page *frag_page;
struct skb_shared_info *sinfo;
struct mlx5e_xdp_buff mxbuf;
u32 frag_consumed_bytes;
@@ -1645,16 +1709,19 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
u32 truesize;
void *va;
- va = page_address(au->page) + wi->offset;
+ frag_page = wi->frag_page;
+
+ va = page_address(frag_page->page) + wi->offset;
frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
- addr = page_pool_get_dma_addr(au->page);
+ addr = page_pool_get_dma_addr(frag_page->page);
dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
rq->buff.frame0_sz, rq->buff.map_dir);
net_prefetchw(va); /* xdp_frame data area */
net_prefetch(va + rx_headroom);
- mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, frag_consumed_bytes, &mxbuf);
+ mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
+ frag_consumed_bytes, &mxbuf);
sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
truesize = 0;
@@ -1663,34 +1730,12 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
wi++;
while (cqe_bcnt) {
- skb_frag_t *frag;
-
- au = wi->au;
+ frag_page = wi->frag_page;
frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
- addr = page_pool_get_dma_addr(au->page);
- dma_sync_single_for_cpu(rq->pdev, addr + wi->offset,
- frag_consumed_bytes, rq->buff.map_dir);
-
- if (!xdp_buff_has_frags(&mxbuf.xdp)) {
- /* Init on the first fragment to avoid cold cache access
- * when possible.
- */
- sinfo->nr_frags = 0;
- sinfo->xdp_frags_size = 0;
- xdp_buff_set_frags_flag(&mxbuf.xdp);
- }
-
- frag = &sinfo->frags[sinfo->nr_frags++];
- __skb_frag_set_page(frag, au->page);
- skb_frag_off_set(frag, wi->offset);
- skb_frag_size_set(frag, frag_consumed_bytes);
-
- if (page_is_pfmemalloc(au->page))
- xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp);
-
- sinfo->xdp_frags_size += frag_consumed_bytes;
+ mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page,
+ wi->offset, frag_consumed_bytes);
truesize += frag_info->frag_stride;
cqe_bcnt -= frag_consumed_bytes;
@@ -1701,10 +1746,10 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
prog = rcu_dereference(rq->xdp_prog);
if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) {
if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
- int i;
+ struct mlx5e_wqe_frag_info *pwi;
- for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++)
- mlx5e_put_rx_frag(rq, &head_wi[i], true);
+ for (pwi = head_wi; pwi < wi; pwi++)
+ pwi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
}
return NULL; /* page/packet was consumed by XDP */
}
@@ -1716,21 +1761,17 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
if (unlikely(!skb))
return NULL;
- page_ref_inc(head_wi->au->page);
+ skb_mark_for_recycle(skb);
+ head_wi->frag_page->frags++;
if (xdp_buff_has_frags(&mxbuf.xdp)) {
- int i;
-
/* sinfo->nr_frags is reset by build_skb, calculate again. */
xdp_update_skb_shared_info(skb, wi - head_wi - 1,
sinfo->xdp_frags_size, truesize,
xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
- for (i = 0; i < sinfo->nr_frags; i++) {
- skb_frag_t *frag = &sinfo->frags[i];
-
- page_ref_inc(skb_frag_page(frag));
- }
+ for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++)
+ pwi->frag_page->frags++;
}
return skb;
@@ -1768,7 +1809,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
mlx5e_handle_rx_err_cqe(rq, cqe);
- goto free_wqe;
+ goto wq_cyc_pop;
}
skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe,
@@ -1782,9 +1823,9 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
/* do not return page to cache,
* it will be returned on XDP_TX completion.
*/
- goto wq_cyc_pop;
+ wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
}
- goto free_wqe;
+ goto wq_cyc_pop;
}
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
@@ -1792,13 +1833,11 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
if (mlx5e_cqe_regb_chain(cqe))
if (!mlx5e_tc_update_skb_nic(cqe, skb)) {
dev_kfree_skb_any(skb);
- goto free_wqe;
+ goto wq_cyc_pop;
}
napi_gro_receive(rq->cq.napi, skb);
-free_wqe:
- mlx5e_free_rx_wqe(rq, wi, true);
wq_cyc_pop:
mlx5_wq_cyc_pop(wq);
}
@@ -1822,7 +1861,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
mlx5e_handle_rx_err_cqe(rq, cqe);
- goto free_wqe;
+ goto wq_cyc_pop;
}
skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
@@ -1835,9 +1874,9 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
/* do not return page to cache,
* it will be returned on XDP_TX completion.
*/
- goto wq_cyc_pop;
+ wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
}
- goto free_wqe;
+ goto wq_cyc_pop;
}
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
@@ -1847,8 +1886,6 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
mlx5e_rep_tc_receive(cqe, rq, skb);
-free_wqe:
- mlx5e_free_rx_wqe(rq, wi, true);
wq_cyc_pop:
mlx5_wq_cyc_pop(wq);
}
@@ -1901,7 +1938,6 @@ mpwrq_cqe_out:
wq = &rq->mpwqe.wq;
wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
- mlx5e_free_rx_mpwqe(rq, wi, true);
mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
}
@@ -1913,7 +1949,8 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = {
static void
mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq,
- union mlx5e_alloc_unit *au, u32 data_bcnt, u32 data_offset)
+ struct mlx5e_frag_page *frag_page,
+ u32 data_bcnt, u32 data_offset)
{
net_prefetchw(skb->data);
@@ -1927,12 +1964,13 @@ mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq,
else
truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
- mlx5e_add_skb_frag(rq, skb, au, data_offset,
+ frag_page->frags++;
+ mlx5e_add_skb_frag(rq, skb, frag_page->page, data_offset,
pg_consumed_bytes, truesize);
data_bcnt -= pg_consumed_bytes;
data_offset = 0;
- au++;
+ frag_page++;
}
}
@@ -1941,37 +1979,142 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset,
u32 page_idx)
{
- union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
+ struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx];
u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
- u32 frag_offset = head_offset + headlen;
- u32 byte_cnt = cqe_bcnt - headlen;
- union mlx5e_alloc_unit *head_au = au;
+ struct mlx5e_frag_page *head_page = frag_page;
+ u32 frag_offset = head_offset;
+ u32 byte_cnt = cqe_bcnt;
+ struct skb_shared_info *sinfo;
+ struct mlx5e_xdp_buff mxbuf;
+ unsigned int truesize = 0;
+ struct bpf_prog *prog;
struct sk_buff *skb;
- dma_addr_t addr;
+ u32 linear_frame_sz;
+ u16 linear_data_len;
+ u16 linear_hr;
+ void *va;
- skb = napi_alloc_skb(rq->cq.napi,
- ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
- if (unlikely(!skb)) {
- rq->stats->buff_alloc_err++;
- return NULL;
+ prog = rcu_dereference(rq->xdp_prog);
+
+ if (prog) {
+ /* area for bpf_xdp_[store|load]_bytes */
+ net_prefetchw(page_address(frag_page->page) + frag_offset);
+ if (unlikely(mlx5e_page_alloc_fragmented(rq, &wi->linear_page))) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+ va = page_address(wi->linear_page.page);
+ net_prefetchw(va); /* xdp_frame data area */
+ linear_hr = XDP_PACKET_HEADROOM;
+ linear_data_len = 0;
+ linear_frame_sz = MLX5_SKB_FRAG_SZ(linear_hr + MLX5E_RX_MAX_HEAD);
+ } else {
+ skb = napi_alloc_skb(rq->cq.napi,
+ ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+ skb_mark_for_recycle(skb);
+ va = skb->head;
+ net_prefetchw(va); /* xdp_frame data area */
+ net_prefetchw(skb->data);
+
+ frag_offset += headlen;
+ byte_cnt -= headlen;
+ linear_hr = skb_headroom(skb);
+ linear_data_len = headlen;
+ linear_frame_sz = MLX5_SKB_FRAG_SZ(skb_end_offset(skb));
+ if (unlikely(frag_offset >= PAGE_SIZE)) {
+ frag_page++;
+ frag_offset -= PAGE_SIZE;
+ }
}
- net_prefetchw(skb->data);
+ mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, linear_data_len, &mxbuf);
+
+ sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
+
+ while (byte_cnt) {
+ /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
+ u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
- /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
- if (unlikely(frag_offset >= PAGE_SIZE)) {
- au++;
- frag_offset -= PAGE_SIZE;
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ truesize += pg_consumed_bytes;
+ else
+ truesize += ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
+
+ mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, frag_offset,
+ pg_consumed_bytes);
+ byte_cnt -= pg_consumed_bytes;
+ frag_offset = 0;
+ frag_page++;
}
- mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset);
- /* copy header */
- addr = page_pool_get_dma_addr(head_au->page);
- mlx5e_copy_skb_header(rq, skb, head_au->page, addr,
- head_offset, head_offset, headlen);
- /* skb linear part was allocated with headlen and aligned to long */
- skb->tail += headlen;
- skb->len += headlen;
+ if (prog) {
+ if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+ int i;
+
+ for (i = 0; i < sinfo->nr_frags; i++)
+ /* non-atomic */
+ __set_bit(page_idx + i, wi->skip_release_bitmap);
+ return NULL;
+ }
+ mlx5e_page_release_fragmented(rq, &wi->linear_page);
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start,
+ linear_frame_sz,
+ mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 0,
+ mxbuf.xdp.data - mxbuf.xdp.data_meta);
+ if (unlikely(!skb)) {
+ mlx5e_page_release_fragmented(rq, &wi->linear_page);
+ return NULL;
+ }
+
+ skb_mark_for_recycle(skb);
+ wi->linear_page.frags++;
+ mlx5e_page_release_fragmented(rq, &wi->linear_page);
+
+ if (xdp_buff_has_frags(&mxbuf.xdp)) {
+ struct mlx5e_frag_page *pagep;
+
+ /* sinfo->nr_frags is reset by build_skb, calculate again. */
+ xdp_update_skb_shared_info(skb, frag_page - head_page,
+ sinfo->xdp_frags_size, truesize,
+ xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
+
+ pagep = head_page;
+ do
+ pagep->frags++;
+ while (++pagep < frag_page);
+ }
+ __pskb_pull_tail(skb, headlen);
+ } else {
+ dma_addr_t addr;
+
+ if (xdp_buff_has_frags(&mxbuf.xdp)) {
+ struct mlx5e_frag_page *pagep;
+
+ xdp_update_skb_shared_info(skb, sinfo->nr_frags,
+ sinfo->xdp_frags_size, truesize,
+ xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
+
+ pagep = frag_page - sinfo->nr_frags;
+ do
+ pagep->frags++;
+ while (++pagep < frag_page);
+ }
+ /* copy header */
+ addr = page_pool_get_dma_addr(head_page->page);
+ mlx5e_copy_skb_header(rq, skb, head_page->page, addr,
+ head_offset, head_offset, headlen);
+ /* skb linear part was allocated with headlen and aligned to long */
+ skb->tail += headlen;
+ skb->len += headlen;
+ }
return skb;
}
@@ -1981,7 +2124,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset,
u32 page_idx)
{
- union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
+ struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx];
u16 rx_headroom = rq->buff.headroom;
struct bpf_prog *prog;
struct sk_buff *skb;
@@ -1996,11 +2139,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
return NULL;
}
- va = page_address(au->page) + head_offset;
+ va = page_address(frag_page->page) + head_offset;
data = va + rx_headroom;
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
- addr = page_pool_get_dma_addr(au->page);
+ addr = page_pool_get_dma_addr(frag_page->page);
dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset,
frag_size, rq->buff.map_dir);
net_prefetch(data);
@@ -2010,10 +2153,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
struct mlx5e_xdp_buff mxbuf;
net_prefetchw(va); /* xdp_frame data area */
- mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf);
+ mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
+ cqe_bcnt, &mxbuf);
if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
- __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */
return NULL; /* page/packet was consumed by XDP */
}
@@ -2027,7 +2171,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
return NULL;
/* queue up for recycling/reuse */
- page_ref_inc(au->page);
+ skb_mark_for_recycle(skb);
+ frag_page->frags++;
return skb;
}
@@ -2044,7 +2189,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
void *hdr, *data;
u32 frag_size;
- hdr = page_address(head->page) + head_offset;
+ hdr = page_address(head->frag_page->page) + head_offset;
data = hdr + rx_headroom;
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size);
@@ -2058,9 +2203,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
if (unlikely(!skb))
return NULL;
- /* queue up for recycling/reuse */
- page_ref_inc(head->page);
-
+ head->frag_page->frags++;
} else {
/* allocate SKB and copy header for large header */
rq->stats->gro_large_hds++;
@@ -2072,13 +2215,17 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
}
prefetchw(skb->data);
- mlx5e_copy_skb_header(rq, skb, head->page, head->addr,
+ mlx5e_copy_skb_header(rq, skb, head->frag_page->page, head->addr,
head_offset + rx_headroom,
rx_headroom, head_size);
/* skb linear part was allocated with headlen and aligned to long */
skb->tail += head_size;
skb->len += head_size;
}
+
+ /* queue up for recycling/reuse */
+ skb_mark_for_recycle(skb);
+
return skb;
}
@@ -2123,8 +2270,10 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
u64 addr = shampo->info[header_index].addr;
if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
- shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE);
- mlx5e_page_release_dynamic(rq, shampo->info[header_index].page, true);
+ struct mlx5e_dma_info *dma_info = &shampo->info[header_index];
+
+ dma_info->addr = ALIGN_DOWN(addr, PAGE_SIZE);
+ mlx5e_page_release_fragmented(rq, dma_info->frag_page);
}
bitmap_clear(shampo->bitmap, header_index, 1);
}
@@ -2145,7 +2294,6 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
bool match = cqe->shampo.match;
struct mlx5e_rq_stats *stats = rq->stats;
struct mlx5e_rx_wqe_ll *wqe;
- union mlx5e_alloc_unit *au;
struct mlx5e_mpw_info *wi;
struct mlx5_wq_ll *wq;
@@ -2195,8 +2343,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
}
if (likely(head_size)) {
- au = &wi->alloc_units[page_idx];
- mlx5e_fill_skb_data(*skb, rq, au, data_bcnt, data_offset);
+ struct mlx5e_frag_page *frag_page;
+
+ frag_page = &wi->alloc_units.frag_pages[page_idx];
+ mlx5e_fill_skb_data(*skb, rq, frag_page, data_bcnt, data_offset);
}
mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
@@ -2210,7 +2360,6 @@ mpwrq_cqe_out:
wq = &rq->mpwqe.wq;
wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
- mlx5e_free_rx_mpwqe(rq, wi, true);
mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
}
@@ -2270,7 +2419,6 @@ mpwrq_cqe_out:
wq = &rq->mpwqe.wq;
wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
- mlx5e_free_rx_mpwqe(rq, wi, true);
mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
}
@@ -2489,7 +2637,7 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
rq->stats->wqe_err++;
- goto wq_free_wqe;
+ goto wq_cyc_pop;
}
skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
@@ -2497,17 +2645,16 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
mlx5e_skb_from_cqe_nonlinear,
rq, wi, cqe, cqe_bcnt);
if (!skb)
- goto wq_free_wqe;
+ goto wq_cyc_pop;
mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
if (unlikely(!skb->dev)) {
dev_kfree_skb_any(skb);
- goto wq_free_wqe;
+ goto wq_cyc_pop;
}
napi_gro_receive(rq->cq.napi, skb);
-wq_free_wqe:
- mlx5e_free_rx_wqe(rq, wi, true);
+wq_cyc_pop:
mlx5_wq_cyc_pop(wq);
}
@@ -2582,12 +2729,12 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
rq->stats->wqe_err++;
- goto free_wqe;
+ goto wq_cyc_pop;
}
skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe, cqe_bcnt);
if (!skb)
- goto free_wqe;
+ goto wq_cyc_pop;
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
skb_push(skb, ETH_HLEN);
@@ -2596,8 +2743,7 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe
rq->netdev->devlink_port);
dev_kfree_skb_any(skb);
-free_wqe:
- mlx5e_free_rx_wqe(rq, wi, false);
+wq_cyc_pop:
mlx5_wq_cyc_pop(wq);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 4478223c1720..f1d9596905c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -179,11 +179,6 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
- { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
- { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
- { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
- { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
- { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) },
@@ -358,11 +353,6 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s,
s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
- s->rx_cache_reuse += rq_stats->cache_reuse;
- s->rx_cache_full += rq_stats->cache_full;
- s->rx_cache_empty += rq_stats->cache_empty;
- s->rx_cache_busy += rq_stats->cache_busy;
- s->rx_cache_waive += rq_stats->cache_waive;
s->rx_congst_umr += rq_stats->congst_umr;
s->rx_arfs_err += rq_stats->arfs_err;
s->rx_recover += rq_stats->recover;
@@ -1978,11 +1968,6 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
- { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
- { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
- { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
- { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
- { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) },
@@ -2163,11 +2148,6 @@ static const struct counter_desc ptp_rq_stats_desc[] = {
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
- { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) },
- { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) },
- { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) },
- { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) },
- { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) },
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) },
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) },
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index b77100b60b50..1ff8a06027dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -193,11 +193,6 @@ struct mlx5e_sw_stats {
u64 rx_buff_alloc_err;
u64 rx_cqe_compress_blks;
u64 rx_cqe_compress_pkts;
- u64 rx_cache_reuse;
- u64 rx_cache_full;
- u64 rx_cache_empty;
- u64 rx_cache_busy;
- u64 rx_cache_waive;
u64 rx_congst_umr;
u64 rx_arfs_err;
u64 rx_recover;
@@ -362,11 +357,6 @@ struct mlx5e_rq_stats {
u64 buff_alloc_err;
u64 cqe_compress_blks;
u64 cqe_compress_pkts;
- u64 cache_reuse;
- u64 cache_full;
- u64 cache_empty;
- u64 cache_busy;
- u64 cache_waive;
u64 congst_umr;
u64 arfs_err;
u64 recover;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 87a2850b32d0..728b82ce4031 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -44,6 +44,7 @@
#include <net/bareudp.h>
#include <net/bonding.h>
#include <net/dst_metadata.h>
+#include "devlink.h"
#include "en.h"
#include "en/tc/post_act.h"
#include "en/tc/act_stats.h"
@@ -73,12 +74,6 @@
#define MLX5E_TC_TABLE_NUM_GROUPS 4
#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
-struct mlx5e_hairpin_params {
- struct mlx5_core_dev *mdev;
- u32 num_queues;
- u32 queue_size;
-};
-
struct mlx5e_tc_table {
/* Protects the dynamic assignment of the t parameter
* which is the nic tc root table.
@@ -101,7 +96,6 @@ struct mlx5e_tc_table {
struct mlx5_tc_ct_priv *ct;
struct mapping_ctx *mapping;
- struct mlx5e_hairpin_params hairpin_params;
struct dentry *dfs_root;
/* tc action stats */
@@ -183,7 +177,8 @@ static struct lock_class_key tc_ht_wq_key;
static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
-static void mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr);
+static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr);
void
mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
@@ -493,15 +488,6 @@ mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
int err;
- if (attr->flags & MLX5_ATTR_FLAG_CT) {
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts =
- &attr->parse_attr->mod_hdr_acts;
-
- return mlx5_tc_ct_flow_offload(get_ct_priv(priv),
- spec, attr,
- mod_hdr_acts);
- }
-
if (!is_mdev_switchdev_mode(priv->mdev))
return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
@@ -524,11 +510,6 @@ mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- if (attr->flags & MLX5_ATTR_FLAG_CT) {
- mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr);
- return;
- }
-
if (!is_mdev_switchdev_mode(priv->mdev)) {
mlx5e_del_offloaded_nic_rule(priv, rule, attr);
return;
@@ -589,6 +570,7 @@ struct mlx5e_hairpin {
struct mlx5e_tir direct_tir;
int num_channels;
+ u8 log_num_packets;
struct mlx5e_rqt indir_rqt;
struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5_ttc_table *ttc;
@@ -935,6 +917,7 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params
hp->func_mdev = func_mdev;
hp->func_priv = priv;
hp->num_channels = params->num_channels;
+ hp->log_num_packets = params->log_num_packets;
err = mlx5e_hairpin_create_transport(hp);
if (err)
@@ -1076,9 +1059,11 @@ static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv)
mutex_lock(&tc->hairpin_tbl_lock);
hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
- seq_printf(file, "Hairpin peer_vhca_id %u prio %u refcnt %u\n",
+ seq_printf(file,
+ "Hairpin peer_vhca_id %u prio %u refcnt %u num_channels %u num_packets %lu\n",
hpe->peer_vhca_id, hpe->prio,
- refcount_read(&hpe->refcnt));
+ refcount_read(&hpe->refcnt), hpe->hp->num_channels,
+ BIT(hpe->hp->log_num_packets));
mutex_unlock(&tc->hairpin_tbl_lock);
return 0;
@@ -1099,33 +1084,15 @@ static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc,
&debugfs_hairpin_table_dump_fops);
}
-static void
-mlx5e_hairpin_params_init(struct mlx5e_hairpin_params *hairpin_params,
- struct mlx5_core_dev *mdev)
-{
- u32 link_speed = 0;
- u64 link_speed64;
-
- hairpin_params->mdev = mdev;
- /* set hairpin pair per each 50Gbs share of the link */
- mlx5e_port_max_linkspeed(mdev, &link_speed);
- link_speed = max_t(u32, link_speed, 50000);
- link_speed64 = link_speed;
- do_div(link_speed64, 50000);
- hairpin_params->num_queues = link_speed64;
-
- hairpin_params->queue_size =
- BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev),
- MLX5_CAP_GEN(mdev, log_max_hairpin_num_packets)));
-}
-
static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct netlink_ext_ack *extack)
{
struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct devlink *devlink = priv_to_devlink(priv->mdev);
int peer_ifindex = parse_attr->mirred_ifindex[0];
+ union devlink_param_value val = {};
struct mlx5_hairpin_params params;
struct mlx5_core_dev *peer_mdev;
struct mlx5e_hairpin_entry *hpe;
@@ -1182,7 +1149,14 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
hash_hairpin_info(peer_id, match_prio));
mutex_unlock(&tc->hairpin_tbl_lock);
- params.log_num_packets = ilog2(tc->hairpin_params.queue_size);
+ err = devl_param_driverinit_value_get(
+ devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val);
+ if (err) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ params.log_num_packets = ilog2(val.vu32);
params.log_data_size =
clamp_t(u32,
params.log_num_packets +
@@ -1191,7 +1165,14 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
params.q_counter = priv->q_counter;
- params.num_channels = tc->hairpin_params.num_queues;
+ err = devl_param_driverinit_value_get(
+ devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val);
+ if (err) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ params.num_channels = val.vu32;
hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
hpe->hp = hp;
@@ -1401,13 +1382,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
return err;
}
- if (attr->flags & MLX5_ATTR_FLAG_CT)
- flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec,
- attr, &parse_attr->mod_hdr_acts);
- else
- flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
- attr);
-
+ flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, attr);
return PTR_ERR_OR_ZERO(flow->rule[0]);
}
@@ -1438,9 +1413,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
flow_flag_clear(flow, OFFLOADED);
- if (attr->flags & MLX5_ATTR_FLAG_CT)
- mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
- else if (!IS_ERR_OR_NULL(flow->rule[0]))
+ if (!IS_ERR_OR_NULL(flow->rule[0]))
mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
/* Remove root table if no rules are left to avoid
@@ -1791,8 +1764,7 @@ out:
static void
clean_encap_dests(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
- struct mlx5_flow_attr *attr,
- bool *vf_tun)
+ struct mlx5_flow_attr *attr)
{
struct mlx5_esw_flow_attr *esw_attr;
int out_index;
@@ -1801,17 +1773,11 @@ clean_encap_dests(struct mlx5e_priv *priv,
return;
esw_attr = attr->esw_attr;
- *vf_tun = false;
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
continue;
- if (esw_attr->dests[out_index].flags &
- MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
- !esw_attr->dest_int_port)
- *vf_tun = true;
-
mlx5e_detach_encap(priv, flow, attr, out_index);
kfree(attr->parse_attr->tun_info[out_index]);
}
@@ -2034,7 +2000,7 @@ static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *
if (!attr)
return;
- mlx5_free_flow_attr(flow, attr);
+ mlx5_free_flow_attr_actions(flow, attr);
kvfree(attr->parse_attr);
kfree(attr);
}
@@ -2045,7 +2011,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
- bool vf_tun;
esw_attr = attr->esw_attr;
mlx5e_put_flow_tunnel_id(flow);
@@ -2067,18 +2032,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
if (flow->decap_route)
mlx5e_detach_decap_route(priv, flow);
- clean_encap_dests(priv, flow, attr, &vf_tun);
-
mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
- mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
- mlx5e_tc_detach_mod_hdr(priv, flow, attr);
- }
-
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
-
if (esw_attr->int_port)
mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
@@ -2091,8 +2046,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow);
free_flow_post_acts(flow);
- free_branch_attr(flow, attr->branch_true);
- free_branch_attr(flow, attr->branch_false);
+ mlx5_free_flow_attr_actions(flow, attr);
kvfree(attr->esw_attr->rx_tun_attr);
kvfree(attr->parse_attr);
@@ -3469,114 +3423,59 @@ struct ipv6_hoplimit_word {
};
static bool
-is_action_keys_supported(const struct flow_action_entry *act, bool ct_flow,
- bool *modify_ip_header, bool *modify_tuple,
- struct netlink_ext_ack *extack)
+is_flow_action_modify_ip_header(struct flow_action *flow_action)
{
+ const struct flow_action_entry *act;
u32 mask, offset;
u8 htype;
+ int i;
- htype = act->mangle.htype;
- offset = act->mangle.offset;
- mask = ~act->mangle.mask;
/* For IPv4 & IPv6 header check 4 byte word,
* to determine that modified fields
* are NOT ttl & hop_limit only.
*/
- if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
- struct ip_ttl_word *ttl_word =
- (struct ip_ttl_word *)&mask;
-
- if (offset != offsetof(struct iphdr, ttl) ||
- ttl_word->protocol ||
- ttl_word->check) {
- *modify_ip_header = true;
- }
-
- if (offset >= offsetof(struct iphdr, saddr))
- *modify_tuple = true;
-
- if (ct_flow && *modify_tuple) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't offload re-write of ipv4 address with action ct");
- return false;
- }
- } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
- struct ipv6_hoplimit_word *hoplimit_word =
- (struct ipv6_hoplimit_word *)&mask;
-
- if (offset != offsetof(struct ipv6hdr, payload_len) ||
- hoplimit_word->payload_len ||
- hoplimit_word->nexthdr) {
- *modify_ip_header = true;
- }
-
- if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
- *modify_tuple = true;
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id != FLOW_ACTION_MANGLE &&
+ act->id != FLOW_ACTION_ADD)
+ continue;
- if (ct_flow && *modify_tuple) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't offload re-write of ipv6 address with action ct");
- return false;
- }
- } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
- htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
- *modify_tuple = true;
- if (ct_flow) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't offload re-write of transport header ports with action ct");
- return false;
+ htype = act->mangle.htype;
+ offset = act->mangle.offset;
+ mask = ~act->mangle.mask;
+
+ if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
+ struct ip_ttl_word *ttl_word =
+ (struct ip_ttl_word *)&mask;
+
+ if (offset != offsetof(struct iphdr, ttl) ||
+ ttl_word->protocol ||
+ ttl_word->check)
+ return true;
+ } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
+ struct ipv6_hoplimit_word *hoplimit_word =
+ (struct ipv6_hoplimit_word *)&mask;
+
+ if (offset != offsetof(struct ipv6hdr, payload_len) ||
+ hoplimit_word->payload_len ||
+ hoplimit_word->nexthdr)
+ return true;
}
}
- return true;
-}
-
-static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
- bool ct_flow, struct netlink_ext_ack *extack,
- struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec)
-{
- if (!modify_tuple || ct_clear)
- return true;
-
- if (ct_flow) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't offload tuple modification with non-clear ct()");
- netdev_info(priv->netdev,
- "can't offload tuple modification with non-clear ct()");
- return false;
- }
-
- /* Add ct_state=-trk match so it will be offloaded for non ct flows
- * (or after clear action), as otherwise, since the tuple is changed,
- * we can't restore ct state
- */
- if (mlx5_tc_ct_add_no_trk_match(spec)) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't offload tuple modification with ct matches and no ct(clear) action");
- netdev_info(priv->netdev,
- "can't offload tuple modification with ct matches and no ct(clear) action");
- return false;
- }
-
- return true;
+ return false;
}
static bool modify_header_match_supported(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct flow_action *flow_action,
- u32 actions, bool ct_flow,
- bool ct_clear,
+ u32 actions,
struct netlink_ext_ack *extack)
{
- const struct flow_action_entry *act;
- bool modify_ip_header, modify_tuple;
+ bool modify_ip_header;
void *headers_c;
void *headers_v;
u16 ethertype;
u8 ip_proto;
- int i;
headers_c = mlx5e_get_match_headers_criteria(actions, spec);
headers_v = mlx5e_get_match_headers_value(actions, spec);
@@ -3587,23 +3486,7 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv,
ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
goto out_ok;
- modify_ip_header = false;
- modify_tuple = false;
- flow_action_for_each(i, act, flow_action) {
- if (act->id != FLOW_ACTION_MANGLE &&
- act->id != FLOW_ACTION_ADD)
- continue;
-
- if (!is_action_keys_supported(act, ct_flow,
- &modify_ip_header,
- &modify_tuple, extack))
- return false;
- }
-
- if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
- priv, spec))
- return false;
-
+ modify_ip_header = is_flow_action_modify_ip_header(flow_action);
ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
if (modify_ip_header && ip_proto != IPPROTO_TCP &&
ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
@@ -3624,19 +3507,6 @@ actions_match_supported_fdb(struct mlx5e_priv *priv,
struct netlink_ext_ack *extack)
{
struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
- bool ct_flow, ct_clear;
-
- ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
- ct_flow = flow_flag_test(flow, CT) && !ct_clear;
-
- if (esw_attr->split_count && ct_flow &&
- !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) {
- /* All registers used by ct are cleared when using
- * split rules.
- */
- NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct");
- return false;
- }
if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
NL_SET_ERR_MSG_MOD(extack,
@@ -3657,14 +3527,9 @@ actions_match_supported(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
- bool ct_flow, ct_clear;
-
- ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
- ct_flow = flow_flag_test(flow, CT) && !ct_clear;
-
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
- !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
- actions, ct_flow, ct_clear, extack))
+ !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions,
+ extack))
return false;
if (mlx5e_is_eswitch_flow(flow) &&
@@ -3758,6 +3623,7 @@ mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
attr2->dest_chain = 0;
attr2->dest_ft = NULL;
attr2->act_id_restore_rule = NULL;
+ memset(&attr2->ct_attr, 0, sizeof(attr2->ct_attr));
if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
attr2->esw_attr->out_count = 0;
@@ -3811,9 +3677,7 @@ free_flow_post_acts(struct mlx5e_tc_flow *flow)
if (list_is_last(&attr->list, &flow->attrs))
break;
- mlx5_free_flow_attr(flow, attr);
- free_branch_attr(flow, attr->branch_true);
- free_branch_attr(flow, attr->branch_false);
+ mlx5_free_flow_attr_actions(flow, attr);
list_del(&attr->list);
kvfree(attr->parse_attr);
@@ -4071,76 +3935,79 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
struct flow_action *flow_action)
{
struct netlink_ext_ack *extack = parse_state->extack;
- struct mlx5e_tc_flow_action flow_action_reorder;
struct mlx5e_tc_flow *flow = parse_state->flow;
struct mlx5e_tc_jump_state jump_state = {};
struct mlx5_flow_attr *attr = flow->attr;
enum mlx5_flow_namespace_type ns_type;
struct mlx5e_priv *priv = flow->priv;
- struct flow_action_entry *act, **_act;
+ struct mlx5_flow_attr *prev_attr;
+ struct flow_action_entry *act;
struct mlx5e_tc_act *tc_act;
+ bool is_missable;
int err, i;
- flow_action_reorder.num_entries = flow_action->num_entries;
- flow_action_reorder.entries = kcalloc(flow_action->num_entries,
- sizeof(flow_action), GFP_KERNEL);
- if (!flow_action_reorder.entries)
- return -ENOMEM;
-
- mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder);
-
ns_type = mlx5e_get_flow_namespace(flow);
list_add(&attr->list, &flow->attrs);
- flow_action_for_each(i, _act, &flow_action_reorder) {
+ flow_action_for_each(i, act, flow_action) {
jump_state.jump_target = false;
- act = *_act;
+ is_missable = false;
+ prev_attr = attr;
+
tc_act = mlx5e_tc_act_get(act->id, ns_type);
if (!tc_act) {
NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
err = -EOPNOTSUPP;
- goto out_free;
+ goto out_free_post_acts;
}
- if (!tc_act->can_offload(parse_state, act, i, attr)) {
+ if (tc_act->can_offload && !tc_act->can_offload(parse_state, act, i, attr)) {
err = -EOPNOTSUPP;
- goto out_free;
+ goto out_free_post_acts;
}
err = tc_act->parse_action(parse_state, act, priv, attr);
if (err)
- goto out_free;
+ goto out_free_post_acts;
dec_jump_count(act, tc_act, attr, priv, &jump_state);
err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack);
if (err)
- goto out_free;
+ goto out_free_post_acts;
parse_state->actions |= attr->action;
- if (!tc_act->stats_action)
- attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie;
/* Split attr for multi table act if not the last act. */
if (jump_state.jump_target ||
(tc_act->is_multi_table_act &&
tc_act->is_multi_table_act(priv, act, attr) &&
- i < flow_action_reorder.num_entries - 1)) {
+ i < flow_action->num_entries - 1)) {
+ is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false;
+
err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
if (err)
- goto out_free;
+ goto out_free_post_acts;
attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
if (!attr) {
err = -ENOMEM;
- goto out_free;
+ goto out_free_post_acts;
}
list_add(&attr->list, &flow->attrs);
}
- }
- kfree(flow_action_reorder.entries);
+ if (is_missable) {
+ /* Add counter to prev, and assign act to new (next) attr */
+ prev_attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ flow_flag_set(flow, USE_ACT_STATS);
+
+ attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie;
+ } else if (!tc_act->stats_action) {
+ prev_attr->tc_act_cookies[prev_attr->tc_act_cookies_count++] = act->cookie;
+ }
+ }
err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
if (err)
@@ -4152,8 +4019,6 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
return 0;
-out_free:
- kfree(flow_action_reorder.entries);
out_free_post_acts:
free_flow_post_acts(flow);
@@ -4448,10 +4313,9 @@ mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
}
static void
-mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
+mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
{
struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
- bool vf_tun;
if (!attr)
return;
@@ -4459,7 +4323,7 @@ mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
if (attr->post_act_handle)
mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle);
- clean_encap_dests(flow->priv, flow, attr, &vf_tun);
+ clean_encap_dests(flow->priv, flow, attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
mlx5_fc_destroy(counter_dev, attr->counter);
@@ -4468,6 +4332,11 @@ mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr);
}
+
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
+
+ free_branch_attr(flow, attr->branch_true);
+ free_branch_attr(flow, attr->branch_false);
}
static int
@@ -4929,7 +4798,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
goto errout;
}
- if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
+ if (mlx5e_is_offloaded_flow(flow)) {
if (flow_flag_test(flow, USE_ACT_STATS)) {
f->use_act_stats = true;
} else {
@@ -5187,22 +5056,6 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
-{
- int tc_grp_size, tc_tbl_size;
- u32 max_flow_counter;
-
- max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
- MLX5_CAP_GEN(dev, max_flow_counter_15_0);
-
- tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
-
- tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
- BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
-
- return tc_tbl_size;
-}
-
static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
{
struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
@@ -5275,10 +5128,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
- attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
attr.default_ft = tc->miss_t;
attr.mapping = chains_mapping;
+ attr.fs_base_prio = MLX5E_TC_PRIO;
tc->chains = mlx5_chains_create(dev, &attr);
if (IS_ERR(tc->chains)) {
@@ -5286,12 +5139,12 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
goto err_miss;
}
+ mlx5_chains_print_info(tc->chains);
+
tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
- mlx5e_hairpin_params_init(&tc->hairpin_params, dev);
-
tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
err = register_netdevice_notifier_dev_net(priv->netdev,
&tc->netdevice_nb,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 9a458a5d9853..a50bfda18e96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -51,7 +51,7 @@ static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
struct mlx5e_sq_stats *stats = sq->stats;
struct dim_sample dim_sample = {};
- if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state)))
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_DIM, &sq->state)))
return;
dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
@@ -63,7 +63,7 @@ static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
struct mlx5e_rq_stats *stats = rq->stats;
struct dim_sample dim_sample = {};
- if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state)))
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_DIM, &rq->state)))
return;
dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 38b32e98f3bd..1c35d721a31d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -18,6 +18,7 @@
#include "lib/clock.h"
#include "diag/fw_tracer.h"
#include "mlx5_irq.h"
+#include "pci_irq.h"
#include "devlink.h"
#include "en_accel/ipsec.h"
@@ -61,9 +62,7 @@ struct mlx5_eq_table {
struct mlx5_irq_table *irq_table;
struct mlx5_irq **comp_irqs;
struct mlx5_irq *ctrl_irq;
-#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rmap;
-#endif
};
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
@@ -637,6 +636,7 @@ static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
return MLX5_NUM_ASYNC_EQE;
}
+
static int create_async_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
@@ -803,44 +803,28 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
}
EXPORT_SYMBOL(mlx5_eq_update_ci);
-static void comp_irqs_release(struct mlx5_core_dev *dev)
+static void comp_irqs_release_pci(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
- if (mlx5_core_is_sf(dev))
- mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
- else
- mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
- kfree(table->comp_irqs);
+ mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
}
-static int comp_irqs_request(struct mlx5_core_dev *dev)
+static int comp_irqs_request_pci(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
const struct cpumask *prev = cpu_none_mask;
const struct cpumask *mask;
- int ncomp_eqs = table->num_comp_eqs;
+ int ncomp_eqs;
u16 *cpus;
int ret;
int cpu;
int i;
ncomp_eqs = table->num_comp_eqs;
- table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
- if (!table->comp_irqs)
- return -ENOMEM;
- if (mlx5_core_is_sf(dev)) {
- ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
- if (ret < 0)
- goto free_irqs;
- return ret;
- }
-
cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL);
- if (!cpus) {
+ if (!cpus)
ret = -ENOMEM;
- goto free_irqs;
- }
i = 0;
rcu_read_lock();
@@ -854,17 +838,89 @@ static int comp_irqs_request(struct mlx5_core_dev *dev)
}
spread_done:
rcu_read_unlock();
- ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs);
+ ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs, &table->rmap);
kfree(cpus);
- if (ret < 0)
- goto free_irqs;
return ret;
+}
+
+static void comp_irqs_release_sf(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+
+ mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
+}
+
+static int comp_irqs_request_sf(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int ncomp_eqs = table->num_comp_eqs;
+
+ return mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
+}
+
+static void comp_irqs_release(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+
+ mlx5_core_is_sf(dev) ? comp_irqs_release_sf(dev) :
+ comp_irqs_release_pci(dev);
-free_irqs:
kfree(table->comp_irqs);
+}
+
+static int comp_irqs_request(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int ncomp_eqs;
+ int ret;
+
+ ncomp_eqs = table->num_comp_eqs;
+ table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
+ if (!table->comp_irqs)
+ return -ENOMEM;
+
+ ret = mlx5_core_is_sf(dev) ? comp_irqs_request_sf(dev) :
+ comp_irqs_request_pci(dev);
+ if (ret < 0)
+ kfree(table->comp_irqs);
+
return ret;
}
+#ifdef CONFIG_RFS_ACCEL
+static int alloc_rmap(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
+
+ /* rmap is a mapping between irq number and queue number.
+ * Each irq can be assigned only to a single rmap.
+ * Since SFs share IRQs, rmap mapping cannot function correctly
+ * for irqs that are shared between different core/netdev RX rings.
+ * Hence we don't allow netdev rmap for SFs.
+ */
+ if (mlx5_core_is_sf(mdev))
+ return 0;
+
+ eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
+ if (!eq_table->rmap)
+ return -ENOMEM;
+ return 0;
+}
+
+static void free_rmap(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
+
+ if (eq_table->rmap) {
+ free_irq_cpu_rmap(eq_table->rmap);
+ eq_table->rmap = NULL;
+ }
+}
+#else
+static int alloc_rmap(struct mlx5_core_dev *mdev) { return 0; }
+static void free_rmap(struct mlx5_core_dev *mdev) {}
+#endif
+
static void destroy_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
@@ -880,6 +936,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
kfree(eq);
}
comp_irqs_release(dev);
+ free_rmap(dev);
}
static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
@@ -906,9 +963,16 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
int err;
int i;
+ err = alloc_rmap(dev);
+ if (err)
+ return err;
+
ncomp_eqs = comp_irqs_request(dev);
- if (ncomp_eqs < 0)
- return ncomp_eqs;
+ if (ncomp_eqs < 0) {
+ err = ncomp_eqs;
+ goto err_irqs_req;
+ }
+
INIT_LIST_HEAD(&table->comp_eqs_list);
nent = comp_eq_depth_devlink_param_get(dev);
@@ -953,6 +1017,8 @@ clean_eq:
kfree(eq);
clean:
destroy_comp_eqs(dev);
+err_irqs_req:
+ free_rmap(dev);
return err;
}
@@ -1004,10 +1070,11 @@ mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
list_for_each_entry(eq, &table->comp_eqs_list, list) {
if (i++ == vector)
- break;
+ return mlx5_irq_get_affinity_mask(eq->core.irq);
}
- return mlx5_irq_get_affinity_mask(eq->core.irq);
+ WARN_ON_ONCE(1);
+ return NULL;
}
EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
@@ -1031,55 +1098,12 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
return ERR_PTR(-ENOENT);
}
-static void clear_rmap(struct mlx5_core_dev *dev)
-{
-#ifdef CONFIG_RFS_ACCEL
- struct mlx5_eq_table *eq_table = dev->priv.eq_table;
-
- free_irq_cpu_rmap(eq_table->rmap);
-#endif
-}
-
-static int set_rmap(struct mlx5_core_dev *mdev)
-{
- int err = 0;
-#ifdef CONFIG_RFS_ACCEL
- struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
- int vecidx;
-
- eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
- if (!eq_table->rmap) {
- err = -ENOMEM;
- mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
- goto err_out;
- }
-
- for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) {
- err = irq_cpu_rmap_add(eq_table->rmap,
- pci_irq_vector(mdev->pdev, vecidx));
- if (err) {
- mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
- err);
- goto err_irq_cpu_rmap_add;
- }
- }
- return 0;
-
-err_irq_cpu_rmap_add:
- clear_rmap(mdev);
-err_out:
-#endif
- return err;
-}
-
/* This function should only be called after mlx5_cmd_force_teardown_hca */
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
- if (!mlx5_core_is_sf(dev))
- clear_rmap(dev);
mlx5_irq_table_destroy(dev);
mutex_unlock(&table->lock);
}
@@ -1090,44 +1114,47 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
#define MLX5_MAX_ASYNC_EQS 3
#endif
-int mlx5_eq_table_create(struct mlx5_core_dev *dev)
+static int get_num_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *eq_table = dev->priv.eq_table;
- int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ int max_dev_eqs;
+ int max_eqs_sf;
+ int num_eqs;
+
+ /* If ethernet is disabled we use just a single completion vector to
+ * have the other vectors available for other drivers using mlx5_core. For
+ * example, mlx5_vdpa
+ */
+ if (!mlx5_core_is_eth_enabled(dev) && mlx5_eth_supported(dev))
+ return 1;
+
+ max_dev_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
MLX5_CAP_GEN(dev, max_num_eqs) :
1 << MLX5_CAP_GEN(dev, log_max_eq);
- int max_eqs_sf;
- int err;
- eq_table->num_comp_eqs =
- min_t(int,
- mlx5_irq_table_get_num_comp(eq_table->irq_table),
- num_eqs - MLX5_MAX_ASYNC_EQS);
+ num_eqs = min_t(int, mlx5_irq_table_get_num_comp(eq_table->irq_table),
+ max_dev_eqs - MLX5_MAX_ASYNC_EQS);
if (mlx5_core_is_sf(dev)) {
max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF,
mlx5_irq_table_get_sfs_vec(eq_table->irq_table));
- eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
- max_eqs_sf);
+ num_eqs = min_t(int, num_eqs, max_eqs_sf);
}
+ return num_eqs;
+}
+
+int mlx5_eq_table_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ int err;
+
+ eq_table->num_comp_eqs = get_num_eqs(dev);
err = create_async_eqs(dev);
if (err) {
mlx5_core_err(dev, "Failed to create async EQs\n");
goto err_async_eqs;
}
- if (!mlx5_core_is_sf(dev)) {
- /* rmap is a mapping between irq number and queue number.
- * each irq can be assign only to a single rmap.
- * since SFs share IRQs, rmap mapping cannot function correctly
- * for irqs that are shared for different core/netdev RX rings.
- * Hence we don't allow netdev rmap for SFs
- */
- err = set_rmap(dev);
- if (err)
- goto err_rmap;
- }
-
err = create_comp_eqs(dev);
if (err) {
mlx5_core_err(dev, "Failed to create completion EQs\n");
@@ -1135,10 +1162,8 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
}
return 0;
+
err_comp_eqs:
- if (!mlx5_core_is_sf(dev))
- clear_rmap(dev);
-err_rmap:
destroy_async_eqs(dev);
err_async_eqs:
return err;
@@ -1146,8 +1171,6 @@ err_async_eqs:
void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
{
- if (!mlx5_core_is_sf(dev))
- clear_rmap(dev);
destroy_comp_eqs(dev);
destroy_async_eqs(dev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index 3cdcb0e0b20f..1ba03e219111 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -13,66 +13,6 @@
#define CREATE_TRACE_POINTS
#include "diag/bridge_tracepoint.h"
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE 12000
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE 16000
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM + \
- MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM + \
- MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM + \
- MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM + \
- MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO + 1)
-static_assert(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE == 64000);
-
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE 16000
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE (32000 - 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM + \
- MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM + \
- MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO \
- MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO + 1)
-static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 64000);
-
-#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0
-
-enum {
- MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
- MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
- MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
-};
-
static const struct rhashtable_params fdb_ht_params = {
.key_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, key),
.key_len = sizeof(struct mlx5_esw_bridge_fdb_key),
@@ -80,31 +20,6 @@ static const struct rhashtable_params fdb_ht_params = {
.automatic_shrinking = true,
};
-enum {
- MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0),
-};
-
-struct mlx5_esw_bridge {
- int ifindex;
- int refcnt;
- struct list_head list;
- struct mlx5_esw_bridge_offloads *br_offloads;
-
- struct list_head fdb_list;
- struct rhashtable fdb_ht;
-
- struct mlx5_flow_table *egress_ft;
- struct mlx5_flow_group *egress_vlan_fg;
- struct mlx5_flow_group *egress_qinq_fg;
- struct mlx5_flow_group *egress_mac_fg;
- struct mlx5_flow_group *egress_miss_fg;
- struct mlx5_pkt_reformat *egress_miss_pkt_reformat;
- struct mlx5_flow_handle *egress_miss_handle;
- unsigned long ageing_time;
- u32 flags;
- u16 vlan_proto;
-};
-
static void
mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *addr, u16 vid,
unsigned long val)
@@ -146,7 +61,7 @@ mlx5_esw_bridge_pkt_reformat_vlan_pop_create(struct mlx5_eswitch *esw)
return mlx5_packet_reformat_alloc(esw->dev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB);
}
-static struct mlx5_flow_table *
+struct mlx5_flow_table *
mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
{
struct mlx5_flow_table_attr ft_attr = {};
@@ -925,6 +840,10 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
if (err)
goto err_fdb_ht;
+ err = mlx5_esw_bridge_mdb_init(bridge);
+ if (err)
+ goto err_mdb_ht;
+
INIT_LIST_HEAD(&bridge->fdb_list);
bridge->ifindex = ifindex;
bridge->refcnt = 1;
@@ -934,6 +853,8 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
return bridge;
+err_mdb_ht:
+ rhashtable_destroy(&bridge->fdb_ht);
err_fdb_ht:
mlx5_esw_bridge_egress_table_cleanup(bridge);
err_egress_tbl:
@@ -953,7 +874,9 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads,
return;
mlx5_esw_bridge_egress_table_cleanup(bridge);
+ mlx5_esw_bridge_mcast_disable(bridge);
list_del(&bridge->list);
+ mlx5_esw_bridge_mdb_cleanup(bridge);
rhashtable_destroy(&bridge->fdb_ht);
kvfree(bridge);
@@ -993,7 +916,7 @@ static unsigned long mlx5_esw_bridge_port_key_from_data(u16 vport_num, u16 esw_o
return vport_num | (unsigned long)esw_owner_vhca_id << sizeof(vport_num) * BITS_PER_BYTE;
}
-static unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port)
+unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port)
{
return mlx5_esw_bridge_port_key_from_data(port->vport_num, port->esw_owner_vhca_id);
}
@@ -1018,6 +941,19 @@ static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port,
xa_erase(&br_offloads->ports, mlx5_esw_bridge_port_key(port));
}
+static struct mlx5_esw_bridge *
+mlx5_esw_bridge_from_port_lookup(u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return NULL;
+
+ return port->bridge;
+}
+
static void mlx5_esw_bridge_fdb_entry_refresh(struct mlx5_esw_bridge_fdb_entry *entry)
{
trace_mlx5_esw_bridge_fdb_entry_refresh(entry);
@@ -1166,8 +1102,21 @@ mlx5_esw_bridge_vlan_push_mark_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct
}
static int
-mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_bridge_vlan *vlan,
- struct mlx5_eswitch *esw)
+mlx5_esw_bridge_vlan_push_pop_fhs_create(u16 vlan_proto, struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan)
+{
+ return mlx5_esw_bridge_vlan_mcast_init(vlan_proto, port, vlan);
+}
+
+static void
+mlx5_esw_bridge_vlan_push_pop_fhs_cleanup(struct mlx5_esw_bridge_vlan *vlan)
+{
+ mlx5_esw_bridge_vlan_mcast_cleanup(vlan);
+}
+
+static int
+mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
{
int err;
@@ -1185,10 +1134,16 @@ mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_
err = mlx5_esw_bridge_vlan_pop_create(vlan, esw);
if (err)
goto err_vlan_pop;
+
+ err = mlx5_esw_bridge_vlan_push_pop_fhs_create(vlan_proto, port, vlan);
+ if (err)
+ goto err_vlan_pop_fhs;
}
return 0;
+err_vlan_pop_fhs:
+ mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
err_vlan_pop:
if (vlan->pkt_mod_hdr_push_mark)
mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw);
@@ -1213,7 +1168,7 @@ mlx5_esw_bridge_vlan_create(u16 vlan_proto, u16 vid, u16 flags, struct mlx5_esw_
vlan->flags = flags;
INIT_LIST_HEAD(&vlan->fdb_list);
- err = mlx5_esw_bridge_vlan_push_pop_create(vlan_proto, flags, vlan, esw);
+ err = mlx5_esw_bridge_vlan_push_pop_create(vlan_proto, flags, port, vlan, esw);
if (err)
goto err_vlan_push_pop;
@@ -1225,6 +1180,8 @@ mlx5_esw_bridge_vlan_create(u16 vlan_proto, u16 vid, u16 flags, struct mlx5_esw_
return vlan;
err_xa_insert:
+ if (vlan->mcast_handle)
+ mlx5_esw_bridge_vlan_push_pop_fhs_cleanup(vlan);
if (vlan->pkt_reformat_pop)
mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
if (vlan->pkt_mod_hdr_push_mark)
@@ -1242,7 +1199,8 @@ static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port,
xa_erase(&port->vlans, vlan->vid);
}
-static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan,
+static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan,
struct mlx5_esw_bridge *bridge)
{
struct mlx5_eswitch *esw = bridge->br_offloads->esw;
@@ -1250,7 +1208,10 @@ static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan,
list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list)
mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge);
+ mlx5_esw_bridge_port_mdb_vlan_flush(port, vlan);
+ if (vlan->mcast_handle)
+ mlx5_esw_bridge_vlan_push_pop_fhs_cleanup(vlan);
if (vlan->pkt_reformat_pop)
mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
if (vlan->pkt_mod_hdr_push_mark)
@@ -1264,7 +1225,7 @@ static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port,
struct mlx5_esw_bridge *bridge)
{
trace_mlx5_esw_bridge_vlan_cleanup(vlan);
- mlx5_esw_bridge_vlan_flush(vlan, bridge);
+ mlx5_esw_bridge_vlan_flush(port, vlan, bridge);
mlx5_esw_bridge_vlan_erase(port, vlan);
kvfree(vlan);
}
@@ -1288,9 +1249,9 @@ static int mlx5_esw_bridge_port_vlans_recreate(struct mlx5_esw_bridge_port *port
int err;
xa_for_each(&port->vlans, i, vlan) {
- mlx5_esw_bridge_vlan_flush(vlan, bridge);
- err = mlx5_esw_bridge_vlan_push_pop_create(bridge->vlan_proto, vlan->flags, vlan,
- br_offloads->esw);
+ mlx5_esw_bridge_vlan_flush(port, vlan, bridge);
+ err = mlx5_esw_bridge_vlan_push_pop_create(bridge->vlan_proto, vlan->flags, port,
+ vlan, br_offloads->esw);
if (err) {
esw_warn(br_offloads->esw->dev,
"Failed to create VLAN=%u(proto=%x) push/pop actions (vport=%u,err=%d)\n",
@@ -1473,33 +1434,32 @@ err_ingress_fc_create:
int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time,
struct mlx5_esw_bridge_offloads *br_offloads)
{
- struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
- port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
- if (!port)
+ bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!bridge)
return -EINVAL;
- port->bridge->ageing_time = clock_t_to_jiffies(ageing_time);
+ bridge->ageing_time = clock_t_to_jiffies(ageing_time);
return 0;
}
int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
struct mlx5_esw_bridge_offloads *br_offloads)
{
- struct mlx5_esw_bridge_port *port;
struct mlx5_esw_bridge *bridge;
bool filtering;
- port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
- if (!port)
+ bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!bridge)
return -EINVAL;
- bridge = port->bridge;
filtering = bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
if (filtering == enable)
return 0;
mlx5_esw_bridge_fdb_flush(bridge);
+ mlx5_esw_bridge_mdb_flush(bridge);
if (enable)
bridge->flags |= MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
else
@@ -1511,15 +1471,13 @@ int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, boo
int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto,
struct mlx5_esw_bridge_offloads *br_offloads)
{
- struct mlx5_esw_bridge_port *port;
struct mlx5_esw_bridge *bridge;
- port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id,
- br_offloads);
- if (!port)
+ bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id,
+ br_offloads);
+ if (!bridge)
return -EINVAL;
- bridge = port->bridge;
if (bridge->vlan_proto == proto)
return 0;
if (proto != ETH_P_8021Q && proto != ETH_P_8021AD) {
@@ -1528,12 +1486,43 @@ int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 pro
}
mlx5_esw_bridge_fdb_flush(bridge);
+ mlx5_esw_bridge_mdb_flush(bridge);
bridge->vlan_proto = proto;
mlx5_esw_bridge_vlans_recreate(bridge);
return 0;
}
+int mlx5_esw_bridge_mcast_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct mlx5_esw_bridge *bridge;
+ int err = 0;
+ bool mcast;
+
+ if (!(MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_multi_path_any_table) ||
+ MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_multi_path_any_table_limit_regc)) ||
+ !MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_uplink_hairpin) ||
+ !MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level))
+ return -EOPNOTSUPP;
+
+ bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!bridge)
+ return -EINVAL;
+
+ mcast = bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG;
+ if (mcast == enable)
+ return 0;
+
+ if (enable)
+ err = mlx5_esw_bridge_mcast_enable(bridge);
+ else
+ mlx5_esw_bridge_mcast_disable(bridge);
+
+ return err;
+}
+
static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags,
struct mlx5_esw_bridge_offloads *br_offloads,
struct mlx5_esw_bridge *bridge)
@@ -1551,6 +1540,15 @@ static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16
port->bridge = bridge;
port->flags |= flags;
xa_init(&port->vlans);
+
+ err = mlx5_esw_bridge_port_mcast_init(port);
+ if (err) {
+ esw_warn(esw->dev,
+ "Failed to initialize port multicast (vport=%u,esw_owner_vhca_id=%u,err=%d)\n",
+ port->vport_num, port->esw_owner_vhca_id, err);
+ goto err_port_mcast;
+ }
+
err = mlx5_esw_bridge_port_insert(port, br_offloads);
if (err) {
esw_warn(esw->dev,
@@ -1563,6 +1561,8 @@ static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16
return 0;
err_port_insert:
+ mlx5_esw_bridge_port_mcast_cleanup(port);
+err_port_mcast:
kvfree(port);
return err;
}
@@ -1580,6 +1580,7 @@ static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_off
trace_mlx5_esw_bridge_vport_cleanup(port);
mlx5_esw_bridge_port_vlans_flush(port, bridge);
+ mlx5_esw_bridge_port_mcast_cleanup(port);
mlx5_esw_bridge_port_erase(port, br_offloads);
kvfree(port);
mlx5_esw_bridge_put(br_offloads, bridge);
@@ -1711,14 +1712,12 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16
struct switchdev_notifier_fdb_info *fdb_info)
{
struct mlx5_esw_bridge_fdb_entry *entry;
- struct mlx5_esw_bridge_port *port;
struct mlx5_esw_bridge *bridge;
- port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
- if (!port)
+ bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!bridge)
return;
- bridge = port->bridge;
entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid);
if (!entry) {
esw_debug(br_offloads->esw->dev,
@@ -1765,14 +1764,12 @@ void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_o
{
struct mlx5_eswitch *esw = br_offloads->esw;
struct mlx5_esw_bridge_fdb_entry *entry;
- struct mlx5_esw_bridge_port *port;
struct mlx5_esw_bridge *bridge;
- port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
- if (!port)
+ bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!bridge)
return;
- bridge = port->bridge;
entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid);
if (!entry) {
esw_debug(esw->dev,
@@ -1806,6 +1803,64 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads)
}
}
+int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ const unsigned char *addr, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_bridge_vlan *vlan;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+ int err;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port) {
+ esw_warn(br_offloads->esw->dev,
+ "Failed to lookup bridge port to add MDB (MAC=%pM,vport=%u)\n",
+ addr, vport_num);
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Failed to lookup bridge port to add MDB (MAC=%pM,vport=%u)\n",
+ addr, vport_num);
+ return -EINVAL;
+ }
+
+ bridge = port->bridge;
+ if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) {
+ vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
+ if (!vlan) {
+ esw_warn(br_offloads->esw->dev,
+ "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n",
+ addr, vid, vport_num);
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n",
+ addr, vid, vport_num);
+ return -EINVAL;
+ }
+ }
+
+ err = mlx5_esw_bridge_port_mdb_attach(dev, port, addr, vid);
+ if (err) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Failed to add MDB (MAC=%pM,vid=%u,vport=%u)\n",
+ addr, vid, vport_num);
+ return err;
+ }
+
+ return 0;
+}
+
+void mlx5_esw_bridge_port_mdb_del(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ const unsigned char *addr, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return;
+
+ mlx5_esw_bridge_port_mdb_detach(dev, port, addr, vid);
+}
+
static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads)
{
struct mlx5_esw_bridge_port *port;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
index 10851a515bca..a9dd18c73d6a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -25,12 +25,19 @@ struct mlx5_esw_bridge_offloads {
struct delayed_work update_work;
struct mlx5_flow_table *ingress_ft;
+ struct mlx5_flow_group *ingress_igmp_fg;
+ struct mlx5_flow_group *ingress_mld_fg;
struct mlx5_flow_group *ingress_vlan_fg;
struct mlx5_flow_group *ingress_vlan_filter_fg;
struct mlx5_flow_group *ingress_qinq_fg;
struct mlx5_flow_group *ingress_qinq_filter_fg;
struct mlx5_flow_group *ingress_mac_fg;
+ struct mlx5_flow_handle *igmp_handle;
+ struct mlx5_flow_handle *mld_query_handle;
+ struct mlx5_flow_handle *mld_report_handle;
+ struct mlx5_flow_handle *mld_done_handle;
+
struct mlx5_flow_table *skip_ft;
};
@@ -64,10 +71,20 @@ int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, boo
struct mlx5_esw_bridge_offloads *br_offloads);
int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto,
struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_mcast_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
+ struct mlx5_esw_bridge_offloads *br_offloads);
int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags,
struct mlx5_esw_bridge_offloads *br_offloads,
struct netlink_ext_ack *extack);
void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid,
struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ const unsigned char *addr, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+void mlx5_esw_bridge_port_mdb_del(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ const unsigned char *addr, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads);
+
#endif /* __MLX5_ESW_BRIDGE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
new file mode 100644
index 000000000000..2eae594a5e80
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
@@ -0,0 +1,1126 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "lib/devcom.h"
+#include "bridge.h"
+#include "eswitch.h"
+#include "bridge_priv.h"
+#include "diag/bridge_tracepoint.h"
+
+static const struct rhashtable_params mdb_ht_params = {
+ .key_offset = offsetof(struct mlx5_esw_bridge_mdb_entry, key),
+ .key_len = sizeof(struct mlx5_esw_bridge_mdb_key),
+ .head_offset = offsetof(struct mlx5_esw_bridge_mdb_entry, ht_node),
+ .automatic_shrinking = true,
+};
+
+int mlx5_esw_bridge_mdb_init(struct mlx5_esw_bridge *bridge)
+{
+ INIT_LIST_HEAD(&bridge->mdb_list);
+ return rhashtable_init(&bridge->mdb_ht, &mdb_ht_params);
+}
+
+void mlx5_esw_bridge_mdb_cleanup(struct mlx5_esw_bridge *bridge)
+{
+ rhashtable_destroy(&bridge->mdb_ht);
+}
+
+static struct mlx5_esw_bridge_port *
+mlx5_esw_bridge_mdb_port_lookup(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_mdb_entry *entry)
+{
+ return xa_load(&entry->ports, mlx5_esw_bridge_port_key(port));
+}
+
+static int mlx5_esw_bridge_mdb_port_insert(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_mdb_entry *entry)
+{
+ int err = xa_insert(&entry->ports, mlx5_esw_bridge_port_key(port), port, GFP_KERNEL);
+
+ if (!err)
+ entry->num_ports++;
+ return err;
+}
+
+static void mlx5_esw_bridge_mdb_port_remove(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_mdb_entry *entry)
+{
+ xa_erase(&entry->ports, mlx5_esw_bridge_port_key(port));
+ entry->num_ports--;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mdb_flow_create(u16 esw_owner_vhca_id, struct mlx5_esw_bridge_mdb_entry *entry,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND | FLOW_ACT_IGNORE_FLOW_LEVEL,
+ };
+ int num_dests = entry->num_ports, i = 0;
+ struct mlx5_flow_destination *dests;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+ u8 *dmac_v, *dmac_c;
+ unsigned long idx;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ dests = kvcalloc(num_dests, sizeof(*dests), GFP_KERNEL);
+ if (!dests) {
+ kvfree(rule_spec);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ xa_for_each(&entry->ports, idx, port) {
+ dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dests[i].ft = port->mcast.ft;
+ i++;
+ }
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, outer_headers.dmac_47_16);
+ ether_addr_copy(dmac_v, entry->key.addr);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, outer_headers.dmac_47_16);
+ eth_broadcast_addr(dmac_c);
+
+ if (entry->key.vid) {
+ if (bridge->vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (bridge->vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid,
+ entry->key.vid);
+ }
+
+ handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, dests, num_dests);
+
+ kvfree(dests);
+ kvfree(rule_spec);
+ return handle;
+}
+
+static int
+mlx5_esw_bridge_port_mdb_offload(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_mdb_entry *entry)
+{
+ struct mlx5_flow_handle *handle;
+
+ handle = mlx5_esw_bridge_mdb_flow_create(port->esw_owner_vhca_id, entry, port->bridge);
+ if (entry->egress_handle) {
+ mlx5_del_flow_rules(entry->egress_handle);
+ entry->egress_handle = NULL;
+ }
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ entry->egress_handle = handle;
+ return 0;
+}
+
+static struct mlx5_esw_bridge_mdb_entry *
+mlx5_esw_bridge_mdb_lookup(struct mlx5_esw_bridge *bridge,
+ const unsigned char *addr, u16 vid)
+{
+ struct mlx5_esw_bridge_mdb_key key = {};
+
+ ether_addr_copy(key.addr, addr);
+ key.vid = vid;
+ return rhashtable_lookup_fast(&bridge->mdb_ht, &key, mdb_ht_params);
+}
+
+static struct mlx5_esw_bridge_mdb_entry *
+mlx5_esw_bridge_port_mdb_entry_init(struct mlx5_esw_bridge_port *port,
+ const unsigned char *addr, u16 vid)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_esw_bridge_mdb_entry *entry;
+ int err;
+
+ entry = kvzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ ether_addr_copy(entry->key.addr, addr);
+ entry->key.vid = vid;
+ xa_init(&entry->ports);
+ err = rhashtable_insert_fast(&bridge->mdb_ht, &entry->ht_node, mdb_ht_params);
+ if (err)
+ goto err_ht_insert;
+
+ list_add(&entry->list, &bridge->mdb_list);
+
+ return entry;
+
+err_ht_insert:
+ xa_destroy(&entry->ports);
+ kvfree(entry);
+ return ERR_PTR(err);
+}
+
+static void mlx5_esw_bridge_port_mdb_entry_cleanup(struct mlx5_esw_bridge *bridge,
+ struct mlx5_esw_bridge_mdb_entry *entry)
+{
+ if (entry->egress_handle)
+ mlx5_del_flow_rules(entry->egress_handle);
+ list_del(&entry->list);
+ rhashtable_remove_fast(&bridge->mdb_ht, &entry->ht_node, mdb_ht_params);
+ xa_destroy(&entry->ports);
+ kvfree(entry);
+}
+
+int mlx5_esw_bridge_port_mdb_attach(struct net_device *dev, struct mlx5_esw_bridge_port *port,
+ const unsigned char *addr, u16 vid)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_esw_bridge_mdb_entry *entry;
+ int err;
+
+ if (!(bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG))
+ return -EOPNOTSUPP;
+
+ entry = mlx5_esw_bridge_mdb_lookup(bridge, addr, vid);
+ if (entry) {
+ if (mlx5_esw_bridge_mdb_port_lookup(port, entry)) {
+ esw_warn(bridge->br_offloads->esw->dev, "MDB attach entry is already attached to port (MAC=%pM,vid=%u,vport=%u)\n",
+ addr, vid, port->vport_num);
+ return 0;
+ }
+ } else {
+ entry = mlx5_esw_bridge_port_mdb_entry_init(port, addr, vid);
+ if (IS_ERR(entry)) {
+ err = PTR_ERR(entry);
+ esw_warn(bridge->br_offloads->esw->dev, "MDB attach failed to init entry (MAC=%pM,vid=%u,vport=%u,err=%d)\n",
+ addr, vid, port->vport_num, err);
+ return err;
+ }
+ }
+
+ err = mlx5_esw_bridge_mdb_port_insert(port, entry);
+ if (err) {
+ if (!entry->num_ports)
+ mlx5_esw_bridge_port_mdb_entry_cleanup(bridge, entry); /* new mdb entry */
+ esw_warn(bridge->br_offloads->esw->dev,
+ "MDB attach failed to insert port (MAC=%pM,vid=%u,vport=%u,err=%d)\n",
+ addr, vid, port->vport_num, err);
+ return err;
+ }
+
+ err = mlx5_esw_bridge_port_mdb_offload(port, entry);
+ if (err)
+ /* Single mdb can be used by multiple ports, so just log the
+ * error and continue.
+ */
+ esw_warn(bridge->br_offloads->esw->dev, "MDB attach failed to offload (MAC=%pM,vid=%u,vport=%u,err=%d)\n",
+ addr, vid, port->vport_num, err);
+
+ trace_mlx5_esw_bridge_port_mdb_attach(dev, entry);
+ return 0;
+}
+
+static void mlx5_esw_bridge_port_mdb_entry_detach(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_mdb_entry *entry)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ int err;
+
+ mlx5_esw_bridge_mdb_port_remove(port, entry);
+ if (!entry->num_ports) {
+ mlx5_esw_bridge_port_mdb_entry_cleanup(bridge, entry);
+ return;
+ }
+
+ err = mlx5_esw_bridge_port_mdb_offload(port, entry);
+ if (err)
+ /* Single mdb can be used by multiple ports, so just log the
+ * error and continue.
+ */
+ esw_warn(bridge->br_offloads->esw->dev, "MDB detach failed to offload (MAC=%pM,vid=%u,vport=%u)\n",
+ entry->key.addr, entry->key.vid, port->vport_num);
+}
+
+void mlx5_esw_bridge_port_mdb_detach(struct net_device *dev, struct mlx5_esw_bridge_port *port,
+ const unsigned char *addr, u16 vid)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_esw_bridge_mdb_entry *entry;
+
+ entry = mlx5_esw_bridge_mdb_lookup(bridge, addr, vid);
+ if (!entry) {
+ esw_debug(bridge->br_offloads->esw->dev,
+ "MDB detach entry not found (MAC=%pM,vid=%u,vport=%u)\n",
+ addr, vid, port->vport_num);
+ return;
+ }
+
+ if (!mlx5_esw_bridge_mdb_port_lookup(port, entry)) {
+ esw_debug(bridge->br_offloads->esw->dev,
+ "MDB detach entry not attached to the port (MAC=%pM,vid=%u,vport=%u)\n",
+ addr, vid, port->vport_num);
+ return;
+ }
+
+ trace_mlx5_esw_bridge_port_mdb_detach(dev, entry);
+ mlx5_esw_bridge_port_mdb_entry_detach(port, entry);
+}
+
+void mlx5_esw_bridge_port_mdb_vlan_flush(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_esw_bridge_mdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &bridge->mdb_list, list)
+ if (entry->key.vid == vlan->vid && mlx5_esw_bridge_mdb_port_lookup(port, entry))
+ mlx5_esw_bridge_port_mdb_entry_detach(port, entry);
+}
+
+static void mlx5_esw_bridge_port_mdb_flush(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_esw_bridge_mdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &bridge->mdb_list, list)
+ if (mlx5_esw_bridge_mdb_port_lookup(port, entry))
+ mlx5_esw_bridge_port_mdb_entry_detach(port, entry);
+}
+
+void mlx5_esw_bridge_mdb_flush(struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_mdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &bridge->mdb_list, list)
+ mlx5_esw_bridge_port_mdb_entry_cleanup(bridge, entry);
+}
+static int mlx5_esw_bridge_port_mcast_fts_init(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_eswitch *esw = bridge->br_offloads->esw;
+ struct mlx5_flow_table *mcast_ft;
+
+ mcast_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE,
+ MLX5_ESW_BRIDGE_LEVEL_MCAST_TABLE,
+ esw);
+ if (IS_ERR(mcast_ft))
+ return PTR_ERR(mcast_ft);
+
+ port->mcast.ft = mcast_ft;
+ return 0;
+}
+
+static void mlx5_esw_bridge_port_mcast_fts_cleanup(struct mlx5_esw_bridge_port *port)
+{
+ if (port->mcast.ft)
+ mlx5_destroy_flow_table(port->mcast.ft);
+ port->mcast.ft = NULL;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_mcast_filter_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *mcast_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(mcast_ft, in);
+ kvfree(in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create filter flow group for bridge mcast table (err=%pe)\n",
+ fg);
+
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_mcast_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *mcast_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ if (vlan_proto == ETH_P_8021Q)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ else if (vlan_proto == ETH_P_8021AD)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index, from);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, to);
+
+ fg = mlx5_create_flow_group(mcast_ft, in);
+ kvfree(in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create VLAN(proto=%x) flow group for bridge mcast table (err=%pe)\n",
+ vlan_proto, fg);
+
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_mcast_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *mcast_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_mcast_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, mcast_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_mcast_qinq_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *mcast_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_mcast_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, mcast_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_mcast_fwd_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *mcast_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(mcast_ft, in);
+ kvfree(in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create forward flow group for bridge mcast table (err=%pe)\n",
+ fg);
+
+ return fg;
+}
+
+static int mlx5_esw_bridge_port_mcast_fgs_init(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_flow_group *fwd_fg, *qinq_fg, *vlan_fg, *filter_fg;
+ struct mlx5_eswitch *esw = port->bridge->br_offloads->esw;
+ struct mlx5_flow_table *mcast_ft = port->mcast.ft;
+ int err;
+
+ filter_fg = mlx5_esw_bridge_mcast_filter_fg_create(esw, mcast_ft);
+ if (IS_ERR(filter_fg))
+ return PTR_ERR(filter_fg);
+
+ vlan_fg = mlx5_esw_bridge_mcast_vlan_fg_create(esw, mcast_ft);
+ if (IS_ERR(vlan_fg)) {
+ err = PTR_ERR(vlan_fg);
+ goto err_vlan_fg;
+ }
+
+ qinq_fg = mlx5_esw_bridge_mcast_qinq_fg_create(esw, mcast_ft);
+ if (IS_ERR(qinq_fg)) {
+ err = PTR_ERR(qinq_fg);
+ goto err_qinq_fg;
+ }
+
+ fwd_fg = mlx5_esw_bridge_mcast_fwd_fg_create(esw, mcast_ft);
+ if (IS_ERR(fwd_fg)) {
+ err = PTR_ERR(fwd_fg);
+ goto err_fwd_fg;
+ }
+
+ port->mcast.filter_fg = filter_fg;
+ port->mcast.vlan_fg = vlan_fg;
+ port->mcast.qinq_fg = qinq_fg;
+ port->mcast.fwd_fg = fwd_fg;
+
+ return 0;
+
+err_fwd_fg:
+ mlx5_destroy_flow_group(qinq_fg);
+err_qinq_fg:
+ mlx5_destroy_flow_group(vlan_fg);
+err_vlan_fg:
+ mlx5_destroy_flow_group(filter_fg);
+ return err;
+}
+
+static void mlx5_esw_bridge_port_mcast_fgs_cleanup(struct mlx5_esw_bridge_port *port)
+{
+ if (port->mcast.fwd_fg)
+ mlx5_destroy_flow_group(port->mcast.fwd_fg);
+ port->mcast.fwd_fg = NULL;
+ if (port->mcast.qinq_fg)
+ mlx5_destroy_flow_group(port->mcast.qinq_fg);
+ port->mcast.qinq_fg = NULL;
+ if (port->mcast.vlan_fg)
+ mlx5_destroy_flow_group(port->mcast.vlan_fg);
+ port->mcast.vlan_fg = NULL;
+ if (port->mcast.filter_fg)
+ mlx5_destroy_flow_group(port->mcast.filter_fg);
+ port->mcast.filter_fg = NULL;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_flow_with_esw_create(struct mlx5_esw_bridge_port *port,
+ struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_DROP,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+ MLX5_SET(fte_match_param, rule_spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, port->vport_num));
+
+ handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, NULL, 0);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_filter_flow_create(struct mlx5_esw_bridge_port *port)
+{
+ return mlx5_esw_bridge_mcast_flow_with_esw_create(port, port->bridge->br_offloads->esw);
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_filter_flow_peer_create(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_devcom *devcom = port->bridge->br_offloads->esw->dev->priv.devcom;
+ static struct mlx5_flow_handle *handle;
+ struct mlx5_eswitch *peer_esw;
+
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return ERR_PTR(-ENODEV);
+
+ handle = mlx5_esw_bridge_mcast_flow_with_esw_create(port, peer_esw);
+
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_vlan_flow_create(u16 vlan_proto, struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan)
+{
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_VPORT,
+ .vport.num = port->vport_num,
+ };
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) &&
+ port->vport_num == MLX5_VPORT_UPLINK)
+ rule_spec->flow_context.flow_source =
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act.pkt_reformat = vlan->pkt_reformat_pop;
+
+ if (vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.first_vid);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, vlan->vid);
+
+ if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) {
+ dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ dest.vport.vhca_id = port->esw_owner_vhca_id;
+ }
+ handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+int mlx5_esw_bridge_vlan_mcast_init(u16 vlan_proto, struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan)
+{
+ struct mlx5_flow_handle *handle;
+
+ if (!(port->bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG))
+ return 0;
+
+ handle = mlx5_esw_bridge_mcast_vlan_flow_create(vlan_proto, port, vlan);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ vlan->mcast_handle = handle;
+ return 0;
+}
+
+void mlx5_esw_bridge_vlan_mcast_cleanup(struct mlx5_esw_bridge_vlan *vlan)
+{
+ if (vlan->mcast_handle)
+ mlx5_del_flow_rules(vlan->mcast_handle);
+ vlan->mcast_handle = NULL;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_fwd_flow_create(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_VPORT,
+ .vport.num = port->vport_num,
+ };
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) &&
+ port->vport_num == MLX5_VPORT_UPLINK)
+ rule_spec->flow_context.flow_source =
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+
+ if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) {
+ dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ dest.vport.vhca_id = port->esw_owner_vhca_id;
+ }
+ handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static int mlx5_esw_bridge_port_mcast_fhs_init(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_flow_handle *filter_handle, *fwd_handle;
+ struct mlx5_esw_bridge_vlan *vlan, *failed;
+ unsigned long index;
+ int err;
+
+
+ filter_handle = (port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER) ?
+ mlx5_esw_bridge_mcast_filter_flow_peer_create(port) :
+ mlx5_esw_bridge_mcast_filter_flow_create(port);
+ if (IS_ERR(filter_handle))
+ return PTR_ERR(filter_handle);
+
+ fwd_handle = mlx5_esw_bridge_mcast_fwd_flow_create(port);
+ if (IS_ERR(fwd_handle)) {
+ err = PTR_ERR(fwd_handle);
+ goto err_fwd;
+ }
+
+ xa_for_each(&port->vlans, index, vlan) {
+ err = mlx5_esw_bridge_vlan_mcast_init(port->bridge->vlan_proto, port, vlan);
+ if (err) {
+ failed = vlan;
+ goto err_vlan;
+ }
+ }
+
+ port->mcast.filter_handle = filter_handle;
+ port->mcast.fwd_handle = fwd_handle;
+
+ return 0;
+
+err_vlan:
+ xa_for_each(&port->vlans, index, vlan) {
+ if (vlan == failed)
+ break;
+
+ mlx5_esw_bridge_vlan_mcast_cleanup(vlan);
+ }
+ mlx5_del_flow_rules(fwd_handle);
+err_fwd:
+ mlx5_del_flow_rules(filter_handle);
+ return err;
+}
+
+static void mlx5_esw_bridge_port_mcast_fhs_cleanup(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_esw_bridge_vlan *vlan;
+ unsigned long index;
+
+ xa_for_each(&port->vlans, index, vlan)
+ mlx5_esw_bridge_vlan_mcast_cleanup(vlan);
+
+ if (port->mcast.fwd_handle)
+ mlx5_del_flow_rules(port->mcast.fwd_handle);
+ port->mcast.fwd_handle = NULL;
+ if (port->mcast.filter_handle)
+ mlx5_del_flow_rules(port->mcast.filter_handle);
+ port->mcast.filter_handle = NULL;
+}
+
+int mlx5_esw_bridge_port_mcast_init(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ int err;
+
+ if (!(bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG))
+ return 0;
+
+ err = mlx5_esw_bridge_port_mcast_fts_init(port, bridge);
+ if (err)
+ return err;
+
+ err = mlx5_esw_bridge_port_mcast_fgs_init(port);
+ if (err)
+ goto err_fgs;
+
+ err = mlx5_esw_bridge_port_mcast_fhs_init(port);
+ if (err)
+ goto err_fhs;
+ return err;
+
+err_fhs:
+ mlx5_esw_bridge_port_mcast_fgs_cleanup(port);
+err_fgs:
+ mlx5_esw_bridge_port_mcast_fts_cleanup(port);
+ return err;
+}
+
+void mlx5_esw_bridge_port_mcast_cleanup(struct mlx5_esw_bridge_port *port)
+{
+ mlx5_esw_bridge_port_mdb_flush(port);
+ mlx5_esw_bridge_port_mcast_fhs_cleanup(port);
+ mlx5_esw_bridge_port_mcast_fgs_cleanup(port);
+ mlx5_esw_bridge_port_mcast_fts_cleanup(port);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_igmp_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ip_version);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ip_protocol);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(ingress_ft, in);
+ kvfree(in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create IGMP flow group for bridge ingress table (err=%pe)\n",
+ fg);
+
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_mld_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ if (!(MLX5_CAP_GEN(esw->dev, flex_parser_protocols) & MLX5_FLEX_PROTO_ICMPV6)) {
+ esw_warn(esw->dev,
+ "Can't create MLD flow group due to missing hardware ICMPv6 parsing support\n");
+ return NULL;
+ }
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_3);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ip_version);
+ MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters_3.icmpv6_type);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(ingress_ft, in);
+ kvfree(in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create MLD flow group for bridge ingress table (err=%pe)\n",
+ fg);
+
+ return fg;
+}
+
+static int
+mlx5_esw_bridge_ingress_mcast_fgs_init(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_flow_table *ingress_ft = br_offloads->ingress_ft;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct mlx5_flow_group *igmp_fg, *mld_fg;
+
+ igmp_fg = mlx5_esw_bridge_ingress_igmp_fg_create(esw, ingress_ft);
+ if (IS_ERR(igmp_fg))
+ return PTR_ERR(igmp_fg);
+
+ mld_fg = mlx5_esw_bridge_ingress_mld_fg_create(esw, ingress_ft);
+ if (IS_ERR(mld_fg)) {
+ mlx5_destroy_flow_group(igmp_fg);
+ return PTR_ERR(mld_fg);
+ }
+
+ br_offloads->ingress_igmp_fg = igmp_fg;
+ br_offloads->ingress_mld_fg = mld_fg;
+ return 0;
+}
+
+static void
+mlx5_esw_bridge_ingress_mcast_fgs_cleanup(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ if (br_offloads->ingress_mld_fg)
+ mlx5_destroy_flow_group(br_offloads->ingress_mld_fg);
+ br_offloads->ingress_mld_fg = NULL;
+ if (br_offloads->ingress_igmp_fg)
+ mlx5_destroy_flow_group(br_offloads->ingress_igmp_fg);
+ br_offloads->ingress_igmp_fg = NULL;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_igmp_fh_create(struct mlx5_flow_table *ingress_ft,
+ struct mlx5_flow_table *skip_ft)
+{
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
+ .ft = skip_ft,
+ };
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, 4);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_protocol, IPPROTO_IGMP);
+
+ handle = mlx5_add_flow_rules(ingress_ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_mld_fh_create(u8 type, struct mlx5_flow_table *ingress_ft,
+ struct mlx5_flow_table *skip_ft)
+{
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
+ .ft = skip_ft,
+ };
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_3;
+
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, 6);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, misc_parameters_3.icmpv6_type);
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_3.icmpv6_type, type);
+
+ handle = mlx5_add_flow_rules(ingress_ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static int
+mlx5_esw_bridge_ingress_mcast_fhs_create(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_flow_handle *igmp_handle, *mld_query_handle, *mld_report_handle,
+ *mld_done_handle;
+ struct mlx5_flow_table *ingress_ft = br_offloads->ingress_ft,
+ *skip_ft = br_offloads->skip_ft;
+ int err;
+
+ igmp_handle = mlx5_esw_bridge_ingress_igmp_fh_create(ingress_ft, skip_ft);
+ if (IS_ERR(igmp_handle))
+ return PTR_ERR(igmp_handle);
+
+ if (br_offloads->ingress_mld_fg) {
+ mld_query_handle = mlx5_esw_bridge_ingress_mld_fh_create(ICMPV6_MGM_QUERY,
+ ingress_ft,
+ skip_ft);
+ if (IS_ERR(mld_query_handle)) {
+ err = PTR_ERR(mld_query_handle);
+ goto err_mld_query;
+ }
+
+ mld_report_handle = mlx5_esw_bridge_ingress_mld_fh_create(ICMPV6_MGM_REPORT,
+ ingress_ft,
+ skip_ft);
+ if (IS_ERR(mld_report_handle)) {
+ err = PTR_ERR(mld_report_handle);
+ goto err_mld_report;
+ }
+
+ mld_done_handle = mlx5_esw_bridge_ingress_mld_fh_create(ICMPV6_MGM_REDUCTION,
+ ingress_ft,
+ skip_ft);
+ if (IS_ERR(mld_done_handle)) {
+ err = PTR_ERR(mld_done_handle);
+ goto err_mld_done;
+ }
+ } else {
+ mld_query_handle = NULL;
+ mld_report_handle = NULL;
+ mld_done_handle = NULL;
+ }
+
+ br_offloads->igmp_handle = igmp_handle;
+ br_offloads->mld_query_handle = mld_query_handle;
+ br_offloads->mld_report_handle = mld_report_handle;
+ br_offloads->mld_done_handle = mld_done_handle;
+
+ return 0;
+
+err_mld_done:
+ mlx5_del_flow_rules(mld_report_handle);
+err_mld_report:
+ mlx5_del_flow_rules(mld_query_handle);
+err_mld_query:
+ mlx5_del_flow_rules(igmp_handle);
+ return err;
+}
+
+static void
+mlx5_esw_bridge_ingress_mcast_fhs_cleanup(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ if (br_offloads->mld_done_handle)
+ mlx5_del_flow_rules(br_offloads->mld_done_handle);
+ br_offloads->mld_done_handle = NULL;
+ if (br_offloads->mld_report_handle)
+ mlx5_del_flow_rules(br_offloads->mld_report_handle);
+ br_offloads->mld_report_handle = NULL;
+ if (br_offloads->mld_query_handle)
+ mlx5_del_flow_rules(br_offloads->mld_query_handle);
+ br_offloads->mld_query_handle = NULL;
+ if (br_offloads->igmp_handle)
+ mlx5_del_flow_rules(br_offloads->igmp_handle);
+ br_offloads->igmp_handle = NULL;
+}
+
+static int mlx5_esw_brige_mcast_init(struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_esw_bridge_port *port, *failed;
+ unsigned long i;
+ int err;
+
+ xa_for_each(&br_offloads->ports, i, port) {
+ if (port->bridge != bridge)
+ continue;
+
+ err = mlx5_esw_bridge_port_mcast_init(port);
+ if (err) {
+ failed = port;
+ goto err_port;
+ }
+ }
+ return 0;
+
+err_port:
+ xa_for_each(&br_offloads->ports, i, port) {
+ if (port == failed)
+ break;
+ if (port->bridge != bridge)
+ continue;
+
+ mlx5_esw_bridge_port_mcast_cleanup(port);
+ }
+ return err;
+}
+
+static void mlx5_esw_brige_mcast_cleanup(struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_esw_bridge_port *port;
+ unsigned long i;
+
+ xa_for_each(&br_offloads->ports, i, port) {
+ if (port->bridge != bridge)
+ continue;
+
+ mlx5_esw_bridge_port_mcast_cleanup(port);
+ }
+}
+
+static int mlx5_esw_brige_mcast_global_enable(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ int err;
+
+ if (br_offloads->ingress_igmp_fg)
+ return 0; /* already enabled by another bridge */
+
+ err = mlx5_esw_bridge_ingress_mcast_fgs_init(br_offloads);
+ if (err) {
+ esw_warn(br_offloads->esw->dev,
+ "Failed to create global multicast flow groups (err=%d)\n",
+ err);
+ return err;
+ }
+
+ err = mlx5_esw_bridge_ingress_mcast_fhs_create(br_offloads);
+ if (err) {
+ esw_warn(br_offloads->esw->dev,
+ "Failed to create global multicast flows (err=%d)\n",
+ err);
+ goto err_fhs;
+ }
+
+ return 0;
+
+err_fhs:
+ mlx5_esw_bridge_ingress_mcast_fgs_cleanup(br_offloads);
+ return err;
+}
+
+static void mlx5_esw_brige_mcast_global_disable(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge *br;
+
+ list_for_each_entry(br, &br_offloads->bridges, list) {
+ /* Ingress table is global, so only disable snooping when all
+ * bridges on esw have multicast disabled.
+ */
+ if (br->flags & MLX5_ESW_BRIDGE_MCAST_FLAG)
+ return;
+ }
+
+ mlx5_esw_bridge_ingress_mcast_fhs_cleanup(br_offloads);
+ mlx5_esw_bridge_ingress_mcast_fgs_cleanup(br_offloads);
+}
+
+int mlx5_esw_bridge_mcast_enable(struct mlx5_esw_bridge *bridge)
+{
+ int err;
+
+ err = mlx5_esw_brige_mcast_global_enable(bridge->br_offloads);
+ if (err)
+ return err;
+
+ bridge->flags |= MLX5_ESW_BRIDGE_MCAST_FLAG;
+
+ err = mlx5_esw_brige_mcast_init(bridge);
+ if (err) {
+ esw_warn(bridge->br_offloads->esw->dev, "Failed to enable multicast (err=%d)\n",
+ err);
+ bridge->flags &= ~MLX5_ESW_BRIDGE_MCAST_FLAG;
+ mlx5_esw_brige_mcast_global_disable(bridge->br_offloads);
+ }
+ return err;
+}
+
+void mlx5_esw_bridge_mcast_disable(struct mlx5_esw_bridge *bridge)
+{
+ mlx5_esw_brige_mcast_cleanup(bridge);
+ bridge->flags &= ~MLX5_ESW_BRIDGE_MCAST_FLAG;
+ mlx5_esw_brige_mcast_global_disable(bridge->br_offloads);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
index 878311fe950a..c9595801bdb4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
@@ -12,11 +12,124 @@
#include <linux/xarray.h>
#include "fs_core.h"
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_SIZE 1
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_SIZE 3
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE 131072
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE \
+ (524288 - MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_SIZE - \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_SIZE)
+
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO + 1)
+static_assert(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE == 1048576);
+
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE 131072
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE (262144 - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO + 1)
+static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 524288);
+
+#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0
+
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_SIZE 1
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_SIZE 1
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_SIZE 4095
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_SIZE MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_SIZE
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_SIZE - 1)
+
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO + 1)
+static_assert(MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE == 8192);
+
+enum {
+ MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
+ MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
+ MLX5_ESW_BRIDGE_LEVEL_MCAST_TABLE,
+ MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
+};
+
+enum {
+ MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0),
+ MLX5_ESW_BRIDGE_MCAST_FLAG = BIT(1),
+};
+
struct mlx5_esw_bridge_fdb_key {
unsigned char addr[ETH_ALEN];
u16 vid;
};
+struct mlx5_esw_bridge_mdb_key {
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+};
+
enum {
MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0),
MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1),
@@ -43,6 +156,16 @@ struct mlx5_esw_bridge_fdb_entry {
struct mlx5_flow_handle *filter_handle;
};
+struct mlx5_esw_bridge_mdb_entry {
+ struct mlx5_esw_bridge_mdb_key key;
+ struct rhash_head ht_node;
+ struct list_head list;
+ struct xarray ports;
+ int num_ports;
+
+ struct mlx5_flow_handle *egress_handle;
+};
+
struct mlx5_esw_bridge_vlan {
u16 vid;
u16 flags;
@@ -50,6 +173,7 @@ struct mlx5_esw_bridge_vlan {
struct mlx5_pkt_reformat *pkt_reformat_push;
struct mlx5_pkt_reformat *pkt_reformat_pop;
struct mlx5_modify_hdr *pkt_mod_hdr_push_mark;
+ struct mlx5_flow_handle *mcast_handle;
};
struct mlx5_esw_bridge_port {
@@ -58,6 +182,63 @@ struct mlx5_esw_bridge_port {
u16 flags;
struct mlx5_esw_bridge *bridge;
struct xarray vlans;
+ struct {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *filter_fg;
+ struct mlx5_flow_group *vlan_fg;
+ struct mlx5_flow_group *qinq_fg;
+ struct mlx5_flow_group *fwd_fg;
+
+ struct mlx5_flow_handle *filter_handle;
+ struct mlx5_flow_handle *fwd_handle;
+ } mcast;
};
+struct mlx5_esw_bridge {
+ int ifindex;
+ int refcnt;
+ struct list_head list;
+ struct mlx5_esw_bridge_offloads *br_offloads;
+
+ struct list_head fdb_list;
+ struct rhashtable fdb_ht;
+
+ struct list_head mdb_list;
+ struct rhashtable mdb_ht;
+
+ struct mlx5_flow_table *egress_ft;
+ struct mlx5_flow_group *egress_vlan_fg;
+ struct mlx5_flow_group *egress_qinq_fg;
+ struct mlx5_flow_group *egress_mac_fg;
+ struct mlx5_flow_group *egress_miss_fg;
+ struct mlx5_pkt_reformat *egress_miss_pkt_reformat;
+ struct mlx5_flow_handle *egress_miss_handle;
+ unsigned long ageing_time;
+ u32 flags;
+ u16 vlan_proto;
+};
+
+struct mlx5_flow_table *mlx5_esw_bridge_table_create(int max_fte, u32 level,
+ struct mlx5_eswitch *esw);
+unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port);
+
+int mlx5_esw_bridge_port_mcast_init(struct mlx5_esw_bridge_port *port);
+void mlx5_esw_bridge_port_mcast_cleanup(struct mlx5_esw_bridge_port *port);
+int mlx5_esw_bridge_vlan_mcast_init(u16 vlan_proto, struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan);
+void mlx5_esw_bridge_vlan_mcast_cleanup(struct mlx5_esw_bridge_vlan *vlan);
+
+int mlx5_esw_bridge_mcast_enable(struct mlx5_esw_bridge *bridge);
+void mlx5_esw_bridge_mcast_disable(struct mlx5_esw_bridge *bridge);
+
+int mlx5_esw_bridge_mdb_init(struct mlx5_esw_bridge *bridge);
+void mlx5_esw_bridge_mdb_cleanup(struct mlx5_esw_bridge *bridge);
+int mlx5_esw_bridge_port_mdb_attach(struct net_device *dev, struct mlx5_esw_bridge_port *port,
+ const unsigned char *addr, u16 vid);
+void mlx5_esw_bridge_port_mdb_detach(struct net_device *dev, struct mlx5_esw_bridge_port *port,
+ const unsigned char *addr, u16 vid);
+void mlx5_esw_bridge_port_mdb_vlan_flush(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan);
+void mlx5_esw_bridge_mdb_flush(struct mlx5_esw_bridge *bridge);
+
#endif /* _MLX5_ESW_BRIDGE_PRIVATE_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
deleted file mode 100644
index 3d0bbcca1cb9..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
+++ /dev/null
@@ -1,198 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
-
-#include <linux/debugfs.h>
-#include "eswitch.h"
-
-enum vnic_diag_counter {
- MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE,
- MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW,
- MLX5_VNIC_DIAG_COMP_EQ_OVERRUN,
- MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN,
- MLX5_VNIC_DIAG_CQ_OVERRUN,
- MLX5_VNIC_DIAG_INVALID_COMMAND,
- MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND,
- MLX5_VNIC_DIAG_RX_STEERING_DISCARD,
-};
-
-static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter,
- u64 *val)
-{
- u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
- struct mlx5_core_dev *dev = vport->dev;
- u16 vport_num = vport->vport;
- void *vnic_diag_out;
- int err;
-
- MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
- MLX5_SET(query_vnic_env_in, in, vport_number, vport_num);
- if (!mlx5_esw_is_manager_vport(dev->priv.eswitch, vport_num))
- MLX5_SET(query_vnic_env_in, in, other_vport, 1);
-
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (err)
- return err;
-
- vnic_diag_out = MLX5_ADDR_OF(query_vnic_env_out, out, vport_env);
- switch (counter) {
- case MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE:
- *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, total_error_queues);
- break;
- case MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW:
- *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out,
- send_queue_priority_update_flow);
- break;
- case MLX5_VNIC_DIAG_COMP_EQ_OVERRUN:
- *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, comp_eq_overrun);
- break;
- case MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN:
- *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, async_eq_overrun);
- break;
- case MLX5_VNIC_DIAG_CQ_OVERRUN:
- *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, cq_overrun);
- break;
- case MLX5_VNIC_DIAG_INVALID_COMMAND:
- *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, invalid_command);
- break;
- case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND:
- *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command);
- break;
- case MLX5_VNIC_DIAG_RX_STEERING_DISCARD:
- *val = MLX5_GET64(vnic_diagnostic_statistics, vnic_diag_out,
- nic_receive_steering_discard);
- break;
- }
-
- return 0;
-}
-
-static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport,
- enum vnic_diag_counter type)
-{
- u64 val = 0;
- int ret;
-
- ret = mlx5_esw_query_vnic_diag(vport, type, &val);
- if (ret)
- return ret;
-
- seq_printf(file, "%llu\n", val);
- return 0;
-}
-
-static int total_q_under_processor_handle_show(struct seq_file *file, void *priv)
-{
- return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE);
-}
-
-static int send_queue_priority_update_flow_show(struct seq_file *file, void *priv)
-{
- return __show_vnic_diag(file, file->private,
- MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW);
-}
-
-static int comp_eq_overrun_show(struct seq_file *file, void *priv)
-{
- return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_COMP_EQ_OVERRUN);
-}
-
-static int async_eq_overrun_show(struct seq_file *file, void *priv)
-{
- return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN);
-}
-
-static int cq_overrun_show(struct seq_file *file, void *priv)
-{
- return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_CQ_OVERRUN);
-}
-
-static int invalid_command_show(struct seq_file *file, void *priv)
-{
- return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_INVALID_COMMAND);
-}
-
-static int quota_exceeded_command_show(struct seq_file *file, void *priv)
-{
- return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND);
-}
-
-static int rx_steering_discard_show(struct seq_file *file, void *priv)
-{
- return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_RX_STEERING_DISCARD);
-}
-
-DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle);
-DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow);
-DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun);
-DEFINE_SHOW_ATTRIBUTE(async_eq_overrun);
-DEFINE_SHOW_ATTRIBUTE(cq_overrun);
-DEFINE_SHOW_ATTRIBUTE(invalid_command);
-DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command);
-DEFINE_SHOW_ATTRIBUTE(rx_steering_discard);
-
-void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num)
-{
- struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
-
- debugfs_remove_recursive(vport->dbgfs);
- vport->dbgfs = NULL;
-}
-
-/* vnic diag dir name is "pf", "ecpf" or "{vf/sf}_xxxx" */
-#define VNIC_DIAG_DIR_NAME_MAX_LEN 8
-
-void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num)
-{
- struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
- struct dentry *vnic_diag;
- char dir_name[VNIC_DIAG_DIR_NAME_MAX_LEN];
- int err;
-
- if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
- return;
-
- if (vport_num == MLX5_VPORT_PF) {
- strcpy(dir_name, "pf");
- } else if (vport_num == MLX5_VPORT_ECPF) {
- strcpy(dir_name, "ecpf");
- } else {
- err = snprintf(dir_name, VNIC_DIAG_DIR_NAME_MAX_LEN, "%s_%d", is_sf ? "sf" : "vf",
- is_sf ? sf_num : vport_num - MLX5_VPORT_FIRST_VF);
- if (WARN_ON(err < 0))
- return;
- }
-
- vport->dbgfs = debugfs_create_dir(dir_name, esw->dbgfs);
- vnic_diag = debugfs_create_dir("vnic_diag", vport->dbgfs);
-
- if (MLX5_CAP_GEN(esw->dev, vnic_env_queue_counters)) {
- debugfs_create_file("total_q_under_processor_handle", 0444, vnic_diag, vport,
- &total_q_under_processor_handle_fops);
- debugfs_create_file("send_queue_priority_update_flow", 0444, vnic_diag, vport,
- &send_queue_priority_update_flow_fops);
- }
-
- if (MLX5_CAP_GEN(esw->dev, eq_overrun_count)) {
- debugfs_create_file("comp_eq_overrun", 0444, vnic_diag, vport,
- &comp_eq_overrun_fops);
- debugfs_create_file("async_eq_overrun", 0444, vnic_diag, vport,
- &async_eq_overrun_fops);
- }
-
- if (MLX5_CAP_GEN(esw->dev, vnic_env_cq_overrun))
- debugfs_create_file("cq_overrun", 0444, vnic_diag, vport, &cq_overrun_fops);
-
- if (MLX5_CAP_GEN(esw->dev, invalid_command_count))
- debugfs_create_file("invalid_command", 0444, vnic_diag, vport,
- &invalid_command_fops);
-
- if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count))
- debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport,
- &quota_exceeded_command_fops);
-
- if (MLX5_CAP_GEN(esw->dev, nic_receive_steering_discard))
- debugfs_create_file("rx_steering_discard", 0444, vnic_diag, vport,
- &rx_steering_discard_fops);
-
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
index 51ac24e6ec3c..1808da214094 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
@@ -110,6 +110,41 @@ DEFINE_EVENT(mlx5_esw_bridge_port_template,
TP_ARGS(port)
);
+DECLARE_EVENT_CLASS(mlx5_esw_bridge_mdb_port_change_template,
+ TP_PROTO(const struct net_device *dev,
+ const struct mlx5_esw_bridge_mdb_entry *mdb),
+ TP_ARGS(dev, mdb),
+ TP_STRUCT__entry(
+ __array(char, dev_name, IFNAMSIZ)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __field(int, num_ports)
+ __field(bool, offloaded)),
+ TP_fast_assign(
+ strscpy(__entry->dev_name, netdev_name(dev), IFNAMSIZ);
+ memcpy(__entry->addr, mdb->key.addr, ETH_ALEN);
+ __entry->vid = mdb->key.vid;
+ __entry->num_ports = mdb->num_ports;
+ __entry->offloaded = mdb->egress_handle;),
+ TP_printk("net_device=%s addr=%pM vid=%u num_ports=%d offloaded=%d",
+ __entry->dev_name,
+ __entry->addr,
+ __entry->vid,
+ __entry->num_ports,
+ __entry->offloaded));
+
+DEFINE_EVENT(mlx5_esw_bridge_mdb_port_change_template,
+ mlx5_esw_bridge_port_mdb_attach,
+ TP_PROTO(const struct net_device *dev,
+ const struct mlx5_esw_bridge_mdb_entry *mdb),
+ TP_ARGS(dev, mdb));
+
+DEFINE_EVENT(mlx5_esw_bridge_mdb_port_change_template,
+ mlx5_esw_bridge_port_mdb_detach,
+ TP_PROTO(const struct net_device *dev,
+ const struct mlx5_esw_bridge_mdb_entry *mdb),
+ TP_ARGS(dev, mdb));
+
#endif
/* This part must be outside protection */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 75015d370922..7c79476cc5f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -744,7 +744,7 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *
u64 value;
int err;
- err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
+ err = mlx5_port_max_linkspeed(mdev, &link_speed_max);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
index 9e72118f2e4c..749c3957a128 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
@@ -11,7 +11,7 @@ struct mlx5_vport_key {
u16 prio;
u16 vport;
u16 vhca_id;
- const struct esw_vport_tbl_namespace *vport_ns;
+ struct esw_vport_tbl_namespace *vport_ns;
} __packed;
struct mlx5_vport_table {
@@ -21,6 +21,14 @@ struct mlx5_vport_table {
struct mlx5_vport_key key;
};
+static void
+esw_vport_tbl_init(struct mlx5_eswitch *esw, struct esw_vport_tbl_namespace *ns)
+{
+ if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+ ns->flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+}
+
static struct mlx5_flow_table *
esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns,
const struct esw_vport_tbl_namespace *vport_ns)
@@ -80,6 +88,7 @@ mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr
u32 hkey;
mutex_lock(&esw->fdb_table.offloads.vports.lock);
+ esw_vport_tbl_init(esw, attr->vport_ns);
hkey = flow_attr_to_vport_key(esw, attr, &skey);
e = esw_vport_tbl_lookup(esw, &skey, hkey);
if (e) {
@@ -127,6 +136,7 @@ mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr
u32 hkey;
mutex_lock(&esw->fdb_table.offloads.vports.lock);
+ esw_vport_tbl_init(esw, attr->vport_ns);
hkey = flow_attr_to_vport_key(esw, attr, &key);
e = esw_vport_tbl_lookup(esw, &key, hkey);
if (!e || --e->num_rules)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 19fed514fc17..901c53751b0a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -36,7 +36,6 @@
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/mpfs.h>
-#include <linux/debugfs.h>
#include "esw/acl/lgcy.h"
#include "esw/legacy.h"
#include "esw/qos.h"
@@ -1056,7 +1055,6 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
if (err)
return err;
- mlx5_esw_vport_debugfs_create(esw, vport_num, false, 0);
err = esw_offloads_load_rep(esw, vport_num);
if (err)
goto err_rep;
@@ -1064,7 +1062,6 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
return err;
err_rep:
- mlx5_esw_vport_debugfs_destroy(esw, vport_num);
mlx5_esw_vport_disable(esw, vport_num);
return err;
}
@@ -1072,7 +1069,6 @@ err_rep:
void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
{
esw_offloads_unload_rep(esw, vport_num);
- mlx5_esw_vport_debugfs_destroy(esw, vport_num);
mlx5_esw_vport_disable(esw, vport_num);
}
@@ -1510,7 +1506,7 @@ out_free:
return err;
}
-static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, struct mlx5_core_dev *dev,
+static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw,
int index, u16 vport_num)
{
struct mlx5_vport *vport;
@@ -1564,7 +1560,7 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
xa_init(&esw->vports);
- err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_PF);
+ err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_PF);
if (err)
goto err;
if (esw->first_host_vport == MLX5_VPORT_PF)
@@ -1572,7 +1568,7 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
idx++;
for (i = 0; i < mlx5_core_max_vfs(dev); i++) {
- err = mlx5_esw_vport_alloc(esw, dev, idx, idx);
+ err = mlx5_esw_vport_alloc(esw, idx, idx);
if (err)
goto err;
xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF);
@@ -1581,7 +1577,7 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
}
base_sf_num = mlx5_sf_start_function_id(dev);
for (i = 0; i < mlx5_sf_max_functions(dev); i++) {
- err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i);
+ err = mlx5_esw_vport_alloc(esw, idx, base_sf_num + i);
if (err)
goto err;
xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
@@ -1592,7 +1588,7 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
if (err)
goto err;
for (i = 0; i < max_host_pf_sfs; i++) {
- err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i);
+ err = mlx5_esw_vport_alloc(esw, idx, base_sf_num + i);
if (err)
goto err;
xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
@@ -1600,12 +1596,12 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
}
if (mlx5_ecpf_vport_exists(dev)) {
- err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_ECPF);
+ err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_ECPF);
if (err)
goto err;
idx++;
}
- err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_UPLINK);
+ err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_UPLINK);
if (err)
goto err;
return 0;
@@ -1672,7 +1668,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
dev->priv.eswitch = esw;
BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
- esw->dbgfs = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(esw->dev));
esw_info(dev,
"Total vports %d, per vport: max uc(%d) max mc(%d)\n",
esw->total_vports,
@@ -1696,7 +1691,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw_info(esw->dev, "cleanup\n");
- debugfs_remove_recursive(esw->dbgfs);
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
WARN_ON(refcount_read(&esw->qos.refcnt));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 19e9a77c4633..1a042c981713 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -195,7 +195,6 @@ struct mlx5_vport {
enum mlx5_eswitch_vport_event enabled_events;
int index;
struct devlink_port *dl_port;
- struct dentry *dbgfs;
};
struct mlx5_esw_indir_table;
@@ -263,6 +262,7 @@ struct mlx5_esw_offload {
const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
u8 inline_mode;
atomic64_t num_flows;
+ u64 num_block_encap;
enum devlink_eswitch_encap_mode encap;
struct ida vport_metadata_ida;
unsigned int host_number; /* ECPF supports one external host */
@@ -342,7 +342,6 @@ struct mlx5_eswitch {
u32 large_group_num;
} params;
struct blocking_notifier_head n_head;
- struct dentry *dbgfs;
};
void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -355,7 +354,6 @@ mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num
void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule);
bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw);
-int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw);
void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata);
@@ -674,7 +672,7 @@ struct mlx5_vport_tbl_attr {
u32 chain;
u16 prio;
u16 vport;
- const struct esw_vport_tbl_namespace *vport_ns;
+ struct esw_vport_tbl_namespace *vport_ns;
};
struct mlx5_flow_table *
@@ -703,9 +701,6 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
-void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num);
-void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num);
-
int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
u16 vport_num, u32 controller, u32 sfnum);
void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
@@ -748,6 +743,9 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
struct mlx5_eswitch *slave_esw);
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
+bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev);
+void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev);
+
static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw)
{
if (mlx5_esw_allowed(esw))
@@ -761,6 +759,7 @@ mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw)
{
return esw->fdb_table.offloads.slow_fdb;
}
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -805,6 +804,15 @@ mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
{
return 0;
}
+
+static inline bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev)
+{
+ return true;
+}
+
+static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
+{
+}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 25a8076a77bf..69215ffb9999 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -73,7 +73,7 @@
#define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
-static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
+static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
.max_fte = MLX5_ESW_VPORT_TBL_SIZE,
.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS,
.flags = 0,
@@ -760,7 +760,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
kfree(dest);
return rule;
err_chain_src_rewrite:
- esw_put_dest_tables_loop(esw, attr, 0, i);
mlx5_esw_vporttbl_put(esw, &fwd_attr);
err_get_fwd:
mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
@@ -803,7 +802,6 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
if (fwd_rule) {
mlx5_esw_vporttbl_put(esw, &fwd_attr);
mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
- esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count);
} else {
if (split)
mlx5_esw_vporttbl_put(esw, &fwd_attr);
@@ -1374,14 +1372,11 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
struct mlx5_flow_table *nf_ft, *ft;
struct mlx5_chains_attr attr = {};
struct mlx5_fs_chains *chains;
- u32 fdb_max;
int err;
- fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
-
esw_init_chains_offload_flags(esw, &attr.flags);
attr.ns = MLX5_FLOW_NAMESPACE_FDB;
- attr.max_ft_sz = fdb_max;
+ attr.fs_base_prio = FDB_TC_OFFLOAD;
attr.max_grp_num = esw->params.large_group_num;
attr.default_ft = miss_fdb;
attr.mapping = esw->offloads.reg_c0_obj_pool;
@@ -1392,6 +1387,7 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
esw_warn(dev, "Failed to create fdb chains err(%d)\n", err);
return err;
}
+ mlx5_chains_print_info(chains);
esw->fdb_table.offloads.esw_chains_priv = chains;
@@ -2941,28 +2937,6 @@ metadata_err:
return err;
}
-int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable)
-{
- int err = 0;
-
- down_write(&esw->mode_lock);
- if (mlx5_esw_is_fdb_created(esw)) {
- err = -EBUSY;
- goto done;
- }
- if (!mlx5_esw_vport_match_metadata_supported(esw)) {
- err = -EOPNOTSUPP;
- goto done;
- }
- if (enable)
- esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
- else
- esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
-done:
- up_write(&esw->mode_lock);
- return err;
-}
-
int
esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
@@ -3588,6 +3562,47 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
return err;
}
+bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_eswitch *esw;
+
+ devl_lock(devlink);
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw)) {
+ devl_unlock(devlink);
+ /* Failure means no eswitch => not possible to change encap */
+ return true;
+ }
+
+ down_write(&esw->mode_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY &&
+ esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
+ up_write(&esw->mode_lock);
+ devl_unlock(devlink);
+ return false;
+ }
+
+ esw->offloads.num_block_encap++;
+ up_write(&esw->mode_lock);
+ devl_unlock(devlink);
+ return true;
+}
+
+void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_eswitch *esw;
+
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return;
+
+ down_write(&esw->mode_lock);
+ esw->offloads.num_block_encap--;
+ up_write(&esw->mode_lock);
+}
+
int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
enum devlink_eswitch_encap_mode encap,
struct netlink_ext_ack *extack)
@@ -3629,6 +3644,13 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
goto unlock;
}
+ if (esw->offloads.num_block_encap) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't set encapsulation when IPsec SA and/or policies are configured");
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
esw_destroy_offloads_fdb_tables(esw);
esw->offloads.encap = encap;
@@ -3782,14 +3804,12 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p
if (err)
goto devlink_err;
- mlx5_esw_vport_debugfs_create(esw, vport_num, true, sfnum);
err = mlx5_esw_offloads_rep_load(esw, vport_num);
if (err)
goto rep_err;
return 0;
rep_err:
- mlx5_esw_vport_debugfs_destroy(esw, vport_num);
mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
devlink_err:
mlx5_esw_vport_disable(esw, vport_num);
@@ -3799,7 +3819,6 @@ devlink_err:
void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
{
mlx5_esw_offloads_rep_unload(esw, vport_num);
- mlx5_esw_vport_debugfs_destroy(esw, vport_num);
mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
mlx5_esw_vport_disable(esw, vport_num);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index 3a9a6bb9158d..edd910258314 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -210,18 +210,6 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw,
return (port_mask & port_value) == MLX5_VPORT_UPLINK;
}
-static bool
-mlx5_eswitch_is_push_vlan_no_cap(struct mlx5_eswitch *esw,
- struct mlx5_flow_act *flow_act)
-{
- if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
- !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) &
- MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX))
- return true;
-
- return false;
-}
-
bool
mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
struct mlx5_flow_attr *attr,
@@ -237,7 +225,10 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
(!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port))
return false;
- if (mlx5_eswitch_is_push_vlan_no_cap(esw, flow_act))
+ /* push vlan on RX */
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
+ !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) &
+ MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX))
return true;
/* hairpin */
@@ -261,31 +252,19 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_act term_tbl_act = {};
struct mlx5_flow_handle *rule = NULL;
bool term_table_created = false;
- bool is_push_vlan_on_rx;
int num_vport_dests = 0;
int i, curr_dest;
- is_push_vlan_on_rx = mlx5_eswitch_is_push_vlan_no_cap(esw, flow_act);
mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act);
term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
for (i = 0; i < num_dest; i++) {
struct mlx5_termtbl_handle *tt;
- bool hairpin = false;
/* only vport destinations can be terminated */
if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT)
continue;
- if (attr->dests[num_vport_dests].rep &&
- attr->dests[num_vport_dests].rep->vport == MLX5_VPORT_UPLINK)
- hairpin = true;
-
- if (!is_push_vlan_on_rx && !hairpin) {
- num_vport_dests++;
- continue;
- }
-
if (attr->dests[num_vport_dests].flags & MLX5_ESW_DEST_ENCAP) {
term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
term_tbl_act.pkt_reformat = attr->dests[num_vport_dests].pkt_reformat;
@@ -333,9 +312,6 @@ revert_changes:
for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) {
struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl;
- if (!tt)
- continue;
-
attr->dests[curr_dest].termtbl = NULL;
/* search for the destination associated with the
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 731acbe22dc7..19da02c41616 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -137,7 +137,7 @@
#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + KERNEL_RX_MACSEC_MIN_LEVEL + 1)
#define KERNEL_TX_IPSEC_NUM_PRIOS 1
-#define KERNEL_TX_IPSEC_NUM_LEVELS 2
+#define KERNEL_TX_IPSEC_NUM_LEVELS 3
#define KERNEL_TX_IPSEC_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS)
#define KERNEL_TX_MACSEC_NUM_PRIOS 1
@@ -1762,7 +1762,8 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
if (ignore_level) {
if (ft->type != FS_FT_FDB &&
- ft->type != FS_FT_NIC_RX)
+ ft->type != FS_FT_NIC_RX &&
+ ft->type != FS_FT_NIC_TX)
return false;
if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
@@ -2996,7 +2997,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
goto out_err;
}
- maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 3);
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 4);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
goto out_err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 4c2dad9d7cfb..50022e7565f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -167,7 +167,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
if (mlx5_health_wait_pci_up(dev))
mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
else
- mlx5_load_one(dev);
+ mlx5_load_one(dev, true);
devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0,
BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
@@ -499,7 +499,7 @@ int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
err = fw_reset->ret;
if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) {
mlx5_unload_one_devl_locked(dev, false);
- mlx5_load_one_devl_locked(dev, false);
+ mlx5_load_one_devl_locked(dev, true);
}
out:
clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index f9438d4e43ca..871c32dda66e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -42,6 +42,7 @@
#include "lib/pci_vsc.h"
#include "lib/tout.h"
#include "diag/fw_tracer.h"
+#include "diag/reporter_vnic.h"
enum {
MAX_MISSES = 3,
@@ -325,6 +326,10 @@ int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
while (sensor_pci_not_working(dev)) {
if (time_after(jiffies, end))
return -ETIMEDOUT;
+ if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
+ mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n");
+ return -ENODEV;
+ }
msleep(100);
}
return 0;
@@ -894,6 +899,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev)
cancel_delayed_work_sync(&health->update_fw_log_ts_work);
destroy_workqueue(health->wq);
+ mlx5_reporter_vnic_destroy(dev);
mlx5_fw_reporters_destroy(dev);
}
@@ -903,6 +909,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
char *name;
mlx5_fw_reporters_create(dev);
+ mlx5_reporter_vnic_create(dev);
health = &dev->priv.health;
name = kmalloc(64, GFP_KERNEL);
@@ -922,6 +929,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
return 0;
out_err:
+ mlx5_reporter_vnic_destroy(dev);
mlx5_fw_reporters_destroy(dev);
return -ENOMEM;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
index 380a208ab137..fa467335526e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
@@ -45,30 +45,28 @@ static int cpu_get_least_loaded(struct mlx5_irq_pool *pool,
/* Creating an IRQ from irq_pool */
static struct mlx5_irq *
-irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
{
- cpumask_var_t auto_mask;
- struct mlx5_irq *irq;
+ struct irq_affinity_desc auto_desc = {};
u32 irq_index;
int err;
- if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL))
- return ERR_PTR(-ENOMEM);
err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL);
if (err)
return ERR_PTR(err);
if (pool->irqs_per_cpu) {
- if (cpumask_weight(req_mask) > 1)
+ if (cpumask_weight(&af_desc->mask) > 1)
/* if req_mask contain more then one CPU, set the least loadad CPU
* of req_mask
*/
- cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask);
+ cpumask_set_cpu(cpu_get_least_loaded(pool, &af_desc->mask),
+ &auto_desc.mask);
else
- cpu_get(pool, cpumask_first(req_mask));
+ cpu_get(pool, cpumask_first(&af_desc->mask));
}
- irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask);
- free_cpumask_var(auto_mask);
- return irq;
+ return mlx5_irq_alloc(pool, irq_index,
+ cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc,
+ NULL);
}
/* Looking for the IRQ with the smallest refcount that fits req_mask.
@@ -115,22 +113,22 @@ irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req
/**
* mlx5_irq_affinity_request - request an IRQ according to the given mask.
* @pool: IRQ pool to request from.
- * @req_mask: cpumask requested for this IRQ.
+ * @af_desc: affinity descriptor for this IRQ.
*
* This function returns a pointer to IRQ, or ERR_PTR in case of error.
*/
struct mlx5_irq *
-mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
{
struct mlx5_irq *least_loaded_irq, *new_irq;
mutex_lock(&pool->lock);
- least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask);
+ least_loaded_irq = irq_pool_find_least_loaded(pool, &af_desc->mask);
if (least_loaded_irq &&
mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold)
goto out;
/* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */
- new_irq = irq_pool_request_irq(pool, req_mask);
+ new_irq = irq_pool_request_irq(pool, af_desc);
if (IS_ERR(new_irq)) {
if (!least_loaded_irq) {
/* We failed to create an IRQ and we didn't find an IRQ */
@@ -194,32 +192,30 @@ int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
struct mlx5_irq **irqs)
{
struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
- cpumask_var_t req_mask;
+ struct irq_affinity_desc af_desc = {};
struct mlx5_irq *irq;
int i = 0;
- if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
- return -ENOMEM;
- cpumask_copy(req_mask, cpu_online_mask);
+ af_desc.is_managed = 1;
+ cpumask_copy(&af_desc.mask, cpu_online_mask);
for (i = 0; i < nirqs; i++) {
if (mlx5_irq_pool_is_sf_pool(pool))
- irq = mlx5_irq_affinity_request(pool, req_mask);
+ irq = mlx5_irq_affinity_request(pool, &af_desc);
else
/* In case SF pool doesn't exists, fallback to the PF IRQs.
* The PF IRQs are already allocated and binded to CPU
* at this point. Hence, only an index is needed.
*/
- irq = mlx5_irq_request(dev, i, NULL);
+ irq = mlx5_irq_request(dev, i, NULL, NULL);
if (IS_ERR(irq))
break;
irqs[i] = irq;
- cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask);
+ cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), &af_desc.mask);
mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
}
- free_cpumask_var(req_mask);
if (!i)
return PTR_ERR(irq);
return i;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 4c9a40211059..932fbc843c69 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -39,7 +39,7 @@
#include "clock.h"
enum {
- MLX5_CYCLES_SHIFT = 23
+ MLX5_CYCLES_SHIFT = 31
};
enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
index 81ed91fee59b..db9df9798ffa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -14,10 +14,8 @@
#define chains_lock(chains) ((chains)->lock)
#define chains_ht(chains) ((chains)->chains_ht)
#define prios_ht(chains) ((chains)->prios_ht)
-#define tc_default_ft(chains) ((chains)->tc_default_ft)
-#define tc_end_ft(chains) ((chains)->tc_end_ft)
-#define ns_to_chains_fs_prio(ns) ((ns) == MLX5_FLOW_NAMESPACE_FDB ? \
- FDB_TC_OFFLOAD : MLX5E_TC_PRIO)
+#define chains_default_ft(chains) ((chains)->chains_default_ft)
+#define chains_end_ft(chains) ((chains)->chains_end_ft)
#define FT_TBL_SZ (64 * 1024)
struct mlx5_fs_chains {
@@ -28,13 +26,15 @@ struct mlx5_fs_chains {
/* Protects above chains_ht and prios_ht */
struct mutex lock;
- struct mlx5_flow_table *tc_default_ft;
- struct mlx5_flow_table *tc_end_ft;
+ struct mlx5_flow_table *chains_default_ft;
+ struct mlx5_flow_table *chains_end_ft;
struct mapping_ctx *chains_mapping;
enum mlx5_flow_namespace_type ns;
u32 group_num;
u32 flags;
+ int fs_base_prio;
+ int fs_base_level;
};
struct fs_chain {
@@ -145,7 +145,7 @@ void
mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
struct mlx5_flow_table *ft)
{
- tc_end_ft(chains) = ft;
+ chains_end_ft(chains) = ft;
}
static struct mlx5_flow_table *
@@ -164,11 +164,11 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains,
sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? FT_TBL_SZ : POOL_NEXT_SIZE;
ft_attr.max_fte = sz;
- /* We use tc_default_ft(chains) as the table's next_ft till
+ /* We use chains_default_ft(chains) as the table's next_ft till
* ignore_flow_level is allowed on FT creation and not just for FTEs.
* Instead caller should add an explicit miss rule if needed.
*/
- ft_attr.next_ft = tc_default_ft(chains);
+ ft_attr.next_ft = chains_default_ft(chains);
/* The root table(chain 0, prio 1, level 0) is required to be
* connected to the previous fs_core managed prio.
@@ -177,22 +177,22 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains,
*/
if (!mlx5_chains_ignore_flow_level_supported(chains) ||
(chain == 0 && prio == 1 && level == 0)) {
- ft_attr.level = level;
- ft_attr.prio = prio - 1;
+ ft_attr.level = chains->fs_base_level;
+ ft_attr.prio = chains->fs_base_prio;
ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ?
mlx5_get_fdb_sub_ns(chains->dev, chain) :
mlx5_get_flow_namespace(chains->dev, chains->ns);
} else {
ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
- ft_attr.prio = ns_to_chains_fs_prio(chains->ns);
+ ft_attr.prio = chains->fs_base_prio;
/* Firmware doesn't allow us to create another level 0 table,
- * so we create all unmanaged tables as level 1.
+ * so we create all unmanaged tables as level 1 (base + 1).
*
* To connect them, we use explicit miss rules with
* ignore_flow_level. Caller is responsible to create
* these rules (if needed).
*/
- ft_attr.level = 1;
+ ft_attr.level = chains->fs_base_level + 1;
ns = mlx5_get_flow_namespace(chains->dev, chains->ns);
}
@@ -220,7 +220,8 @@ create_chain_restore(struct fs_chain *chain)
int err;
if (chain->chain == mlx5_chains_get_nf_ft_chain(chains) ||
- !mlx5_chains_prios_supported(chains))
+ !mlx5_chains_prios_supported(chains) ||
+ !chains->chains_mapping)
return 0;
err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index);
@@ -380,7 +381,7 @@ mlx5_chains_add_miss_rule(struct fs_chain *chain,
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = next_ft;
- if (next_ft == tc_end_ft(chains) &&
+ if (chains->chains_mapping && next_ft == chains_end_ft(chains) &&
chain->chain != mlx5_chains_get_nf_ft_chain(chains) &&
mlx5_chains_prios_supported(chains)) {
act.modify_hdr = chain->miss_modify_hdr;
@@ -494,8 +495,8 @@ mlx5_chains_create_prio(struct mlx5_fs_chains *chains,
/* Default miss for each chain: */
next_ft = (chain == mlx5_chains_get_nf_ft_chain(chains)) ?
- tc_default_ft(chains) :
- tc_end_ft(chains);
+ chains_default_ft(chains) :
+ chains_end_ft(chains);
list_for_each(pos, &chain_s->prios_list) {
struct prio *p = list_entry(pos, struct prio, list);
@@ -681,7 +682,7 @@ err_get_prio:
struct mlx5_flow_table *
mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains)
{
- return tc_end_ft(chains);
+ return chains_end_ft(chains);
}
struct mlx5_flow_table *
@@ -718,48 +719,38 @@ mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
static struct mlx5_fs_chains *
mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
{
- struct mlx5_fs_chains *chains_priv;
- u32 max_flow_counter;
+ struct mlx5_fs_chains *chains;
int err;
- chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
- if (!chains_priv)
+ chains = kzalloc(sizeof(*chains), GFP_KERNEL);
+ if (!chains)
return ERR_PTR(-ENOMEM);
- max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
- MLX5_CAP_GEN(dev, max_flow_counter_15_0);
-
- mlx5_core_dbg(dev,
- "Init flow table chains, max counters(%d), groups(%d), max flow table size(%d)\n",
- max_flow_counter, attr->max_grp_num, attr->max_ft_sz);
-
- chains_priv->dev = dev;
- chains_priv->flags = attr->flags;
- chains_priv->ns = attr->ns;
- chains_priv->group_num = attr->max_grp_num;
- chains_priv->chains_mapping = attr->mapping;
- tc_default_ft(chains_priv) = tc_end_ft(chains_priv) = attr->default_ft;
+ chains->dev = dev;
+ chains->flags = attr->flags;
+ chains->ns = attr->ns;
+ chains->group_num = attr->max_grp_num;
+ chains->chains_mapping = attr->mapping;
+ chains->fs_base_prio = attr->fs_base_prio;
+ chains->fs_base_level = attr->fs_base_level;
+ chains_default_ft(chains) = chains_end_ft(chains) = attr->default_ft;
- mlx5_core_info(dev, "Supported tc offload range - chains: %u, prios: %u\n",
- mlx5_chains_get_chain_range(chains_priv),
- mlx5_chains_get_prio_range(chains_priv));
-
- err = rhashtable_init(&chains_ht(chains_priv), &chain_params);
+ err = rhashtable_init(&chains_ht(chains), &chain_params);
if (err)
goto init_chains_ht_err;
- err = rhashtable_init(&prios_ht(chains_priv), &prio_params);
+ err = rhashtable_init(&prios_ht(chains), &prio_params);
if (err)
goto init_prios_ht_err;
- mutex_init(&chains_lock(chains_priv));
+ mutex_init(&chains_lock(chains));
- return chains_priv;
+ return chains;
init_prios_ht_err:
- rhashtable_destroy(&chains_ht(chains_priv));
+ rhashtable_destroy(&chains_ht(chains));
init_chains_ht_err:
- kfree(chains_priv);
+ kfree(chains);
return ERR_PTR(err);
}
@@ -808,3 +799,9 @@ mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping)
return mapping_remove(ctx, chain_mapping);
}
+
+void
+mlx5_chains_print_info(struct mlx5_fs_chains *chains)
+{
+ mlx5_core_dbg(chains->dev, "Flow table chains groups(%d)\n", chains->group_num);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
index d50bdb226cef..8972fe05723a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
@@ -17,8 +17,9 @@ enum mlx5_chains_flags {
struct mlx5_chains_attr {
enum mlx5_flow_namespace_type ns;
+ int fs_base_prio;
+ int fs_base_level;
u32 flags;
- u32 max_ft_sz;
u32 max_grp_num;
struct mlx5_flow_table *default_ft;
struct mapping_ctx *mapping;
@@ -68,6 +69,8 @@ void mlx5_chains_destroy(struct mlx5_fs_chains *chains);
void
mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
struct mlx5_flow_table *ft);
+void
+mlx5_chains_print_info(struct mlx5_fs_chains *chains);
#else /* CONFIG_MLX5_CLS_ACT */
@@ -89,7 +92,9 @@ static inline struct mlx5_fs_chains *
mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
{ return NULL; }
static inline void
-mlx5_chains_destroy(struct mlx5_fs_chains *chains) {};
+mlx5_chains_destroy(struct mlx5_fs_chains *chains) {}
+static inline void
+mlx5_chains_print_info(struct mlx5_fs_chains *chains) {}
#endif /* CONFIG_MLX5_CLS_ACT */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f1de152a6113..edc738e86cac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -46,12 +46,10 @@
#include <linux/kmod.h>
#include <linux/mlx5/mlx5_ifc.h>
#include <linux/mlx5/vport.h>
-#ifdef CONFIG_RFS_ACCEL
-#include <linux/cpu_rmap.h>
-#endif
#include <linux/version.h>
#include <net/devlink.h>
#include "mlx5_core.h"
+#include "thermal.h"
#include "lib/eq.h"
#include "fs_core.h"
#include "lib/mpfs.h"
@@ -102,15 +100,19 @@ enum {
static struct mlx5_profile profile[] = {
[0] = {
.mask = 0,
+ .num_cmd_caches = MLX5_NUM_COMMAND_CACHES,
},
[1] = {
.mask = MLX5_PROF_MASK_QP_SIZE,
.log_max_qp = 12,
+ .num_cmd_caches = MLX5_NUM_COMMAND_CACHES,
+
},
[2] = {
.mask = MLX5_PROF_MASK_QP_SIZE |
MLX5_PROF_MASK_MR_CACHE,
.log_max_qp = LOG_MAX_SUPPORTED_QPS,
+ .num_cmd_caches = MLX5_NUM_COMMAND_CACHES,
.mr_cache[0] = {
.size = 500,
.limit = 250
@@ -176,6 +178,11 @@ static struct mlx5_profile profile[] = {
.limit = 4
},
},
+ [3] = {
+ .mask = MLX5_PROF_MASK_QP_SIZE,
+ .log_max_qp = LOG_MAX_SUPPORTED_QPS,
+ .num_cmd_caches = 0,
+ },
};
static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
@@ -191,7 +198,7 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
if (!(fw_initializing >> 31))
break;
if (time_after(jiffies, end) ||
- test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
+ test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
err = -EBUSY;
break;
}
@@ -710,7 +717,7 @@ static int handle_hca_cap_port_selection(struct mlx5_core_dev *dev,
MLX5_ST_SZ_BYTES(port_selection_cap));
MLX5_SET(port_selection_cap, set_hca_cap, port_select_flow_table_bypass, 1);
- err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION);
+ err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION);
return err;
}
@@ -917,7 +924,6 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
return 0;
err_clr_master:
- pci_clear_master(dev->pdev);
release_bar(dev->pdev);
err_disable:
mlx5_pci_disable_device(dev);
@@ -932,7 +938,6 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev)
*/
mlx5_drain_health_wq(dev);
iounmap(dev->iseg);
- pci_clear_master(dev->pdev);
release_bar(dev->pdev);
mlx5_pci_disable_device(dev);
}
@@ -1402,16 +1407,16 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
goto function_teardown;
}
+ err = mlx5_devlink_params_register(priv_to_devlink(dev));
+ if (err)
+ goto err_devlink_params_reg;
+
err = mlx5_load(dev);
if (err)
goto err_load;
set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
- err = mlx5_devlink_params_register(priv_to_devlink(dev));
- if (err)
- goto err_devlink_params_reg;
-
err = mlx5_register_device(dev);
if (err)
goto err_register;
@@ -1421,11 +1426,11 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
return 0;
err_register:
- mlx5_devlink_params_unregister(priv_to_devlink(dev));
-err_devlink_params_reg:
clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
mlx5_unload(dev);
err_load:
+ mlx5_devlink_params_unregister(priv_to_devlink(dev));
+err_devlink_params_reg:
mlx5_cleanup_once(dev);
function_teardown:
mlx5_function_teardown(dev, true);
@@ -1444,7 +1449,6 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
mutex_lock(&dev->intf_state_mutex);
mlx5_unregister_device(dev);
- mlx5_devlink_params_unregister(priv_to_devlink(dev));
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
mlx5_core_warn(dev, "%s: interface is down, NOP\n",
@@ -1455,6 +1459,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
mlx5_unload(dev);
+ mlx5_devlink_params_unregister(priv_to_devlink(dev));
mlx5_cleanup_once(dev);
mlx5_function_teardown(dev, true);
out:
@@ -1509,13 +1514,13 @@ out:
return err;
}
-int mlx5_load_one(struct mlx5_core_dev *dev)
+int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery)
{
struct devlink *devlink = priv_to_devlink(dev);
int ret;
devl_lock(devlink);
- ret = mlx5_load_one_devl_locked(dev, false);
+ ret = mlx5_load_one_devl_locked(dev, recovery);
devl_unlock(devlink);
return ret;
}
@@ -1768,6 +1773,10 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
+ err = mlx5_thermal_init(dev);
+ if (err)
+ dev_err(&pdev->dev, "mlx5_thermal_init failed with error code %d\n", err);
+
pci_save_state(pdev);
devlink_register(devlink);
return 0;
@@ -1796,6 +1805,7 @@ static void remove_one(struct pci_dev *pdev)
mlx5_drain_fw_reset(dev);
devlink_unregister(devlink);
mlx5_sriov_disable(pdev);
+ mlx5_thermal_uninit(dev);
mlx5_crdump_disable(dev);
mlx5_drain_health_wq(dev);
mlx5_uninit_one(dev);
@@ -1912,7 +1922,8 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
mlx5_pci_trace(dev, "Enter, loading driver..\n");
- err = mlx5_load_one(dev);
+ err = mlx5_load_one(dev, false);
+
if (!err)
devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter,
DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
@@ -2003,7 +2014,7 @@ static int mlx5_resume(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
- return mlx5_load_one(dev);
+ return mlx5_load_one(dev, false);
}
static const struct pci_device_id mlx5_core_pci_table[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index be0785f83083..1d879374acaa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -142,6 +142,7 @@ enum mlx5_semaphore_space_address {
};
#define MLX5_DEFAULT_PROF 2
+#define MLX5_SF_PROF 3
static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed,
size_t item_size, size_t num_items,
@@ -321,7 +322,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev);
void mlx5_uninit_one(struct mlx5_core_dev *dev);
void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend);
void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend);
-int mlx5_load_one(struct mlx5_core_dev *dev);
+int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery);
int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery);
int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index 23cb63fa4588..efd0c299c5c7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -9,6 +9,7 @@
#define MLX5_COMP_EQS_PER_SF 8
struct mlx5_irq;
+struct cpu_rmap;
int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
@@ -25,9 +26,10 @@ int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev);
void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq);
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
- struct cpumask *affinity);
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap);
int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
- struct mlx5_irq **irqs);
+ struct mlx5_irq **irqs, struct cpu_rmap **rmap);
void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs);
int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
@@ -39,7 +41,7 @@ struct mlx5_irq_pool;
int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
struct mlx5_irq **irqs);
struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool,
- const struct cpumask *req_mask);
+ struct irq_affinity_desc *af_desc);
void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
int num_irqs);
#else
@@ -50,7 +52,7 @@ static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev,
}
static inline struct mlx5_irq *
-mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
{
return ERR_PTR(-EOPNOTSUPP);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 6bde18bcd42f..2245d3b2f393 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2019 Mellanox Technologies. */
+#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/mlx5/driver.h>
@@ -9,6 +10,7 @@
#include "mlx5_irq.h"
#include "pci_irq.h"
#include "lib/sf.h"
+#include "lib/eq.h"
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
@@ -29,12 +31,11 @@ struct mlx5_irq {
char name[MLX5_MAX_IRQ_NAME];
struct mlx5_irq_pool *pool;
int refcount;
- u32 index;
- int irqn;
+ struct msi_map map;
};
struct mlx5_irq_table {
- struct mlx5_irq_pool *pf_pool;
+ struct mlx5_irq_pool *pcif_pool;
struct mlx5_irq_pool *sf_ctrl_pool;
struct mlx5_irq_pool *sf_comp_pool;
};
@@ -127,15 +128,26 @@ out:
static void irq_release(struct mlx5_irq *irq)
{
struct mlx5_irq_pool *pool = irq->pool;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
- xa_erase(&pool->irqs, irq->index);
- /* free_irq requires that affinity_hint and rmap will be cleared
- * before calling it. This is why there is asymmetry with set_rmap
- * which should be called after alloc_irq but before request_irq.
+ xa_erase(&pool->irqs, irq->map.index);
+ /* free_irq requires that affinity_hint and rmap will be cleared before
+ * calling it. To satisfy this requirement, we call
+ * irq_cpu_rmap_remove() to remove the notifier
*/
- irq_update_affinity_hint(irq->irqn, NULL);
+ irq_update_affinity_hint(irq->map.virq, NULL);
+#ifdef CONFIG_RFS_ACCEL
+ rmap = mlx5_eq_table_get_rmap(pool->dev);
+ if (rmap && irq->map.index)
+ irq_cpu_rmap_remove(rmap, irq->map.virq);
+#endif
+
free_cpumask_var(irq->mask);
- free_irq(irq->irqn, &irq->nh);
+ free_irq(irq->map.virq, &irq->nh);
+ if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
+ pci_msix_free_irq(pool->dev->pdev, irq->map);
kfree(irq);
}
@@ -198,7 +210,7 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
return;
}
- if (vecidx == pool->xa_num_irqs.max) {
+ if (!vecidx) {
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
return;
}
@@ -207,7 +219,8 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
}
struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
- const struct cpumask *affinity)
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap)
{
struct mlx5_core_dev *dev = pool->dev;
char name[MLX5_MAX_IRQ_NAME];
@@ -217,7 +230,28 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
irq = kzalloc(sizeof(*irq), GFP_KERNEL);
if (!irq)
return ERR_PTR(-ENOMEM);
- irq->irqn = pci_irq_vector(dev->pdev, i);
+ if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
+ /* The vector at index 0 was already allocated.
+ * Just get the irq number. If dynamic irq is not supported
+ * vectors have also been allocated.
+ */
+ irq->map.virq = pci_irq_vector(dev->pdev, i);
+ irq->map.index = 0;
+ } else {
+ irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
+ if (!irq->map.virq) {
+ err = irq->map.index;
+ goto err_alloc_irq;
+ }
+ }
+
+ if (i && rmap && *rmap) {
+#ifdef CONFIG_RFS_ACCEL
+ err = irq_cpu_rmap_add(*rmap, irq->map.virq);
+ if (err)
+ goto err_irq_rmap;
+#endif
+ }
if (!mlx5_irq_pool_is_sf_pool(pool))
irq_set_name(pool, name, i);
else
@@ -225,7 +259,7 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
snprintf(irq->name, MLX5_MAX_IRQ_NAME,
"%s@pci:%s", name, pci_name(dev->pdev));
- err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
+ err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
&irq->nh);
if (err) {
mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
@@ -236,26 +270,37 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
err = -ENOMEM;
goto err_cpumask;
}
- if (affinity) {
- cpumask_copy(irq->mask, affinity);
- irq_set_affinity_and_hint(irq->irqn, irq->mask);
+ if (af_desc) {
+ cpumask_copy(irq->mask, &af_desc->mask);
+ irq_set_affinity_and_hint(irq->map.virq, irq->mask);
}
irq->pool = pool;
irq->refcount = 1;
- irq->index = i;
- err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
+ irq->map.index = i;
+ err = xa_err(xa_store(&pool->irqs, irq->map.index, irq, GFP_KERNEL));
if (err) {
mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
- irq->index, err);
+ irq->map.index, err);
goto err_xa;
}
return irq;
err_xa:
- irq_update_affinity_hint(irq->irqn, NULL);
+ if (af_desc)
+ irq_update_affinity_hint(irq->map.virq, NULL);
free_cpumask_var(irq->mask);
err_cpumask:
- free_irq(irq->irqn, &irq->nh);
+ free_irq(irq->map.virq, &irq->nh);
err_req_irq:
+#ifdef CONFIG_RFS_ACCEL
+ if (i && rmap && *rmap) {
+ free_irq_cpu_rmap(*rmap);
+ *rmap = NULL;
+ }
+err_irq_rmap:
+#endif
+ if (i && pci_msix_can_alloc_dyn(dev->pdev))
+ pci_msix_free_irq(dev->pdev, irq->map);
+err_alloc_irq:
kfree(irq);
return ERR_PTR(err);
}
@@ -292,7 +337,7 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
int mlx5_irq_get_index(struct mlx5_irq *irq)
{
- return irq->index;
+ return irq->map.index;
}
/* irq_pool API */
@@ -300,7 +345,8 @@ int mlx5_irq_get_index(struct mlx5_irq *irq)
/* requesting an irq from a given pool according to given index */
static struct mlx5_irq *
irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
- struct cpumask *affinity)
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap)
{
struct mlx5_irq *irq;
@@ -310,7 +356,7 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
mlx5_irq_get_locked(irq);
goto unlock;
}
- irq = mlx5_irq_alloc(pool, vecidx, affinity);
+ irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
unlock:
mutex_unlock(&pool->lock);
return irq;
@@ -337,7 +383,7 @@ struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
* the PF IRQs pool in case the SF pool doesn't exist.
*/
- return pool ? pool : irq_table->pf_pool;
+ return pool ? pool : irq_table->pcif_pool;
}
static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
@@ -351,7 +397,7 @@ static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
* the PF IRQs pool in case the SF pool doesn't exist.
*/
- return pool ? pool : irq_table->pf_pool;
+ return pool ? pool : irq_table->pcif_pool;
}
/**
@@ -364,7 +410,7 @@ static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
int i;
for (i = 0; i < nirqs; i++) {
- synchronize_irq(irqs[i]->irqn);
+ synchronize_irq(irqs[i]->map.virq);
mlx5_irq_put(irqs[i]);
}
}
@@ -387,26 +433,26 @@ void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
{
struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
- cpumask_var_t req_mask;
+ struct irq_affinity_desc af_desc;
struct mlx5_irq *irq;
- if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
- return ERR_PTR(-ENOMEM);
- cpumask_copy(req_mask, cpu_online_mask);
+ cpumask_copy(&af_desc.mask, cpu_online_mask);
+ af_desc.is_managed = false;
if (!mlx5_irq_pool_is_sf_pool(pool)) {
- /* In case we are allocating a control IRQ for PF/VF */
+ /* In case we are allocating a control IRQ from a pci device's pool.
+ * This can happen also for a SF if the SFs pool is empty.
+ */
if (!pool->xa_num_irqs.max) {
- cpumask_clear(req_mask);
+ cpumask_clear(&af_desc.mask);
/* In case we only have a single IRQ for PF/VF */
- cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask);
+ cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask);
}
- /* Allocate the IRQ in the last index of the pool */
- irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask);
+ /* Allocate the IRQ in index 0. The vector was already allocated */
+ irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
} else {
- irq = mlx5_irq_affinity_request(pool, req_mask);
+ irq = mlx5_irq_affinity_request(pool, &af_desc);
}
- free_cpumask_var(req_mask);
return irq;
}
@@ -415,28 +461,82 @@ struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
* @dev: mlx5 device that requesting the IRQ.
* @vecidx: vector index of the IRQ. This argument is ignore if affinity is
* provided.
- * @affinity: cpumask requested for this IRQ.
+ * @af_desc: affinity descriptor for this IRQ.
+ * @rmap: pointer to reverse map pointer for completion interrupts
*
* This function returns a pointer to IRQ, or ERR_PTR in case of error.
*/
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
- struct cpumask *affinity)
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap)
{
struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
struct mlx5_irq_pool *pool;
struct mlx5_irq *irq;
- pool = irq_table->pf_pool;
- irq = irq_pool_request_vector(pool, vecidx, affinity);
+ pool = irq_table->pcif_pool;
+ irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
if (IS_ERR(irq))
return irq;
mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
- irq->irqn, cpumask_pr_args(affinity),
+ irq->map.virq, cpumask_pr_args(&af_desc->mask),
irq->refcount / MLX5_EQ_REFS_PER_IRQ);
return irq;
}
/**
+ * mlx5_msix_alloc - allocate msix interrupt
+ * @dev: mlx5 device from which to request
+ * @handler: interrupt handler
+ * @affdesc: affinity descriptor
+ * @name: interrupt name
+ *
+ * Returns: struct msi_map with result encoded.
+ * Note: the caller must make sure to release the irq by calling
+ * mlx5_msix_free() if shutdown was initiated.
+ */
+struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev,
+ irqreturn_t (*handler)(int, void *),
+ const struct irq_affinity_desc *affdesc,
+ const char *name)
+{
+ struct msi_map map;
+ int err;
+
+ if (!dev->pdev) {
+ map.virq = 0;
+ map.index = -EINVAL;
+ return map;
+ }
+
+ map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc);
+ if (!map.virq)
+ return map;
+
+ err = request_irq(map.virq, handler, 0, name, NULL);
+ if (err) {
+ mlx5_core_warn(dev, "err %d\n", err);
+ pci_msix_free_irq(dev->pdev, map);
+ map.virq = 0;
+ map.index = -ENOMEM;
+ }
+ return map;
+}
+EXPORT_SYMBOL(mlx5_msix_alloc);
+
+/**
+ * mlx5_msix_free - free a previously allocated msix interrupt
+ * @dev: mlx5 device associated with interrupt
+ * @map: map previously returned by mlx5_msix_alloc()
+ */
+void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map)
+{
+ free_irq(map.virq, NULL);
+ pci_msix_free_irq(dev->pdev, map);
+}
+EXPORT_SYMBOL(mlx5_msix_free);
+
+/**
* mlx5_irqs_release_vectors - release one or more IRQs back to the system.
* @irqs: IRQs to be released.
* @nirqs: number of IRQs to be released.
@@ -452,6 +552,7 @@ void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
* @cpus: CPUs array for binding the IRQs
* @nirqs: number of IRQs to request.
* @irqs: an output array of IRQs pointers.
+ * @rmap: pointer to reverse map pointer for completion interrupts
*
* Each IRQ is bound to at most 1 CPU.
* This function is requests nirqs IRQs, starting from @vecidx.
@@ -460,24 +561,22 @@ void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
* @nirqs), if successful, or a negative error code in case of an error.
*/
int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
- struct mlx5_irq **irqs)
+ struct mlx5_irq **irqs, struct cpu_rmap **rmap)
{
- cpumask_var_t req_mask;
+ struct irq_affinity_desc af_desc;
struct mlx5_irq *irq;
int i;
- if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
- return -ENOMEM;
+ af_desc.is_managed = 1;
for (i = 0; i < nirqs; i++) {
- cpumask_set_cpu(cpus[i], req_mask);
- irq = mlx5_irq_request(dev, i, req_mask);
+ cpumask_set_cpu(cpus[i], &af_desc.mask);
+ irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap);
if (IS_ERR(irq))
break;
- cpumask_clear(req_mask);
+ cpumask_clear(&af_desc.mask);
irqs[i] = irq;
}
- free_cpumask_var(req_mask);
return i ? i : PTR_ERR(irq);
}
@@ -521,7 +620,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool)
kvfree(pool);
}
-static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
+static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec)
{
struct mlx5_irq_table *table = dev->priv.irq_table;
int num_sf_ctrl_by_msix;
@@ -529,12 +628,12 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
int num_sf_ctrl;
int err;
- /* init pf_pool */
- table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL,
- MLX5_EQ_SHARE_IRQ_MIN_COMP,
- MLX5_EQ_SHARE_IRQ_MAX_COMP);
- if (IS_ERR(table->pf_pool))
- return PTR_ERR(table->pf_pool);
+ /* init pcif_pool */
+ table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL,
+ MLX5_EQ_SHARE_IRQ_MIN_COMP,
+ MLX5_EQ_SHARE_IRQ_MAX_COMP);
+ if (IS_ERR(table->pcif_pool))
+ return PTR_ERR(table->pcif_pool);
if (!mlx5_sf_max_functions(dev))
return 0;
if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
@@ -548,7 +647,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
MLX5_SFS_PER_CTRL_IRQ);
num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
- table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
+ table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
"mlx5_sf_ctrl",
MLX5_EQ_SHARE_IRQ_MIN_CTRL,
MLX5_EQ_SHARE_IRQ_MAX_CTRL);
@@ -557,7 +656,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
goto err_pf;
}
/* init sf_comp_pool */
- table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
+ table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
sf_vec - num_sf_ctrl, "mlx5_sf_comp",
MLX5_EQ_SHARE_IRQ_MIN_COMP,
MLX5_EQ_SHARE_IRQ_MAX_COMP);
@@ -579,7 +678,7 @@ err_irqs_per_cpu:
err_sf_ctrl:
irq_pool_free(table->sf_ctrl_pool);
err_pf:
- irq_pool_free(table->pf_pool);
+ irq_pool_free(table->pcif_pool);
return err;
}
@@ -589,7 +688,7 @@ static void irq_pools_destroy(struct mlx5_irq_table *table)
irq_pool_free(table->sf_comp_pool);
irq_pool_free(table->sf_ctrl_pool);
}
- irq_pool_free(table->pf_pool);
+ irq_pool_free(table->pcif_pool);
}
/* irq_table API */
@@ -620,9 +719,9 @@ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
{
- if (!table->pf_pool->xa_num_irqs.max)
+ if (!table->pcif_pool->xa_num_irqs.max)
return 1;
- return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min;
+ return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min;
}
int mlx5_irq_table_create(struct mlx5_core_dev *dev)
@@ -631,26 +730,30 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev)
MLX5_CAP_GEN(dev, max_num_eqs) :
1 << MLX5_CAP_GEN(dev, log_max_eq);
int total_vec;
- int pf_vec;
+ int pcif_vec;
+ int req_vec;
int err;
+ int n;
if (mlx5_core_is_sf(dev))
return 0;
- pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
- pf_vec = min_t(int, pf_vec, num_eqs);
+ pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
+ pcif_vec = min_t(int, pcif_vec, num_eqs);
- total_vec = pf_vec;
+ total_vec = pcif_vec;
if (mlx5_sf_max_functions(dev))
total_vec += MLX5_IRQ_CTRL_SF_MAX +
MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
+ total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
+ pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
- total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX);
- if (total_vec < 0)
- return total_vec;
- pf_vec = min(pf_vec, total_vec);
+ req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
+ n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
+ if (n < 0)
+ return n;
- err = irq_pools_init(dev, total_vec - pf_vec, pf_vec);
+ err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec);
if (err)
pci_free_irq_vectors(dev->pdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
index 5c7e68bee43a..d3a77a0ab848 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
@@ -12,6 +12,7 @@
#define MLX5_EQ_REFS_PER_IRQ (2)
struct mlx5_irq;
+struct cpu_rmap;
struct mlx5_irq_pool {
char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
@@ -31,7 +32,8 @@ static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
}
struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
- const struct cpumask *affinity);
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap);
int mlx5_irq_get_locked(struct mlx5_irq *irq);
int mlx5_irq_read_locked(struct mlx5_irq *irq);
int mlx5_irq_put(struct mlx5_irq *irq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index a1548e6bfb35..0daeb4b72cca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -1054,3 +1054,154 @@ out:
kfree(out);
return err;
}
+
+/* speed in units of 1Mb */
+static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
+ [MLX5E_1000BASE_CX_SGMII] = 1000,
+ [MLX5E_1000BASE_KX] = 1000,
+ [MLX5E_10GBASE_CX4] = 10000,
+ [MLX5E_10GBASE_KX4] = 10000,
+ [MLX5E_10GBASE_KR] = 10000,
+ [MLX5E_20GBASE_KR2] = 20000,
+ [MLX5E_40GBASE_CR4] = 40000,
+ [MLX5E_40GBASE_KR4] = 40000,
+ [MLX5E_56GBASE_R4] = 56000,
+ [MLX5E_10GBASE_CR] = 10000,
+ [MLX5E_10GBASE_SR] = 10000,
+ [MLX5E_10GBASE_ER] = 10000,
+ [MLX5E_40GBASE_SR4] = 40000,
+ [MLX5E_40GBASE_LR4] = 40000,
+ [MLX5E_50GBASE_SR2] = 50000,
+ [MLX5E_100GBASE_CR4] = 100000,
+ [MLX5E_100GBASE_SR4] = 100000,
+ [MLX5E_100GBASE_KR4] = 100000,
+ [MLX5E_100GBASE_LR4] = 100000,
+ [MLX5E_100BASE_TX] = 100,
+ [MLX5E_1000BASE_T] = 1000,
+ [MLX5E_10GBASE_T] = 10000,
+ [MLX5E_25GBASE_CR] = 25000,
+ [MLX5E_25GBASE_KR] = 25000,
+ [MLX5E_25GBASE_SR] = 25000,
+ [MLX5E_50GBASE_CR2] = 50000,
+ [MLX5E_50GBASE_KR2] = 50000,
+};
+
+static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
+ [MLX5E_SGMII_100M] = 100,
+ [MLX5E_1000BASE_X_SGMII] = 1000,
+ [MLX5E_5GBASE_R] = 5000,
+ [MLX5E_10GBASE_XFI_XAUI_1] = 10000,
+ [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000,
+ [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000,
+ [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000,
+ [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000,
+ [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000,
+ [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000,
+ [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000,
+ [MLX5E_400GAUI_8] = 400000,
+ [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000,
+ [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000,
+ [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000,
+};
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+ struct mlx5_port_eth_proto *eproto)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ if (!eproto)
+ return -EINVAL;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
+ if (err)
+ return err;
+
+ eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+ eth_proto_capability);
+ eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin);
+ eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
+ return 0;
+}
+
+bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_port_eth_proto eproto;
+ int err;
+
+ if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet))
+ return true;
+
+ err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto);
+ if (err)
+ return false;
+
+ return !!eproto.cap;
+}
+
+static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev,
+ const u32 **arr, u32 *size,
+ bool force_legacy)
+{
+ bool ext = force_legacy ? false : mlx5_ptys_ext_supported(mdev);
+
+ *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) :
+ ARRAY_SIZE(mlx5e_link_speed);
+ *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed;
+}
+
+u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
+ bool force_legacy)
+{
+ unsigned long temp = eth_proto_oper;
+ const u32 *table;
+ u32 speed = 0;
+ u32 max_size;
+ int i;
+
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
+ i = find_first_bit(&temp, max_size);
+ if (i < max_size)
+ speed = table[i];
+ return speed;
+}
+
+u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
+ bool force_legacy)
+{
+ u32 link_modes = 0;
+ const u32 *table;
+ u32 max_size;
+ int i;
+
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
+ for (i = 0; i < max_size; ++i) {
+ if (table[i] == speed)
+ link_modes |= MLX5E_PROT_MASK(i);
+ }
+ return link_modes;
+}
+
+int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+ struct mlx5_port_eth_proto eproto;
+ u32 max_speed = 0;
+ const u32 *table;
+ u32 max_size;
+ bool ext;
+ int err;
+ int i;
+
+ ext = mlx5_ptys_ext_supported(mdev);
+ err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
+ if (err)
+ return err;
+
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size, false);
+ for (i = 0; i < max_size; ++i)
+ if (eproto.cap & MLX5E_PROT_MASK(i))
+ max_speed = max(max_speed, table[i]);
+
+ *speed = max_speed;
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index a7377619ba6f..e2f26d0bc615 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -28,7 +28,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
mdev->priv.adev_idx = adev->id;
sf_dev->mdev = mdev;
- err = mlx5_mdev_init(mdev, MLX5_DEFAULT_PROF);
+ err = mlx5_mdev_init(mdev, MLX5_SF_PROF);
if (err) {
mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err);
goto mdev_err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index ee104cf04392..0eb9a8d7f282 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -819,14 +819,34 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
case DR_ACTION_TYP_TNL_L2_TO_L2:
break;
case DR_ACTION_TYP_TNL_L3_TO_L2:
- attr.decap_index = action->rewrite->index;
- attr.decap_actions = action->rewrite->num_of_actions;
- attr.decap_with_vlan =
- attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS;
+ if (action->rewrite->ptrn && action->rewrite->arg) {
+ attr.decap_index = mlx5dr_arg_get_obj_id(action->rewrite->arg);
+ attr.decap_actions = action->rewrite->ptrn->num_of_actions;
+ attr.decap_pat_idx = action->rewrite->ptrn->index;
+ } else {
+ attr.decap_index = action->rewrite->index;
+ attr.decap_actions = action->rewrite->num_of_actions;
+ attr.decap_with_vlan =
+ attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS;
+ attr.decap_pat_idx = MLX5DR_INVALID_PATTERN_INDEX;
+ }
break;
case DR_ACTION_TYP_MODIFY_HDR:
- attr.modify_index = action->rewrite->index;
- attr.modify_actions = action->rewrite->num_of_actions;
+ if (action->rewrite->single_action_opt) {
+ attr.modify_actions = action->rewrite->num_of_actions;
+ attr.single_modify_action = action->rewrite->data;
+ } else {
+ if (action->rewrite->ptrn && action->rewrite->arg) {
+ attr.modify_index =
+ mlx5dr_arg_get_obj_id(action->rewrite->arg);
+ attr.modify_actions = action->rewrite->ptrn->num_of_actions;
+ attr.modify_pat_idx = action->rewrite->ptrn->index;
+ } else {
+ attr.modify_index = action->rewrite->index;
+ attr.modify_actions = action->rewrite->num_of_actions;
+ attr.modify_pat_idx = MLX5DR_INVALID_PATTERN_INDEX;
+ }
+ }
if (action->rewrite->modify_ttl)
dr_action_modify_ttl_adjust(dmn, &attr, rx_rule,
&recalc_cs_required);
@@ -1365,8 +1385,6 @@ out_err:
return -EINVAL;
}
-#define ACTION_CACHE_LINE_SIZE 64
-
static int
dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
u8 reformat_param_0, u8 reformat_param_1,
@@ -1403,36 +1421,25 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
}
case DR_ACTION_TYP_TNL_L3_TO_L2:
{
- u8 hw_actions[ACTION_CACHE_LINE_SIZE] = {};
+ u8 hw_actions[DR_ACTION_CACHE_LINE_SIZE] = {};
int ret;
ret = mlx5dr_ste_set_action_decap_l3_list(dmn->ste_ctx,
data, data_sz,
hw_actions,
- ACTION_CACHE_LINE_SIZE,
+ DR_ACTION_CACHE_LINE_SIZE,
&action->rewrite->num_of_actions);
if (ret) {
mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n");
return ret;
}
- action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool,
- DR_CHUNK_SIZE_8);
- if (!action->rewrite->chunk) {
- mlx5dr_dbg(dmn, "Failed allocating modify header chunk\n");
- return -ENOMEM;
- }
-
- action->rewrite->data = (void *)hw_actions;
- action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr
- (action->rewrite->chunk) -
- dmn->info.caps.hdr_modify_icm_addr) /
- ACTION_CACHE_LINE_SIZE;
+ action->rewrite->data = hw_actions;
+ action->rewrite->dmn = dmn;
- ret = mlx5dr_send_postsend_action(dmn, action);
+ ret = mlx5dr_ste_alloc_modify_hdr(action);
if (ret) {
- mlx5dr_dbg(dmn, "Writing decap l3 actions to ICM failed\n");
- mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ mlx5dr_dbg(dmn, "Failed preparing reformat data\n");
return ret;
}
return 0;
@@ -1963,7 +1970,6 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
__be64 actions[],
struct mlx5dr_action *action)
{
- struct mlx5dr_icm_chunk *chunk;
u32 max_hw_actions;
u32 num_hw_actions;
u32 num_sw_actions;
@@ -1980,15 +1986,9 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
return -EINVAL;
}
- chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, DR_CHUNK_SIZE_16);
- if (!chunk)
- return -ENOMEM;
-
hw_actions = kcalloc(1, max_hw_actions * DR_MODIFY_ACTION_SIZE, GFP_KERNEL);
- if (!hw_actions) {
- ret = -ENOMEM;
- goto free_chunk;
- }
+ if (!hw_actions)
+ return -ENOMEM;
ret = dr_actions_convert_modify_header(action,
max_hw_actions,
@@ -2000,24 +2000,24 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
if (ret)
goto free_hw_actions;
- action->rewrite->chunk = chunk;
action->rewrite->modify_ttl = modify_ttl;
action->rewrite->data = (u8 *)hw_actions;
action->rewrite->num_of_actions = num_hw_actions;
- action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) -
- dmn->info.caps.hdr_modify_icm_addr) /
- ACTION_CACHE_LINE_SIZE;
- ret = mlx5dr_send_postsend_action(dmn, action);
- if (ret)
- goto free_hw_actions;
+ if (num_hw_actions == 1 &&
+ dmn->info.caps.sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) {
+ action->rewrite->single_action_opt = true;
+ } else {
+ action->rewrite->single_action_opt = false;
+ ret = mlx5dr_ste_alloc_modify_hdr(action);
+ if (ret)
+ goto free_hw_actions;
+ }
return 0;
free_hw_actions:
kfree(hw_actions);
-free_chunk:
- mlx5dr_icm_free_chunk(chunk);
return ret;
}
@@ -2162,7 +2162,8 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
refcount_dec(&action->reformat->dmn->refcount);
break;
case DR_ACTION_TYP_TNL_L3_TO_L2:
- mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ mlx5dr_ste_free_modify_hdr(action);
+ kfree(action->rewrite->data);
refcount_dec(&action->rewrite->dmn->refcount);
break;
case DR_ACTION_TYP_L2_TO_TNL_L2:
@@ -2173,7 +2174,8 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
refcount_dec(&action->reformat->dmn->refcount);
break;
case DR_ACTION_TYP_MODIFY_HDR:
- mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ if (!action->rewrite->single_action_opt)
+ mlx5dr_ste_free_modify_hdr(action);
kfree(action->rewrite->data);
refcount_dec(&action->rewrite->dmn->refcount);
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c
new file mode 100644
index 000000000000..01ed6442095d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "dr_types.h"
+
+#define DR_ICM_MODIFY_HDR_GRANULARITY_4K 12
+
+/* modify-header arg pool */
+enum dr_arg_chunk_size {
+ DR_ARG_CHUNK_SIZE_1,
+ DR_ARG_CHUNK_SIZE_MIN = DR_ARG_CHUNK_SIZE_1, /* keep updated when changing */
+ DR_ARG_CHUNK_SIZE_2,
+ DR_ARG_CHUNK_SIZE_3,
+ DR_ARG_CHUNK_SIZE_4,
+ DR_ARG_CHUNK_SIZE_MAX,
+};
+
+/* argument pool area */
+struct dr_arg_pool {
+ enum dr_arg_chunk_size log_chunk_size;
+ struct mlx5dr_domain *dmn;
+ struct list_head free_list;
+ struct mutex mutex; /* protect arg pool */
+};
+
+struct mlx5dr_arg_mgr {
+ struct mlx5dr_domain *dmn;
+ struct dr_arg_pool *pools[DR_ARG_CHUNK_SIZE_MAX];
+};
+
+static int dr_arg_pool_alloc_objs(struct dr_arg_pool *pool)
+{
+ struct mlx5dr_arg_obj *arg_obj, *tmp_arg;
+ struct list_head cur_list;
+ u16 object_range;
+ int num_of_objects;
+ u32 obj_id = 0;
+ int i, ret;
+
+ INIT_LIST_HEAD(&cur_list);
+
+ object_range =
+ pool->dmn->info.caps.log_header_modify_argument_granularity;
+
+ object_range =
+ max_t(u32, pool->dmn->info.caps.log_header_modify_argument_granularity,
+ DR_ICM_MODIFY_HDR_GRANULARITY_4K);
+ object_range =
+ min_t(u32, pool->dmn->info.caps.log_header_modify_argument_max_alloc,
+ object_range);
+
+ if (pool->log_chunk_size > object_range) {
+ mlx5dr_err(pool->dmn, "Required chunk size (%d) is not supported\n",
+ pool->log_chunk_size);
+ return -ENOMEM;
+ }
+
+ num_of_objects = (1 << (object_range - pool->log_chunk_size));
+ /* Only one devx object per range */
+ ret = mlx5dr_cmd_create_modify_header_arg(pool->dmn->mdev,
+ object_range,
+ pool->dmn->pdn,
+ &obj_id);
+ if (ret) {
+ mlx5dr_err(pool->dmn, "failed allocating object with range: %d:\n",
+ object_range);
+ return -EAGAIN;
+ }
+
+ for (i = 0; i < num_of_objects; i++) {
+ arg_obj = kzalloc(sizeof(*arg_obj), GFP_KERNEL);
+ if (!arg_obj) {
+ ret = -ENOMEM;
+ goto clean_arg_obj;
+ }
+
+ arg_obj->log_chunk_size = pool->log_chunk_size;
+
+ list_add_tail(&arg_obj->list_node, &cur_list);
+
+ arg_obj->obj_id = obj_id;
+ arg_obj->obj_offset = i * (1 << pool->log_chunk_size);
+ }
+ list_splice_tail_init(&cur_list, &pool->free_list);
+
+ return 0;
+
+clean_arg_obj:
+ mlx5dr_cmd_destroy_modify_header_arg(pool->dmn->mdev, obj_id);
+ list_for_each_entry_safe(arg_obj, tmp_arg, &cur_list, list_node) {
+ list_del(&arg_obj->list_node);
+ kfree(arg_obj);
+ }
+ return ret;
+}
+
+static struct mlx5dr_arg_obj *dr_arg_pool_get_arg_obj(struct dr_arg_pool *pool)
+{
+ struct mlx5dr_arg_obj *arg_obj = NULL;
+ int ret;
+
+ mutex_lock(&pool->mutex);
+ if (list_empty(&pool->free_list)) {
+ ret = dr_arg_pool_alloc_objs(pool);
+ if (ret)
+ goto out;
+ }
+
+ arg_obj = list_first_entry_or_null(&pool->free_list,
+ struct mlx5dr_arg_obj,
+ list_node);
+ WARN(!arg_obj, "couldn't get dr arg obj from pool");
+
+ if (arg_obj)
+ list_del_init(&arg_obj->list_node);
+
+out:
+ mutex_unlock(&pool->mutex);
+ return arg_obj;
+}
+
+static void dr_arg_pool_put_arg_obj(struct dr_arg_pool *pool,
+ struct mlx5dr_arg_obj *arg_obj)
+{
+ mutex_lock(&pool->mutex);
+ list_add(&arg_obj->list_node, &pool->free_list);
+ mutex_unlock(&pool->mutex);
+}
+
+static struct dr_arg_pool *dr_arg_pool_create(struct mlx5dr_domain *dmn,
+ enum dr_arg_chunk_size chunk_size)
+{
+ struct dr_arg_pool *pool;
+
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return NULL;
+
+ pool->dmn = dmn;
+
+ INIT_LIST_HEAD(&pool->free_list);
+ mutex_init(&pool->mutex);
+
+ pool->log_chunk_size = chunk_size;
+ if (dr_arg_pool_alloc_objs(pool))
+ goto free_pool;
+
+ return pool;
+
+free_pool:
+ kfree(pool);
+
+ return NULL;
+}
+
+static void dr_arg_pool_destroy(struct dr_arg_pool *pool)
+{
+ struct mlx5dr_arg_obj *arg_obj, *tmp_arg;
+
+ list_for_each_entry_safe(arg_obj, tmp_arg, &pool->free_list, list_node) {
+ list_del(&arg_obj->list_node);
+ if (!arg_obj->obj_offset) /* the first in range */
+ mlx5dr_cmd_destroy_modify_header_arg(pool->dmn->mdev, arg_obj->obj_id);
+ kfree(arg_obj);
+ }
+
+ mutex_destroy(&pool->mutex);
+ kfree(pool);
+}
+
+static enum dr_arg_chunk_size dr_arg_get_chunk_size(u16 num_of_actions)
+{
+ if (num_of_actions <= 8)
+ return DR_ARG_CHUNK_SIZE_1;
+ if (num_of_actions <= 16)
+ return DR_ARG_CHUNK_SIZE_2;
+ if (num_of_actions <= 32)
+ return DR_ARG_CHUNK_SIZE_3;
+ if (num_of_actions <= 64)
+ return DR_ARG_CHUNK_SIZE_4;
+
+ return DR_ARG_CHUNK_SIZE_MAX;
+}
+
+u32 mlx5dr_arg_get_obj_id(struct mlx5dr_arg_obj *arg_obj)
+{
+ return (arg_obj->obj_id + arg_obj->obj_offset);
+}
+
+struct mlx5dr_arg_obj *mlx5dr_arg_get_obj(struct mlx5dr_arg_mgr *mgr,
+ u16 num_of_actions,
+ u8 *data)
+{
+ u32 size = dr_arg_get_chunk_size(num_of_actions);
+ struct mlx5dr_arg_obj *arg_obj;
+ int ret;
+
+ if (size >= DR_ARG_CHUNK_SIZE_MAX)
+ return NULL;
+
+ arg_obj = dr_arg_pool_get_arg_obj(mgr->pools[size]);
+ if (!arg_obj) {
+ mlx5dr_err(mgr->dmn, "Failed allocating args object for modify header\n");
+ return NULL;
+ }
+
+ /* write it into the hw */
+ ret = mlx5dr_send_postsend_args(mgr->dmn,
+ mlx5dr_arg_get_obj_id(arg_obj),
+ num_of_actions, data);
+ if (ret) {
+ mlx5dr_err(mgr->dmn, "Failed writing args object\n");
+ goto put_obj;
+ }
+
+ return arg_obj;
+
+put_obj:
+ mlx5dr_arg_put_obj(mgr, arg_obj);
+ return NULL;
+}
+
+void mlx5dr_arg_put_obj(struct mlx5dr_arg_mgr *mgr,
+ struct mlx5dr_arg_obj *arg_obj)
+{
+ dr_arg_pool_put_arg_obj(mgr->pools[arg_obj->log_chunk_size], arg_obj);
+}
+
+struct mlx5dr_arg_mgr*
+mlx5dr_arg_mgr_create(struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_arg_mgr *pool_mgr;
+ int i;
+
+ if (!mlx5dr_domain_is_support_ptrn_arg(dmn))
+ return NULL;
+
+ pool_mgr = kzalloc(sizeof(*pool_mgr), GFP_KERNEL);
+ if (!pool_mgr)
+ return NULL;
+
+ pool_mgr->dmn = dmn;
+
+ for (i = 0; i < DR_ARG_CHUNK_SIZE_MAX; i++) {
+ pool_mgr->pools[i] = dr_arg_pool_create(dmn, i);
+ if (!pool_mgr->pools[i])
+ goto clean_pools;
+ }
+
+ return pool_mgr;
+
+clean_pools:
+ for (i--; i >= 0; i--)
+ dr_arg_pool_destroy(pool_mgr->pools[i]);
+
+ kfree(pool_mgr);
+ return NULL;
+}
+
+void mlx5dr_arg_mgr_destroy(struct mlx5dr_arg_mgr *mgr)
+{
+ struct dr_arg_pool **pools;
+ int i;
+
+ if (!mgr)
+ return;
+
+ pools = mgr->pools;
+ for (i = 0; i < DR_ARG_CHUNK_SIZE_MAX; i++)
+ dr_arg_pool_destroy(pools[i]);
+
+ kfree(mgr);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 07b6a6dcb92f..3835ba3f4dda 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -132,6 +132,17 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new);
+ caps->support_modify_argument =
+ MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_HEADER_MODIFY_ARGUMENT;
+
+ if (caps->support_modify_argument) {
+ caps->log_header_modify_argument_granularity =
+ MLX5_CAP_GEN(mdev, log_header_modify_argument_granularity);
+ caps->log_header_modify_argument_max_alloc =
+ MLX5_CAP_GEN(mdev, log_header_modify_argument_max_alloc);
+ }
+
/* geneve_tlv_option_0_exist is the indication of
* STE support for lookup type flex_parser_ok
*/
@@ -200,6 +211,12 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
caps->hdr_modify_icm_addr =
MLX5_CAP64_DEV_MEM(mdev, header_modify_sw_icm_start_address);
+ caps->log_modify_pattern_icm_size =
+ MLX5_CAP_DEV_MEM(mdev, log_header_modify_pattern_sw_icm_size);
+
+ caps->hdr_modify_pattern_icm_addr =
+ MLX5_CAP64_DEV_MEM(mdev, header_modify_pattern_sw_icm_start_address);
+
caps->roce_min_src_udp = MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port);
caps->is_ecpf = mlx5_core_is_ecpf_esw_manager(mdev);
@@ -676,6 +693,49 @@ int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
return 0;
}
+int mlx5dr_cmd_create_modify_header_arg(struct mlx5_core_dev *dev,
+ u16 log_obj_range, u32 pd,
+ u32 *obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_modify_header_arg_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ void *attr;
+ int ret;
+
+ attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, hdr);
+ MLX5_SET(general_obj_in_cmd_hdr, attr, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, attr, obj_type,
+ MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT);
+ MLX5_SET(general_obj_in_cmd_hdr, attr,
+ op_param.create.log_obj_range, log_obj_range);
+
+ attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, arg);
+ MLX5_SET(modify_header_arg, attr, access_pd, pd);
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ return 0;
+}
+
+void mlx5dr_cmd_destroy_modify_header_arg(struct mlx5_core_dev *dev,
+ u32 obj_id)
+{
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev,
struct mlx5dr_cmd_fte_info *fte,
bool *extended_dest)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
index db81d881d38e..7e36e1062139 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
@@ -4,6 +4,7 @@
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/seq_file.h>
+#include <linux/version.h>
#include "dr_types.h"
#define DR_DBG_PTR_TO_ID(p) ((u64)(uintptr_t)(p) & 0xFFFFFFFFULL)
@@ -140,10 +141,33 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
action->flow_tag->flow_tag);
break;
case DR_ACTION_TYP_MODIFY_HDR:
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ {
+ struct mlx5dr_ptrn_obj *ptrn = action->rewrite->ptrn;
+ struct mlx5dr_arg_obj *arg = action->rewrite->arg;
+ u8 *rewrite_data = action->rewrite->data;
+ bool ptrn_arg;
+ int i;
+
+ ptrn_arg = !action->rewrite->single_action_opt && ptrn && arg;
+
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,%d,0x%x,0x%x,0x%x",
DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id,
- rule_id, action->rewrite->index);
+ rule_id, action->rewrite->index,
+ action->rewrite->single_action_opt,
+ ptrn_arg ? action->rewrite->num_of_actions : 0,
+ ptrn_arg ? ptrn->index : 0,
+ ptrn_arg ? mlx5dr_arg_get_obj_id(arg) : 0);
+
+ if (ptrn_arg) {
+ for (i = 0; i < action->rewrite->num_of_actions; i++) {
+ seq_printf(file, ",0x%016llx",
+ be64_to_cpu(((__be64 *)rewrite_data)[i]));
+ }
+ }
+
+ seq_puts(file, "\n");
break;
+ }
case DR_ACTION_TYP_VPORT:
seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
DR_DUMP_REC_TYPE_ACTION_VPORT, action_id, rule_id,
@@ -157,7 +181,10 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
case DR_ACTION_TYP_TNL_L3_TO_L2:
seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
DR_DUMP_REC_TYPE_ACTION_DECAP_L3, action_id,
- rule_id, action->rewrite->index);
+ rule_id,
+ (action->rewrite->ptrn && action->rewrite->arg) ?
+ mlx5dr_arg_get_obj_id(action->rewrite->arg) :
+ action->rewrite->index);
break;
case DR_ACTION_TYP_L2_TO_TNL_L2:
seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
@@ -606,9 +633,18 @@ dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn)
u64 domain_id = DR_DBG_PTR_TO_ID(dmn);
int ret;
- seq_printf(file, "%d,0x%llx,%d,0%x,%d,%s\n", DR_DUMP_REC_TYPE_DOMAIN,
+ seq_printf(file, "%d,0x%llx,%d,0%x,%d,%u.%u.%u,%s,%d,%u,%u,%u\n",
+ DR_DUMP_REC_TYPE_DOMAIN,
domain_id, dmn->type, dmn->info.caps.gvmi,
- dmn->info.supp_sw_steering, pci_name(dmn->mdev->pdev));
+ dmn->info.supp_sw_steering,
+ /* package version */
+ LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL,
+ LINUX_VERSION_SUBLEVEL,
+ pci_name(dmn->mdev->pdev),
+ 0, /* domain flags */
+ dmn->num_buddies[DR_ICM_TYPE_STE],
+ dmn->num_buddies[DR_ICM_TYPE_MODIFY_ACTION],
+ dmn->num_buddies[DR_ICM_TYPE_MODIFY_HDR_PTRN]);
ret = dr_dump_domain_info(file, &dmn->info, domain_id);
if (ret < 0)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 5b8bb2ca31e6..9a2dfe6ebe31 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -10,6 +10,46 @@
((dmn)->info.caps.dmn_type##_sw_owner_v2 && \
(dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_7))
+bool mlx5dr_domain_is_support_ptrn_arg(struct mlx5dr_domain *dmn)
+{
+ return dmn->info.caps.sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX &&
+ dmn->info.caps.support_modify_argument;
+}
+
+static int dr_domain_init_modify_header_resources(struct mlx5dr_domain *dmn)
+{
+ if (!mlx5dr_domain_is_support_ptrn_arg(dmn))
+ return 0;
+
+ dmn->ptrn_mgr = mlx5dr_ptrn_mgr_create(dmn);
+ if (!dmn->ptrn_mgr) {
+ mlx5dr_err(dmn, "Couldn't create ptrn_mgr\n");
+ return -ENOMEM;
+ }
+
+ /* create argument pool */
+ dmn->arg_mgr = mlx5dr_arg_mgr_create(dmn);
+ if (!dmn->arg_mgr) {
+ mlx5dr_err(dmn, "Couldn't create arg_mgr\n");
+ goto free_modify_header_pattern;
+ }
+
+ return 0;
+
+free_modify_header_pattern:
+ mlx5dr_ptrn_mgr_destroy(dmn->ptrn_mgr);
+ return -ENOMEM;
+}
+
+static void dr_domain_destroy_modify_header_resources(struct mlx5dr_domain *dmn)
+{
+ if (!mlx5dr_domain_is_support_ptrn_arg(dmn))
+ return;
+
+ mlx5dr_arg_mgr_destroy(dmn->arg_mgr);
+ mlx5dr_ptrn_mgr_destroy(dmn->ptrn_mgr);
+}
+
static void dr_domain_init_csum_recalc_fts(struct mlx5dr_domain *dmn)
{
/* Per vport cached FW FT for checksum recalculation, this
@@ -149,14 +189,22 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn)
goto clean_uar;
}
+ ret = dr_domain_init_modify_header_resources(dmn);
+ if (ret) {
+ mlx5dr_err(dmn, "Couldn't create modify-header-resources\n");
+ goto clean_mem_resources;
+ }
+
ret = mlx5dr_send_ring_alloc(dmn);
if (ret) {
mlx5dr_err(dmn, "Couldn't create send-ring\n");
- goto clean_mem_resources;
+ goto clean_modify_hdr;
}
return 0;
+clean_modify_hdr:
+ dr_domain_destroy_modify_header_resources(dmn);
clean_mem_resources:
dr_domain_uninit_mem_resources(dmn);
clean_uar:
@@ -170,6 +218,7 @@ clean_pd:
static void dr_domain_uninit_resources(struct mlx5dr_domain *dmn)
{
mlx5dr_send_ring_free(dmn, dmn->send_ring);
+ dr_domain_destroy_modify_header_resources(dmn);
dr_domain_uninit_mem_resources(dmn);
mlx5_put_uars_page(dmn->mdev, dmn->uar);
mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn);
@@ -215,7 +264,7 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn,
return 0;
}
-static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn)
+static int dr_domain_query_esw_mgr(struct mlx5dr_domain *dmn)
{
return dr_domain_query_vport(dmn, 0, false,
&dmn->info.caps.vports.esw_manager_caps);
@@ -321,7 +370,7 @@ static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev,
* vports (vport 0, VFs and SFs) will be queried dynamically.
*/
- ret = dr_domain_query_esw_mngr(dmn);
+ ret = dr_domain_query_esw_mgr(dmn);
if (ret) {
mlx5dr_err(dmn, "Failed to query eswitch manager vport caps (err: %d)", ret);
goto free_vports_caps_xa;
@@ -435,6 +484,9 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K;
dmn->info.max_log_sw_icm_sz = min_t(u32, DR_CHUNK_SIZE_1024K,
dmn->info.caps.log_icm_size);
+ dmn->info.max_log_modify_hdr_pattern_icm_sz =
+ min_t(u32, DR_CHUNK_SIZE_4K,
+ dmn->info.caps.log_modify_pattern_icm_size);
if (!dmn->info.supp_sw_steering) {
mlx5dr_err(dmn, "SW steering is not supported\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
index 3eb6719bc8eb..0b5af9f3f605 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
@@ -4,7 +4,9 @@
#include "dr_types.h"
#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64
-#define DR_ICM_POOL_HOT_MEMORY_FRACTION 4
+#define DR_ICM_POOL_STE_HOT_MEM_PERCENT 25
+#define DR_ICM_POOL_MODIFY_HDR_PTRN_HOT_MEM_PERCENT 50
+#define DR_ICM_POOL_MODIFY_ACTION_HOT_MEM_PERCENT 90
struct mlx5dr_icm_hot_chunk {
struct mlx5dr_icm_buddy_mem *buddy_mem;
@@ -29,6 +31,8 @@ struct mlx5dr_icm_pool {
struct mlx5dr_icm_hot_chunk *hot_chunks_arr;
u32 hot_chunks_num;
u64 hot_memory_size;
+ /* hot memory size threshold for triggering sync */
+ u64 th;
};
struct mlx5dr_icm_dm {
@@ -107,9 +111,9 @@ static struct mlx5dr_icm_mr *
dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool)
{
struct mlx5_core_dev *mdev = pool->dmn->mdev;
- enum mlx5_sw_icm_type dm_type;
+ enum mlx5_sw_icm_type dm_type = 0;
struct mlx5dr_icm_mr *icm_mr;
- size_t log_align_base;
+ size_t log_align_base = 0;
int err;
icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL);
@@ -121,14 +125,25 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool)
icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
pool->icm_type);
- if (pool->icm_type == DR_ICM_TYPE_STE) {
+ switch (pool->icm_type) {
+ case DR_ICM_TYPE_STE:
dm_type = MLX5_SW_ICM_TYPE_STEERING;
log_align_base = ilog2(icm_mr->dm.length);
- } else {
+ break;
+ case DR_ICM_TYPE_MODIFY_ACTION:
dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY;
/* Align base is 64B */
log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE);
+ break;
+ case DR_ICM_TYPE_MODIFY_HDR_PTRN:
+ dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN;
+ /* Align base is 64B */
+ log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE);
+ break;
+ default:
+ WARN_ON(pool->icm_type);
}
+
icm_mr->dm.type = dm_type;
err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length,
@@ -273,6 +288,8 @@ static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
/* add it to the -start- of the list in order to search in it first */
list_add(&buddy->list_node, &pool->buddy_mem_list);
+ pool->dmn->num_buddies[pool->icm_type]++;
+
return 0;
err_cleanup_buddy:
@@ -286,13 +303,17 @@ free_mr:
static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy)
{
+ enum mlx5dr_icm_type icm_type = buddy->pool->icm_type;
+
dr_icm_pool_mr_destroy(buddy->icm_mr);
mlx5dr_buddy_cleanup(buddy);
- if (buddy->pool->icm_type == DR_ICM_TYPE_STE)
+ if (icm_type == DR_ICM_TYPE_STE)
dr_icm_buddy_cleanup_ste_cache(buddy);
+ buddy->pool->dmn->num_buddies[icm_type]--;
+
kvfree(buddy);
}
@@ -319,15 +340,7 @@ dr_icm_chunk_init(struct mlx5dr_icm_chunk *chunk,
static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool)
{
- int allow_hot_size;
-
- /* sync when hot memory reaches a certain fraction of the pool size */
- allow_hot_size =
- mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
- pool->icm_type) /
- DR_ICM_POOL_HOT_MEMORY_FRACTION;
-
- return pool->hot_memory_size > allow_hot_size;
+ return pool->hot_memory_size > pool->th;
}
static void dr_icm_pool_clear_hot_chunks_arr(struct mlx5dr_icm_pool *pool)
@@ -492,14 +505,9 @@ void mlx5dr_icm_pool_free_htbl(struct mlx5dr_icm_pool *pool, struct mlx5dr_ste_h
struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
enum mlx5dr_icm_type icm_type)
{
- u32 num_of_chunks, entry_size, max_hot_size;
- enum mlx5dr_icm_chunk_size max_log_chunk_sz;
+ u32 num_of_chunks, entry_size;
struct mlx5dr_icm_pool *pool;
-
- if (icm_type == DR_ICM_TYPE_STE)
- max_log_chunk_sz = dmn->info.max_log_sw_icm_sz;
- else
- max_log_chunk_sz = dmn->info.max_log_action_icm_sz;
+ u32 max_hot_size = 0;
pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
if (!pool)
@@ -507,20 +515,38 @@ struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
pool->dmn = dmn;
pool->icm_type = icm_type;
- pool->max_log_chunk_sz = max_log_chunk_sz;
pool->chunks_kmem_cache = dmn->chunks_kmem_cache;
INIT_LIST_HEAD(&pool->buddy_mem_list);
-
mutex_init(&pool->mutex);
- entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type);
+ switch (icm_type) {
+ case DR_ICM_TYPE_STE:
+ pool->max_log_chunk_sz = dmn->info.max_log_sw_icm_sz;
+ max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
+ pool->icm_type) *
+ DR_ICM_POOL_STE_HOT_MEM_PERCENT / 100;
+ break;
+ case DR_ICM_TYPE_MODIFY_ACTION:
+ pool->max_log_chunk_sz = dmn->info.max_log_action_icm_sz;
+ max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
+ pool->icm_type) *
+ DR_ICM_POOL_MODIFY_ACTION_HOT_MEM_PERCENT / 100;
+ break;
+ case DR_ICM_TYPE_MODIFY_HDR_PTRN:
+ pool->max_log_chunk_sz = dmn->info.max_log_modify_hdr_pattern_icm_sz;
+ max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
+ pool->icm_type) *
+ DR_ICM_POOL_MODIFY_HDR_PTRN_HOT_MEM_PERCENT / 100;
+ break;
+ default:
+ WARN_ON(icm_type);
+ }
- max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
- pool->icm_type) /
- DR_ICM_POOL_HOT_MEMORY_FRACTION;
+ entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type);
num_of_chunks = DIV_ROUND_UP(max_hot_size, entry_size) + 1;
+ pool->th = max_hot_size;
pool->hot_chunks_arr = kvcalloc(num_of_chunks,
sizeof(struct mlx5dr_icm_hot_chunk),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
new file mode 100644
index 000000000000..13e06a6a6b22
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "dr_types.h"
+#include "mlx5_ifc_dr_ste_v1.h"
+
+enum dr_ptrn_modify_hdr_action_id {
+ DR_PTRN_MODIFY_HDR_ACTION_ID_NOP = 0x00,
+ DR_PTRN_MODIFY_HDR_ACTION_ID_COPY = 0x05,
+ DR_PTRN_MODIFY_HDR_ACTION_ID_SET = 0x06,
+ DR_PTRN_MODIFY_HDR_ACTION_ID_ADD = 0x07,
+ DR_PTRN_MODIFY_HDR_ACTION_ID_INSERT_INLINE = 0x0a,
+};
+
+struct mlx5dr_ptrn_mgr {
+ struct mlx5dr_domain *dmn;
+ struct mlx5dr_icm_pool *ptrn_icm_pool;
+ /* cache for modify_header ptrn */
+ struct list_head ptrn_list;
+ struct mutex modify_hdr_mutex; /* protect the pattern cache */
+};
+
+/* Cache structure and functions */
+static bool dr_ptrn_compare_modify_hdr(size_t cur_num_of_actions,
+ __be64 cur_hw_actions[],
+ size_t num_of_actions,
+ __be64 hw_actions[])
+{
+ int i;
+
+ if (cur_num_of_actions != num_of_actions)
+ return false;
+
+ for (i = 0; i < num_of_actions; i++) {
+ u8 action_id =
+ MLX5_GET(ste_double_action_set_v1, &hw_actions[i], action_id);
+
+ if (action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_COPY) {
+ if (hw_actions[i] != cur_hw_actions[i])
+ return false;
+ } else {
+ if ((__force __be32)hw_actions[i] !=
+ (__force __be32)cur_hw_actions[i])
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static struct mlx5dr_ptrn_obj *
+dr_ptrn_find_cached_pattern(struct mlx5dr_ptrn_mgr *mgr,
+ size_t num_of_actions,
+ __be64 hw_actions[])
+{
+ struct mlx5dr_ptrn_obj *cached_pattern;
+ struct mlx5dr_ptrn_obj *tmp;
+
+ list_for_each_entry_safe(cached_pattern, tmp, &mgr->ptrn_list, list) {
+ if (dr_ptrn_compare_modify_hdr(cached_pattern->num_of_actions,
+ (__be64 *)cached_pattern->data,
+ num_of_actions,
+ hw_actions)) {
+ /* Put this pattern in the head of the list,
+ * as we will probably use it more.
+ */
+ list_del_init(&cached_pattern->list);
+ list_add(&cached_pattern->list, &mgr->ptrn_list);
+ return cached_pattern;
+ }
+ }
+
+ return NULL;
+}
+
+static struct mlx5dr_ptrn_obj *
+dr_ptrn_alloc_pattern(struct mlx5dr_ptrn_mgr *mgr,
+ u16 num_of_actions, u8 *data)
+{
+ struct mlx5dr_ptrn_obj *pattern;
+ struct mlx5dr_icm_chunk *chunk;
+ u32 chunk_size;
+ u32 index;
+
+ chunk_size = ilog2(num_of_actions);
+ /* HW modify action index granularity is at least 64B */
+ chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8);
+
+ chunk = mlx5dr_icm_alloc_chunk(mgr->ptrn_icm_pool, chunk_size);
+ if (!chunk)
+ return NULL;
+
+ index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) -
+ mgr->dmn->info.caps.hdr_modify_pattern_icm_addr) /
+ DR_ACTION_CACHE_LINE_SIZE;
+
+ pattern = kzalloc(sizeof(*pattern), GFP_KERNEL);
+ if (!pattern)
+ goto free_chunk;
+
+ pattern->data = kzalloc(num_of_actions * DR_MODIFY_ACTION_SIZE *
+ sizeof(*pattern->data), GFP_KERNEL);
+ if (!pattern->data)
+ goto free_pattern;
+
+ memcpy(pattern->data, data, num_of_actions * DR_MODIFY_ACTION_SIZE);
+ pattern->chunk = chunk;
+ pattern->index = index;
+ pattern->num_of_actions = num_of_actions;
+
+ list_add(&pattern->list, &mgr->ptrn_list);
+ refcount_set(&pattern->refcount, 1);
+
+ return pattern;
+
+free_pattern:
+ kfree(pattern);
+free_chunk:
+ mlx5dr_icm_free_chunk(chunk);
+ return NULL;
+}
+
+static void
+dr_ptrn_free_pattern(struct mlx5dr_ptrn_obj *pattern)
+{
+ list_del(&pattern->list);
+ mlx5dr_icm_free_chunk(pattern->chunk);
+ kfree(pattern->data);
+ kfree(pattern);
+}
+
+struct mlx5dr_ptrn_obj *
+mlx5dr_ptrn_cache_get_pattern(struct mlx5dr_ptrn_mgr *mgr,
+ u16 num_of_actions,
+ u8 *data)
+{
+ struct mlx5dr_ptrn_obj *pattern;
+ u64 *hw_actions;
+ u8 action_id;
+ int i;
+
+ mutex_lock(&mgr->modify_hdr_mutex);
+ pattern = dr_ptrn_find_cached_pattern(mgr,
+ num_of_actions,
+ (__be64 *)data);
+ if (!pattern) {
+ /* Alloc and add new pattern to cache */
+ pattern = dr_ptrn_alloc_pattern(mgr, num_of_actions, data);
+ if (!pattern)
+ goto out_unlock;
+
+ hw_actions = (u64 *)pattern->data;
+ /* Here we mask the pattern data to create a valid pattern
+ * since we do an OR operation between the arg and pattern
+ */
+ for (i = 0; i < num_of_actions; i++) {
+ action_id = MLX5_GET(ste_double_action_set_v1, &hw_actions[i], action_id);
+
+ if (action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_SET ||
+ action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_ADD ||
+ action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_INSERT_INLINE)
+ MLX5_SET(ste_double_action_set_v1, &hw_actions[i], inline_data, 0);
+ }
+
+ if (mlx5dr_send_postsend_pattern(mgr->dmn, pattern->chunk,
+ num_of_actions, pattern->data)) {
+ refcount_dec(&pattern->refcount);
+ goto free_pattern;
+ }
+ } else {
+ refcount_inc(&pattern->refcount);
+ }
+
+ mutex_unlock(&mgr->modify_hdr_mutex);
+
+ return pattern;
+
+free_pattern:
+ dr_ptrn_free_pattern(pattern);
+out_unlock:
+ mutex_unlock(&mgr->modify_hdr_mutex);
+ return NULL;
+}
+
+void
+mlx5dr_ptrn_cache_put_pattern(struct mlx5dr_ptrn_mgr *mgr,
+ struct mlx5dr_ptrn_obj *pattern)
+{
+ mutex_lock(&mgr->modify_hdr_mutex);
+
+ if (refcount_dec_and_test(&pattern->refcount))
+ dr_ptrn_free_pattern(pattern);
+
+ mutex_unlock(&mgr->modify_hdr_mutex);
+}
+
+struct mlx5dr_ptrn_mgr *mlx5dr_ptrn_mgr_create(struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_ptrn_mgr *mgr;
+
+ if (!mlx5dr_domain_is_support_ptrn_arg(dmn))
+ return NULL;
+
+ mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
+ if (!mgr)
+ return NULL;
+
+ mgr->dmn = dmn;
+ mgr->ptrn_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_MODIFY_HDR_PTRN);
+ if (!mgr->ptrn_icm_pool) {
+ mlx5dr_err(dmn, "Couldn't get modify-header-pattern memory\n");
+ goto free_mgr;
+ }
+
+ INIT_LIST_HEAD(&mgr->ptrn_list);
+ return mgr;
+
+free_mgr:
+ kfree(mgr);
+ return NULL;
+}
+
+void mlx5dr_ptrn_mgr_destroy(struct mlx5dr_ptrn_mgr *mgr)
+{
+ struct mlx5dr_ptrn_obj *pattern;
+ struct mlx5dr_ptrn_obj *tmp;
+
+ if (!mgr)
+ return;
+
+ WARN_ON(!list_empty(&mgr->ptrn_list));
+
+ list_for_each_entry_safe(pattern, tmp, &mgr->ptrn_list, list) {
+ list_del(&pattern->list);
+ kfree(pattern->data);
+ kfree(pattern);
+ }
+
+ mlx5dr_icm_pool_destroy(mgr->ptrn_icm_pool);
+ kfree(mgr);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index fd2d31cdbcf9..4a5ae86e2b62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -18,7 +18,13 @@ struct dr_data_seg {
unsigned int send_flags;
};
+enum send_info_type {
+ WRITE_ICM = 0,
+ GTA_ARG = 1,
+};
+
struct postsend_info {
+ enum send_info_type type;
struct dr_data_seg write;
struct dr_data_seg read;
u64 remote_addr;
@@ -261,9 +267,10 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
dr_qp->rq.pc = 0;
dr_qp->rq.cc = 0;
- dr_qp->rq.wqe_cnt = 4;
+ dr_qp->rq.wqe_cnt = 256;
dr_qp->sq.pc = 0;
dr_qp->sq.cc = 0;
+ dr_qp->sq.head = 0;
dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
@@ -362,39 +369,113 @@ static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
}
-static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
- u32 rkey, struct dr_data_seg *data_seg,
- u32 opcode, bool notify_hw)
+static void
+dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
+ u32 remote_addr,
+ struct dr_data_seg *data_seg,
+ int *size)
{
- struct mlx5_wqe_raddr_seg *wq_raddr;
- struct mlx5_wqe_ctrl_seg *wq_ctrl;
- struct mlx5_wqe_data_seg *wq_dseg;
- unsigned int size;
- unsigned int idx;
+ struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg;
+ struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg;
- size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
- sizeof(*wq_raddr) / 16;
+ wq_ctrl->general_id = cpu_to_be32(remote_addr);
+ wq_flow_seg = (void *)(wq_ctrl + 1);
- idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
+ /* mlx5_wqe_flow_update_ctrl_seg - all reserved */
+ memset(wq_flow_seg, 0, sizeof(*wq_flow_seg));
+ wq_arg_seg = (void *)(wq_flow_seg + 1);
+
+ memcpy(wq_arg_seg->argument_list,
+ (void *)(uintptr_t)data_seg->addr,
+ data_seg->length);
+
+ *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */
+ sizeof(*wq_flow_seg) + /* WQE flow update ctrl seg - reserved */
+ sizeof(*wq_arg_seg)) / /* WQE hdr modify arg seg - data */
+ MLX5_SEND_WQE_DS;
+}
+
+static void
+dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
+ u64 remote_addr,
+ u32 rkey,
+ struct dr_data_seg *data_seg,
+ unsigned int *size)
+{
+ struct mlx5_wqe_raddr_seg *wq_raddr;
+ struct mlx5_wqe_data_seg *wq_dseg;
- wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
- wq_ctrl->imm = 0;
- wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
- MLX5_WQE_CTRL_CQ_UPDATE : 0;
- wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
- opcode);
- wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
wq_raddr = (void *)(wq_ctrl + 1);
+
wq_raddr->raddr = cpu_to_be64(remote_addr);
wq_raddr->rkey = cpu_to_be32(rkey);
wq_raddr->reserved = 0;
wq_dseg = (void *)(wq_raddr + 1);
+
wq_dseg->byte_count = cpu_to_be32(data_seg->length);
wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
wq_dseg->addr = cpu_to_be64(data_seg->addr);
- dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
+ *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */
+ sizeof(*wq_dseg) + /* WQE data segment */
+ sizeof(*wq_raddr)) / /* WQE remote addr segment */
+ MLX5_SEND_WQE_DS;
+}
+
+static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
+ struct dr_data_seg *data_seg)
+{
+ wq_ctrl->signature = 0;
+ wq_ctrl->rsvd[0] = 0;
+ wq_ctrl->rsvd[1] = 0;
+ wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ?
+ MLX5_WQE_CTRL_CQ_UPDATE : 0;
+ wq_ctrl->imm = 0;
+}
+
+static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
+ u32 rkey, struct dr_data_seg *data_seg,
+ u32 opcode, bool notify_hw)
+{
+ struct mlx5_wqe_ctrl_seg *wq_ctrl;
+ int opcode_mod = 0;
+ unsigned int size;
+ unsigned int idx;
+
+ idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
+
+ wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
+ dr_set_ctrl_seg(wq_ctrl, data_seg);
+
+ switch (opcode) {
+ case MLX5_OPCODE_RDMA_READ:
+ case MLX5_OPCODE_RDMA_WRITE:
+ dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr,
+ rkey, data_seg, &size);
+ break;
+ case MLX5_OPCODE_FLOW_TBL_ACCESS:
+ opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT;
+ dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr,
+ data_seg, &size);
+ break;
+ default:
+ WARN(true, "illegal opcode %d", opcode);
+ return;
+ }
+
+ /* --------------------------------------------------------
+ * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)|
+ * --------------------------------------------------------
+ */
+ wq_ctrl->opmod_idx_opcode =
+ cpu_to_be32((opcode_mod << 24) |
+ ((dr_qp->sq.pc & 0xffff) << 8) |
+ opcode);
+ wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
+
+ dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
+ dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++;
if (notify_hw)
dr_cmd_notify_hw(dr_qp, wq_ctrl);
@@ -402,10 +483,16 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
{
- dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
- &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
- dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
- &send_info->read, MLX5_OPCODE_RDMA_READ, true);
+ if (send_info->type == WRITE_ICM) {
+ dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+ &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
+ dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+ &send_info->read, MLX5_OPCODE_RDMA_READ, true);
+ } else { /* GTA_ARG */
+ dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+ &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true);
+ }
+
}
/**
@@ -471,24 +558,54 @@ static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
} else if (ne == 1) {
send_ring->pending_wqe -= send_ring->signal_th;
}
- } while (is_drain && send_ring->pending_wqe);
+ } while (ne == 1 ||
+ (is_drain && send_ring->pending_wqe >= send_ring->signal_th));
return 0;
}
-static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
- struct postsend_info *send_info)
+static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring,
+ struct postsend_info *send_info)
{
send_ring->pending_wqe++;
if (send_ring->pending_wqe % send_ring->signal_th == 0)
send_info->write.send_flags |= IB_SEND_SIGNALED;
+ else
+ send_info->write.send_flags = 0;
+}
+
+static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
+ struct mlx5dr_send_ring *send_ring,
+ struct postsend_info *send_info)
+{
+ u32 buff_offset;
+
+ if (send_info->write.length > dmn->info.max_inline_size) {
+ buff_offset = (send_ring->tx_head &
+ (dmn->send_ring->signal_th - 1)) *
+ send_ring->max_post_send_size;
+ /* Copy to ring mr */
+ memcpy(send_ring->buf + buff_offset,
+ (void *)(uintptr_t)send_info->write.addr,
+ send_info->write.length);
+ send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
+ send_info->write.lkey = send_ring->mr->mkey;
+
+ send_ring->tx_head++;
+ }
+
+ send_ring->pending_wqe++;
+
+ if (send_ring->pending_wqe % send_ring->signal_th == 0)
+ send_info->write.send_flags |= IB_SEND_SIGNALED;
send_ring->pending_wqe++;
send_info->read.length = send_info->write.length;
- /* Read into the same write area */
- send_info->read.addr = (uintptr_t)send_info->write.addr;
- send_info->read.lkey = send_ring->mr->mkey;
+
+ /* Read into dedicated sync buffer */
+ send_info->read.addr = (uintptr_t)send_ring->sync_mr->dma_addr;
+ send_info->read.lkey = send_ring->sync_mr->mkey;
if (send_ring->pending_wqe % send_ring->signal_th == 0)
send_info->read.send_flags = IB_SEND_SIGNALED;
@@ -496,11 +613,20 @@ static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
send_info->read.send_flags = 0;
}
+static void dr_fill_data_segs(struct mlx5dr_domain *dmn,
+ struct mlx5dr_send_ring *send_ring,
+ struct postsend_info *send_info)
+{
+ if (send_info->type == WRITE_ICM)
+ dr_fill_write_icm_segs(dmn, send_ring, send_info);
+ else /* args */
+ dr_fill_write_args_segs(send_ring, send_info);
+}
+
static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
struct postsend_info *send_info)
{
struct mlx5dr_send_ring *send_ring = dmn->send_ring;
- u32 buff_offset;
int ret;
if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
@@ -517,20 +643,7 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
if (ret)
goto out_unlock;
- if (send_info->write.length > dmn->info.max_inline_size) {
- buff_offset = (send_ring->tx_head &
- (dmn->send_ring->signal_th - 1)) *
- send_ring->max_post_send_size;
- /* Copy to ring mr */
- memcpy(send_ring->buf + buff_offset,
- (void *)(uintptr_t)send_info->write.addr,
- send_info->write.length);
- send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
- send_info->write.lkey = send_ring->mr->mkey;
- }
-
- send_ring->tx_head++;
- dr_fill_data_segs(send_ring, send_info);
+ dr_fill_data_segs(dmn, send_ring, send_info);
dr_post_send(send_ring->qp, send_info);
out_unlock:
@@ -736,6 +849,59 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
return dr_postsend_icm_data(dmn, &send_info);
}
+int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn,
+ struct mlx5dr_icm_chunk *chunk,
+ u16 num_of_actions,
+ u8 *data)
+{
+ struct postsend_info send_info = {};
+ int ret;
+
+ send_info.write.addr = (uintptr_t)data;
+ send_info.write.length = num_of_actions * DR_MODIFY_ACTION_SIZE;
+ send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(chunk);
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(chunk);
+
+ ret = dr_postsend_icm_data(dmn, &send_info);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id,
+ u16 num_of_actions, u8 *actions_data)
+{
+ int data_len, iter = 0, cur_sent;
+ u64 addr;
+ int ret;
+
+ addr = (uintptr_t)actions_data;
+ data_len = num_of_actions * DR_MODIFY_ACTION_SIZE;
+
+ do {
+ struct postsend_info send_info = {};
+
+ send_info.type = GTA_ARG;
+ send_info.write.addr = addr;
+ cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE);
+ send_info.write.length = cur_sent;
+ send_info.write.lkey = 0;
+ send_info.remote_addr = arg_id + iter;
+
+ ret = dr_postsend_icm_data(dmn, &send_info);
+ if (ret)
+ goto out;
+
+ iter++;
+ addr += cur_sent;
+ data_len -= cur_sent;
+ } while (data_len > 0);
+
+out:
+ return ret;
+}
+
static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
struct mlx5dr_qp *dr_qp,
int port)
@@ -1123,16 +1289,25 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
goto free_mem;
}
+ dmn->send_ring->sync_buff = kzalloc(dmn->send_ring->max_post_send_size,
+ GFP_KERNEL);
+ if (!dmn->send_ring->sync_buff) {
+ ret = -ENOMEM;
+ goto clean_mr;
+ }
+
dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
dmn->pdn, dmn->send_ring->sync_buff,
- MIN_READ_SYNC);
+ dmn->send_ring->max_post_send_size);
if (!dmn->send_ring->sync_mr) {
ret = -ENOMEM;
- goto clean_mr;
+ goto free_sync_mem;
}
return 0;
+free_sync_mem:
+ kfree(dmn->send_ring->sync_buff);
clean_mr:
dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
free_mem:
@@ -1155,6 +1330,7 @@ void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
dr_dereg_mr(dmn->mdev, send_ring->mr);
kfree(send_ring->buf);
+ kfree(send_ring->sync_buff);
kfree(send_ring);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 1e15f605df6e..9413aaf51251 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -633,6 +633,63 @@ int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx,
used_hw_action_num);
}
+static int
+dr_ste_alloc_modify_hdr_chunk(struct mlx5dr_action *action)
+{
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
+ u32 chunk_size;
+ int ret;
+
+ chunk_size = ilog2(roundup_pow_of_two(action->rewrite->num_of_actions));
+
+ /* HW modify action index granularity is at least 64B */
+ chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8);
+
+ action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool,
+ chunk_size);
+ if (!action->rewrite->chunk)
+ return -ENOMEM;
+
+ action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(action->rewrite->chunk) -
+ dmn->info.caps.hdr_modify_icm_addr) /
+ DR_ACTION_CACHE_LINE_SIZE;
+
+ ret = mlx5dr_send_postsend_action(action->rewrite->dmn, action);
+ if (ret)
+ goto free_chunk;
+
+ return 0;
+
+free_chunk:
+ mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ return -ENOMEM;
+}
+
+static void dr_ste_free_modify_hdr_chunk(struct mlx5dr_action *action)
+{
+ mlx5dr_icm_free_chunk(action->rewrite->chunk);
+}
+
+int mlx5dr_ste_alloc_modify_hdr(struct mlx5dr_action *action)
+{
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
+
+ if (mlx5dr_domain_is_support_ptrn_arg(dmn))
+ return dmn->ste_ctx->alloc_modify_hdr_chunk(action);
+
+ return dr_ste_alloc_modify_hdr_chunk(action);
+}
+
+void mlx5dr_ste_free_modify_hdr(struct mlx5dr_action *action)
+{
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
+
+ if (mlx5dr_domain_is_support_ptrn_arg(dmn))
+ return dmn->ste_ctx->dealloc_modify_hdr_chunk(action);
+
+ return dr_ste_free_modify_hdr_chunk(action);
+}
+
static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn,
struct mlx5dr_match_spec *spec)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
index 7075142bcfb6..54a6619c3ecb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -195,6 +195,8 @@ struct mlx5dr_ste_ctx {
u8 *hw_action,
u32 hw_action_sz,
u16 *used_hw_action_num);
+ int (*alloc_modify_hdr_chunk)(struct mlx5dr_action *action);
+ void (*dealloc_modify_hdr_chunk)(struct mlx5dr_action *action);
/* Send */
void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index 084145f18084..4c0704ad166b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -495,21 +495,66 @@ static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action)
dr_ste_v1_set_reparse(hw_ste_p);
}
-static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p,
- u8 *s_action,
- u16 num_of_actions,
- u32 re_write_index)
+static void dr_ste_v1_set_accelerated_rewrite_actions(u8 *hw_ste_p,
+ u8 *d_action,
+ u16 num_of_actions,
+ u32 rewrite_pattern,
+ u32 rewrite_args,
+ u8 *action_data)
+{
+ if (action_data) {
+ memcpy(d_action, action_data, DR_MODIFY_ACTION_SIZE);
+ } else {
+ MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action,
+ action_id, DR_STE_V1_ACTION_ID_ACCELERATED_LIST);
+ MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action,
+ modify_actions_pattern_pointer, rewrite_pattern);
+ MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action,
+ number_of_modify_actions, num_of_actions);
+ MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action,
+ modify_actions_argument_pointer, rewrite_args);
+ }
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_basic_rewrite_actions(u8 *hw_ste_p,
+ u8 *s_action,
+ u16 num_of_actions,
+ u32 rewrite_index)
{
MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id,
DR_STE_V1_ACTION_ID_MODIFY_LIST);
MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions,
num_of_actions);
MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr,
- re_write_index);
+ rewrite_index);
dr_ste_v1_set_reparse(hw_ste_p);
}
+static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p,
+ u8 *action,
+ u16 num_of_actions,
+ u32 rewrite_pattern,
+ u32 rewrite_args,
+ u8 *action_data)
+{
+ if (rewrite_pattern != MLX5DR_INVALID_PATTERN_INDEX)
+ return dr_ste_v1_set_accelerated_rewrite_actions(hw_ste_p,
+ action,
+ num_of_actions,
+ rewrite_pattern,
+ rewrite_args,
+ action_data);
+
+ /* fall back to the code that doesn't support accelerated modify header */
+ return dr_ste_v1_set_basic_rewrite_actions(hw_ste_p,
+ action,
+ num_of_actions,
+ rewrite_args);
+}
+
static void dr_ste_v1_set_aso_flow_meter(u8 *d_action,
u32 object_id,
u32 offset,
@@ -604,9 +649,6 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
allow_modify_hdr = false;
}
- if (action_type_set[DR_ACTION_TYP_CTR])
- dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
-
if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
dr_ste_v1_arr_init_next_match(&last_ste, added_stes,
@@ -617,7 +659,9 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
}
dr_ste_v1_set_rewrite_actions(last_ste, action,
attr->modify_actions,
- attr->modify_index);
+ attr->modify_pat_idx,
+ attr->modify_index,
+ attr->single_modify_action);
action_sz -= DR_STE_ACTION_DOUBLE_SZ;
action += DR_STE_ACTION_DOUBLE_SZ;
allow_encap = false;
@@ -724,6 +768,10 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
attr->range.max);
}
+ /* set counter ID on the last STE to adhere to DMFS behavior */
+ if (action_type_set[DR_ACTION_TYP_CTR])
+ dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
+
dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1);
}
@@ -743,7 +791,9 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
dr_ste_v1_set_rewrite_actions(last_ste, action,
attr->decap_actions,
- attr->decap_index);
+ attr->decap_pat_idx,
+ attr->decap_index,
+ NULL);
action_sz -= DR_STE_ACTION_DOUBLE_SZ;
action += DR_STE_ACTION_DOUBLE_SZ;
allow_modify_hdr = false;
@@ -798,7 +848,9 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
}
dr_ste_v1_set_rewrite_actions(last_ste, action,
attr->modify_actions,
- attr->modify_index);
+ attr->modify_pat_idx,
+ attr->modify_index,
+ attr->single_modify_action);
action_sz -= DR_STE_ACTION_DOUBLE_SZ;
action += DR_STE_ACTION_DOUBLE_SZ;
}
@@ -2175,6 +2227,49 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag;
}
+int dr_ste_v1_alloc_modify_hdr_ptrn_arg(struct mlx5dr_action *action)
+{
+ struct mlx5dr_ptrn_mgr *ptrn_mgr;
+ int ret;
+
+ ptrn_mgr = action->rewrite->dmn->ptrn_mgr;
+ if (!ptrn_mgr)
+ return -EOPNOTSUPP;
+
+ action->rewrite->arg = mlx5dr_arg_get_obj(action->rewrite->dmn->arg_mgr,
+ action->rewrite->num_of_actions,
+ action->rewrite->data);
+ if (!action->rewrite->arg) {
+ mlx5dr_err(action->rewrite->dmn, "Failed allocating args for modify header\n");
+ return -EAGAIN;
+ }
+
+ action->rewrite->ptrn =
+ mlx5dr_ptrn_cache_get_pattern(ptrn_mgr,
+ action->rewrite->num_of_actions,
+ action->rewrite->data);
+ if (!action->rewrite->ptrn) {
+ mlx5dr_err(action->rewrite->dmn, "Failed to get pattern\n");
+ ret = -EAGAIN;
+ goto put_arg;
+ }
+
+ return 0;
+
+put_arg:
+ mlx5dr_arg_put_obj(action->rewrite->dmn->arg_mgr,
+ action->rewrite->arg);
+ return ret;
+}
+
+void dr_ste_v1_free_modify_hdr_ptrn_arg(struct mlx5dr_action *action)
+{
+ mlx5dr_ptrn_cache_put_pattern(action->rewrite->dmn->ptrn_mgr,
+ action->rewrite->ptrn);
+ mlx5dr_arg_put_obj(action->rewrite->dmn->arg_mgr,
+ action->rewrite->arg);
+}
+
static struct mlx5dr_ste_ctx ste_ctx_v1 = {
/* Builders */
.build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init,
@@ -2231,6 +2326,9 @@ static struct mlx5dr_ste_ctx ste_ctx_v1 = {
.set_action_add = &dr_ste_v1_set_action_add,
.set_action_copy = &dr_ste_v1_set_action_copy,
.set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list,
+ .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg,
+ .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg,
+
/* Send */
.prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
index b5c0f0f8392f..e2fc69867088 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
@@ -31,6 +31,8 @@ void dr_ste_v1_set_action_copy(u8 *d_action, u8 dst_hw_field, u8 dst_shifter,
u8 dst_len, u8 src_hw_field, u8 src_shifter);
int dr_ste_v1_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action,
u32 hw_action_sz, u16 *used_hw_action_num);
+int dr_ste_v1_alloc_modify_hdr_ptrn_arg(struct mlx5dr_action *action);
+void dr_ste_v1_free_modify_hdr_ptrn_arg(struct mlx5dr_action *action);
void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask);
void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
index cf1a3c9a1cf4..808b013cf48c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
@@ -221,6 +221,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v2 = {
.set_action_add = &dr_ste_v1_set_action_add,
.set_action_copy = &dr_ste_v1_set_action_copy,
.set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list,
+ .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg,
+ .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg,
/* Send */
.prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 2b769dcbd453..678a993ab053 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -21,11 +21,16 @@
#define DR_NUM_OF_FLEX_PARSERS 8
#define DR_STE_MAX_FLEX_0_ID 3
#define DR_STE_MAX_FLEX_1_ID 7
+#define DR_ACTION_CACHE_LINE_SIZE 64
#define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg)
#define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg)
#define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg)
+struct mlx5dr_ptrn_mgr;
+struct mlx5dr_arg_mgr;
+struct mlx5dr_arg_obj;
+
static inline bool dr_is_flex_parser_0_id(u8 parser_id)
{
return parser_id <= DR_STE_MAX_FLEX_0_ID;
@@ -66,6 +71,8 @@ enum mlx5dr_icm_chunk_size {
enum mlx5dr_icm_type {
DR_ICM_TYPE_STE,
DR_ICM_TYPE_MODIFY_ACTION,
+ DR_ICM_TYPE_MODIFY_HDR_PTRN,
+ DR_ICM_TYPE_MAX,
};
static inline enum mlx5dr_icm_chunk_size
@@ -255,11 +262,15 @@ u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste);
struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste);
#define MLX5DR_MAX_VLANS 2
+#define MLX5DR_INVALID_PATTERN_INDEX 0xffffffff
struct mlx5dr_ste_actions_attr {
u32 modify_index;
+ u32 modify_pat_idx;
u16 modify_actions;
+ u8 *single_modify_action;
u32 decap_index;
+ u32 decap_pat_idx;
u16 decap_actions;
u8 decap_with_vlan:1;
u64 final_icm_addr;
@@ -331,6 +342,8 @@ int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx,
u8 *hw_action,
u32 hw_action_sz,
u16 *used_hw_action_num);
+int mlx5dr_ste_alloc_modify_hdr(struct mlx5dr_action *action);
+void mlx5dr_ste_free_modify_hdr(struct mlx5dr_action *action);
const struct mlx5dr_ste_action_modify_field *
mlx5dr_ste_conv_modify_hdr_sw_field(struct mlx5dr_ste_ctx *ste_ctx, u16 sw_field);
@@ -861,6 +874,8 @@ struct mlx5dr_cmd_caps {
u64 esw_tx_drop_address;
u32 log_icm_size;
u64 hdr_modify_icm_addr;
+ u32 log_modify_pattern_icm_size;
+ u64 hdr_modify_pattern_icm_addr;
u32 flex_protocols;
u8 flex_parser_id_icmp_dw0;
u8 flex_parser_id_icmp_dw1;
@@ -888,6 +903,9 @@ struct mlx5dr_cmd_caps {
struct mlx5dr_vports vports;
bool prio_tag_required;
struct mlx5dr_roce_cap roce_caps;
+ u16 log_header_modify_argument_granularity;
+ u16 log_header_modify_argument_max_alloc;
+ bool support_modify_argument;
u8 is_ecpf:1;
u8 isolate_vl_tc:1;
};
@@ -910,6 +928,7 @@ struct mlx5dr_domain_info {
u32 max_send_wr;
u32 max_log_sw_icm_sz;
u32 max_log_action_icm_sz;
+ u32 max_log_modify_hdr_pattern_icm_sz;
struct mlx5dr_domain_rx_tx rx;
struct mlx5dr_domain_rx_tx tx;
struct mlx5dr_cmd_caps caps;
@@ -928,6 +947,8 @@ struct mlx5dr_domain {
struct mlx5dr_send_info_pool *send_info_pool_tx;
struct kmem_cache *chunks_kmem_cache;
struct kmem_cache *htbls_kmem_cache;
+ struct mlx5dr_ptrn_mgr *ptrn_mgr;
+ struct mlx5dr_arg_mgr *arg_mgr;
struct mlx5dr_send_ring *send_ring;
struct mlx5dr_domain_info info;
struct xarray csum_fts_xa;
@@ -935,6 +956,8 @@ struct mlx5dr_domain {
struct list_head dbg_tbl_list;
struct mlx5dr_dbg_dump_info dump_info;
struct xarray definers_xa;
+ /* memory management statistics */
+ u32 num_buddies[DR_ICM_TYPE_MAX];
};
struct mlx5dr_table_rx_tx {
@@ -994,15 +1017,34 @@ struct mlx5dr_ste_action_modify_field {
u8 l4_type;
};
+struct mlx5dr_ptrn_obj {
+ struct mlx5dr_icm_chunk *chunk;
+ u8 *data;
+ u16 num_of_actions;
+ u32 index;
+ refcount_t refcount;
+ struct list_head list;
+};
+
+struct mlx5dr_arg_obj {
+ u32 obj_id;
+ u32 obj_offset;
+ struct list_head list_node;
+ u32 log_chunk_size;
+};
+
struct mlx5dr_action_rewrite {
struct mlx5dr_domain *dmn;
struct mlx5dr_icm_chunk *chunk;
u8 *data;
u16 num_of_actions;
u32 index;
+ u8 single_action_opt:1;
u8 allow_rx:1;
u8 allow_tx:1;
u8 modify_ttl:1;
+ struct mlx5dr_ptrn_obj *ptrn;
+ struct mlx5dr_arg_obj *arg;
};
struct mlx5dr_action_reformat {
@@ -1334,6 +1376,12 @@ struct mlx5dr_cmd_gid_attr {
int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
u16 index, struct mlx5dr_cmd_gid_attr *attr);
+int mlx5dr_cmd_create_modify_header_arg(struct mlx5_core_dev *dev,
+ u16 log_obj_range, u32 pd,
+ u32 *obj_id);
+void mlx5dr_cmd_destroy_modify_header_arg(struct mlx5_core_dev *dev,
+ u32 obj_id);
+
struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
enum mlx5dr_icm_type icm_type);
void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool);
@@ -1368,6 +1416,7 @@ struct mlx5dr_qp {
struct mlx5_wq_ctrl wq_ctrl;
u32 qpn;
struct {
+ unsigned int head;
unsigned int pc;
unsigned int cc;
unsigned int size;
@@ -1399,9 +1448,6 @@ struct mlx5dr_mr {
size_t size;
};
-#define MAX_SEND_CQE 64
-#define MIN_READ_SYNC 64
-
struct mlx5dr_send_ring {
struct mlx5dr_cq *cq;
struct mlx5dr_qp *qp;
@@ -1416,7 +1462,7 @@ struct mlx5dr_send_ring {
u32 tx_head;
void *buf;
u32 buf_size;
- u8 sync_buff[MIN_READ_SYNC];
+ u8 *sync_buff;
struct mlx5dr_mr *sync_mr;
spinlock_t lock; /* Protect the data path of the send ring */
bool err_state; /* send_ring is not usable in err state */
@@ -1440,6 +1486,12 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
bool update_hw_ste);
int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
struct mlx5dr_action *action);
+int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn,
+ struct mlx5dr_icm_chunk *chunk,
+ u16 num_of_actions,
+ u8 *data);
+int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id,
+ u16 num_of_actions, u8 *actions_data);
int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn);
void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn);
@@ -1526,4 +1578,20 @@ static inline bool mlx5dr_supp_match_ranges(struct mlx5_core_dev *dev)
(1ULL << MLX5_IFC_DEFINER_FORMAT_ID_SELECT));
}
+bool mlx5dr_domain_is_support_ptrn_arg(struct mlx5dr_domain *dmn);
+struct mlx5dr_ptrn_mgr *mlx5dr_ptrn_mgr_create(struct mlx5dr_domain *dmn);
+void mlx5dr_ptrn_mgr_destroy(struct mlx5dr_ptrn_mgr *mgr);
+struct mlx5dr_ptrn_obj *mlx5dr_ptrn_cache_get_pattern(struct mlx5dr_ptrn_mgr *mgr,
+ u16 num_of_actions, u8 *data);
+void mlx5dr_ptrn_cache_put_pattern(struct mlx5dr_ptrn_mgr *mgr,
+ struct mlx5dr_ptrn_obj *pattern);
+struct mlx5dr_arg_mgr *mlx5dr_arg_mgr_create(struct mlx5dr_domain *dmn);
+void mlx5dr_arg_mgr_destroy(struct mlx5dr_arg_mgr *mgr);
+struct mlx5dr_arg_obj *mlx5dr_arg_get_obj(struct mlx5dr_arg_mgr *mgr,
+ u16 num_of_actions,
+ u8 *data);
+void mlx5dr_arg_put_obj(struct mlx5dr_arg_mgr *mgr,
+ struct mlx5dr_arg_obj *arg_obj);
+u32 mlx5dr_arg_get_obj_id(struct mlx5dr_arg_obj *arg_obj);
+
#endif /* _DR_TYPES_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
index 790a17d6207f..ca3b0f1453a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
@@ -100,7 +100,7 @@ struct mlx5_ifc_ste_double_action_insert_with_ptr_v1_bits {
u8 pointer[0x20];
};
-struct mlx5_ifc_ste_double_action_modify_action_list_v1_bits {
+struct mlx5_ifc_ste_double_action_accelerated_modify_action_list_v1_bits {
u8 action_id[0x8];
u8 modify_actions_pattern_pointer[0x18];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c
new file mode 100644
index 000000000000..e47fa6fb836f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/thermal.h>
+#include <linux/err.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "thermal.h"
+
+#define MLX5_THERMAL_POLL_INT_MSEC 1000
+#define MLX5_THERMAL_NUM_TRIPS 0
+#define MLX5_THERMAL_ASIC_SENSOR_INDEX 0
+
+/* Bit string indicating the writeablility of trip points if any */
+#define MLX5_THERMAL_TRIP_MASK (BIT(MLX5_THERMAL_NUM_TRIPS) - 1)
+
+struct mlx5_thermal {
+ struct mlx5_core_dev *mdev;
+ struct thermal_zone_device *tzdev;
+};
+
+static int mlx5_thermal_get_mtmp_temp(struct mlx5_core_dev *mdev, u32 id, int *p_temp)
+{
+ u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {};
+ u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {};
+ int err;
+
+ MLX5_SET(mtmp_reg, mtmp_in, sensor_index, id);
+
+ err = mlx5_core_access_reg(mdev, mtmp_in, sizeof(mtmp_in),
+ mtmp_out, sizeof(mtmp_out),
+ MLX5_REG_MTMP, 0, 0);
+
+ if (err)
+ return err;
+
+ *p_temp = MLX5_GET(mtmp_reg, mtmp_out, temperature);
+
+ return 0;
+}
+
+static int mlx5_thermal_get_temp(struct thermal_zone_device *tzdev,
+ int *p_temp)
+{
+ struct mlx5_thermal *thermal = tzdev->devdata;
+ struct mlx5_core_dev *mdev = thermal->mdev;
+ int err;
+
+ err = mlx5_thermal_get_mtmp_temp(mdev, MLX5_THERMAL_ASIC_SENSOR_INDEX, p_temp);
+
+ if (err)
+ return err;
+
+ /* The unit of temp returned is in 0.125 C. The thermal
+ * framework expects the value in 0.001 C.
+ */
+ *p_temp *= 125;
+
+ return 0;
+}
+
+static struct thermal_zone_device_ops mlx5_thermal_ops = {
+ .get_temp = mlx5_thermal_get_temp,
+};
+
+int mlx5_thermal_init(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_thermal *thermal;
+ struct thermal_zone_device *tzd;
+ const char *data = "mlx5";
+
+ tzd = thermal_zone_get_zone_by_name(data);
+ if (!IS_ERR(tzd))
+ return 0;
+
+ thermal = kzalloc(sizeof(*thermal), GFP_KERNEL);
+ if (!thermal)
+ return -ENOMEM;
+
+ thermal->mdev = mdev;
+ thermal->tzdev = thermal_zone_device_register(data,
+ MLX5_THERMAL_NUM_TRIPS,
+ MLX5_THERMAL_TRIP_MASK,
+ thermal,
+ &mlx5_thermal_ops,
+ NULL, 0, MLX5_THERMAL_POLL_INT_MSEC);
+ if (IS_ERR(thermal->tzdev)) {
+ dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n",
+ data, PTR_ERR(thermal->tzdev));
+ kfree(thermal);
+ return -EINVAL;
+ }
+
+ mdev->thermal = thermal;
+ return 0;
+}
+
+void mlx5_thermal_uninit(struct mlx5_core_dev *mdev)
+{
+ if (!mdev->thermal)
+ return;
+
+ thermal_zone_device_unregister(mdev->thermal->tzdev);
+ kfree(mdev->thermal);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.h b/drivers/net/ethernet/mellanox/mlx5/core/thermal.h
new file mode 100644
index 000000000000..7d752c122192
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+ * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.
+ */
+#ifndef __MLX5_THERMAL_DRIVER_H
+#define __MLX5_THERMAL_DRIVER_H
+
+#if IS_ENABLED(CONFIG_THERMAL)
+int mlx5_thermal_init(struct mlx5_core_dev *mdev);
+void mlx5_thermal_uninit(struct mlx5_core_dev *mdev);
+#else
+static inline int mlx5_thermal_init(struct mlx5_core_dev *mdev)
+{
+ mdev->thermal = NULL;
+ return 0;
+}
+
+static inline void mlx5_thermal_uninit(struct mlx5_core_dev *mdev) { }
+#endif
+
+#endif /* __MLX5_THERMAL_DRIVER_H */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 66dd42a8e72f..70d7fff24fa2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -19,6 +19,9 @@
#define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */
#define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */
#define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */
+#define MLXSW_THERMAL_MODULE_TEMP_NORM 55000 /* 55C */
+#define MLXSW_THERMAL_MODULE_TEMP_HIGH 65000 /* 65C */
+#define MLXSW_THERMAL_MODULE_TEMP_HOT 80000 /* 80C */
#define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */
#define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
#define MLXSW_THERMAL_MAX_STATE 10
@@ -30,12 +33,6 @@ static char * const mlxsw_thermal_external_allowed_cdev[] = {
"mlxreg_fan",
};
-enum mlxsw_thermal_trips {
- MLXSW_THERMAL_TEMP_TRIP_NORM,
- MLXSW_THERMAL_TEMP_TRIP_HIGH,
- MLXSW_THERMAL_TEMP_TRIP_HOT,
-};
-
struct mlxsw_cooling_states {
int min_state;
int max_state;
@@ -59,6 +56,24 @@ static const struct thermal_trip default_thermal_trips[] = {
},
};
+static const struct thermal_trip default_thermal_module_trips[] = {
+ { /* In range - 0-40% PWM */
+ .type = THERMAL_TRIP_ACTIVE,
+ .temperature = MLXSW_THERMAL_MODULE_TEMP_NORM,
+ .hysteresis = MLXSW_THERMAL_HYSTERESIS_TEMP,
+ },
+ {
+ /* In range - 40-100% PWM */
+ .type = THERMAL_TRIP_ACTIVE,
+ .temperature = MLXSW_THERMAL_MODULE_TEMP_HIGH,
+ .hysteresis = MLXSW_THERMAL_HYSTERESIS_TEMP,
+ },
+ { /* Warning */
+ .type = THERMAL_TRIP_HOT,
+ .temperature = MLXSW_THERMAL_MODULE_TEMP_HOT,
+ },
+};
+
static const struct mlxsw_cooling_states default_cooling_states[] = {
{
.min_state = 0,
@@ -140,63 +155,6 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
return -ENODEV;
}
-static void
-mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
-{
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temperature = 0;
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temperature = 0;
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temperature = 0;
-}
-
-static int
-mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
- struct mlxsw_thermal_module *tz,
- int crit_temp, int emerg_temp)
-{
- int err;
-
- /* Do not try to query temperature thresholds directly from the module's
- * EEPROM if we got valid thresholds from MTMP.
- */
- if (!emerg_temp || !crit_temp) {
- err = mlxsw_env_module_temp_thresholds_get(core, tz->slot_index,
- tz->module,
- SFP_TEMP_HIGH_WARN,
- &crit_temp);
- if (err)
- return err;
-
- err = mlxsw_env_module_temp_thresholds_get(core, tz->slot_index,
- tz->module,
- SFP_TEMP_HIGH_ALARM,
- &emerg_temp);
- if (err)
- return err;
- }
-
- if (crit_temp > emerg_temp) {
- dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n",
- thermal_zone_device_type(tz->tzdev), crit_temp, emerg_temp);
- return 0;
- }
-
- /* According to the system thermal requirements, the thermal zones are
- * defined with three trip points. The critical and emergency
- * temperature thresholds, provided by QSFP module are set as "active"
- * and "hot" trip points, "normal" trip point is derived from "active"
- * by subtracting double hysteresis value.
- */
- if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temperature = crit_temp -
- MLXSW_THERMAL_MODULE_TEMP_SHIFT;
- else
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temperature = crit_temp;
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temperature = crit_temp;
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temperature = emerg_temp;
-
- return 0;
-}
-
static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
struct thermal_cooling_device *cdev)
{
@@ -325,59 +283,22 @@ static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
return err;
}
-static void
-mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core,
- u8 slot_index, u16 sensor_index,
- int *p_temp, int *p_crit_temp,
- int *p_emerg_temp)
-{
- char mtmp_pl[MLXSW_REG_MTMP_LEN];
- int err;
-
- /* Read module temperature and thresholds. */
- mlxsw_reg_mtmp_pack(mtmp_pl, slot_index, sensor_index,
- false, false);
- err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
- if (err) {
- /* Set temperature and thresholds to zero to avoid passing
- * uninitialized data back to the caller.
- */
- *p_temp = 0;
- *p_crit_temp = 0;
- *p_emerg_temp = 0;
-
- return;
- }
- mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, p_crit_temp, p_emerg_temp,
- NULL);
-}
-
static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
int *p_temp)
{
struct mlxsw_thermal_module *tz = thermal_zone_device_priv(tzdev);
struct mlxsw_thermal *thermal = tz->parent;
- int temp, crit_temp, emerg_temp;
- struct device *dev;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
u16 sensor_index;
+ int err;
- dev = thermal->bus_info->dev;
sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module;
-
- /* Read module temperature and thresholds. */
- mlxsw_thermal_module_temp_and_thresholds_get(thermal->core,
- tz->slot_index,
- sensor_index, &temp,
- &crit_temp, &emerg_temp);
- *p_temp = temp;
-
- if (!temp)
- return 0;
-
- /* Update trip points. */
- mlxsw_thermal_module_trips_update(dev, thermal->core, tz,
- crit_temp, emerg_temp);
-
+ mlxsw_reg_mtmp_pack(mtmp_pl, tz->slot_index, sensor_index,
+ false, false);
+ err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err)
+ return err;
+ mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, NULL, NULL, NULL);
return 0;
}
@@ -521,36 +442,26 @@ static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
thermal_zone_device_unregister(tzdev);
}
-static int
+static void
mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
struct mlxsw_thermal *thermal,
struct mlxsw_thermal_area *area, u8 module)
{
struct mlxsw_thermal_module *module_tz;
- int dummy_temp, crit_temp, emerg_temp;
- u16 sensor_index;
- sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module;
module_tz = &area->tz_module_arr[module];
/* Skip if parent is already set (case of port split). */
if (module_tz->parent)
- return 0;
+ return;
module_tz->module = module;
module_tz->slot_index = area->slot_index;
module_tz->parent = thermal;
- memcpy(module_tz->trips, default_thermal_trips,
+ BUILD_BUG_ON(ARRAY_SIZE(default_thermal_module_trips) !=
+ MLXSW_THERMAL_NUM_TRIPS);
+ memcpy(module_tz->trips, default_thermal_module_trips,
sizeof(thermal->trips));
memcpy(module_tz->cooling_states, default_cooling_states,
sizeof(thermal->cooling_states));
- /* Initialize all trip point. */
- mlxsw_thermal_module_trips_reset(module_tz);
- /* Read module temperature and thresholds. */
- mlxsw_thermal_module_temp_and_thresholds_get(core, area->slot_index,
- sensor_index, &dummy_temp,
- &crit_temp, &emerg_temp);
- /* Update trip point according to the module data. */
- return mlxsw_thermal_module_trips_update(dev, core, module_tz,
- crit_temp, emerg_temp);
}
static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
@@ -589,11 +500,8 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
if (!area->tz_module_arr)
return -ENOMEM;
- for (i = 0; i < area->tz_module_num; i++) {
- err = mlxsw_thermal_module_init(dev, core, thermal, area, i);
- if (err)
- goto err_thermal_module_init;
- }
+ for (i = 0; i < area->tz_module_num; i++)
+ mlxsw_thermal_module_init(dev, core, thermal, area, i);
for (i = 0; i < area->tz_module_num; i++) {
module_tz = &area->tz_module_arr[i];
@@ -607,7 +515,6 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
return 0;
err_thermal_module_tz_init:
-err_thermal_module_init:
for (i = area->tz_module_num - 1; i >= 0; i--)
mlxsw_thermal_module_fini(&area->tz_module_arr[i]);
kfree(area->tz_module_arr);