summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 11:33:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 11:33:01 -0700
commitbd6c11bc43c496cddfc6cf603b5d45365606dbd5 (patch)
tree36318fa68f784d397111991177d65bd6325189c4 /drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
parent68cf01760bc0891074e813b9bb06d2696cac1c01 (diff)
parentc873512ef3a39cc1a605b7a5ff2ad0a33d619aa8 (diff)
Merge tag 'net-next-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "Core: - Increase size limits for to-be-sent skb frag allocations. This allows tun, tap devices and packet sockets to better cope with large writes operations - Store netdevs in an xarray, to simplify iterating over netdevs - Refactor nexthop selection for multipath routes - Improve sched class lifetime handling - Add backup nexthop ID support for bridge - Implement drop reasons support in openvswitch - Several data races annotations and fixes - Constify the sk parameter of routing functions - Prepend kernel version to netconsole message Protocols: - Implement support for TCP probing the peer being under memory pressure - Remove hard coded limitation on IPv6 specific info placement inside the socket struct - Get rid of sysctl_tcp_adv_win_scale and use an auto-estimated per socket scaling factor - Scaling-up the IPv6 expired route GC via a separated list of expiring routes - In-kernel support for the TLS alert protocol - Better support for UDP reuseport with connected sockets - Add NEXT-C-SID support for SRv6 End.X behavior, reducing the SR header size - Get rid of additional ancillary per MPTCP connection struct socket - Implement support for BPF-based MPTCP packet schedulers - Format MPTCP subtests selftests results in TAP - Several new SMC 2.1 features including unique experimental options, max connections per lgr negotiation, max links per lgr negotiation BPF: - Multi-buffer support in AF_XDP - Add multi uprobe BPF links for attaching multiple uprobes and usdt probes, which is significantly faster and saves extra fds - Implement an fd-based tc BPF attach API (TCX) and BPF link support on top of it - Add SO_REUSEPORT support for TC bpf_sk_assign - Support new instructions from cpu v4 to simplify the generated code and feature completeness, for x86, arm64, riscv64 - Support defragmenting IPv(4|6) packets in BPF - Teach verifier actual bounds of bpf_get_smp_processor_id() and fix perf+libbpf issue related to custom section handling - Introduce bpf map element count and enable it for all program types - Add a BPF hook in sys_socket() to change the protocol ID from IPPROTO_TCP to IPPROTO_MPTCP to cover migration for legacy - Introduce bpf_me_mcache_free_rcu() and fix OOM under stress - Add uprobe support for the bpf_get_func_ip helper - Check skb ownership against full socket - Support for up to 12 arguments in BPF trampoline - Extend link_info for kprobe_multi and perf_event links Netfilter: - Speed-up process exit by aborting ruleset validation if a fatal signal is pending - Allow NLA_POLICY_MASK to be used with BE16/BE32 types Driver API: - Page pool optimizations, to improve data locality and cache usage - Introduce ndo_hwtstamp_get() and ndo_hwtstamp_set() to avoid the need for raw ioctl() handling in drivers - Simplify genetlink dump operations (doit/dumpit) providing them the common information already populated in struct genl_info - Extend and use the yaml devlink specs to [re]generate the split ops - Introduce devlink selective dumps, to allow SF filtering SF based on handle and other attributes - Add yaml netlink spec for netlink-raw families, allow route, link and address related queries via the ynl tool - Remove phylink legacy mode support - Support offload LED blinking to phy - Add devlink port function attributes for IPsec New hardware / drivers: - Ethernet: - Broadcom ASP 2.0 (72165) ethernet controller - MediaTek MT7988 SoC - Texas Instruments AM654 SoC - Texas Instruments IEP driver - Atheros qca8081 phy - Marvell 88Q2110 phy - NXP TJA1120 phy - WiFi: - MediaTek mt7981 support - Can: - Kvaser SmartFusion2 PCI Express devices - Allwinner T113 controllers - Texas Instruments tcan4552/4553 chips - Bluetooth: - Intel Gale Peak - Qualcomm WCN3988 and WCN7850 - NXP AW693 and IW624 - Mediatek MT2925 Drivers: - Ethernet NICs: - nVidia/Mellanox: - mlx5: - support UDP encapsulation in packet offload mode - IPsec packet offload support in eswitch mode - improve aRFS observability by adding new set of counters - extends MACsec offload support to cover RoCE traffic - dynamic completion EQs - mlx4: - convert to use auxiliary bus instead of custom interface logic - Intel - ice: - implement switchdev bridge offload, even for LAG interfaces - implement SRIOV support for LAG interfaces - igc: - add support for multiple in-flight TX timestamps - Broadcom: - bnxt: - use the unified RX page pool buffers for XDP and non-XDP - use the NAPI skb allocation cache - OcteonTX2: - support Round Robin scheduling HTB offload - TC flower offload support for SPI field - Freescale: - add XDP_TX feature support - AMD: - ionic: add support for PCI FLR event - sfc: - basic conntrack offload - introduce eth, ipv4 and ipv6 pedit offloads - ST Microelectronics: - stmmac: maximze PTP timestamping resolution - Virtual NICs: - Microsoft vNIC: - batch ringing RX queue doorbell on receiving packets - add page pool for RX buffers - Virtio vNIC: - add per queue interrupt coalescing support - Google vNIC: - add queue-page-list mode support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add port range matching tc-flower offload - permit enslavement to netdevices with uppers - Ethernet embedded switches: - Marvell (mv88e6xxx): - convert to phylink_pcs - Renesas: - r8A779fx: add speed change support - rzn1: enables vlan support - Ethernet PHYs: - convert mv88e6xxx to phylink_pcs - WiFi: - Qualcomm Wi-Fi 7 (ath12k): - extremely High Throughput (EHT) PHY support - RealTek (rtl8xxxu): - enable AP mode for: RTL8192FU, RTL8710BU (RTL8188GU), RTL8192EU and RTL8723BU - RealTek (rtw89): - Introduce Time Averaged SAR (TAS) support - Connector: - support for event filtering" * tag 'net-next-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1806 commits) net: ethernet: mtk_wed: minor change in wed_{tx,rx}info_show net: ethernet: mtk_wed: add some more info in wed_txinfo_show handler net: stmmac: clarify difference between "interface" and "phy_interface" r8152: add vendor/device ID pair for D-Link DUB-E250 devlink: move devlink_notify_register/unregister() to dev.c devlink: move small_ops definition into netlink.c devlink: move tracepoint definitions into core.c devlink: push linecard related code into separate file devlink: push rate related code into separate file devlink: push trap related code into separate file devlink: use tracepoint_enabled() helper devlink: push region related code into separate file devlink: push param related code into separate file devlink: push resource related code into separate file devlink: push dpipe related code into separate file devlink: move and rename devlink_dpipe_send_and_alloc_skb() helper devlink: push shared buffer related code into separate file devlink: push port related code into separate file devlink: push object register/unregister notifications into separate helpers inet: fix IP_TRANSPARENT error handling ...
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/eswitch.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c202
1 files changed, 164 insertions, 38 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 243c455f1029..6cd7d6497e10 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -48,6 +48,7 @@
#include "devlink.h"
#include "ecpf.h"
#include "en/mod_hdr.h"
+#include "en_accel/ipsec.h"
enum {
MLX5_ACTION_NONE = 0,
@@ -77,18 +78,31 @@ static int mlx5_eswitch_check(const struct mlx5_core_dev *dev)
return 0;
}
-struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink)
+static struct mlx5_eswitch *__mlx5_devlink_eswitch_get(struct devlink *devlink, bool check)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
int err;
- err = mlx5_eswitch_check(dev);
- if (err)
- return ERR_PTR(err);
+ if (check) {
+ err = mlx5_eswitch_check(dev);
+ if (err)
+ return ERR_PTR(err);
+ }
return dev->priv.eswitch;
}
+struct mlx5_eswitch *__must_check
+mlx5_devlink_eswitch_get(struct devlink *devlink)
+{
+ return __mlx5_devlink_eswitch_get(devlink, true);
+}
+
+struct mlx5_eswitch *mlx5_devlink_eswitch_nocheck_get(struct devlink *devlink)
+{
+ return __mlx5_devlink_eswitch_get(devlink, false);
+}
+
struct mlx5_vport *__must_check
mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num)
{
@@ -818,6 +832,8 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport *
hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
vport->info.mig_enabled = MLX5_GET(cmd_hca_cap_2, hca_caps, migratable);
+
+ err = mlx5_esw_ipsec_vf_offload_get(esw->dev, vport);
out_free:
kfree(query_ctx);
return err;
@@ -882,16 +898,12 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
esw_vport_cleanup_acl(esw, vport);
}
-int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
+int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
enum mlx5_eswitch_vport_event enabled_events)
{
- struct mlx5_vport *vport;
+ u16 vport_num = vport->vport;
int ret;
- vport = mlx5_eswitch_get_vport(esw, vport_num);
- if (IS_ERR(vport))
- return PTR_ERR(vport);
-
mutex_lock(&esw->state_lock);
WARN_ON(vport->enabled);
@@ -904,6 +916,9 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
/* Sync with current vport context */
vport->enabled_events = enabled_events;
vport->enabled = true;
+ if (vport->vport != MLX5_VPORT_PF &&
+ (vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled))
+ esw->enabled_ipsec_vf_count++;
/* Esw manager is trusted by default. Host PF (vport 0) is trusted as well
* in smartNIC as it's a vport group manager.
@@ -912,7 +927,7 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
(!vport_num && mlx5_core_is_ecpf(esw->dev)))
vport->info.trusted = true;
- if (!mlx5_esw_is_manager_vport(esw, vport->vport) &&
+ if (!mlx5_esw_is_manager_vport(esw, vport_num) &&
MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
ret = mlx5_esw_vport_vhca_id_set(esw, vport_num);
if (ret)
@@ -939,15 +954,12 @@ err_vhca_mapping:
return ret;
}
-void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
+void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
- struct mlx5_vport *vport;
-
- vport = mlx5_eswitch_get_vport(esw, vport_num);
- if (IS_ERR(vport))
- return;
+ u16 vport_num = vport->vport;
mutex_lock(&esw->state_lock);
+
if (!vport->enabled)
goto done;
@@ -957,12 +969,16 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
/* Disable events from this vport */
if (MLX5_CAP_GEN(esw->dev, log_max_l2_table))
- arm_vport_context_events_cmd(esw->dev, vport->vport, 0);
+ arm_vport_context_events_cmd(esw->dev, vport_num, 0);
- if (!mlx5_esw_is_manager_vport(esw, vport->vport) &&
+ if (!mlx5_esw_is_manager_vport(esw, vport_num) &&
MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
mlx5_esw_vport_vhca_id_clear(esw, vport_num);
+ if (vport->vport != MLX5_VPORT_PF &&
+ (vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled))
+ esw->enabled_ipsec_vf_count--;
+
/* We don't assume VFs will cleanup after themselves.
* Calling vport change handler while vport is disabled will cleanup
* the vport resources.
@@ -1068,31 +1084,104 @@ static void mlx5_eswitch_clear_ec_vf_vports_info(struct mlx5_eswitch *esw)
}
}
-/* Public E-Switch API */
-int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
- enum mlx5_eswitch_vport_event enabled_events)
+static int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ enum mlx5_eswitch_vport_event enabled_events)
{
int err;
- err = mlx5_esw_vport_enable(esw, vport_num, enabled_events);
+ err = mlx5_esw_vport_enable(esw, vport, enabled_events);
if (err)
return err;
- err = esw_offloads_load_rep(esw, vport_num);
+ err = mlx5_esw_offloads_load_rep(esw, vport);
if (err)
goto err_rep;
return err;
err_rep:
- mlx5_esw_vport_disable(esw, vport_num);
+ mlx5_esw_vport_disable(esw, vport);
+ return err;
+}
+
+static void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ mlx5_esw_offloads_unload_rep(esw, vport);
+ mlx5_esw_vport_disable(esw, vport);
+}
+
+static int mlx5_eswitch_load_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num,
+ enum mlx5_eswitch_vport_event enabled_events)
+{
+ struct mlx5_vport *vport;
+ int err;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ err = mlx5_esw_offloads_init_pf_vf_rep(esw, vport);
+ if (err)
+ return err;
+
+ err = mlx5_eswitch_load_vport(esw, vport, enabled_events);
+ if (err)
+ goto err_load;
+ return 0;
+
+err_load:
+ mlx5_esw_offloads_cleanup_pf_vf_rep(esw, vport);
+ return err;
+}
+
+static void mlx5_eswitch_unload_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return;
+
+ mlx5_eswitch_unload_vport(esw, vport);
+ mlx5_esw_offloads_cleanup_pf_vf_rep(esw, vport);
+}
+
+int mlx5_eswitch_load_sf_vport(struct mlx5_eswitch *esw, u16 vport_num,
+ enum mlx5_eswitch_vport_event enabled_events,
+ struct mlx5_devlink_port *dl_port, u32 controller, u32 sfnum)
+{
+ struct mlx5_vport *vport;
+ int err;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ err = mlx5_esw_offloads_init_sf_rep(esw, vport, dl_port, controller, sfnum);
+ if (err)
+ return err;
+
+ err = mlx5_eswitch_load_vport(esw, vport, enabled_events);
+ if (err)
+ goto err_load;
+
+ return 0;
+
+err_load:
+ mlx5_esw_offloads_cleanup_sf_rep(esw, vport);
return err;
}
-void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
+void mlx5_eswitch_unload_sf_vport(struct mlx5_eswitch *esw, u16 vport_num)
{
- esw_offloads_unload_rep(esw, vport_num);
- mlx5_esw_vport_disable(esw, vport_num);
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return;
+
+ mlx5_eswitch_unload_vport(esw, vport);
+ mlx5_esw_offloads_cleanup_sf_rep(esw, vport);
}
void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs)
@@ -1103,7 +1192,7 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs)
mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
if (!vport->enabled)
continue;
- mlx5_eswitch_unload_vport(esw, vport->vport);
+ mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport);
}
}
@@ -1116,7 +1205,7 @@ static void mlx5_eswitch_unload_ec_vf_vports(struct mlx5_eswitch *esw,
mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) {
if (!vport->enabled)
continue;
- mlx5_eswitch_unload_vport(esw, vport->vport);
+ mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport);
}
}
@@ -1128,7 +1217,7 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
int err;
mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
- err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events);
+ err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, enabled_events);
if (err)
goto vf_err;
}
@@ -1148,7 +1237,7 @@ static int mlx5_eswitch_load_ec_vf_vports(struct mlx5_eswitch *esw, u16 num_ec_v
int err;
mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) {
- err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events);
+ err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, enabled_events);
if (err)
goto vf_err;
}
@@ -1190,7 +1279,7 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
int ret;
/* Enable PF vport */
- ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_PF, enabled_events);
+ ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, enabled_events);
if (ret)
return ret;
@@ -1201,7 +1290,7 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
/* Enable ECPF vport */
if (mlx5_ecpf_vport_exists(esw->dev)) {
- ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events);
+ ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events);
if (ret)
goto ecpf_err;
if (mlx5_core_ec_sriov_enabled(esw->dev)) {
@@ -1224,11 +1313,11 @@ vf_err:
mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs);
ec_vf_err:
if (mlx5_ecpf_vport_exists(esw->dev))
- mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF);
+ mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF);
ecpf_err:
host_pf_disable_hca(esw->dev);
pf_hca_err:
- mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF);
+ mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
return ret;
}
@@ -1242,11 +1331,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
if (mlx5_ecpf_vport_exists(esw->dev)) {
if (mlx5_core_ec_sriov_enabled(esw->dev))
mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs);
- mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF);
+ mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF);
}
host_pf_disable_hca(esw->dev);
- mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF);
+ mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
}
static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw)
@@ -1919,6 +2008,12 @@ bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num)
return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_VF);
}
+bool mlx5_eswitch_is_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return vport_num == MLX5_VPORT_PF ||
+ mlx5_eswitch_is_vf_vport(esw, vport_num);
+}
+
bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num)
{
return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF);
@@ -2251,3 +2346,34 @@ struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
return mlx5_esw_allowed(esw) ? esw->dev : NULL;
}
EXPORT_SYMBOL(mlx5_eswitch_get_core_dev);
+
+bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+
+ if (!mlx5_esw_allowed(esw))
+ return true;
+
+ mutex_lock(&esw->state_lock);
+ if (esw->enabled_ipsec_vf_count) {
+ mutex_unlock(&esw->state_lock);
+ return false;
+ }
+
+ dev->num_ipsec_offloads++;
+ mutex_unlock(&esw->state_lock);
+ return true;
+}
+
+void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+
+ if (!mlx5_esw_allowed(esw))
+ /* Failure means no eswitch => core dev is not a PF */
+ return;
+
+ mutex_lock(&esw->state_lock);
+ dev->num_ipsec_offloads--;
+ mutex_unlock(&esw->state_lock);
+}