diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-29 11:33:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-29 11:33:01 -0700 |
commit | bd6c11bc43c496cddfc6cf603b5d45365606dbd5 (patch) | |
tree | 36318fa68f784d397111991177d65bd6325189c4 /drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | |
parent | 68cf01760bc0891074e813b9bb06d2696cac1c01 (diff) | |
parent | c873512ef3a39cc1a605b7a5ff2ad0a33d619aa8 (diff) |
Merge tag 'net-next-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni:
"Core:
- Increase size limits for to-be-sent skb frag allocations. This
allows tun, tap devices and packet sockets to better cope with
large writes operations
- Store netdevs in an xarray, to simplify iterating over netdevs
- Refactor nexthop selection for multipath routes
- Improve sched class lifetime handling
- Add backup nexthop ID support for bridge
- Implement drop reasons support in openvswitch
- Several data races annotations and fixes
- Constify the sk parameter of routing functions
- Prepend kernel version to netconsole message
Protocols:
- Implement support for TCP probing the peer being under memory
pressure
- Remove hard coded limitation on IPv6 specific info placement inside
the socket struct
- Get rid of sysctl_tcp_adv_win_scale and use an auto-estimated per
socket scaling factor
- Scaling-up the IPv6 expired route GC via a separated list of
expiring routes
- In-kernel support for the TLS alert protocol
- Better support for UDP reuseport with connected sockets
- Add NEXT-C-SID support for SRv6 End.X behavior, reducing the SR
header size
- Get rid of additional ancillary per MPTCP connection struct socket
- Implement support for BPF-based MPTCP packet schedulers
- Format MPTCP subtests selftests results in TAP
- Several new SMC 2.1 features including unique experimental options,
max connections per lgr negotiation, max links per lgr negotiation
BPF:
- Multi-buffer support in AF_XDP
- Add multi uprobe BPF links for attaching multiple uprobes and usdt
probes, which is significantly faster and saves extra fds
- Implement an fd-based tc BPF attach API (TCX) and BPF link support
on top of it
- Add SO_REUSEPORT support for TC bpf_sk_assign
- Support new instructions from cpu v4 to simplify the generated code
and feature completeness, for x86, arm64, riscv64
- Support defragmenting IPv(4|6) packets in BPF
- Teach verifier actual bounds of bpf_get_smp_processor_id() and fix
perf+libbpf issue related to custom section handling
- Introduce bpf map element count and enable it for all program types
- Add a BPF hook in sys_socket() to change the protocol ID from
IPPROTO_TCP to IPPROTO_MPTCP to cover migration for legacy
- Introduce bpf_me_mcache_free_rcu() and fix OOM under stress
- Add uprobe support for the bpf_get_func_ip helper
- Check skb ownership against full socket
- Support for up to 12 arguments in BPF trampoline
- Extend link_info for kprobe_multi and perf_event links
Netfilter:
- Speed-up process exit by aborting ruleset validation if a fatal
signal is pending
- Allow NLA_POLICY_MASK to be used with BE16/BE32 types
Driver API:
- Page pool optimizations, to improve data locality and cache usage
- Introduce ndo_hwtstamp_get() and ndo_hwtstamp_set() to avoid the
need for raw ioctl() handling in drivers
- Simplify genetlink dump operations (doit/dumpit) providing them the
common information already populated in struct genl_info
- Extend and use the yaml devlink specs to [re]generate the split ops
- Introduce devlink selective dumps, to allow SF filtering SF based
on handle and other attributes
- Add yaml netlink spec for netlink-raw families, allow route, link
and address related queries via the ynl tool
- Remove phylink legacy mode support
- Support offload LED blinking to phy
- Add devlink port function attributes for IPsec
New hardware / drivers:
- Ethernet:
- Broadcom ASP 2.0 (72165) ethernet controller
- MediaTek MT7988 SoC
- Texas Instruments AM654 SoC
- Texas Instruments IEP driver
- Atheros qca8081 phy
- Marvell 88Q2110 phy
- NXP TJA1120 phy
- WiFi:
- MediaTek mt7981 support
- Can:
- Kvaser SmartFusion2 PCI Express devices
- Allwinner T113 controllers
- Texas Instruments tcan4552/4553 chips
- Bluetooth:
- Intel Gale Peak
- Qualcomm WCN3988 and WCN7850
- NXP AW693 and IW624
- Mediatek MT2925
Drivers:
- Ethernet NICs:
- nVidia/Mellanox:
- mlx5:
- support UDP encapsulation in packet offload mode
- IPsec packet offload support in eswitch mode
- improve aRFS observability by adding new set of counters
- extends MACsec offload support to cover RoCE traffic
- dynamic completion EQs
- mlx4:
- convert to use auxiliary bus instead of custom interface
logic
- Intel
- ice:
- implement switchdev bridge offload, even for LAG
interfaces
- implement SRIOV support for LAG interfaces
- igc:
- add support for multiple in-flight TX timestamps
- Broadcom:
- bnxt:
- use the unified RX page pool buffers for XDP and non-XDP
- use the NAPI skb allocation cache
- OcteonTX2:
- support Round Robin scheduling HTB offload
- TC flower offload support for SPI field
- Freescale:
- add XDP_TX feature support
- AMD:
- ionic: add support for PCI FLR event
- sfc:
- basic conntrack offload
- introduce eth, ipv4 and ipv6 pedit offloads
- ST Microelectronics:
- stmmac: maximze PTP timestamping resolution
- Virtual NICs:
- Microsoft vNIC:
- batch ringing RX queue doorbell on receiving packets
- add page pool for RX buffers
- Virtio vNIC:
- add per queue interrupt coalescing support
- Google vNIC:
- add queue-page-list mode support
- Ethernet high-speed switches:
- nVidia/Mellanox (mlxsw):
- add port range matching tc-flower offload
- permit enslavement to netdevices with uppers
- Ethernet embedded switches:
- Marvell (mv88e6xxx):
- convert to phylink_pcs
- Renesas:
- r8A779fx: add speed change support
- rzn1: enables vlan support
- Ethernet PHYs:
- convert mv88e6xxx to phylink_pcs
- WiFi:
- Qualcomm Wi-Fi 7 (ath12k):
- extremely High Throughput (EHT) PHY support
- RealTek (rtl8xxxu):
- enable AP mode for: RTL8192FU, RTL8710BU (RTL8188GU),
RTL8192EU and RTL8723BU
- RealTek (rtw89):
- Introduce Time Averaged SAR (TAS) support
- Connector:
- support for event filtering"
* tag 'net-next-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1806 commits)
net: ethernet: mtk_wed: minor change in wed_{tx,rx}info_show
net: ethernet: mtk_wed: add some more info in wed_txinfo_show handler
net: stmmac: clarify difference between "interface" and "phy_interface"
r8152: add vendor/device ID pair for D-Link DUB-E250
devlink: move devlink_notify_register/unregister() to dev.c
devlink: move small_ops definition into netlink.c
devlink: move tracepoint definitions into core.c
devlink: push linecard related code into separate file
devlink: push rate related code into separate file
devlink: push trap related code into separate file
devlink: use tracepoint_enabled() helper
devlink: push region related code into separate file
devlink: push param related code into separate file
devlink: push resource related code into separate file
devlink: push dpipe related code into separate file
devlink: move and rename devlink_dpipe_send_and_alloc_skb() helper
devlink: push shared buffer related code into separate file
devlink: push port related code into separate file
devlink: push object register/unregister notifications into separate helpers
inet: fix IP_TRANSPARENT error handling
...
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/eswitch.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 202 |
1 files changed, 164 insertions, 38 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 243c455f1029..6cd7d6497e10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -48,6 +48,7 @@ #include "devlink.h" #include "ecpf.h" #include "en/mod_hdr.h" +#include "en_accel/ipsec.h" enum { MLX5_ACTION_NONE = 0, @@ -77,18 +78,31 @@ static int mlx5_eswitch_check(const struct mlx5_core_dev *dev) return 0; } -struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink) +static struct mlx5_eswitch *__mlx5_devlink_eswitch_get(struct devlink *devlink, bool check) { struct mlx5_core_dev *dev = devlink_priv(devlink); int err; - err = mlx5_eswitch_check(dev); - if (err) - return ERR_PTR(err); + if (check) { + err = mlx5_eswitch_check(dev); + if (err) + return ERR_PTR(err); + } return dev->priv.eswitch; } +struct mlx5_eswitch *__must_check +mlx5_devlink_eswitch_get(struct devlink *devlink) +{ + return __mlx5_devlink_eswitch_get(devlink, true); +} + +struct mlx5_eswitch *mlx5_devlink_eswitch_nocheck_get(struct devlink *devlink) +{ + return __mlx5_devlink_eswitch_get(devlink, false); +} + struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) { @@ -818,6 +832,8 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport * hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); vport->info.mig_enabled = MLX5_GET(cmd_hca_cap_2, hca_caps, migratable); + + err = mlx5_esw_ipsec_vf_offload_get(esw->dev, vport); out_free: kfree(query_ctx); return err; @@ -882,16 +898,12 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport esw_vport_cleanup_acl(esw, vport); } -int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, +int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, enum mlx5_eswitch_vport_event enabled_events) { - struct mlx5_vport *vport; + u16 vport_num = vport->vport; int ret; - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return PTR_ERR(vport); - mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); @@ -904,6 +916,9 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, /* Sync with current vport context */ vport->enabled_events = enabled_events; vport->enabled = true; + if (vport->vport != MLX5_VPORT_PF && + (vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled)) + esw->enabled_ipsec_vf_count++; /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well * in smartNIC as it's a vport group manager. @@ -912,7 +927,7 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, (!vport_num && mlx5_core_is_ecpf(esw->dev))) vport->info.trusted = true; - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + if (!mlx5_esw_is_manager_vport(esw, vport_num) && MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { ret = mlx5_esw_vport_vhca_id_set(esw, vport_num); if (ret) @@ -939,15 +954,12 @@ err_vhca_mapping: return ret; } -void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - struct mlx5_vport *vport; - - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return; + u16 vport_num = vport->vport; mutex_lock(&esw->state_lock); + if (!vport->enabled) goto done; @@ -957,12 +969,16 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) /* Disable events from this vport */ if (MLX5_CAP_GEN(esw->dev, log_max_l2_table)) - arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + arm_vport_context_events_cmd(esw->dev, vport_num, 0); - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + if (!mlx5_esw_is_manager_vport(esw, vport_num) && MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) mlx5_esw_vport_vhca_id_clear(esw, vport_num); + if (vport->vport != MLX5_VPORT_PF && + (vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled)) + esw->enabled_ipsec_vf_count--; + /* We don't assume VFs will cleanup after themselves. * Calling vport change handler while vport is disabled will cleanup * the vport resources. @@ -1068,31 +1084,104 @@ static void mlx5_eswitch_clear_ec_vf_vports_info(struct mlx5_eswitch *esw) } } -/* Public E-Switch API */ -int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, - enum mlx5_eswitch_vport_event enabled_events) +static int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + enum mlx5_eswitch_vport_event enabled_events) { int err; - err = mlx5_esw_vport_enable(esw, vport_num, enabled_events); + err = mlx5_esw_vport_enable(esw, vport, enabled_events); if (err) return err; - err = esw_offloads_load_rep(esw, vport_num); + err = mlx5_esw_offloads_load_rep(esw, vport); if (err) goto err_rep; return err; err_rep: - mlx5_esw_vport_disable(esw, vport_num); + mlx5_esw_vport_disable(esw, vport); + return err; +} + +static void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + mlx5_esw_offloads_unload_rep(esw, vport); + mlx5_esw_vport_disable(esw, vport); +} + +static int mlx5_eswitch_load_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + int err; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + err = mlx5_esw_offloads_init_pf_vf_rep(esw, vport); + if (err) + return err; + + err = mlx5_eswitch_load_vport(esw, vport, enabled_events); + if (err) + goto err_load; + return 0; + +err_load: + mlx5_esw_offloads_cleanup_pf_vf_rep(esw, vport); + return err; +} + +static void mlx5_eswitch_unload_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; + + mlx5_eswitch_unload_vport(esw, vport); + mlx5_esw_offloads_cleanup_pf_vf_rep(esw, vport); +} + +int mlx5_eswitch_load_sf_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events, + struct mlx5_devlink_port *dl_port, u32 controller, u32 sfnum) +{ + struct mlx5_vport *vport; + int err; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + err = mlx5_esw_offloads_init_sf_rep(esw, vport, dl_port, controller, sfnum); + if (err) + return err; + + err = mlx5_eswitch_load_vport(esw, vport, enabled_events); + if (err) + goto err_load; + + return 0; + +err_load: + mlx5_esw_offloads_cleanup_sf_rep(esw, vport); return err; } -void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_eswitch_unload_sf_vport(struct mlx5_eswitch *esw, u16 vport_num) { - esw_offloads_unload_rep(esw, vport_num); - mlx5_esw_vport_disable(esw, vport_num); + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; + + mlx5_eswitch_unload_vport(esw, vport); + mlx5_esw_offloads_cleanup_sf_rep(esw, vport); } void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) @@ -1103,7 +1192,7 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { if (!vport->enabled) continue; - mlx5_eswitch_unload_vport(esw, vport->vport); + mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport); } } @@ -1116,7 +1205,7 @@ static void mlx5_eswitch_unload_ec_vf_vports(struct mlx5_eswitch *esw, mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { if (!vport->enabled) continue; - mlx5_eswitch_unload_vport(esw, vport->vport); + mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport); } } @@ -1128,7 +1217,7 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, int err; mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { - err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events); + err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, enabled_events); if (err) goto vf_err; } @@ -1148,7 +1237,7 @@ static int mlx5_eswitch_load_ec_vf_vports(struct mlx5_eswitch *esw, u16 num_ec_v int err; mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { - err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events); + err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, enabled_events); if (err) goto vf_err; } @@ -1190,7 +1279,7 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, int ret; /* Enable PF vport */ - ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_PF, enabled_events); + ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, enabled_events); if (ret) return ret; @@ -1201,7 +1290,7 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, /* Enable ECPF vport */ if (mlx5_ecpf_vport_exists(esw->dev)) { - ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events); + ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; if (mlx5_core_ec_sriov_enabled(esw->dev)) { @@ -1224,11 +1313,11 @@ vf_err: mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); ec_vf_err: if (mlx5_ecpf_vport_exists(esw->dev)) - mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); ecpf_err: host_pf_disable_hca(esw->dev); pf_hca_err: - mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); return ret; } @@ -1242,11 +1331,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) if (mlx5_ecpf_vport_exists(esw->dev)) { if (mlx5_core_ec_sriov_enabled(esw->dev)) mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); - mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); } host_pf_disable_hca(esw->dev); - mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); } static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw) @@ -1919,6 +2008,12 @@ bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num) return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_VF); } +bool mlx5_eswitch_is_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num == MLX5_VPORT_PF || + mlx5_eswitch_is_vf_vport(esw, vport_num); +} + bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num) { return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF); @@ -2251,3 +2346,34 @@ struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw) return mlx5_esw_allowed(esw) ? esw->dev : NULL; } EXPORT_SYMBOL(mlx5_eswitch_get_core_dev); + +bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!mlx5_esw_allowed(esw)) + return true; + + mutex_lock(&esw->state_lock); + if (esw->enabled_ipsec_vf_count) { + mutex_unlock(&esw->state_lock); + return false; + } + + dev->num_ipsec_offloads++; + mutex_unlock(&esw->state_lock); + return true; +} + +void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!mlx5_esw_allowed(esw)) + /* Failure means no eswitch => core dev is not a PF */ + return; + + mutex_lock(&esw->state_lock); + dev->num_ipsec_offloads--; + mutex_unlock(&esw->state_lock); +} |