summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-11-12 14:02:04 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-11-12 14:02:04 -0800
commitdb7c953555388571a96ed8783ff6c5745ba18ab9 (patch)
tree6eab6c28b0340841f51d8b4fbae685a2d7e84a91
parent200f9d21aa92ae55390030b6c84757c2aa75bce0 (diff)
parentedbc21113bde13ca3d06eec24b621b1f628583dd (diff)
Merge tag 'net-5.10-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski: "Current release - regressions: - arm64: dts: fsl-ls1028a-kontron-sl28: specify in-band mode for ENETC Current release - bugs in new features: - mptcp: provide rmem[0] limit offset to fix oops Previous release - regressions: - IPv6: Set SIT tunnel hard_header_len to zero to fix path MTU calculations - lan743x: correctly handle chips with internal PHY - bpf: Don't rely on GCC __attribute__((optimize)) to disable GCSE - mlx5e: Fix VXLAN port table synchronization after function reload Previous release - always broken: - bpf: Zero-fill re-used per-cpu map element - fix out-of-order UDP packets when forwarding with UDP GSO fraglists turned on: - fix UDP header access on Fast/frag0 UDP GRO - fix IP header access and skb lookup on Fast/frag0 UDP GRO - ethtool: netlink: add missing netdev_features_change() call - net: Update window_clamp if SOCK_RCVBUF is set - igc: Fix returning wrong statistics - ch_ktls: fix multiple leaks and corner cases in Chelsio TLS offload - tunnels: Fix off-by-one in lower MTU bounds for ICMP/ICMPv6 replies - r8169: disable hw csum for short packets on all chip versions - vrf: Fix fast path output packet handling with async Netfilter rules" * tag 'net-5.10-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (65 commits) lan743x: fix use of uninitialized variable net: udp: fix IP header access and skb lookup on Fast/frag0 UDP GRO net: udp: fix UDP header access on Fast/frag0 UDP GRO devlink: Avoid overwriting port attributes of registered port vrf: Fix fast path output packet handling with async Netfilter rules cosa: Add missing kfree in error path of cosa_write net: switch to the kernel.org patchwork instance ch_ktls: stop the txq if reaches threshold ch_ktls: tcb update fails sometimes ch_ktls/cxgb4: handle partial tag alone SKBs ch_ktls: don't free skb before sending FIN ch_ktls: packet handling prior to start marker ch_ktls: Correction in middle record handling ch_ktls: missing handling of header alone ch_ktls: Correction in trimmed_len calculation cxgb4/ch_ktls: creating skbs causes panic ch_ktls: Update cheksum information ch_ktls: Correction in finding correct length cxgb4/ch_ktls: decrypted bit is not enough net/x25: Fix null-ptr-deref in x25_connect ...
-rw-r--r--Documentation/networking/netdev-FAQ.rst4
-rw-r--r--Documentation/networking/phy.rst4
-rw-r--r--Documentation/process/stable-kernel-rules.rst2
-rw-r--r--Documentation/translations/it_IT/process/stable-kernel-rules.rst2
-rw-r--r--MAINTAINERS27
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts1
-rw-r--r--drivers/net/dsa/mv88e6xxx/devlink.c4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h6
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c111
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c582
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c14
-rw-r--r--drivers/net/ethernet/marvell/prestera/Kconfig1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h2
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c24
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.h3
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c18
-rw-r--r--drivers/net/phy/realtek.c2
-rw-r--r--drivers/net/vrf.c92
-rw-r--r--drivers/net/wan/cosa.c1
-rw-r--r--include/linux/compiler-gcc.h2
-rw-r--r--include/linux/compiler_types.h4
-rw-r--r--include/linux/filter.h22
-rw-r--r--include/net/xsk_buff_pool.h2
-rw-r--r--kernel/bpf/Makefile6
-rw-r--r--kernel/bpf/bpf_lsm.c10
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/hashtab.c30
-rw-r--r--kernel/bpf/preload/Kconfig1
-rw-r--r--net/core/devlink.c8
-rw-r--r--net/ethtool/features.c2
-rw-r--r--net/ipv4/ip_tunnel_core.c4
-rw-r--r--net/ipv4/syncookies.c9
-rw-r--r--net/ipv4/udp_offload.c19
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/syncookies.c10
-rw-r--r--net/ipv6/udp_offload.c17
-rw-r--r--net/iucv/af_iucv.c3
-rw-r--r--net/mptcp/protocol.c1
-rw-r--r--net/netlabel/netlabel_unlabeled.c17
-rw-r--r--net/tipc/topsrv.c10
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/xdp/xsk.c3
-rw-r--r--net/xdp/xsk_buff_pool.c7
-rw-r--r--samples/bpf/task_fd_query_user.c2
-rw-r--r--samples/bpf/tracex2_user.c2
-rw-r--r--samples/bpf/tracex3_user.c2
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c2
-rw-r--r--samples/bpf/xdp_rxq_info_user.c2
-rwxr-xr-xscripts/bpf_helpers_doc.py1
-rw-r--r--tools/bpf/bpftool/feature.c7
-rw-r--r--tools/bpf/bpftool/prog.c2
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.bpf.c4
-rw-r--r--tools/lib/bpf/hashmap.h15
-rw-r--r--tools/lib/bpf/xsk.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_init.c214
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h11
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_init.c33
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json4
76 files changed, 1138 insertions, 439 deletions
diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index d5c9320901c3..21537766be4d 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -110,7 +110,7 @@ Q: I sent a patch and I'm wondering what happened to it?
Q: How can I tell whether it got merged?
A: Start by looking at the main patchworks queue for netdev:
- http://patchwork.ozlabs.org/project/netdev/list/
+ https://patchwork.kernel.org/project/netdevbpf/list/
The "State" field will tell you exactly where things are at with your
patch.
@@ -152,7 +152,7 @@ networking subsystem, and then hands them off to Greg.
There is a patchworks queue that you can see here:
- http://patchwork.ozlabs.org/bundle/davem/stable/?state=*
+ https://patchwork.kernel.org/bundle/netdev/stable/?state=*
It contains the patches which Dave has selected, but not yet handed off
to Greg. If Greg already has the patch, then it will be here:
diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst
index 256106054c8c..b2f7ec794bc8 100644
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@@ -247,8 +247,8 @@ Some of the interface modes are described below:
speeds (see below.)
``PHY_INTERFACE_MODE_2500BASEX``
- This defines a variant of 1000BASE-X which is clocked 2.5 times faster,
- than the 802.3 standard giving a fixed bit rate of 3.125Gbaud.
+ This defines a variant of 1000BASE-X which is clocked 2.5 times as fast
+ as the 802.3 standard, giving a fixed bit rate of 3.125Gbaud.
``PHY_INTERFACE_MODE_SGMII``
This is used for Cisco SGMII, which is a modification of 1000BASE-X
diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst
index 06f743b612c4..3973556250e1 100644
--- a/Documentation/process/stable-kernel-rules.rst
+++ b/Documentation/process/stable-kernel-rules.rst
@@ -39,7 +39,7 @@ Procedure for submitting patches to the -stable tree
submission guidelines as described in
:ref:`Documentation/networking/netdev-FAQ.rst <netdev-FAQ>`
after first checking the stable networking queue at
- https://patchwork.ozlabs.org/bundle/davem/stable/?series=&submitter=&state=*&q=&archive=
+ https://patchwork.kernel.org/bundle/netdev/stable/?state=*
to ensure the requested patch is not already queued up.
- Security patches should not be handled (solely) by the -stable review
process but should follow the procedures in
diff --git a/Documentation/translations/it_IT/process/stable-kernel-rules.rst b/Documentation/translations/it_IT/process/stable-kernel-rules.rst
index 4f206cee31a7..283d62541c4f 100644
--- a/Documentation/translations/it_IT/process/stable-kernel-rules.rst
+++ b/Documentation/translations/it_IT/process/stable-kernel-rules.rst
@@ -46,7 +46,7 @@ Procedura per sottomettere patch per i sorgenti -stable
:ref:`Documentation/translations/it_IT/networking/netdev-FAQ.rst <it_netdev-FAQ>`;
ma solo dopo aver verificato al seguente indirizzo che la patch non sia
giĆ  in coda:
- https://patchwork.ozlabs.org/bundle/davem/stable/?series=&submitter=&state=*&q=&archive=
+ https://patchwork.kernel.org/bundle/netdev/stable/?state=*
- Una patch di sicurezza non dovrebbero essere gestite (solamente) dal processo
di revisione -stable, ma dovrebbe seguire le procedure descritte in
:ref:`Documentation/translations/it_IT/admin-guide/security-bugs.rst <it_securitybugs>`.
diff --git a/MAINTAINERS b/MAINTAINERS
index 94ac10a153c7..4a34b25ecc1f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1279,7 +1279,7 @@ M: Igor Russkikh <irusskikh@marvell.com>
L: netdev@vger.kernel.org
S: Supported
W: https://www.marvell.com/
-Q: http://patchwork.ozlabs.org/project/netdev/list/
+Q: https://patchwork.kernel.org/project/netdevbpf/list/
F: Documentation/networking/device_drivers/ethernet/aquantia/atlantic.rst
F: drivers/net/ethernet/aquantia/atlantic/
@@ -8830,8 +8830,8 @@ S: Supported
W: http://www.intel.com/support/feedback.htm
W: http://e1000.sourceforge.net/
Q: http://patchwork.ozlabs.org/project/intel-wired-lan/list/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-queue.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue.git
F: Documentation/networking/device_drivers/ethernet/intel/
F: drivers/net/ethernet/intel/
F: drivers/net/ethernet/intel/*/
@@ -11174,7 +11174,7 @@ M: Tariq Toukan <tariqt@nvidia.com>
L: netdev@vger.kernel.org
S: Supported
W: http://www.mellanox.com
-Q: http://patchwork.ozlabs.org/project/netdev/list/
+Q: https://patchwork.kernel.org/project/netdevbpf/list/
F: drivers/net/ethernet/mellanox/mlx4/en_*
MELLANOX ETHERNET DRIVER (mlx5e)
@@ -11182,7 +11182,7 @@ M: Saeed Mahameed <saeedm@nvidia.com>
L: netdev@vger.kernel.org
S: Supported
W: http://www.mellanox.com
-Q: http://patchwork.ozlabs.org/project/netdev/list/
+Q: https://patchwork.kernel.org/project/netdevbpf/list/
F: drivers/net/ethernet/mellanox/mlx5/core/en_*
MELLANOX ETHERNET INNOVA DRIVERS
@@ -11190,7 +11190,7 @@ R: Boris Pismenny <borisp@nvidia.com>
L: netdev@vger.kernel.org
S: Supported
W: http://www.mellanox.com
-Q: http://patchwork.ozlabs.org/project/netdev/list/
+Q: https://patchwork.kernel.org/project/netdevbpf/list/
F: drivers/net/ethernet/mellanox/mlx5/core/accel/*
F: drivers/net/ethernet/mellanox/mlx5/core/en_accel/*
F: drivers/net/ethernet/mellanox/mlx5/core/fpga/*
@@ -11202,7 +11202,7 @@ M: Ido Schimmel <idosch@nvidia.com>
L: netdev@vger.kernel.org
S: Supported
W: http://www.mellanox.com
-Q: http://patchwork.ozlabs.org/project/netdev/list/
+Q: https://patchwork.kernel.org/project/netdevbpf/list/
F: drivers/net/ethernet/mellanox/mlxsw/
F: tools/testing/selftests/drivers/net/mlxsw/
@@ -11211,7 +11211,7 @@ M: mlxsw@nvidia.com
L: netdev@vger.kernel.org
S: Supported
W: http://www.mellanox.com
-Q: http://patchwork.ozlabs.org/project/netdev/list/
+Q: https://patchwork.kernel.org/project/netdevbpf/list/
F: drivers/net/ethernet/mellanox/mlxfw/
MELLANOX HARDWARE PLATFORM SUPPORT
@@ -11230,7 +11230,7 @@ L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
S: Supported
W: http://www.mellanox.com
-Q: http://patchwork.ozlabs.org/project/netdev/list/
+Q: https://patchwork.kernel.org/project/netdevbpf/list/
F: drivers/net/ethernet/mellanox/mlx4/
F: include/linux/mlx4/
@@ -11251,7 +11251,7 @@ L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
S: Supported
W: http://www.mellanox.com
-Q: http://patchwork.ozlabs.org/project/netdev/list/
+Q: https://patchwork.kernel.org/project/netdevbpf/list/
F: Documentation/networking/device_drivers/ethernet/mellanox/
F: drivers/net/ethernet/mellanox/mlx5/core/
F: include/linux/mlx5/
@@ -12131,7 +12131,7 @@ M: Jakub Kicinski <kuba@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
W: http://www.linuxfoundation.org/en/Net
-Q: http://patchwork.ozlabs.org/project/netdev/list/
+Q: https://patchwork.kernel.org/project/netdevbpf/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
F: Documentation/devicetree/bindings/net/
@@ -12176,7 +12176,7 @@ M: Jakub Kicinski <kuba@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
W: http://www.linuxfoundation.org/en/Net
-Q: http://patchwork.ozlabs.org/project/netdev/list/
+Q: https://patchwork.kernel.org/project/netdevbpf/list/
B: mailto:netdev@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
@@ -15247,7 +15247,6 @@ F: drivers/iommu/s390-iommu.c
S390 IUCV NETWORK LAYER
M: Julian Wiedmann <jwi@linux.ibm.com>
M: Karsten Graul <kgraul@linux.ibm.com>
-M: Ursula Braun <ubraun@linux.ibm.com>
L: linux-s390@vger.kernel.org
S: Supported
W: http://www.ibm.com/developerworks/linux/linux390/
@@ -15258,7 +15257,6 @@ F: net/iucv/
S390 NETWORK DRIVERS
M: Julian Wiedmann <jwi@linux.ibm.com>
M: Karsten Graul <kgraul@linux.ibm.com>
-M: Ursula Braun <ubraun@linux.ibm.com>
L: linux-s390@vger.kernel.org
S: Supported
W: http://www.ibm.com/developerworks/linux/linux390/
@@ -15829,7 +15827,6 @@ S: Maintained
F: drivers/misc/sgi-xp/
SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
-M: Ursula Braun <ubraun@linux.ibm.com>
M: Karsten Graul <kgraul@linux.ibm.com>
L: linux-s390@vger.kernel.org
S: Supported
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
index f46eb47cfa4d..8161dd237971 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
@@ -75,6 +75,7 @@
&enetc_port0 {
phy-handle = <&phy0>;
phy-connection-type = "sgmii";
+ managed = "in-band-status";
status = "okay";
mdio {
diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c
index 10cd1bfd81a0..ade04c036fd9 100644
--- a/drivers/net/dsa/mv88e6xxx/devlink.c
+++ b/drivers/net/dsa/mv88e6xxx/devlink.c
@@ -393,8 +393,10 @@ static int mv88e6xxx_region_atu_snapshot(struct devlink *dl,
mv88e6xxx_reg_lock(chip);
err = mv88e6xxx_fid_map(chip, fid_bitmap);
- if (err)
+ if (err) {
+ kfree(table);
goto out;
+ }
while (1) {
fid = find_next_bit(fid_bitmap, MV88E6XXX_N_FID, fid + 1);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 3352dad6ca99..27308600da15 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -2124,6 +2124,9 @@ void cxgb4_inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
struct ulptx_sgl *sgl, u64 *end, unsigned int start,
const dma_addr_t *addr);
+void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q,
+ struct ulptx_sgl *sgl, u64 *end,
+ const dma_addr_t *addr, u32 start, u32 send_len);
void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n);
int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf,
u16 vlan);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 0273f40b85f7..17410fe86626 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -3573,6 +3573,8 @@ static int chcr_stats_show(struct seq_file *seq, void *v)
atomic64_read(&adap->ch_ktls_stats.ktls_tx_complete_pkts));
seq_printf(seq, "TX trim pkts : %20llu\n",
atomic64_read(&adap->ch_ktls_stats.ktls_tx_trimmed_pkts));
+ seq_printf(seq, "TX sw fallback : %20llu\n",
+ atomic64_read(&adap->ch_ktls_stats.ktls_tx_fallback));
while (i < MAX_NPORTS) {
ktls_port = &adap->ch_ktls_stats.ktls_port[i];
seq_printf(seq, "Port %d\n", i);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index a952fe198eb9..7fd264a6d085 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1176,6 +1176,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
txq = netdev_pick_tx(dev, skb, sb_dev);
if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) ||
skb->encapsulation ||
+ cxgb4_is_ktls_skb(skb) ||
(proto != IPPROTO_TCP && proto != IPPROTO_UDP))
txq = txq % pi->nqsets;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index b169776ab484..1b49f2fa9b18 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -388,6 +388,7 @@ struct ch_ktls_stats_debug {
atomic64_t ktls_tx_retransmit_pkts;
atomic64_t ktls_tx_complete_pkts;
atomic64_t ktls_tx_trimmed_pkts;
+ atomic64_t ktls_tx_fallback;
};
#endif
@@ -493,6 +494,11 @@ struct cxgb4_uld_info {
#endif
};
+static inline bool cxgb4_is_ktls_skb(struct sk_buff *skb)
+{
+ return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
+}
+
void cxgb4_uld_enable(struct adapter *adap);
void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
int cxgb4_unregister_uld(enum cxgb4_uld type);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index a9e9c7ae565d..196652a114c5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -890,6 +890,114 @@ void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
}
EXPORT_SYMBOL(cxgb4_write_sgl);
+/* cxgb4_write_partial_sgl - populate SGL for partial packet
+ * @skb: the packet
+ * @q: the Tx queue we are writing into
+ * @sgl: starting location for writing the SGL
+ * @end: points right after the end of the SGL
+ * @addr: the list of bus addresses for the SGL elements
+ * @start: start offset in the SKB where partial data starts
+ * @len: length of data from @start to send out
+ *
+ * This API will handle sending out partial data of a skb if required.
+ * Unlike cxgb4_write_sgl, @start can be any offset into the skb data,
+ * and @len will decide how much data after @start offset to send out.
+ */
+void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q,
+ struct ulptx_sgl *sgl, u64 *end,
+ const dma_addr_t *addr, u32 start, u32 len)
+{
+ struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1] = {0}, *to;
+ u32 frag_size, skb_linear_data_len = skb_headlen(skb);
+ struct skb_shared_info *si = skb_shinfo(skb);
+ u8 i = 0, frag_idx = 0, nfrags = 0;
+ skb_frag_t *frag;
+
+ /* Fill the first SGL either from linear data or from partial
+ * frag based on @start.
+ */
+ if (unlikely(start < skb_linear_data_len)) {
+ frag_size = min(len, skb_linear_data_len - start);
+ sgl->len0 = htonl(frag_size);
+ sgl->addr0 = cpu_to_be64(addr[0] + start);
+ len -= frag_size;
+ nfrags++;
+ } else {
+ start -= skb_linear_data_len;
+ frag = &si->frags[frag_idx];
+ frag_size = skb_frag_size(frag);
+ /* find the first frag */
+ while (start >= frag_size) {
+ start -= frag_size;
+ frag_idx++;
+ frag = &si->frags[frag_idx];
+ frag_size = skb_frag_size(frag);
+ }
+
+ frag_size = min(len, skb_frag_size(frag) - start);
+ sgl->len0 = cpu_to_be32(frag_size);
+ sgl->addr0 = cpu_to_be64(addr[frag_idx + 1] + start);
+ len -= frag_size;
+ nfrags++;
+ frag_idx++;
+ }
+
+ /* If the entire partial data fit in one SGL, then send it out
+ * now.
+ */
+ if (!len)
+ goto done;
+
+ /* Most of the complexity below deals with the possibility we hit the
+ * end of the queue in the middle of writing the SGL. For this case
+ * only we create the SGL in a temporary buffer and then copy it.
+ */
+ to = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge;
+
+ /* If the skb couldn't fit in first SGL completely, fill the
+ * rest of the frags in subsequent SGLs. Note that each SGL
+ * pair can store 2 frags.
+ */
+ while (len) {
+ frag_size = min(len, skb_frag_size(&si->frags[frag_idx]));
+ to->len[i & 1] = cpu_to_be32(frag_size);
+ to->addr[i & 1] = cpu_to_be64(addr[frag_idx + 1]);
+ if (i && (i & 1))
+ to++;
+ nfrags++;
+ frag_idx++;
+ i++;
+ len -= frag_size;
+ }
+
+ /* If we ended in an odd boundary, then set the second SGL's
+ * length in the pair to 0.
+ */
+ if (i & 1)
+ to->len[1] = cpu_to_be32(0);
+
+ /* Copy from temporary buffer to Tx ring, in case we hit the
+ * end of the queue in the middle of writing the SGL.
+ */
+ if (unlikely((u8 *)end > (u8 *)q->stat)) {
+ u32 part0 = (u8 *)q->stat - (u8 *)sgl->sge, part1;
+
+ if (likely(part0))
+ memcpy(sgl->sge, buf, part0);
+ part1 = (u8 *)end - (u8 *)q->stat;
+ memcpy(q->desc, (u8 *)buf + part0, part1);
+ end = (void *)q->desc + part1;
+ }
+
+ /* 0-pad to multiple of 16 */
+ if ((uintptr_t)end & 8)
+ *end = 0;
+done:
+ sgl->cmd_nsge = htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL) |
+ ULPTX_NSGE_V(nfrags));
+}
+EXPORT_SYMBOL(cxgb4_write_partial_sgl);
+
/* This function copies 64 byte coalesced work request to
* memory mapped BAR2 space. For coalesced WR SGE fetches
* data from the FIFO instead of from Host.
@@ -1422,7 +1530,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
#endif /* CHELSIO_IPSEC_INLINE */
#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
- if (skb->decrypted)
+ if (cxgb4_is_ktls_skb(skb) &&
+ (skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb))))
return adap->uld[CXGB4_ULD_KTLS].tx_handler(skb, dev);
#endif /* CHELSIO_TLS_DEVICE */
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
index 5195f692f14d..c24485c0d512 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -14,6 +14,50 @@
static LIST_HEAD(uld_ctx_list);
static DEFINE_MUTEX(dev_mutex);
+/* chcr_get_nfrags_to_send: get the remaining nfrags after start offset
+ * @skb: skb
+ * @start: start offset.
+ * @len: how much data to send after @start
+ */
+static int chcr_get_nfrags_to_send(struct sk_buff *skb, u32 start, u32 len)
+{
+ struct skb_shared_info *si = skb_shinfo(skb);
+ u32 frag_size, skb_linear_data_len = skb_headlen(skb);
+ u8 nfrags = 0, frag_idx = 0;
+ skb_frag_t *frag;
+
+ /* if its a linear skb then return 1 */
+ if (!skb_is_nonlinear(skb))
+ return 1;
+
+ if (unlikely(start < skb_linear_data_len)) {
+ frag_size = min(len, skb_linear_data_len - start);
+ start = 0;
+ } else {
+ start -= skb_linear_data_len;
+
+ frag = &si->frags[frag_idx];
+ frag_size = skb_frag_size(frag);
+ while (start >= frag_size) {
+ start -= frag_size;
+ frag_idx++;
+ frag = &si->frags[frag_idx];
+ frag_size = skb_frag_size(frag);
+ }
+ frag_size = min(len, skb_frag_size(frag) - start);
+ }
+ len -= frag_size;
+ nfrags++;
+
+ while (len) {
+ frag_size = min(len, skb_frag_size(&si->frags[frag_idx]));
+ len -= frag_size;
+ nfrags++;
+ frag_idx++;
+ }
+ return nfrags;
+}
+
static int chcr_init_tcb_fields(struct chcr_ktls_info *tx_info);
/*
* chcr_ktls_save_keys: calculate and save crypto keys.
@@ -689,7 +733,8 @@ static int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input)
}
static void *__chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
- u32 tid, void *pos, u16 word, u64 mask,
+ u32 tid, void *pos, u16 word,
+ struct sge_eth_txq *q, u64 mask,
u64 val, u32 reply)
{
struct cpl_set_tcb_field_core *cpl;
@@ -698,7 +743,10 @@ static void *__chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
/* ULP_TXPKT */
txpkt = pos;
- txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0));
+ txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) |
+ ULP_TXPKT_CHANNELID_V(tx_info->port_id) |
+ ULP_TXPKT_FID_V(q->q.cntxt_id) |
+ ULP_TXPKT_RO_F);
txpkt->len = htonl(DIV_ROUND_UP(CHCR_SET_TCB_FIELD_LEN, 16));
/* ULPTX_IDATA sub-command */
@@ -753,7 +801,7 @@ static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
} else {
u8 buf[48] = {0};
- __chcr_write_cpl_set_tcb_ulp(tx_info, tid, buf, word,
+ __chcr_write_cpl_set_tcb_ulp(tx_info, tid, buf, word, q,
mask, val, reply);
return chcr_copy_to_txd(buf, &q->q, pos,
@@ -761,7 +809,7 @@ static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
}
}
- pos = __chcr_write_cpl_set_tcb_ulp(tx_info, tid, pos, word,
+ pos = __chcr_write_cpl_set_tcb_ulp(tx_info, tid, pos, word, q,
mask, val, reply);
/* check again if we are at the end of the queue */
@@ -783,11 +831,11 @@ static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
*/
static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
struct sge_eth_txq *q, u64 tcp_seq,
- u64 tcp_ack, u64 tcp_win)
+ u64 tcp_ack, u64 tcp_win, bool offset)
{
bool first_wr = ((tx_info->prev_ack == 0) && (tx_info->prev_win == 0));
struct ch_ktls_port_stats_debug *port_stats;
- u32 len, cpl = 0, ndesc, wr_len;
+ u32 len, cpl = 0, ndesc, wr_len, wr_mid = 0;
struct fw_ulptx_wr *wr;
int credits;
void *pos;
@@ -803,6 +851,11 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
return NETDEV_TX_BUSY;
}
+ if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+ chcr_eth_txq_stop(q);
+ wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+ }
+
pos = &q->q.desc[q->q.pidx];
/* make space for WR, we'll fill it later when we know all the cpls
* being sent out and have complete length.
@@ -818,7 +871,7 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
cpl++;
}
/* reset snd una if it's a re-transmit pkt */
- if (tcp_seq != tx_info->prev_seq) {
+ if (tcp_seq != tx_info->prev_seq || offset) {
/* reset snd_una */
port_stats =
&tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id];
@@ -827,7 +880,8 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
TCB_SND_UNA_RAW_V
(TCB_SND_UNA_RAW_M),
TCB_SND_UNA_RAW_V(0), 0);
- atomic64_inc(&port_stats->ktls_tx_ooo);
+ if (tcp_seq != tx_info->prev_seq)
+ atomic64_inc(&port_stats->ktls_tx_ooo);
cpl++;
}
/* update ack */
@@ -856,7 +910,8 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
wr->cookie = 0;
/* fill len in wr field */
- wr->flowid_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16)));
+ wr->flowid_len16 = htonl(wr_mid |
+ FW_WR_LEN16_V(DIV_ROUND_UP(len, 16)));
ndesc = DIV_ROUND_UP(len, 64);
chcr_txq_advance(&q->q, ndesc);
@@ -866,34 +921,14 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
}
/*
- * chcr_ktls_skb_copy
- * @nskb - new skb where the frags to be added.
- * @skb - old skb from which frags will be copied.
- */
-static void chcr_ktls_skb_copy(struct sk_buff *skb, struct sk_buff *nskb)
-{
- int i;
-
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- skb_shinfo(nskb)->frags[i] = skb_shinfo(skb)->frags[i];
- __skb_frag_ref(&skb_shinfo(nskb)->frags[i]);
- }
-
- skb_shinfo(nskb)->nr_frags = skb_shinfo(skb)->nr_frags;
- nskb->len += skb->data_len;
- nskb->data_len = skb->data_len;
- nskb->truesize += skb->data_len;
-}
-
-/*
* chcr_ktls_get_tx_flits
* returns number of flits to be sent out, it includes key context length, WR
* size and skb fragments.
*/
static unsigned int
-chcr_ktls_get_tx_flits(const struct sk_buff *skb, unsigned int key_ctx_len)
+chcr_ktls_get_tx_flits(u32 nr_frags, unsigned int key_ctx_len)
{
- return chcr_sgl_len(skb_shinfo(skb)->nr_frags) +
+ return chcr_sgl_len(nr_frags) +
DIV_ROUND_UP(key_ctx_len + CHCR_KTLS_WR_SIZE, 8);
}
@@ -957,8 +992,10 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
struct tcphdr *tcp;
int len16, pktlen;
struct iphdr *ip;
+ u32 wr_mid = 0;
int credits;
u8 buf[150];
+ u64 cntrl1;
void *pos;
iplen = skb_network_header_len(skb);
@@ -967,7 +1004,7 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
/* packet length = eth hdr len + ip hdr len + tcp hdr len
* (including options).
*/
- pktlen = skb->len - skb->data_len;
+ pktlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
ctrl = sizeof(*cpl) + pktlen;
len16 = DIV_ROUND_UP(sizeof(*wr) + ctrl, 16);
@@ -980,6 +1017,11 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
return NETDEV_TX_BUSY;
}
+ if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+ chcr_eth_txq_stop(q);
+ wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+ }
+
pos = &q->q.desc[q->q.pidx];
wr = pos;
@@ -987,7 +1029,7 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) |
FW_WR_IMMDLEN_V(ctrl));
- wr->equiq_to_len16 = htonl(FW_WR_LEN16_V(len16));
+ wr->equiq_to_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
wr->r3 = 0;
cpl = (void *)(wr + 1);
@@ -997,22 +1039,28 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
TXPKT_PF_V(tx_info->adap->pf));
cpl->pack = 0;
cpl->len = htons(pktlen);
- /* checksum offload */
- cpl->ctrl1 = 0;
-
- pos = cpl + 1;
memcpy(buf, skb->data, pktlen);
if (tx_info->ip_family == AF_INET) {
/* we need to correct ip header len */
ip = (struct iphdr *)(buf + maclen);
ip->tot_len = htons(pktlen - maclen);
+ cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP);
#if IS_ENABLED(CONFIG_IPV6)
} else {
ip6 = (struct ipv6hdr *)(buf + maclen);
ip6->payload_len = htons(pktlen - maclen - iplen);
+ cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP6);
#endif
}
+
+ cntrl1 |= T6_TXPKT_ETHHDR_LEN_V(maclen - ETH_HLEN) |
+ TXPKT_IPHDR_LEN_V(iplen);
+ /* checksum offload */
+ cpl->ctrl1 = cpu_to_be64(cntrl1);
+
+ pos = cpl + 1;
+
/* now take care of the tcp header, if fin is not set then clear push
* bit as well, and if fin is set, it will be sent at the last so we
* need to update the tcp sequence number as per the last packet.
@@ -1031,71 +1079,6 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
return 0;
}
-/* chcr_ktls_skb_shift - Shifts request length paged data from skb to another.
- * @tgt- buffer into which tail data gets added
- * @skb- buffer from which the paged data comes from
- * @shiftlen- shift up to this many bytes
- */
-static int chcr_ktls_skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
- int shiftlen)
-{
- skb_frag_t *fragfrom, *fragto;
- int from, to, todo;
-
- WARN_ON(shiftlen > skb->data_len);
-
- todo = shiftlen;
- from = 0;
- to = 0;
- fragfrom = &skb_shinfo(skb)->frags[from];
-
- while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
- fragfrom = &skb_shinfo(skb)->frags[from];
- fragto = &skb_shinfo(tgt)->frags[to];
-
- if (todo >= skb_frag_size(fragfrom)) {
- *fragto = *fragfrom;
- todo -= skb_frag_size(fragfrom);
- from++;
- to++;
-
- } else {
- __skb_frag_ref(fragfrom);
- skb_frag_page_copy(fragto, fragfrom);
- skb_frag_off_copy(fragto, fragfrom);
- skb_frag_size_set(fragto, todo);
-
- skb_frag_off_add(fragfrom, todo);
- skb_frag_size_sub(fragfrom, todo);
- todo = 0;
-
- to++;
- break;
- }
- }
-
- /* Ready to "commit" this state change to tgt */
- skb_shinfo(tgt)->nr_frags = to;
-
- /* Reposition in the original skb */
- to = 0;
- while (from < skb_shinfo(skb)->nr_frags)
- skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
-
- skb_shinfo(skb)->nr_frags = to;
-
- WARN_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);
-
- skb->len -= shiftlen;
- skb->data_len -= shiftlen;
- skb->truesize -= shiftlen;
- tgt->len += shiftlen;
- tgt->data_len += shiftlen;
- tgt->truesize += shiftlen;
-
- return shiftlen;
-}
-
/*
* chcr_ktls_xmit_wr_complete: This sends out the complete record. If an skb
* received has partial end part of the record, send out the complete record, so
@@ -1111,6 +1094,8 @@ static int chcr_ktls_skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
struct chcr_ktls_info *tx_info,
struct sge_eth_txq *q, u32 tcp_seq,
+ bool is_last_wr, u32 data_len,
+ u32 skb_offset, u32 nfrags,
bool tcp_push, u32 mss)
{
u32 len16, wr_mid = 0, flits = 0, ndesc, cipher_start;
@@ -1126,7 +1111,7 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
u64 *end;
/* get the number of flits required */
- flits = chcr_ktls_get_tx_flits(skb, tx_info->key_ctx_len);
+ flits = chcr_ktls_get_tx_flits(nfrags, tx_info->key_ctx_len);
/* number of descriptors */
ndesc = chcr_flits_to_desc(flits);
/* check if enough credits available */
@@ -1155,6 +1140,9 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
return NETDEV_TX_BUSY;
}
+ if (!is_last_wr)
+ skb_get(skb);
+
pos = &q->q.desc[q->q.pidx];
end = (u64 *)pos + flits;
/* FW_ULPTX_WR */
@@ -1187,7 +1175,7 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
CPL_TX_SEC_PDU_CPLLEN_V(CHCR_CPL_TX_SEC_PDU_LEN_64BIT) |
CPL_TX_SEC_PDU_PLACEHOLDER_V(1) |
CPL_TX_SEC_PDU_IVINSRTOFST_V(TLS_HEADER_SIZE + 1));
- cpl->pldlen = htonl(skb->data_len);
+ cpl->pldlen = htonl(data_len);
/* encryption should start after tls header size + iv size */
cipher_start = TLS_HEADER_SIZE + tx_info->iv_size + 1;
@@ -1229,7 +1217,7 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
/* CPL_TX_DATA */
tx_data = (void *)pos;
OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid));
- tx_data->len = htonl(TX_DATA_MSS_V(mss) | TX_LENGTH_V(skb->data_len));
+ tx_data->len = htonl(TX_DATA_MSS_V(mss) | TX_LENGTH_V(data_len));
tx_data->rsvd = htonl(tcp_seq);
@@ -1249,8 +1237,8 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
}
/* send the complete packet except the header */
- cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len,
- sgl_sdesc->addr);
+ cxgb4_write_partial_sgl(skb, &q->q, pos, end, sgl_sdesc->addr,
+ skb_offset, data_len);
sgl_sdesc->skb = skb;
chcr_txq_advance(&q->q, ndesc);
@@ -1282,10 +1270,11 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb,
struct sge_eth_txq *q,
u32 tcp_seq, bool tcp_push, u32 mss,
u32 tls_rec_offset, u8 *prior_data,
- u32 prior_data_len)
+ u32 prior_data_len, u32 data_len,
+ u32 skb_offset)
{
+ u32 len16, wr_mid = 0, cipher_start, nfrags;
struct adapter *adap = tx_info->adap;
- u32 len16, wr_mid = 0, cipher_start;
unsigned int flits = 0, ndesc;
int credits, left, last_desc;
struct tx_sw_desc *sgl_sdesc;
@@ -1298,10 +1287,11 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb,
void *pos;
u64 *end;
+ nfrags = chcr_get_nfrags_to_send(skb, skb_offset, data_len);
/* get the number of flits required, it's a partial record so 2 flits
* (AES_BLOCK_SIZE) will be added.
*/
- flits = chcr_ktls_get_tx_flits(skb, tx_info->key_ctx_len) + 2;
+ flits = chcr_ktls_get_tx_flits(nfrags, tx_info->key_ctx_len) + 2;
/* get the correct 8 byte IV of this record */
iv_record = cpu_to_be64(tx_info->iv + tx_info->record_no);
/* If it's a middle record and not 16 byte aligned to run AES CTR, need
@@ -1373,7 +1363,7 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb,
htonl(CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) |
CPL_TX_SEC_PDU_CPLLEN_V(CHCR_CPL_TX_SEC_PDU_LEN_64BIT) |
CPL_TX_SEC_PDU_IVINSRTOFST_V(1));
- cpl->pldlen = htonl(skb->data_len + AES_BLOCK_LEN + prior_data_len);
+ cpl->pldlen = htonl(data_len + AES_BLOCK_LEN + prior_data_len);
cpl->aadstart_cipherstop_hi =
htonl(CPL_TX_SEC_PDU_CIPHERSTART_V(cipher_start));
cpl->cipherstop_lo_authinsert = 0;
@@ -1404,7 +1394,7 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb,
tx_data = (void *)pos;
OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid));
tx_data->len = htonl(TX_DATA_MSS_V(mss) |
- TX_LENGTH_V(skb->data_len + prior_data_len));
+ TX_LENGTH_V(data_len + prior_data_len));
tx_data->rsvd = htonl(tcp_seq);
tx_data->flags = htonl(TX_BYPASS_F);
if (tcp_push)
@@ -1437,8 +1427,8 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb,
if (prior_data_len)
pos = chcr_copy_to_txd(prior_data, &q->q, pos, 16);
/* send the complete packet except the header */
- cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len,
- sgl_sdesc->addr);
+ cxgb4_write_partial_sgl(skb, &q->q, pos, end, sgl_sdesc->addr,
+ skb_offset, data_len);
sgl_sdesc->skb = skb;
chcr_txq_advance(&q->q, ndesc);
@@ -1466,6 +1456,7 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
struct sk_buff *skb, u32 tcp_seq, u32 mss,
bool tcp_push, struct sge_eth_txq *q,
u32 port_id, u8 *prior_data,
+ u32 data_len, u32 skb_offset,
u32 prior_data_len)
{
int credits, left, len16, last_desc;
@@ -1475,14 +1466,16 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
struct ulptx_idata *idata;
struct ulp_txpkt *ulptx;
struct fw_ulptx_wr *wr;
- u32 wr_mid = 0;
+ u32 wr_mid = 0, nfrags;
void *pos;
u64 *end;
flits = DIV_ROUND_UP(CHCR_PLAIN_TX_DATA_LEN, 8);
- flits += chcr_sgl_len(skb_shinfo(skb)->nr_frags);
+ nfrags = chcr_get_nfrags_to_send(skb, skb_offset, data_len);
+ flits += chcr_sgl_len(nfrags);
if (prior_data_len)
flits += 2;
+
/* WR will need len16 */
len16 = DIV_ROUND_UP(flits, 2);
/* check how many descriptors needed */
@@ -1535,7 +1528,7 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
tx_data = (struct cpl_tx_data *)(idata + 1);
OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid));
tx_data->len = htonl(TX_DATA_MSS_V(mss) |
- TX_LENGTH_V(skb->data_len + prior_data_len));
+ TX_LENGTH_V(data_len + prior_data_len));
/* set tcp seq number */
tx_data->rsvd = htonl(tcp_seq);
tx_data->flags = htonl(TX_BYPASS_F);
@@ -1559,8 +1552,8 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
end = pos + left;
}
/* send the complete packet including the header */
- cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len,
- sgl_sdesc->addr);
+ cxgb4_write_partial_sgl(skb, &q->q, pos, end, sgl_sdesc->addr,
+ skb_offset, data_len);
sgl_sdesc->skb = skb;
chcr_txq_advance(&q->q, ndesc);
@@ -1568,12 +1561,96 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
return 0;
}
+static int chcr_ktls_tunnel_pkt(struct chcr_ktls_info *tx_info,
+ struct sk_buff *skb,
+ struct sge_eth_txq *q)
+{
+ u32 ctrl, iplen, maclen, wr_mid = 0, len16;
+ struct tx_sw_desc *sgl_sdesc;
+ struct fw_eth_tx_pkt_wr *wr;
+ struct cpl_tx_pkt_core *cpl;
+ unsigned int flits, ndesc;
+ int credits, last_desc;
+ u64 cntrl1, *end;
+ void *pos;
+
+ ctrl = sizeof(*cpl);
+ flits = DIV_ROUND_UP(sizeof(*wr) + ctrl, 8);
+
+ flits += chcr_sgl_len(skb_shinfo(skb)->nr_frags + 1);
+ len16 = DIV_ROUND_UP(flits, 2);
+ /* check how many descriptors needed */
+ ndesc = DIV_ROUND_UP(flits, 8);
+
+ credits = chcr_txq_avail(&q->q) - ndesc;
+ if (unlikely(credits < 0)) {
+ chcr_eth_txq_stop(q);
+ return -ENOMEM;
+ }
+
+ if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+ chcr_eth_txq_stop(q);
+ wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+ }
+
+ last_desc = q->q.pidx + ndesc - 1;
+ if (last_desc >= q->q.size)
+ last_desc -= q->q.size;
+ sgl_sdesc = &q->q.sdesc[last_desc];
+
+ if (unlikely(cxgb4_map_skb(tx_info->adap->pdev_dev, skb,
+ sgl_sdesc->addr) < 0)) {
+ memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
+ q->mapping_err++;
+ return -ENOMEM;
+ }
+
+ iplen = skb_network_header_len(skb);
+ maclen = skb_mac_header_len(skb);
+
+ pos = &q->q.desc[q->q.pidx];
+ end = (u64 *)pos + flits;
+ wr = pos;
+
+ /* Firmware work request header */
+ wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) |
+ FW_WR_IMMDLEN_V(ctrl));
+
+ wr->equiq_to_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
+ wr->r3 = 0;
+
+ cpl = (void *)(wr + 1);
+
+ /* CPL header */
+ cpl->ctrl0 = htonl(TXPKT_OPCODE_V(CPL_TX_PKT) |
+ TXPKT_INTF_V(tx_info->tx_chan) |
+ TXPKT_PF_V(tx_info->adap->pf));
+ cpl->pack = 0;
+ cntrl1 = TXPKT_CSUM_TYPE_V(tx_info->ip_family == AF_INET ?
+ TX_CSUM_TCPIP : TX_CSUM_TCPIP6);
+ cntrl1 |= T6_TXPKT_ETHHDR_LEN_V(maclen - ETH_HLEN) |
+ TXPKT_IPHDR_LEN_V(iplen);
+ /* checksum offload */
+ cpl->ctrl1 = cpu_to_be64(cntrl1);
+ cpl->len = htons(skb->len);
+
+ pos = cpl + 1;
+
+ cxgb4_write_sgl(skb, &q->q, pos, end, 0, sgl_sdesc->addr);
+ sgl_sdesc->skb = skb;
+ chcr_txq_advance(&q->q, ndesc);
+ cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc);
+ return 0;
+}
+
/*
* chcr_ktls_copy_record_in_skb
* @nskb - new skb where the frags to be added.
+ * @skb - old skb, to copy socket and destructor details.
* @record - specific record which has complete 16k record in frags.
*/
static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb,
+ struct sk_buff *skb,
struct tls_record_info *record)
{
int i = 0;
@@ -1588,6 +1665,9 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb,
nskb->data_len = record->len;
nskb->len += record->len;
nskb->truesize += record->len;
+ nskb->sk = skb->sk;
+ nskb->destructor = skb->destructor;
+ refcount_add(nskb->truesize, &nskb->sk->sk_wmem_alloc);
}
/*
@@ -1659,7 +1739,7 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info,
struct sk_buff *skb,
struct tls_record_info *record,
u32 tcp_seq, int mss, bool tcp_push_no_fin,
- struct sge_eth_txq *q,
+ struct sge_eth_txq *q, u32 skb_offset,
u32 tls_end_offset, bool last_wr)
{
struct sk_buff *nskb = NULL;
@@ -1668,30 +1748,37 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info,
nskb = skb;
atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_complete_pkts);
} else {
- dev_kfree_skb_any(skb);
-
- nskb = alloc_skb(0, GFP_KERNEL);
- if (!nskb)
+ nskb = alloc_skb(0, GFP_ATOMIC);
+ if (!nskb) {
+ dev_kfree_skb_any(skb);
return NETDEV_TX_BUSY;
+ }
+
/* copy complete record in skb */
- chcr_ktls_copy_record_in_skb(nskb, record);
+ chcr_ktls_copy_record_in_skb(nskb, skb, record);
/* packet is being sent from the beginning, update the tcp_seq
* accordingly.
*/
tcp_seq = tls_record_start_seq(record);
- /* reset snd una, so the middle record won't send the already
- * sent part.
- */
- if (chcr_ktls_update_snd_una(tx_info, q))
- goto out;
+ /* reset skb offset */
+ skb_offset = 0;
+
+ if (last_wr)
+ dev_kfree_skb_any(skb);
+
+ last_wr = true;
+
atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_end_pkts);
}
if (chcr_ktls_xmit_wr_complete(nskb, tx_info, q, tcp_seq,
+ last_wr, record->len, skb_offset,
+ record->num_frags,
(last_wr && tcp_push_no_fin),
mss)) {
goto out;
}
+ tx_info->prev_seq = record->end_seq;
return 0;
out:
dev_kfree_skb_any(nskb);
@@ -1723,41 +1810,47 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info,
struct sk_buff *skb,
struct tls_record_info *record,
u32 tcp_seq, int mss, bool tcp_push_no_fin,
+ u32 data_len, u32 skb_offset,
struct sge_eth_txq *q, u32 tls_end_offset)
{
u32 tls_rec_offset = tcp_seq - tls_record_start_seq(record);
u8 prior_data[16] = {0};
u32 prior_data_len = 0;
- u32 data_len;
/* check if the skb is ending in middle of tag/HASH, its a big
* trouble, send the packet before the HASH.
*/
- int remaining_record = tls_end_offset - skb->data_len;
+ int remaining_record = tls_end_offset - data_len;
if (remaining_record > 0 &&
remaining_record < TLS_CIPHER_AES_GCM_128_TAG_SIZE) {
- int trimmed_len = skb->data_len -
- (TLS_CIPHER_AES_GCM_128_TAG_SIZE - remaining_record);
- struct sk_buff *tmp_skb = NULL;
- /* don't process the pkt if it is only a partial tag */
- if (skb->data_len < TLS_CIPHER_AES_GCM_128_TAG_SIZE)
- goto out;
+ int trimmed_len = 0;
- WARN_ON(trimmed_len > skb->data_len);
+ if (tls_end_offset > TLS_CIPHER_AES_GCM_128_TAG_SIZE)
+ trimmed_len = data_len -
+ (TLS_CIPHER_AES_GCM_128_TAG_SIZE -
+ remaining_record);
+ if (!trimmed_len)
+ return FALLBACK;
- /* shift to those many bytes */
- tmp_skb = alloc_skb(0, GFP_KERNEL);
- if (unlikely(!tmp_skb))
- goto out;
+ WARN_ON(trimmed_len > data_len);
- chcr_ktls_skb_shift(tmp_skb, skb, trimmed_len);
- /* free the last trimmed portion */
- dev_kfree_skb_any(skb);
- skb = tmp_skb;
+ data_len = trimmed_len;
atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_trimmed_pkts);
}
- data_len = skb->data_len;
+
+ /* check if it is only the header part. */
+ if (tls_rec_offset + data_len <= (TLS_HEADER_SIZE + tx_info->iv_size)) {
+ if (chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss,
+ tcp_push_no_fin, q,
+ tx_info->port_id, prior_data,
+ data_len, skb_offset, prior_data_len))
+ goto out;
+
+ tx_info->prev_seq = tcp_seq + data_len;
+ return 0;
+ }
+
/* check if the middle record's start point is 16 byte aligned. CTR
* needs 16 byte aligned start point to start encryption.
*/
@@ -1818,9 +1911,6 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info,
}
/* reset tcp_seq as per the prior_data_required len */
tcp_seq -= prior_data_len;
- /* include prio_data_len for further calculation.
- */
- data_len += prior_data_len;
}
/* reset snd una, so the middle record won't send the already
* sent part.
@@ -1829,37 +1919,54 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info,
goto out;
atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_middle_pkts);
} else {
- /* Else means, its a partial first part of the record. Check if
- * its only the header, don't need to send for encryption then.
- */
- if (data_len <= TLS_HEADER_SIZE + tx_info->iv_size) {
- if (chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss,
- tcp_push_no_fin, q,
- tx_info->port_id,
- prior_data,
- prior_data_len)) {
- goto out;
- }
- return 0;
- }
atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_start_pkts);
}
if (chcr_ktls_xmit_wr_short(skb, tx_info, q, tcp_seq, tcp_push_no_fin,
mss, tls_rec_offset, prior_data,
- prior_data_len)) {
+ prior_data_len, data_len, skb_offset)) {
goto out;
}
+ tx_info->prev_seq = tcp_seq + data_len + prior_data_len;
return 0;
out:
dev_kfree_skb_any(skb);
return NETDEV_TX_BUSY;
}
+static int chcr_ktls_sw_fallback(struct sk_buff *skb,
+ struct chcr_ktls_info *tx_info,
+ struct sge_eth_txq *q)
+{
+ u32 data_len, skb_offset;
+ struct sk_buff *nskb;
+ struct tcphdr *th;
+
+ nskb = tls_encrypt_skb(skb);
+
+ if (!nskb)
+ return 0;
+
+ th = tcp_hdr(nskb);
+ skb_offset = skb_transport_offset(nskb) + tcp_hdrlen(nskb);
+ data_len = nskb->len - skb_offset;
+ skb_tx_timestamp(nskb);
+
+ if (chcr_ktls_tunnel_pkt(tx_info, nskb, q))
+ goto out;
+
+ tx_info->prev_seq = ntohl(th->seq) + data_len;
+ atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_fallback);
+ return 0;
+out:
+ dev_kfree_skb_any(nskb);
+ return 0;
+}
/* nic tls TX handler */
static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ u32 tls_end_offset, tcp_seq, skb_data_len, skb_offset;
struct ch_ktls_port_stats_debug *port_stats;
struct chcr_ktls_ofld_ctx_tx *tx_ctx;
struct ch_ktls_stats_debug *stats;
@@ -1867,20 +1974,17 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
int data_len, qidx, ret = 0, mss;
struct tls_record_info *record;
struct chcr_ktls_info *tx_info;
- u32 tls_end_offset, tcp_seq;
struct tls_context *tls_ctx;
- struct sk_buff *local_skb;
struct sge_eth_txq *q;
struct adapter *adap;
unsigned long flags;
tcp_seq = ntohl(th->seq);
+ skb_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ skb_data_len = skb->len - skb_offset;
+ data_len = skb_data_len;
- mss = skb_is_gso(skb) ? skb_shinfo(skb)->gso_size : skb->data_len;
-
- /* check if we haven't set it for ktls offload */
- if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
- goto out;
+ mss = skb_is_gso(skb) ? skb_shinfo(skb)->gso_size : data_len;
tls_ctx = tls_get_ctx(skb->sk);
if (unlikely(tls_ctx->netdev != dev))
@@ -1892,14 +1996,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(!tx_info))
goto out;
- /* don't touch the original skb, make a new skb to extract each records
- * and send them separately.
- */
- local_skb = alloc_skb(0, GFP_KERNEL);
-
- if (unlikely(!local_skb))
- return NETDEV_TX_BUSY;
-
adap = tx_info->adap;
stats = &adap->ch_ktls_stats;
port_stats = &stats->ktls_port[tx_info->port_id];
@@ -1914,20 +2010,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
if (ret)
return NETDEV_TX_BUSY;
}
- /* update tcb */
- ret = chcr_ktls_xmit_tcb_cpls(tx_info, q, ntohl(th->seq),
- ntohl(th->ack_seq),
- ntohs(th->window));
- if (ret) {
- dev_kfree_skb_any(local_skb);
- return NETDEV_TX_BUSY;
- }
- /* copy skb contents into local skb */
- chcr_ktls_skb_copy(skb, local_skb);
-
- /* go through the skb and send only one record at a time. */
- data_len = skb->data_len;
/* TCP segments can be in received either complete or partial.
* chcr_end_part_handler will handle cases if complete record or end
* part of the record is received. Incase of partial end part of record,
@@ -1952,10 +2035,64 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
goto out;
}
+ tls_end_offset = record->end_seq - tcp_seq;
+
+ pr_debug("seq 0x%x, end_seq 0x%x prev_seq 0x%x, datalen 0x%x\n",
+ tcp_seq, record->end_seq, tx_info->prev_seq, data_len);
+ /* update tcb for the skb */
+ if (skb_data_len == data_len) {
+ u32 tx_max = tcp_seq;
+
+ if (!tls_record_is_start_marker(record) &&
+ tls_end_offset < TLS_CIPHER_AES_GCM_128_TAG_SIZE)
+ tx_max = record->end_seq -
+ TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+
+ ret = chcr_ktls_xmit_tcb_cpls(tx_info, q, tx_max,
+ ntohl(th->ack_seq),
+ ntohs(th->window),
+ tls_end_offset !=
+ record->len);
+ if (ret) {
+ spin_unlock_irqrestore(&tx_ctx->base.lock,
+ flags);
+ goto out;
+ }
+
+ if (th->fin)
+ skb_get(skb);
+ }
+
if (unlikely(tls_record_is_start_marker(record))) {
- spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
atomic64_inc(&port_stats->ktls_tx_skip_no_sync_data);
- goto out;
+ /* If tls_end_offset < data_len, means there is some
+ * data after start marker, which needs encryption, send
+ * plaintext first and take skb refcount. else send out
+ * complete pkt as plaintext.
+ */
+ if (tls_end_offset < data_len)
+ skb_get(skb);
+ else
+ tls_end_offset = data_len;
+
+ ret = chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss,
+ (!th->fin && th->psh), q,
+ tx_info->port_id, NULL,
+ tls_end_offset, skb_offset,
+ 0);
+
+ spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
+ if (ret) {
+ /* free the refcount taken earlier */
+ if (tls_end_offset < data_len)
+ dev_kfree_skb_any(skb);
+ goto out;
+ }
+
+ data_len -= tls_end_offset;
+ tcp_seq = record->end_seq;
+ skb_offset += tls_end_offset;
+ continue;
}
/* increase page reference count of the record, so that there
@@ -1967,73 +2104,64 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
/* lock cleared */
spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
- tls_end_offset = record->end_seq - tcp_seq;
- pr_debug("seq 0x%x, end_seq 0x%x prev_seq 0x%x, datalen 0x%x\n",
- tcp_seq, record->end_seq, tx_info->prev_seq, data_len);
/* if a tls record is finishing in this SKB */
if (tls_end_offset <= data_len) {
- struct sk_buff *nskb = NULL;
-
- if (tls_end_offset < data_len) {
- nskb = alloc_skb(0, GFP_KERNEL);
- if (unlikely(!nskb)) {
- ret = -ENOMEM;
- goto clear_ref;
- }
-
- chcr_ktls_skb_shift(nskb, local_skb,
- tls_end_offset);
- } else {
- /* its the only record in this skb, directly
- * point it.
- */
- nskb = local_skb;
- }
- ret = chcr_end_part_handler(tx_info, nskb, record,
+ ret = chcr_end_part_handler(tx_info, skb, record,
tcp_seq, mss,
(!th->fin && th->psh), q,
+ skb_offset,
tls_end_offset,
- (nskb == local_skb));
-
- if (ret && nskb != local_skb)
- dev_kfree_skb_any(local_skb);
+ skb_offset +
+ tls_end_offset == skb->len);
data_len -= tls_end_offset;
/* tcp_seq increment is required to handle next record.
*/
tcp_seq += tls_end_offset;
+ skb_offset += tls_end_offset;
} else {
- ret = chcr_short_record_handler(tx_info, local_skb,
+ ret = chcr_short_record_handler(tx_info, skb,
record, tcp_seq, mss,
(!th->fin && th->psh),
+ data_len, skb_offset,
q, tls_end_offset);
data_len = 0;
}
-clear_ref:
+
/* clear the frag ref count which increased locally before */
for (i = 0; i < record->num_frags; i++) {
/* clear the frag ref count */
__skb_frag_unref(&record->frags[i]);
}
/* if any failure, come out from the loop. */
- if (ret)
- goto out;
+ if (ret) {
+ if (th->fin)
+ dev_kfree_skb_any(skb);
+
+ if (ret == FALLBACK)
+ return chcr_ktls_sw_fallback(skb, tx_info, q);
+
+ return NETDEV_TX_OK;
+ }
+
/* length should never be less than 0 */
WARN_ON(data_len < 0);
} while (data_len > 0);
- tx_info->prev_seq = ntohl(th->seq) + skb->data_len;
atomic64_inc(&port_stats->ktls_tx_encrypted_packets);
- atomic64_add(skb->data_len, &port_stats->ktls_tx_encrypted_bytes);
+ atomic64_add(skb_data_len, &port_stats->ktls_tx_encrypted_bytes);
/* tcp finish is set, send a separate tcp msg including all the options
* as well.
*/
- if (th->fin)
+ if (th->fin) {
chcr_ktls_write_tcp_options(tx_info, skb, q, tx_info->tx_chan);
+ dev_kfree_skb_any(skb);
+ }
+ return NETDEV_TX_OK;
out:
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
index c1651b1431a0..18b3b1f02415 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
@@ -26,6 +26,7 @@
#define CHCR_KTLS_WR_SIZE (CHCR_PLAIN_TX_DATA_LEN +\
sizeof(struct cpl_tx_sec_pdu))
+#define FALLBACK 35
enum ch_ktls_open_state {
CH_KTLS_OPEN_SUCCESS = 0,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index c96e2f2d4cba..4919d22d7b6b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2713,6 +2713,10 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
spin_unlock_bh(&vsi->mac_filter_hash_lock);
goto error_param;
}
+ if (is_valid_ether_addr(al->list[i].addr) &&
+ is_zero_ether_addr(vf->default_lan_addr.addr))
+ ether_addr_copy(vf->default_lan_addr.addr,
+ al->list[i].addr);
}
}
spin_unlock_bh(&vsi->mac_filter_hash_lock);
@@ -2740,6 +2744,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
{
struct virtchnl_ether_addr_list *al =
(struct virtchnl_ether_addr_list *)msg;
+ bool was_unimac_deleted = false;
struct i40e_pf *pf = vf->pf;
struct i40e_vsi *vsi = NULL;
i40e_status ret = 0;
@@ -2759,6 +2764,8 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
ret = I40E_ERR_INVALID_MAC_ADDR;
goto error_param;
}
+ if (ether_addr_equal(al->list[i].addr, vf->default_lan_addr.addr))
+ was_unimac_deleted = true;
}
vsi = pf->vsi[vf->lan_vsi_idx];
@@ -2779,10 +2786,25 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n",
vf->vf_id, ret);
+ if (vf->trusted && was_unimac_deleted) {
+ struct i40e_mac_filter *f;
+ struct hlist_node *h;
+ u8 *macaddr = NULL;
+ int bkt;
+
+ /* set last unicast mac address as default */
+ spin_lock_bh(&vsi->mac_filter_hash_lock);
+ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+ if (is_valid_ether_addr(f->macaddr))
+ macaddr = f->macaddr;
+ }
+ if (macaddr)
+ ether_addr_copy(vf->default_lan_addr.addr, macaddr);
+ spin_unlock_bh(&vsi->mac_filter_hash_lock);
+ }
error_param:
/* send the response to the VF */
- return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_ETH_ADDR,
- ret);
+ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_ETH_ADDR, ret);
}
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 6acede0acdca..567fd67e900e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -281,8 +281,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
unsigned int xdp_res, xdp_xmit = 0;
+ bool failure = false;
struct sk_buff *skb;
- bool failure;
while (likely(total_rx_packets < (unsigned int)budget)) {
union i40e_rx_desc *rx_desc;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 9112dff075cf..b673ac1199bb 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -3891,21 +3891,23 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu)
}
/**
- * igc_get_stats - Get System Network Statistics
+ * igc_get_stats64 - Get System Network Statistics
* @netdev: network interface device structure
+ * @stats: rtnl_link_stats64 pointer
*
* Returns the address of the device statistics structure.
* The statistics are updated here and also from the timer callback.
*/
-static struct net_device_stats *igc_get_stats(struct net_device *netdev)
+static void igc_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
{
struct igc_adapter *adapter = netdev_priv(netdev);
+ spin_lock(&adapter->stats64_lock);
if (!test_bit(__IGC_RESETTING, &adapter->state))
igc_update_stats(adapter);
-
- /* only return the current stats */
- return &netdev->stats;
+ memcpy(stats, &adapter->stats64, sizeof(*stats));
+ spin_unlock(&adapter->stats64_lock);
}
static netdev_features_t igc_fix_features(struct net_device *netdev,
@@ -4855,7 +4857,7 @@ static const struct net_device_ops igc_netdev_ops = {
.ndo_set_rx_mode = igc_set_rx_mode,
.ndo_set_mac_address = igc_set_mac,
.ndo_change_mtu = igc_change_mtu,
- .ndo_get_stats = igc_get_stats,
+ .ndo_get_stats64 = igc_get_stats64,
.ndo_fix_features = igc_fix_features,
.ndo_set_features = igc_set_features,
.ndo_features_check = igc_features_check,
diff --git a/drivers/net/ethernet/marvell/prestera/Kconfig b/drivers/net/ethernet/marvell/prestera/Kconfig
index b1fcc44f566a..b6f20e2034c6 100644
--- a/drivers/net/ethernet/marvell/prestera/Kconfig
+++ b/drivers/net/ethernet/marvell/prestera/Kconfig
@@ -6,6 +6,7 @@
config PRESTERA
tristate "Marvell Prestera Switch ASICs support"
depends on NET_SWITCHDEV && VLAN_8021Q
+ depends on BRIDGE || BRIDGE=n
select NET_DEVLINK
help
This driver supports Marvell Prestera Switch ASICs family.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index e36e505d38ad..d29af7b9c695 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -107,12 +107,16 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
mlx5e_tc_encap_flows_del(priv, e, &flow_list);
if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ struct net_device *route_dev;
+
ether_addr_copy(e->h_dest, ha);
ether_addr_copy(eth->h_dest, ha);
/* Update the encap source mac, in case that we delete
* the flows when encap source mac changed.
*/
- ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
+ route_dev = __dev_get_by_index(dev_net(priv->netdev), e->route_dev_ifindex);
+ if (route_dev)
+ ether_addr_copy(eth->h_source, route_dev->dev_addr);
mlx5e_tc_encap_flows_add(priv, e, &flow_list);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 7cce85faa16f..90930e54b6f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -77,13 +77,13 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
return 0;
}
-static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct net_device **route_dev,
- struct flowi4 *fl4,
- struct neighbour **out_n,
- u8 *out_ttl)
+static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct net_device **route_dev,
+ struct flowi4 *fl4,
+ struct neighbour **out_n,
+ u8 *out_ttl)
{
struct neighbour *n;
struct rtable *rt;
@@ -117,18 +117,28 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
ip_rt_put(rt);
return ret;
}
+ dev_hold(*route_dev);
if (!(*out_ttl))
*out_ttl = ip4_dst_hoplimit(&rt->dst);
n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
ip_rt_put(rt);
- if (!n)
+ if (!n) {
+ dev_put(*route_dev);
return -ENOMEM;
+ }
*out_n = n;
return 0;
}
+static void mlx5e_route_lookup_ipv4_put(struct net_device *route_dev,
+ struct neighbour *n)
+{
+ neigh_release(n);
+ dev_put(route_dev);
+}
+
static const char *mlx5e_netdev_kind(struct net_device *dev)
{
if (dev->rtnl_link_ops)
@@ -193,8 +203,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
fl4.saddr = tun_key->u.ipv4.src;
ttl = tun_key->ttl;
- err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev,
- &fl4, &n, &ttl);
+ err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &out_dev, &route_dev,
+ &fl4, &n, &ttl);
if (err)
return err;
@@ -223,7 +233,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
e->m_neigh.family = n->ops->family;
memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
e->out_dev = out_dev;
- e->route_dev = route_dev;
+ e->route_dev_ifindex = route_dev->ifindex;
/* It's important to add the neigh to the hash table before checking
* the neigh validity state. So if we'll get a notification, in case the
@@ -278,7 +288,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- neigh_release(n);
+ mlx5e_route_lookup_ipv4_put(route_dev, n);
return err;
destroy_neigh_entry:
@@ -286,18 +296,18 @@ destroy_neigh_entry:
free_encap:
kfree(encap_header);
release_neigh:
- neigh_release(n);
+ mlx5e_route_lookup_ipv4_put(route_dev, n);
return err;
}
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
-static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct net_device **route_dev,
- struct flowi6 *fl6,
- struct neighbour **out_n,
- u8 *out_ttl)
+static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct net_device **route_dev,
+ struct flowi6 *fl6,
+ struct neighbour **out_n,
+ u8 *out_ttl)
{
struct dst_entry *dst;
struct neighbour *n;
@@ -318,15 +328,25 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
return ret;
}
+ dev_hold(*route_dev);
n = dst_neigh_lookup(dst, &fl6->daddr);
dst_release(dst);
- if (!n)
+ if (!n) {
+ dev_put(*route_dev);
return -ENOMEM;
+ }
*out_n = n;
return 0;
}
+static void mlx5e_route_lookup_ipv6_put(struct net_device *route_dev,
+ struct neighbour *n)
+{
+ neigh_release(n);
+ dev_put(route_dev);
+}
+
int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e)
@@ -348,8 +368,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
fl6.daddr = tun_key->u.ipv6.dst;
fl6.saddr = tun_key->u.ipv6.src;
- err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev,
- &fl6, &n, &ttl);
+ err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &out_dev, &route_dev,
+ &fl6, &n, &ttl);
if (err)
return err;
@@ -378,7 +398,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
e->m_neigh.family = n->ops->family;
memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
e->out_dev = out_dev;
- e->route_dev = route_dev;
+ e->route_dev_ifindex = route_dev->ifindex;
/* It's importent to add the neigh to the hash table before checking
* the neigh validity state. So if we'll get a notification, in case the
@@ -433,7 +453,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- neigh_release(n);
+ mlx5e_route_lookup_ipv6_put(route_dev, n);
return err;
destroy_neigh_entry:
@@ -441,7 +461,7 @@ destroy_neigh_entry:
free_encap:
kfree(encap_header);
release_neigh:
- neigh_release(n);
+ mlx5e_route_lookup_ipv6_put(route_dev, n);
return err;
}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 4e574ac73019..be3465ba38ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -122,9 +122,9 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c)
set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
/* TX queue is created active. */
- spin_lock(&c->async_icosq_lock);
+ spin_lock_bh(&c->async_icosq_lock);
mlx5e_trigger_irq(&c->async_icosq);
- spin_unlock(&c->async_icosq_lock);
+ spin_unlock_bh(&c->async_icosq_lock);
}
void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index fb671a457129..8e96260fce1d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -36,9 +36,9 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state))
return 0;
- spin_lock(&c->async_icosq_lock);
+ spin_lock_bh(&c->async_icosq_lock);
mlx5e_trigger_irq(&c->async_icosq);
- spin_unlock(&c->async_icosq_lock);
+ spin_unlock_bh(&c->async_icosq_lock);
}
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index ccaccb9fc2f7..7f6221b8b1f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -188,7 +188,7 @@ static int post_rx_param_wqes(struct mlx5e_channel *c,
err = 0;
sq = &c->async_icosq;
- spin_lock(&c->async_icosq_lock);
+ spin_lock_bh(&c->async_icosq_lock);
cseg = post_static_params(sq, priv_rx);
if (IS_ERR(cseg))
@@ -199,7 +199,7 @@ static int post_rx_param_wqes(struct mlx5e_channel *c,
mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
unlock:
- spin_unlock(&c->async_icosq_lock);
+ spin_unlock_bh(&c->async_icosq_lock);
return err;
@@ -265,10 +265,10 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1);
- spin_lock(&sq->channel->async_icosq_lock);
+ spin_lock_bh(&sq->channel->async_icosq_lock);
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
- spin_unlock(&sq->channel->async_icosq_lock);
+ spin_unlock_bh(&sq->channel->async_icosq_lock);
err = -ENOSPC;
goto err_dma_unmap;
}
@@ -299,7 +299,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
icosq_fill_wi(sq, pi, &wi);
sq->pc++;
mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
- spin_unlock(&sq->channel->async_icosq_lock);
+ spin_unlock_bh(&sq->channel->async_icosq_lock);
return 0;
@@ -360,7 +360,7 @@ static int resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx
err = 0;
sq = &c->async_icosq;
- spin_lock(&c->async_icosq_lock);
+ spin_lock_bh(&c->async_icosq_lock);
cseg = post_static_params(sq, priv_rx);
if (IS_ERR(cseg)) {
@@ -372,7 +372,7 @@ static int resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx
mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
priv_rx->stats->tls_resync_res_ok++;
unlock:
- spin_unlock(&c->async_icosq_lock);
+ spin_unlock_bh(&c->async_icosq_lock);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b3f02aac7f26..ebce97921e03 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5253,6 +5253,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
mlx5e_disable_async_events(priv);
mlx5_lag_remove(mdev);
+ mlx5_vxlan_reset_to_default(mdev->vxlan);
}
int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 9020d1419bcf..8932c387d46a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -186,7 +186,7 @@ struct mlx5e_encap_entry {
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
struct net_device *out_dev;
- struct net_device *route_dev;
+ int route_dev_ifindex;
struct mlx5e_tc_tunnel *tunnel;
int reformat_type;
u8 flags;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 599f5b5ebc97..6628a0197b4e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1584,7 +1584,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
out:
- if (rq->xdp_prog)
+ if (rcu_access_pointer(rq->xdp_prog))
mlx5e_xdp_rx_poll_complete(rq);
mlx5_cqwq_update_db_record(cqwq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index e3a968e9e2a0..2e2fa0440032 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4658,6 +4658,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
return flow;
err_free:
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
mlx5e_flow_put(priv, flow);
out:
return ERR_PTR(err);
@@ -4802,6 +4803,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
return 0;
err_free:
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
mlx5e_flow_put(priv, flow);
out:
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 6e6a9a563992..e8e6294c7cca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1902,8 +1902,6 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
ether_addr_copy(hw_addr, vport->info.mac);
*hw_addr_len = ETH_ALEN;
err = 0;
- } else {
- NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
}
mutex_unlock(&esw->state_lock);
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 16091838bfcf..325a5b0d6829 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2010,10 +2010,11 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
down_write_ref_node(&fte->node, false);
for (i = handle->num_rules - 1; i >= 0; i--)
tree_remove_node(&handle->rule[i]->node, true);
- if (fte->modify_mask && fte->dests_size) {
- modify_fte(fte);
+ if (fte->dests_size) {
+ if (fte->modify_mask)
+ modify_fte(fte);
up_write_ref_node(&fte->node, false);
- } else {
+ } else if (list_empty(&fte->node.children)) {
del_hw_fte(&fte->node);
/* Avoid double call to del_hw_fte */
fte->node.del_hw_func = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
index 3315afe2f8dc..38084400ee8f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
@@ -168,6 +168,17 @@ struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
{
+ if (!mlx5_vxlan_allowed(vxlan))
+ return;
+
+ mlx5_vxlan_del_port(vxlan, IANA_VXLAN_UDP_PORT);
+ WARN_ON(!hash_empty(vxlan->htable));
+
+ kfree(vxlan);
+}
+
+void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan)
+{
struct mlx5_vxlan_port *vxlanp;
struct hlist_node *tmp;
int bkt;
@@ -175,12 +186,12 @@ void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
if (!mlx5_vxlan_allowed(vxlan))
return;
- /* Lockless since we are the only hash table consumers*/
hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) {
- hash_del(&vxlanp->hlist);
- mlx5_vxlan_core_del_port_cmd(vxlan->mdev, vxlanp->udp_port);
- kfree(vxlanp);
+ /* Don't delete default UDP port added by the HW.
+ * Remove only user configured ports
+ */
+ if (vxlanp->udp_port == IANA_VXLAN_UDP_PORT)
+ continue;
+ mlx5_vxlan_del_port(vxlan, vxlanp->udp_port);
}
-
- kfree(vxlan);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
index ec766529f49b..34ef662da35e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
@@ -56,6 +56,7 @@ void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan);
int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port);
int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port);
bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
+void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan);
#else
static inline struct mlx5_vxlan*
mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-EOPNOTSUPP); }
@@ -63,6 +64,7 @@ static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; }
static inline int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
static inline int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
static inline bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) { return false; }
+static inline void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan) { return; }
#endif
#endif /* __MLX5_VXLAN_H__ */
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index a1938842f828..e2c99d909247 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -674,14 +674,12 @@ clean_up:
static int lan743x_dp_write(struct lan743x_adapter *adapter,
u32 select, u32 addr, u32 length, u32 *buf)
{
- int ret = -EIO;
u32 dp_sel;
int i;
- mutex_lock(&adapter->dp_lock);
if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
1, 40, 100, 100))
- goto unlock;
+ return -EIO;
dp_sel = lan743x_csr_read(adapter, DP_SEL);
dp_sel &= ~DP_SEL_MASK_;
dp_sel |= select;
@@ -693,13 +691,10 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter,
lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_);
if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
1, 40, 100, 100))
- goto unlock;
+ return -EIO;
}
- ret = 0;
-unlock:
- mutex_unlock(&adapter->dp_lock);
- return ret;
+ return 0;
}
static u32 lan743x_mac_mii_access(u16 id, u16 index, int read)
@@ -1019,16 +1014,16 @@ static void lan743x_phy_close(struct lan743x_adapter *adapter)
static int lan743x_phy_open(struct lan743x_adapter *adapter)
{
struct lan743x_phy *phy = &adapter->phy;
+ struct phy_device *phydev = NULL;
struct device_node *phynode;
- struct phy_device *phydev;
struct net_device *netdev;
int ret = -EIO;
netdev = adapter->netdev;
phynode = of_node_get(adapter->pdev->dev.of_node);
- adapter->phy_mode = PHY_INTERFACE_MODE_GMII;
if (phynode) {
+ /* try devicetree phy, or fixed link */
of_get_phy_mode(phynode, &adapter->phy_mode);
if (of_phy_is_fixed_link(phynode)) {
@@ -1044,13 +1039,15 @@ static int lan743x_phy_open(struct lan743x_adapter *adapter)
lan743x_phy_link_status_change, 0,
adapter->phy_mode);
of_node_put(phynode);
- if (!phydev)
- goto return_error;
- } else {
+ }
+
+ if (!phydev) {
+ /* try internal phy */
phydev = phy_find_first(adapter->mdiobus);
if (!phydev)
goto return_error;
+ adapter->phy_mode = PHY_INTERFACE_MODE_GMII;
ret = phy_connect_direct(netdev, phydev,
lan743x_phy_link_status_change,
adapter->phy_mode);
@@ -2733,7 +2730,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
adapter->intr.irq = adapter->pdev->irq;
lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF);
- mutex_init(&adapter->dp_lock);
ret = lan743x_gpio_init(adapter);
if (ret)
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index c61a40411317..a536f4a4994d 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -712,9 +712,6 @@ struct lan743x_adapter {
struct lan743x_csr csr;
struct lan743x_intr intr;
- /* lock, used to prevent concurrent access to data port */
- struct mutex dp_lock;
-
struct lan743x_gpio gpio;
struct lan743x_ptp ptp;
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 7766d73823eb..85d9c3e30c69 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4163,7 +4163,8 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
opts[1] |= transport_offset << TCPHO_SHIFT;
} else {
if (unlikely(skb->len < ETH_ZLEN && rtl_test_hw_pad_bug(tp)))
- return !eth_skb_pad(skb);
+ /* eth_skb_pad would free the skb on error */
+ return !__skb_put_padto(skb, ETH_ZLEN, false);
}
return true;
@@ -4342,18 +4343,9 @@ static netdev_features_t rtl8169_features_check(struct sk_buff *skb,
rtl_chip_supports_csum_v2(tp))
features &= ~NETIF_F_ALL_TSO;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
- if (skb->len < ETH_ZLEN) {
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_11:
- case RTL_GIGA_MAC_VER_12:
- case RTL_GIGA_MAC_VER_17:
- case RTL_GIGA_MAC_VER_34:
- features &= ~NETIF_F_CSUM_MASK;
- break;
- default:
- break;
- }
- }
+ /* work around hw bug on some chip versions */
+ if (skb->len < ETH_ZLEN)
+ features &= ~NETIF_F_CSUM_MASK;
if (transport_offset > TCPHO_MAX &&
rtl_chip_supports_csum_v2(tp))
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index fb1db713b7fb..575580d3ffe0 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -551,6 +551,8 @@ static struct phy_driver realtek_drvs[] = {
{
PHY_ID_MATCH_EXACT(0x00008201),
.name = "RTL8201CP Ethernet",
+ .read_page = rtl821x_read_page,
+ .write_page = rtl821x_write_page,
}, {
PHY_ID_MATCH_EXACT(0x001cc816),
.name = "RTL8201F Fast Ethernet",
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 60c1aadece89..f2793ffde191 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -608,8 +608,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
return ret;
}
-static int vrf_finish_direct(struct net *net, struct sock *sk,
- struct sk_buff *skb)
+static void vrf_finish_direct(struct sk_buff *skb)
{
struct net_device *vrf_dev = skb->dev;
@@ -628,7 +627,8 @@ static int vrf_finish_direct(struct net *net, struct sock *sk,
skb_pull(skb, ETH_HLEN);
}
- return 1;
+ /* reset skb device */
+ nf_reset_ct(skb);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -707,15 +707,41 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev,
return skb;
}
+static int vrf_output6_direct_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ vrf_finish_direct(skb);
+
+ return vrf_ip6_local_out(net, sk, skb);
+}
+
static int vrf_output6_direct(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
+ int err = 1;
+
skb->protocol = htons(ETH_P_IPV6);
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, skb->dev,
- vrf_finish_direct,
- !(IPCB(skb)->flags & IPSKB_REROUTED));
+ if (!(IPCB(skb)->flags & IPSKB_REROUTED))
+ err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
+ NULL, skb->dev, vrf_output6_direct_finish);
+
+ if (likely(err == 1))
+ vrf_finish_direct(skb);
+
+ return err;
+}
+
+static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ int err;
+
+ err = vrf_output6_direct(net, sk, skb);
+ if (likely(err == 1))
+ err = vrf_ip6_local_out(net, sk, skb);
+
+ return err;
}
static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
@@ -728,18 +754,15 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
skb->dev = vrf_dev;
err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
- skb, NULL, vrf_dev, vrf_output6_direct);
+ skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
if (likely(err == 1))
err = vrf_output6_direct(net, sk, skb);
- /* reset skb device */
if (likely(err == 1))
- nf_reset_ct(skb);
- else
- skb = NULL;
+ return skb;
- return skb;
+ return NULL;
}
static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
@@ -919,15 +942,41 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
return skb;
}
+static int vrf_output_direct_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ vrf_finish_direct(skb);
+
+ return vrf_ip_local_out(net, sk, skb);
+}
+
static int vrf_output_direct(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
+ int err = 1;
+
skb->protocol = htons(ETH_P_IP);
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, skb->dev,
- vrf_finish_direct,
- !(IPCB(skb)->flags & IPSKB_REROUTED));
+ if (!(IPCB(skb)->flags & IPSKB_REROUTED))
+ err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
+ NULL, skb->dev, vrf_output_direct_finish);
+
+ if (likely(err == 1))
+ vrf_finish_direct(skb);
+
+ return err;
+}
+
+static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ int err;
+
+ err = vrf_output_direct(net, sk, skb);
+ if (likely(err == 1))
+ err = vrf_ip_local_out(net, sk, skb);
+
+ return err;
}
static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
@@ -940,18 +989,15 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
skb->dev = vrf_dev;
err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
- skb, NULL, vrf_dev, vrf_output_direct);
+ skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
if (likely(err == 1))
err = vrf_output_direct(net, sk, skb);
- /* reset skb device */
if (likely(err == 1))
- nf_reset_ct(skb);
- else
- skb = NULL;
+ return skb;
- return skb;
+ return NULL;
}
static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index f8aed0696d77..2369ca250cd6 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -889,6 +889,7 @@ static ssize_t cosa_write(struct file *file,
chan->tx_status = 1;
spin_unlock_irqrestore(&cosa->lock, flags);
up(&chan->wsem);
+ kfree(kbuf);
return -ERESTARTSYS;
}
}
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index d1e3c6896b71..5deb37024574 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -175,5 +175,3 @@
#else
#define __diag_GCC_8(s)
#endif
-
-#define __no_fgcse __attribute__((optimize("-fno-gcse")))
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 6e390d58a9f8..ac3fa37a84f9 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -247,10 +247,6 @@ struct ftrace_likely_data {
#define asm_inline asm
#endif
-#ifndef __no_fgcse
-# define __no_fgcse
-#endif
-
/* Are two types/vars the same type (ignoring qualifiers)? */
#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 72d62cbc1578..1b62397bd124 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -558,21 +558,21 @@ struct sk_filter {
DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \
- u32 ret; \
+ u32 __ret; \
cant_migrate(); \
if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
- struct bpf_prog_stats *stats; \
- u64 start = sched_clock(); \
- ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
- stats = this_cpu_ptr(prog->aux->stats); \
- u64_stats_update_begin(&stats->syncp); \
- stats->cnt++; \
- stats->nsecs += sched_clock() - start; \
- u64_stats_update_end(&stats->syncp); \
+ struct bpf_prog_stats *__stats; \
+ u64 __start = sched_clock(); \
+ __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
+ __stats = this_cpu_ptr(prog->aux->stats); \
+ u64_stats_update_begin(&__stats->syncp); \
+ __stats->cnt++; \
+ __stats->nsecs += sched_clock() - __start; \
+ u64_stats_update_end(&__stats->syncp); \
} else { \
- ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
+ __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
} \
- ret; })
+ __ret; })
#define BPF_PROG_RUN(prog, ctx) \
__BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func)
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 0140d086dc84..01755b838c74 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -86,7 +86,7 @@ int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
void xp_destroy(struct xsk_buff_pool *pool);
void xp_release(struct xdp_buff_xsk *xskb);
void xp_get_pool(struct xsk_buff_pool *pool);
-void xp_put_pool(struct xsk_buff_pool *pool);
+bool xp_put_pool(struct xsk_buff_pool *pool);
void xp_clear_dev(struct xsk_buff_pool *pool);
void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index bdc8cd1b6767..c1b9f71ee6aa 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := core.o
-CFLAGS_core.o += $(call cc-disable-warning, override-init)
+ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y)
+# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
+cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
+endif
+CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 78ea8a7bd27f..56cc5a915f67 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -13,6 +13,7 @@
#include <linux/bpf_verifier.h>
#include <net/bpf_sk_storage.h>
#include <linux/bpf_local_storage.h>
+#include <linux/btf_ids.h>
/* For every LSM hook that allows attachment of BPF programs, declare a nop
* function where a BPF program can be attached.
@@ -26,7 +27,11 @@ noinline RET bpf_lsm_##NAME(__VA_ARGS__) \
#include <linux/lsm_hook_defs.h>
#undef LSM_HOOK
-#define BPF_LSM_SYM_PREFX "bpf_lsm_"
+#define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME)
+BTF_SET_START(bpf_lsm_hooks)
+#include <linux/lsm_hook_defs.h>
+#undef LSM_HOOK
+BTF_SET_END(bpf_lsm_hooks)
int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog)
@@ -37,8 +42,7 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
return -EINVAL;
}
- if (strncmp(BPF_LSM_SYM_PREFX, prog->aux->attach_func_name,
- sizeof(BPF_LSM_SYM_PREFX) - 1)) {
+ if (!btf_id_set_contains(&bpf_lsm_hooks, prog->aux->attach_btf_id)) {
bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n",
prog->aux->attach_btf_id, prog->aux->attach_func_name);
return -EINVAL;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9268d77898b7..55454d2278b1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1369,7 +1369,7 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
*
* Decode and execute eBPF instructions.
*/
-static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
+static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 1815e97d4c9c..1fccba6e88c4 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -821,6 +821,32 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
}
}
+static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
+ void *value, bool onallcpus)
+{
+ /* When using prealloc and not setting the initial value on all cpus,
+ * zero-fill element values for other cpus (just as what happens when
+ * not using prealloc). Otherwise, bpf program has no way to ensure
+ * known initial values for cpus other than current one
+ * (onallcpus=false always when coming from bpf prog).
+ */
+ if (htab_is_prealloc(htab) && !onallcpus) {
+ u32 size = round_up(htab->map.value_size, 8);
+ int current_cpu = raw_smp_processor_id();
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == current_cpu)
+ bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
+ size);
+ else
+ memset(per_cpu_ptr(pptr, cpu), 0, size);
+ }
+ } else {
+ pcpu_copy_value(htab, pptr, value, onallcpus);
+ }
+}
+
static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
{
return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
@@ -891,7 +917,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
}
}
- pcpu_copy_value(htab, pptr, value, onallcpus);
+ pcpu_init_value(htab, pptr, value, onallcpus);
if (!prealloc)
htab_elem_set_ptr(l_new, key_size, pptr);
@@ -1183,7 +1209,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
value, onallcpus);
} else {
- pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
+ pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
value, onallcpus);
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
l_new = NULL;
diff --git a/kernel/bpf/preload/Kconfig b/kernel/bpf/preload/Kconfig
index ace49111d3a3..26bced262473 100644
--- a/kernel/bpf/preload/Kconfig
+++ b/kernel/bpf/preload/Kconfig
@@ -6,6 +6,7 @@ config USERMODE_DRIVER
menuconfig BPF_PRELOAD
bool "Preload BPF file system with kernel specific program and map iterators"
depends on BPF
+ depends on BPF_SYSCALL
# The dependency on !COMPILE_TEST prevents it from being enabled
# in allmodconfig or allyesconfig configurations
depends on !COMPILE_TEST
diff --git a/net/core/devlink.c b/net/core/devlink.c
index a932d95be798..ab4b1368904f 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -8254,8 +8254,6 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
- if (WARN_ON(devlink_port->registered))
- return -EEXIST;
devlink_port->attrs_set = true;
attrs->flavour = flavour;
if (attrs->switch_id.id_len) {
@@ -8279,6 +8277,8 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port,
{
int ret;
+ if (WARN_ON(devlink_port->registered))
+ return;
devlink_port->attrs = *attrs;
ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
if (ret)
@@ -8301,6 +8301,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
+ if (WARN_ON(devlink_port->registered))
+ return;
ret = __devlink_port_attrs_set(devlink_port,
DEVLINK_PORT_FLAVOUR_PCI_PF);
if (ret)
@@ -8326,6 +8328,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
+ if (WARN_ON(devlink_port->registered))
+ return;
ret = __devlink_port_attrs_set(devlink_port,
DEVLINK_PORT_FLAVOUR_PCI_VF);
if (ret)
diff --git a/net/ethtool/features.c b/net/ethtool/features.c
index 8ee4cdbd6b82..1c9f4df273bd 100644
--- a/net/ethtool/features.c
+++ b/net/ethtool/features.c
@@ -280,7 +280,7 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info)
active_diff_mask, compact);
}
if (mod)
- ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL);
+ netdev_features_change(dev);
out_rtnl:
rtnl_unlock();
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 25f1caf5abf9..e25be2d01a7a 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -263,7 +263,7 @@ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu)
const struct icmphdr *icmph = icmp_hdr(skb);
const struct iphdr *iph = ip_hdr(skb);
- if (mtu <= 576 || iph->frag_off != htons(IP_DF))
+ if (mtu < 576 || iph->frag_off != htons(IP_DF))
return 0;
if (ipv4_is_lbcast(iph->daddr) || ipv4_is_multicast(iph->daddr) ||
@@ -359,7 +359,7 @@ static int iptunnel_pmtud_check_icmpv6(struct sk_buff *skb, int mtu)
__be16 frag_off;
int offset;
- if (mtu <= IPV6_MIN_MTU)
+ if (mtu < IPV6_MIN_MTU)
return 0;
if (stype == IPV6_ADDR_ANY || stype == IPV6_ADDR_MULTICAST ||
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 6ac473b47f30..00dc3f943c80 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -331,7 +331,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
__u32 cookie = ntohl(th->ack_seq) - 1;
struct sock *ret = sk;
struct request_sock *req;
- int mss;
+ int full_space, mss;
struct rtable *rt;
__u8 rcv_wscale;
struct flowi4 fl4;
@@ -427,8 +427,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+ /* limit the window selection if the user enforce a smaller rx buffer */
+ full_space = tcp_full_space(sk);
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+ (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+ req->rsk_window_clamp = full_space;
- tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
+ tcp_select_initial_window(sk, full_space, req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index e67a66fbf27b..c62805cd3131 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -366,7 +366,7 @@ out:
static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
struct sk_buff *skb)
{
- struct udphdr *uh = udp_hdr(skb);
+ struct udphdr *uh = udp_gro_udphdr(skb);
struct sk_buff *pp = NULL;
struct udphdr *uh2;
struct sk_buff *p;
@@ -500,12 +500,22 @@ out:
}
EXPORT_SYMBOL(udp_gro_receive);
+static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
+ __be16 dport)
+{
+ const struct iphdr *iph = skb_gro_network_header(skb);
+
+ return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
+ iph->daddr, dport, inet_iif(skb),
+ inet_sdif(skb), &udp_table, NULL);
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
+ struct sock *sk = NULL;
struct sk_buff *pp;
- struct sock *sk;
if (unlikely(!uh))
goto flush;
@@ -523,7 +533,10 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
skip:
NAPI_GRO_CB(skb)->is_ipv6 = 0;
rcu_read_lock();
- sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+
+ if (static_branch_unlikely(&udp_encap_needed_key))
+ sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest);
+
pp = udp_gro_receive(head, skb, uh, sk);
rcu_read_unlock();
return pp;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 5e2c34c0ac97..5e7983cb6154 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1128,7 +1128,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
if (tdev && !netif_is_l3_master(tdev)) {
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
dev->mtu = tdev->mtu - t_hlen;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
@@ -1426,7 +1425,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->priv_destructor = ipip6_dev_free;
dev->type = ARPHRD_SIT;
- dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
dev->min_mtu = IPV6_MIN_MTU;
dev->max_mtu = IP6_MAX_MTU - t_hlen;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index e796a64be308..9b6cae1e49d9 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -136,7 +136,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
__u32 cookie = ntohl(th->ack_seq) - 1;
struct sock *ret = sk;
struct request_sock *req;
- int mss;
+ int full_space, mss;
struct dst_entry *dst;
__u8 rcv_wscale;
u32 tsoff = 0;
@@ -241,7 +241,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
}
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
- tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
+ /* limit the window selection if the user enforce a smaller rx buffer */
+ full_space = tcp_full_space(sk);
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+ (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+ req->rsk_window_clamp = full_space;
+
+ tcp_select_initial_window(sk, full_space, req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 584157a07759..f9e888d1b9af 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -111,12 +111,22 @@ out:
return segs;
}
+static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
+ __be16 dport)
+{
+ const struct ipv6hdr *iph = skb_gro_network_header(skb);
+
+ return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
+ &iph->daddr, dport, inet6_iif(skb),
+ inet6_sdif(skb), &udp_table, NULL);
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
+ struct sock *sk = NULL;
struct sk_buff *pp;
- struct sock *sk;
if (unlikely(!uh))
goto flush;
@@ -135,7 +145,10 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
skip:
NAPI_GRO_CB(skb)->is_ipv6 = 1;
rcu_read_lock();
- sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+
+ if (static_branch_unlikely(&udpv6_encap_needed_key))
+ sk = udp6_gro_lookup_skb(skb, uh->source, uh->dest);
+
pp = udp_gro_receive(head, skb, uh, sk);
rcu_read_unlock();
return pp;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index d80572074667..047238f01ba6 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1434,7 +1434,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
break;
}
- if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) {
+ if ((how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) &&
+ sk->sk_state == IUCV_CONNECTED) {
if (iucv->transport == AF_IUCV_TRANS_IUCV) {
txmsg.class = 0;
txmsg.tag = 0;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e7419fd15d84..88f2a7a0ccb8 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2467,6 +2467,7 @@ static struct proto mptcp_prot = {
.memory_pressure = &tcp_memory_pressure,
.stream_memory_free = mptcp_memory_free,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.sysctl_mem = sysctl_tcp_mem,
.obj_size = sizeof(struct mptcp_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 2e8e3f7b2111..fc55c9116da0 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1166,12 +1166,13 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
struct netlbl_unlhsh_walk_arg cb_arg;
u32 skip_bkt = cb->args[0];
u32 skip_chain = cb->args[1];
- u32 iter_bkt;
- u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
+ u32 skip_addr4 = cb->args[2];
+ u32 iter_bkt, iter_chain, iter_addr4 = 0, iter_addr6 = 0;
struct netlbl_unlhsh_iface *iface;
struct list_head *iter_list;
struct netlbl_af4list *addr4;
#if IS_ENABLED(CONFIG_IPV6)
+ u32 skip_addr6 = cb->args[3];
struct netlbl_af6list *addr6;
#endif
@@ -1182,7 +1183,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
rcu_read_lock();
for (iter_bkt = skip_bkt;
iter_bkt < rcu_dereference(netlbl_unlhsh)->size;
- iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) {
+ iter_bkt++) {
iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt];
list_for_each_entry_rcu(iface, iter_list, list) {
if (!iface->valid ||
@@ -1190,7 +1191,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
continue;
netlbl_af4list_foreach_rcu(addr4,
&iface->addr4_list) {
- if (iter_addr4++ < cb->args[2])
+ if (iter_addr4++ < skip_addr4)
continue;
if (netlbl_unlabel_staticlist_gen(
NLBL_UNLABEL_C_STATICLIST,
@@ -1203,10 +1204,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
goto unlabel_staticlist_return;
}
}
+ iter_addr4 = 0;
+ skip_addr4 = 0;
#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_rcu(addr6,
&iface->addr6_list) {
- if (iter_addr6++ < cb->args[3])
+ if (iter_addr6++ < skip_addr6)
continue;
if (netlbl_unlabel_staticlist_gen(
NLBL_UNLABEL_C_STATICLIST,
@@ -1219,8 +1222,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
goto unlabel_staticlist_return;
}
}
+ iter_addr6 = 0;
+ skip_addr6 = 0;
#endif /* IPv6 */
}
+ iter_chain = 0;
+ skip_chain = 0;
}
unlabel_staticlist_return:
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 5f6f86051c83..13f3143609f9 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -664,12 +664,18 @@ static int tipc_topsrv_start(struct net *net)
ret = tipc_topsrv_work_start(srv);
if (ret < 0)
- return ret;
+ goto err_start;
ret = tipc_topsrv_create_listener(srv);
if (ret < 0)
- tipc_topsrv_work_stop(srv);
+ goto err_create;
+ return 0;
+
+err_create:
+ tipc_topsrv_work_stop(srv);
+err_start:
+ kfree(srv);
return ret;
}
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 0bbb283f23c9..046d3fee66a9 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -825,7 +825,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTED;
rc = 0;
out_put_neigh:
- if (rc) {
+ if (rc && x25->neighbour) {
read_lock_bh(&x25_list_lock);
x25_neigh_put(x25->neighbour);
x25->neighbour = NULL;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index b71a32eeae65..cfbec3989a76 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1146,7 +1146,8 @@ static void xsk_destruct(struct sock *sk)
if (!sock_flag(sk, SOCK_DEAD))
return;
- xp_put_pool(xs->pool);
+ if (!xp_put_pool(xs->pool))
+ xdp_put_umem(xs->umem);
sk_refcnt_debug_dec(sk);
}
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 64c9e55d4d4e..8a3bf4e1318e 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -251,15 +251,18 @@ void xp_get_pool(struct xsk_buff_pool *pool)
refcount_inc(&pool->users);
}
-void xp_put_pool(struct xsk_buff_pool *pool)
+bool xp_put_pool(struct xsk_buff_pool *pool)
{
if (!pool)
- return;
+ return false;
if (refcount_dec_and_test(&pool->users)) {
INIT_WORK(&pool->work, xp_release_deferred);
schedule_work(&pool->work);
+ return true;
}
+
+ return false;
}
static struct xsk_dma_map *xp_find_dma_map(struct xsk_buff_pool *pool)
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
index 4a74531dc403..b68bd2f8fdc9 100644
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c
@@ -290,7 +290,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
int main(int argc, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
extern char __executable_start;
char filename[256], buf[256];
__u64 uprobe_file_offset;
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index 3e36b3e4e3ef..3d6eab711d23 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -116,7 +116,7 @@ static void int_exit(int sig)
int main(int ac, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
long key, next_key, value;
struct bpf_link *links[2];
struct bpf_program *prog;
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index 70e987775c15..83e0fecbb01a 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -107,7 +107,7 @@ static void print_hist(int fd)
int main(int ac, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_link *links[2];
struct bpf_program *prog;
struct bpf_object *obj;
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 6fb8dbde62c5..f78cb18319aa 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -765,7 +765,7 @@ static int load_cpumap_prog(char *file_name, char *prog_name,
int main(int argc, char **argv)
{
- struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
char *mprog_filename = "xdp_redirect_kern.o";
char *redir_interface = NULL, *redir_map = NULL;
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index caa4e7ffcfc7..93fa1bc54f13 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -450,7 +450,7 @@ static void stats_poll(int interval, int action, __u32 cfg_opt)
int main(int argc, char **argv)
{
__u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
- struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 6769caae142f..31484377b8b1 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -408,6 +408,7 @@ class PrinterHelpers(Printer):
'struct bpf_perf_event_data',
'struct bpf_perf_event_value',
'struct bpf_pidns_info',
+ 'struct bpf_redir_neigh',
'struct bpf_sock',
'struct bpf_sock_addr',
'struct bpf_sock_ops',
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index a43a6f10b564..359960a8f1de 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -843,9 +843,14 @@ static int handle_perms(void)
else
p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'",
capability_msg(bpf_caps, 0),
+#ifdef CAP_BPF
capability_msg(bpf_caps, 1),
capability_msg(bpf_caps, 2),
- capability_msg(bpf_caps, 3));
+ capability_msg(bpf_caps, 3)
+#else
+ "", "", "", "", "", ""
+#endif /* CAP_BPF */
+ );
goto exit_free;
}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index d942c1e3372c..acdb2c245f0a 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -940,7 +940,7 @@ static int parse_attach_detach_args(int argc, char **argv, int *progfd,
}
if (*attach_type == BPF_FLOW_DISSECTOR) {
- *mapfd = -1;
+ *mapfd = 0;
return 0;
}
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
index 4e3512f700c0..ce5b65e07ab1 100644
--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -70,7 +70,7 @@ int BPF_PROG(fentry_XXX)
static inline void
fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
{
- struct bpf_perf_event_value *before, diff, *accum;
+ struct bpf_perf_event_value *before, diff;
before = bpf_map_lookup_elem(&fentry_readings, &id);
/* only account samples with a valid fentry_reading */
@@ -95,7 +95,7 @@ int BPF_PROG(fexit_XXX)
{
struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
u32 cpu = bpf_get_smp_processor_id();
- u32 i, one = 1, zero = 0;
+ u32 i, zero = 0;
int err;
u64 *count;
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index d9b385fe808c..10a4c4cd13cf 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -15,6 +15,9 @@
static inline size_t hash_bits(size_t h, int bits)
{
/* shuffle bits and return requested number of upper bits */
+ if (bits == 0)
+ return 0;
+
#if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
/* LP64 case */
return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
@@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value);
* @key: key to iterate entries for
*/
#define hashmap__for_each_key_entry(map, cur, _key) \
- for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
- map->cap_bits); \
- map->buckets ? map->buckets[bkt] : NULL; }); \
+ for (cur = map->buckets \
+ ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+ : NULL; \
cur; \
cur = cur->next) \
if (map->equal_fn(cur->key, (_key), map->ctx))
#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \
- for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
- map->cap_bits); \
- cur = map->buckets ? map->buckets[bkt] : NULL; }); \
+ for (cur = map->buckets \
+ ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+ : NULL; \
cur && ({ tmp = cur->next; true; }); \
cur = tmp) \
if (map->equal_fn(cur->key, (_key), map->ctx))
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index e3c98c007825..9bc537d0b92d 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -891,13 +891,16 @@ int xsk_umem__delete(struct xsk_umem *umem)
void xsk_socket__delete(struct xsk_socket *xsk)
{
size_t desc_sz = sizeof(struct xdp_desc);
- struct xsk_ctx *ctx = xsk->ctx;
struct xdp_mmap_offsets off;
+ struct xsk_umem *umem;
+ struct xsk_ctx *ctx;
int err;
if (!xsk)
return;
+ ctx = xsk->ctx;
+ umem = ctx->umem;
if (ctx->prog_fd != -1) {
xsk_delete_bpf_maps(xsk);
close(ctx->prog_fd);
@@ -917,11 +920,11 @@ void xsk_socket__delete(struct xsk_socket *xsk)
xsk_put_ctx(ctx);
- ctx->umem->refcount--;
+ umem->refcount--;
/* Do not close an fd that also has an associated umem connected
* to it.
*/
- if (xsk->fd != ctx->umem->fd)
+ if (xsk->fd != umem->fd)
close(xsk->fd);
free(xsk);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c
new file mode 100644
index 000000000000..14a31109dd0e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_init.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */
+
+#include <test_progs.h>
+#include "test_map_init.skel.h"
+
+#define TEST_VALUE 0x1234
+#define FILL_VALUE 0xdeadbeef
+
+static int nr_cpus;
+static int duration;
+
+typedef unsigned long long map_key_t;
+typedef unsigned long long map_value_t;
+typedef struct {
+ map_value_t v; /* padding */
+} __bpf_percpu_val_align pcpu_map_value_t;
+
+
+static int map_populate(int map_fd, int num)
+{
+ pcpu_map_value_t value[nr_cpus];
+ int i, err;
+ map_key_t key;
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(value, i) = FILL_VALUE;
+
+ for (key = 1; key <= num; key++) {
+ err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct test_map_init *setup(enum bpf_map_type map_type, int map_sz,
+ int *map_fd, int populate)
+{
+ struct test_map_init *skel;
+ int err;
+
+ skel = test_map_init__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return NULL;
+
+ err = bpf_map__set_type(skel->maps.hashmap1, map_type);
+ if (!ASSERT_OK(err, "bpf_map__set_type"))
+ goto error;
+
+ err = bpf_map__set_max_entries(skel->maps.hashmap1, map_sz);
+ if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+ goto error;
+
+ err = test_map_init__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto error;
+
+ *map_fd = bpf_map__fd(skel->maps.hashmap1);
+ if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n"))
+ goto error;
+
+ err = map_populate(*map_fd, populate);
+ if (!ASSERT_OK(err, "map_populate"))
+ goto error_map;
+
+ return skel;
+
+error_map:
+ close(*map_fd);
+error:
+ test_map_init__destroy(skel);
+ return NULL;
+}
+
+/* executes bpf program that updates map with key, value */
+static int prog_run_insert_elem(struct test_map_init *skel, map_key_t key,
+ map_value_t value)
+{
+ struct test_map_init__bss *bss;
+
+ bss = skel->bss;
+
+ bss->inKey = key;
+ bss->inValue = value;
+ bss->inPid = getpid();
+
+ if (!ASSERT_OK(test_map_init__attach(skel), "skel_attach"))
+ return -1;
+
+ /* Let tracepoint trigger */
+ syscall(__NR_getpgid);
+
+ test_map_init__detach(skel);
+
+ return 0;
+}
+
+static int check_values_one_cpu(pcpu_map_value_t *value, map_value_t expected)
+{
+ int i, nzCnt = 0;
+ map_value_t val;
+
+ for (i = 0; i < nr_cpus; i++) {
+ val = bpf_percpu(value, i);
+ if (val) {
+ if (CHECK(val != expected, "map value",
+ "unexpected for cpu %d: 0x%llx\n", i, val))
+ return -1;
+ nzCnt++;
+ }
+ }
+
+ if (CHECK(nzCnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+ nzCnt))
+ return -1;
+
+ return 0;
+}
+
+/* Add key=1 elem with values set for all CPUs
+ * Delete elem key=1
+ * Run bpf prog that inserts new key=1 elem with value=0x1234
+ * (bpf prog can only set value for current CPU)
+ * Lookup Key=1 and check value is as expected for all CPUs:
+ * value set by bpf prog for one CPU, 0 for all others
+ */
+static void test_pcpu_map_init(void)
+{
+ pcpu_map_value_t value[nr_cpus];
+ struct test_map_init *skel;
+ int map_fd, err;
+ map_key_t key;
+
+ /* max 1 elem in map so insertion is forced to reuse freed entry */
+ skel = setup(BPF_MAP_TYPE_PERCPU_HASH, 1, &map_fd, 1);
+ if (!ASSERT_OK_PTR(skel, "prog_setup"))
+ return;
+
+ /* delete element so the entry can be re-used*/
+ key = 1;
+ err = bpf_map_delete_elem(map_fd, &key);
+ if (!ASSERT_OK(err, "bpf_map_delete_elem"))
+ goto cleanup;
+
+ /* run bpf prog that inserts new elem, re-using the slot just freed */
+ err = prog_run_insert_elem(skel, key, TEST_VALUE);
+ if (!ASSERT_OK(err, "prog_run_insert_elem"))
+ goto cleanup;
+
+ /* check that key=1 was re-created by bpf prog */
+ err = bpf_map_lookup_elem(map_fd, &key, value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ /* and has expected values */
+ check_values_one_cpu(value, TEST_VALUE);
+
+cleanup:
+ test_map_init__destroy(skel);
+}
+
+/* Add key=1 and key=2 elems with values set for all CPUs
+ * Run bpf prog that inserts new key=3 elem
+ * (only for current cpu; other cpus should have initial value = 0)
+ * Lookup Key=1 and check value is as expected for all CPUs
+ */
+static void test_pcpu_lru_map_init(void)
+{
+ pcpu_map_value_t value[nr_cpus];
+ struct test_map_init *skel;
+ int map_fd, err;
+ map_key_t key;
+
+ /* Set up LRU map with 2 elements, values filled for all CPUs.
+ * With these 2 elements, the LRU map is full
+ */
+ skel = setup(BPF_MAP_TYPE_LRU_PERCPU_HASH, 2, &map_fd, 2);
+ if (!ASSERT_OK_PTR(skel, "prog_setup"))
+ return;
+
+ /* run bpf prog that inserts new key=3 element, re-using LRU slot */
+ key = 3;
+ err = prog_run_insert_elem(skel, key, TEST_VALUE);
+ if (!ASSERT_OK(err, "prog_run_insert_elem"))
+ goto cleanup;
+
+ /* check that key=3 replaced one of earlier elements */
+ err = bpf_map_lookup_elem(map_fd, &key, value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ /* and has expected values */
+ check_values_one_cpu(value, TEST_VALUE);
+
+cleanup:
+ test_map_init__destroy(skel);
+}
+
+void test_map_init(void)
+{
+ nr_cpus = bpf_num_possible_cpus();
+ if (nr_cpus <= 1) {
+ printf("%s:SKIP: >1 cpu needed for this test\n", __func__);
+ test__skip();
+ return;
+ }
+
+ if (test__start_subtest("pcpu_map_init"))
+ test_pcpu_map_init();
+ if (test__start_subtest("pcpu_lru_map_init"))
+ test_pcpu_lru_map_init();
+}
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 00578311a423..30982a7e4d0f 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -243,7 +243,10 @@ static ino_t get_inode_from_kernfs(struct kernfs_node* node)
}
}
-int pids_cgrp_id = 1;
+extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
+enum cgroup_subsys_id___local {
+ pids_cgrp_id___local = 123, /* value doesn't matter */
+};
static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
struct task_struct* task,
@@ -253,7 +256,9 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
- if (ENABLE_CGROUP_V1_RESOLVER) {
+ if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
+ int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
+ pids_cgrp_id___local);
#ifdef UNROLL
#pragma unroll
#endif
@@ -262,7 +267,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
BPF_CORE_READ(task, cgroups, subsys[i]);
if (subsys != NULL) {
int subsys_id = BPF_CORE_READ(subsys, ss, id);
- if (subsys_id == pids_cgrp_id) {
+ if (subsys_id == cgrp_id) {
proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
break;
diff --git a/tools/testing/selftests/bpf/progs/test_map_init.c b/tools/testing/selftests/bpf/progs/test_map_init.c
new file mode 100644
index 000000000000..c89d28ead673
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_init.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u64 inKey = 0;
+__u64 inValue = 0;
+__u32 inPid = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 2);
+ __type(key, __u64);
+ __type(value, __u64);
+} hashmap1 SEC(".maps");
+
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int sysenter_getpgid(const void *ctx)
+{
+ /* Just do it for once, when called from our own test prog. This
+ * ensures the map value is only updated for a single CPU.
+ */
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (cur_pid == inPid)
+ bpf_map_update_elem(&hashmap1, &inKey, &inValue, BPF_NOEXIST);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index bb543bf69d69..361235ad574b 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -100,7 +100,7 @@
],
"cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
"expExitCode": "0",
- "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+ "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower.*handle",
"matchCount": "1",
"teardown": [
@@ -119,7 +119,7 @@
],
"cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
"expExitCode": "0",
- "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+ "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
"matchPattern": " dst_mac e4:11:22:11:4a:51",
"matchCount": "0",
"teardown": [