From ee5675ecdf7a4e713ed21d98a70c2871d6ebed01 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:07 +0000 Subject: net/packet: convert po->origdev to an atomic flag syzbot/KCAN reported that po->origdev can be read while another thread is changing its value. We can avoid this splat by converting this field to an actual bit. Following patches will convert remaining 1bit fields. Fixes: 80feaacb8a64 ("[AF_PACKET]: Add option to return orig_dev to userspace.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/packet/internal.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'net/packet/internal.h') diff --git a/net/packet/internal.h b/net/packet/internal.h index 48af35b1aed2..178cd1852238 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -116,9 +116,9 @@ struct packet_sock { int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; + unsigned long flags; unsigned int running; /* bind_lock must be held */ unsigned int auxdata:1, /* writer must hold sock lock */ - origdev:1, has_vnet_hdr:1, tp_loss:1, tp_tx_has_off:1; @@ -144,4 +144,24 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) return (struct packet_sock *)sk; } +enum packet_sock_flags { + PACKET_SOCK_ORIGDEV, +}; + +static inline void packet_sock_flag_set(struct packet_sock *po, + enum packet_sock_flags flag, + bool val) +{ + if (val) + set_bit(flag, &po->flags); + else + clear_bit(flag, &po->flags); +} + +static inline bool packet_sock_flag(const struct packet_sock *po, + enum packet_sock_flags flag) +{ + return test_bit(flag, &po->flags); +} + #endif -- cgit From fd53c297aa7b077ae98a3d3d2d3aa278a1686ba6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:08 +0000 Subject: net/packet: convert po->auxdata to an atomic flag po->auxdata can be read while another thread is changing its value, potentially raising KCSAN splat. Convert it to PACKET_SOCK_AUXDATA flag. Fixes: 8dc419447415 ("[PACKET]: Add optional checksum computation for recvmsg") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/packet/internal.h') diff --git a/net/packet/internal.h b/net/packet/internal.h index 178cd1852238..3bae8ea7a36f 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -118,8 +118,7 @@ struct packet_sock { struct mutex pg_vec_lock; unsigned long flags; unsigned int running; /* bind_lock must be held */ - unsigned int auxdata:1, /* writer must hold sock lock */ - has_vnet_hdr:1, + unsigned int has_vnet_hdr:1, /* writer must hold sock lock */ tp_loss:1, tp_tx_has_off:1; int pressure; @@ -146,6 +145,7 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) enum packet_sock_flags { PACKET_SOCK_ORIGDEV, + PACKET_SOCK_AUXDATA, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- cgit From 7438344660fa55b33b8234c1797c886eb73667a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:10 +0000 Subject: net/packet: convert po->tp_tx_has_off to an atomic flag This is to use existing space in po->flags, and reclaim the storage used by the non atomic bit fields. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/packet/internal.h') diff --git a/net/packet/internal.h b/net/packet/internal.h index 3bae8ea7a36f..0d16a581e271 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -119,8 +119,7 @@ struct packet_sock { unsigned long flags; unsigned int running; /* bind_lock must be held */ unsigned int has_vnet_hdr:1, /* writer must hold sock lock */ - tp_loss:1, - tp_tx_has_off:1; + tp_loss:1; int pressure; int ifindex; /* bound device */ __be16 num; @@ -146,6 +145,7 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) enum packet_sock_flags { PACKET_SOCK_ORIGDEV, PACKET_SOCK_AUXDATA, + PACKET_SOCK_TX_HAS_OFF, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- cgit From 164bddace2e03f6005e650cb88f101a66ebdc05a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:11 +0000 Subject: net/packet: convert po->tp_loss to an atomic flag tp_loss can be read locklessly. Convert it to an atomic flag to avoid races. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/packet/internal.h') diff --git a/net/packet/internal.h b/net/packet/internal.h index 0d16a581e271..9d406a92ede8 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -118,8 +118,7 @@ struct packet_sock { struct mutex pg_vec_lock; unsigned long flags; unsigned int running; /* bind_lock must be held */ - unsigned int has_vnet_hdr:1, /* writer must hold sock lock */ - tp_loss:1; + unsigned int has_vnet_hdr:1; /* writer must hold sock lock */ int pressure; int ifindex; /* bound device */ __be16 num; @@ -146,6 +145,7 @@ enum packet_sock_flags { PACKET_SOCK_ORIGDEV, PACKET_SOCK_AUXDATA, PACKET_SOCK_TX_HAS_OFF, + PACKET_SOCK_TP_LOSS, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- cgit From 50d935eafee292fc432d5ac8c8715a6492961abc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:12 +0000 Subject: net/packet: convert po->has_vnet_hdr to an atomic flag po->has_vnet_hdr can be read locklessly. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/packet/internal.h') diff --git a/net/packet/internal.h b/net/packet/internal.h index 9d406a92ede8..2521176807f4 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -118,7 +118,6 @@ struct packet_sock { struct mutex pg_vec_lock; unsigned long flags; unsigned int running; /* bind_lock must be held */ - unsigned int has_vnet_hdr:1; /* writer must hold sock lock */ int pressure; int ifindex; /* bound device */ __be16 num; @@ -146,6 +145,7 @@ enum packet_sock_flags { PACKET_SOCK_AUXDATA, PACKET_SOCK_TX_HAS_OFF, PACKET_SOCK_TP_LOSS, + PACKET_SOCK_HAS_VNET_HDR, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- cgit From 61edf479818e63978cabd243b82ca80f8948a313 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:13 +0000 Subject: net/packet: convert po->running to an atomic flag Instead of consuming 32 bits for po->running, use one available bit in po->flags. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/packet/internal.h') diff --git a/net/packet/internal.h b/net/packet/internal.h index 2521176807f4..58f042c63172 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -117,7 +117,6 @@ struct packet_sock { spinlock_t bind_lock; struct mutex pg_vec_lock; unsigned long flags; - unsigned int running; /* bind_lock must be held */ int pressure; int ifindex; /* bound device */ __be16 num; @@ -146,6 +145,7 @@ enum packet_sock_flags { PACKET_SOCK_TX_HAS_OFF, PACKET_SOCK_TP_LOSS, PACKET_SOCK_HAS_VNET_HDR, + PACKET_SOCK_RUNNING, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- cgit From 791a3e9f1a86fe8eb09173c9788493b8b5c957f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:14 +0000 Subject: net/packet: convert po->pressure to an atomic flag Not only this removes some READ_ONCE()/WRITE_ONCE(), this also removes one integer. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/packet/internal.h') diff --git a/net/packet/internal.h b/net/packet/internal.h index 58f042c63172..680703dbce5e 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -117,7 +117,6 @@ struct packet_sock { spinlock_t bind_lock; struct mutex pg_vec_lock; unsigned long flags; - int pressure; int ifindex; /* bound device */ __be16 num; struct packet_rollover *rollover; @@ -146,6 +145,7 @@ enum packet_sock_flags { PACKET_SOCK_TP_LOSS, PACKET_SOCK_HAS_VNET_HDR, PACKET_SOCK_RUNNING, + PACKET_SOCK_PRESSURE, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- cgit From 68ac9a8b6e65c7cbbe96541353dab1b3f8de2043 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2023 15:55:31 +0000 Subject: af_packet: preserve const qualifier in pkt_sk() We can change pkt_sk() to propagate const qualifier of its argument, thanks to container_of_const() This should avoid some potential errors caused by accidental (const -> not_const) promotion. Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/packet/internal.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net/packet/internal.h') diff --git a/net/packet/internal.h b/net/packet/internal.h index 680703dbce5e..e793e99646f1 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -133,10 +133,7 @@ struct packet_sock { atomic_t tp_drops ____cacheline_aligned_in_smp; }; -static inline struct packet_sock *pkt_sk(struct sock *sk) -{ - return (struct packet_sock *)sk; -} +#define pkt_sk(ptr) container_of_const(ptr, struct packet_sock, sk) enum packet_sock_flags { PACKET_SOCK_ORIGDEV, -- cgit From 105a201ebf3312990b96c4fbaade22e31402f8cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2023 16:20:02 +0000 Subject: net/packet: remove po->xmit Use PACKET_SOCK_QDISC_BYPASS atomic bit instead of a pointer. This removes one indirect call in fast path, and READ_ONCE()/WRITE_ONCE() annotations as well. Signed-off-by: Eric Dumazet Suggested-by: Willem de Bruijn Cc: Daniel Borkmann Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/packet/internal.h') diff --git a/net/packet/internal.h b/net/packet/internal.h index e793e99646f1..27930f69f368 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -128,7 +128,6 @@ struct packet_sock { unsigned int tp_tstamp; struct completion skb_completion; struct net_device __rcu *cached_dev; - int (*xmit)(struct sk_buff *skb); struct packet_type prot_hook ____cacheline_aligned_in_smp; atomic_t tp_drops ____cacheline_aligned_in_smp; }; @@ -143,6 +142,7 @@ enum packet_sock_flags { PACKET_SOCK_HAS_VNET_HDR, PACKET_SOCK_RUNNING, PACKET_SOCK_PRESSURE, + PACKET_SOCK_QDISC_BYPASS, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- cgit From dfc39d4026fb2432363c0f77543c4cf3adca4c7b Mon Sep 17 00:00:00 2001 From: Jianfeng Tan Date: Wed, 19 Apr 2023 15:24:16 +0800 Subject: net/packet: support mergeable feature of virtio Packet sockets, like tap, can be used as the backend for kernel vhost. In packet sockets, virtio net header size is currently hardcoded to be the size of struct virtio_net_hdr, which is 10 bytes; however, it is not always the case: some virtio features, such as mrg_rxbuf, need virtio net header to be 12-byte long. Mergeable buffers, as a virtio feature, is worthy of supporting: packets that are larger than one-mbuf size will be dropped in vhost worker's handle_rx if mrg_rxbuf feature is not used, but large packets cannot be avoided and increasing mbuf's size is not economical. With this virtio feature enabled by virtio-user, packet sockets with hardcoded 10-byte virtio net header will parse mac head incorrectly in packet_snd by taking the last two bytes of virtio net header as part of mac header. This incorrect mac header parsing will cause packet to be dropped due to invalid ether head checking in later under-layer device packet receiving. By adding extra field vnet_hdr_sz with utilizing holes in struct packet_sock to record currently used virtio net header size and supporting extra sockopt PACKET_VNET_HDR_SZ to set specified vnet_hdr_sz, packet sockets can know the exact length of virtio net header that virtio user gives. In packet_snd, tpacket_snd and packet_recvmsg, instead of using hardcoded virtio net header size, it can get the exact vnet_hdr_sz from corresponding packet_sock, and parse mac header correctly based on this information to avoid the packets being mistakenly dropped. Signed-off-by: Jianfeng Tan Co-developed-by: Anqi Shen Signed-off-by: Anqi Shen Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/packet/internal.h') diff --git a/net/packet/internal.h b/net/packet/internal.h index 27930f69f368..63f4865202c1 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -118,6 +118,7 @@ struct packet_sock { struct mutex pg_vec_lock; unsigned long flags; int ifindex; /* bound device */ + u8 vnet_hdr_sz; __be16 num; struct packet_rollover *rollover; struct packet_mclist *mclist; @@ -139,7 +140,6 @@ enum packet_sock_flags { PACKET_SOCK_AUXDATA, PACKET_SOCK_TX_HAS_OFF, PACKET_SOCK_TP_LOSS, - PACKET_SOCK_HAS_VNET_HDR, PACKET_SOCK_RUNNING, PACKET_SOCK_PRESSURE, PACKET_SOCK_QDISC_BYPASS, -- cgit