summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-21 14:50:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-21 14:50:39 -0700
commitcba9ffdb9913dfe6be29f049ce920ce451ce7cc4 (patch)
tree7b4a85ce028c0911bd7d2a69a8801af537dca41f /include
parent1d35aae78ffe739bf46c2bf9dea7b51a4eebfbe0 (diff)
parentf99c5f563c174a49ea1cbf4754539b05cfde40c4 (diff)
Merge tag 'net-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski: "Including fixes from CAN, netfilter, wireguard and IPsec. I'd like to highlight [ lowlight? - Linus ] Florian W stepping down as a netfilter maintainer due to constant stream of bug reports. Not sure what we can do but IIUC this is not the first such case. Current release - regressions: - rxrpc: fix use of page_frag_alloc_align(), it changed semantics and we added a new caller in a different subtree - xfrm: allow UDP encapsulation only in offload modes Current release - new code bugs: - tcp: fix refcnt handling in __inet_hash_connect() - Revert "net: Re-use and set mono_delivery_time bit for userspace tstamp packets", conflicted with some expectations in BPF uAPI Previous releases - regressions: - ipv4: raw: fix sending packets from raw sockets via IPsec tunnels - devlink: fix devlink's parallel command processing - veth: do not manipulate GRO when using XDP - esp: fix bad handling of pages from page_pool Previous releases - always broken: - report RCU QS for busy network kthreads (with Paul McK's blessing) - tcp/rds: fix use-after-free on netns with kernel TCP reqsk - virt: vmxnet3: fix missing reserved tailroom with XDP Misc: - couple of build fixes for Documentation" * tag 'net-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (59 commits) selftests: forwarding: Fix ping failure due to short timeout MAINTAINERS: step down as netfilter maintainer netfilter: nf_tables: Fix a memory leak in nf_tables_updchain net: dsa: mt7530: fix handling of all link-local frames net: dsa: mt7530: fix link-local frames that ingress vlan filtering ports bpf: report RCU QS in cpumap kthread net: report RCU QS on threaded NAPI repolling rcu: add a helper to report consolidated flavor QS ionic: update documentation for XDP support lib/bitmap: Fix bitmap_scatter() and bitmap_gather() kernel doc netfilter: nf_tables: do not compare internal table flags on updates netfilter: nft_set_pipapo: release elements in clone only from destroy path octeontx2-af: Use separate handlers for interrupts octeontx2-pf: Send UP messages to VF only when VF is up. octeontx2-pf: Use default max_active works instead of one octeontx2-pf: Wait till detach_resources msg is complete octeontx2: Detect the mbox up or down message via register devlink: fix port new reply cmd type tcp: Clear req->syncookie in reqsk_alloc(). net/bnx2x: Prevent access to a freed page in page_pool ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bitmap.h44
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/linux/rcupdate.h31
-rw-r--r--include/linux/skbuff.h16
-rw-r--r--include/linux/socket.h7
-rw-r--r--include/net/request_sock.h7
6 files changed, 74 insertions, 33 deletions
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index fb3a9c93ac86..aa4096126553 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -522,17 +522,18 @@ static inline void bitmap_replace(unsigned long *dst,
*
* (Bits 0, 1, 2, 3, 4, 5 are copied to the bits 0, 1, 4, 8, 9, 12)
*
- * A more 'visual' description of the operation:
- * src: 0000000001011010
- * ||||||
- * +------+|||||
- * | +----+||||
- * | |+----+|||
- * | || +-+||
- * | || | ||
- * mask: ...v..vv...v..vv
- * ...0..11...0..10
- * dst: 0000001100000010
+ * A more 'visual' description of the operation::
+ *
+ * src: 0000000001011010
+ * ||||||
+ * +------+|||||
+ * | +----+||||
+ * | |+----+|||
+ * | || +-+||
+ * | || | ||
+ * mask: ...v..vv...v..vv
+ * ...0..11...0..10
+ * dst: 0000001100000010
*
* A relationship exists between bitmap_scatter() and bitmap_gather().
* bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation.
@@ -568,16 +569,17 @@ static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src,
*
* (Bits 0, 1, 4, 8, 9, 12 are copied to the bits 0, 1, 2, 3, 4, 5)
*
- * A more 'visual' description of the operation:
- * mask: ...v..vv...v..vv
- * src: 0000001100000010
- * ^ ^^ ^ 0
- * | || | 10
- * | || > 010
- * | |+--> 1010
- * | +--> 11010
- * +----> 011010
- * dst: 0000000000011010
+ * A more 'visual' description of the operation::
+ *
+ * mask: ...v..vv...v..vv
+ * src: 0000001100000010
+ * ^ ^^ ^ 0
+ * | || | 10
+ * | || > 010
+ * | |+--> 1010
+ * | +--> 11010
+ * +----> 011010
+ * dst: 0000000000011010
*
* A relationship exists between bitmap_gather() and bitmap_scatter(). See
* bitmap_scatter() for the bitmap scatter detailed operations.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c6f6ac779b34..cb37817d6382 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2072,6 +2072,7 @@ struct net_device {
struct pcpu_sw_netstats __percpu *tstats;
struct pcpu_dstats __percpu *dstats;
};
+ unsigned long state;
unsigned int flags;
unsigned short hard_header_len;
netdev_features_t features;
@@ -2117,7 +2118,6 @@ struct net_device {
* part of the usual set specified in Space.c.
*/
- unsigned long state;
struct list_head dev_list;
struct list_head napi_list;
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 16f519914415..17d7ed5f3ae6 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -247,6 +247,37 @@ do { \
cond_resched(); \
} while (0)
+/**
+ * rcu_softirq_qs_periodic - Report RCU and RCU-Tasks quiescent states
+ * @old_ts: jiffies at start of processing.
+ *
+ * This helper is for long-running softirq handlers, such as NAPI threads in
+ * networking. The caller should initialize the variable passed in as @old_ts
+ * at the beginning of the softirq handler. When invoked frequently, this macro
+ * will invoke rcu_softirq_qs() every 100 milliseconds thereafter, which will
+ * provide both RCU and RCU-Tasks quiescent states. Note that this macro
+ * modifies its old_ts argument.
+ *
+ * Because regions of code that have disabled softirq act as RCU read-side
+ * critical sections, this macro should be invoked with softirq (and
+ * preemption) enabled.
+ *
+ * The macro is not needed when CONFIG_PREEMPT_RT is defined. RT kernels would
+ * have more chance to invoke schedule() calls and provide necessary quiescent
+ * states. As a contrast, calling cond_resched() only won't achieve the same
+ * effect because cond_resched() does not provide RCU-Tasks quiescent states.
+ */
+#define rcu_softirq_qs_periodic(old_ts) \
+do { \
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && \
+ time_after(jiffies, (old_ts) + HZ / 10)) { \
+ preempt_disable(); \
+ rcu_softirq_qs(); \
+ preempt_enable(); \
+ (old_ts) = jiffies; \
+ } \
+} while (0)
+
/*
* Infrastructure to implement the synchronize_() primitives in
* TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3023bc2be6a1..0c7c67b3a87b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -822,9 +822,9 @@ typedef unsigned char *sk_buff_data_t;
* @decrypted: Decrypted SKB
* @slow_gro: state present at GRO time, slower prepare step required
* @mono_delivery_time: When set, skb->tstamp has the
- * delivery_time in mono clock base (i.e., EDT) or a clock base chosen
- * by SO_TXTIME. If zero, skb->tstamp has the (rcv) timestamp at
- * ingress.
+ * delivery_time in mono clock base (i.e. EDT). Otherwise, the
+ * skb->tstamp has the (rcv) timestamp at ingress and
+ * delivery_time at egress.
* @napi_id: id of the NAPI struct this skb came from
* @sender_cpu: (aka @napi_id) source CPU in XPS
* @alloc_cpu: CPU which did the skb allocation.
@@ -3524,6 +3524,16 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
bool napi_pp_put_page(struct page *page, bool napi_safe);
static inline void
+skb_page_unref(const struct sk_buff *skb, struct page *page, bool napi_safe)
+{
+#ifdef CONFIG_PAGE_POOL
+ if (skb->pp_recycle && napi_pp_put_page(page, napi_safe))
+ return;
+#endif
+ put_page(page);
+}
+
+static inline void
napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
{
struct page *page = skb_frag_page(frag);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index cfcb7e2c3813..139c330ccf2c 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -422,13 +422,6 @@ extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
struct user_msghdr __user *umsg,
struct sockaddr __user *uaddr,
unsigned int flags);
-extern int sendmsg_copy_msghdr(struct msghdr *msg,
- struct user_msghdr __user *umsg, unsigned flags,
- struct iovec **iov);
-extern int recvmsg_copy_msghdr(struct msghdr *msg,
- struct user_msghdr __user *umsg, unsigned flags,
- struct sockaddr __user **uaddr,
- struct iovec **iov);
extern int __copy_msghdr(struct msghdr *kmsg,
struct user_msghdr *umsg,
struct sockaddr __user **save_addr);
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 8839133d6f6b..004e651e6067 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -61,7 +61,11 @@ struct request_sock {
struct request_sock *dl_next;
u16 mss;
u8 num_retrans; /* number of retransmits */
- u8 syncookie:1; /* syncookie: encode tcpopts in timestamp */
+ u8 syncookie:1; /* True if
+ * 1) tcpopts needs to be encoded in
+ * TS of SYN+ACK
+ * 2) ACK is validated by BPF kfunc.
+ */
u8 num_timeout:7; /* number of timeouts */
u32 ts_recent;
struct timer_list rsk_timer;
@@ -144,6 +148,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
sk_node_init(&req_to_sk(req)->sk_node);
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
+ req->syncookie = 0;
req->timeout = 0;
req->num_timeout = 0;
req->num_retrans = 0;