From 5db7c8b9f9fc2aeec671ae3ca6375752c162e0e7 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 18 Jun 2019 23:07:36 +0300 Subject: ipvs: fix tinfo memory leak in start_sync_thread syzkaller reports for memory leak in start_sync_thread [1] As Eric points out, kthread may start and stop before the threadfn function is called, so there is no chance the data (tinfo in our case) to be released in thread. Fix this by releasing tinfo in the controlling code instead. [1] BUG: memory leak unreferenced object 0xffff8881206bf700 (size 32): comm "syz-executor761", pid 7268, jiffies 4294943441 (age 20.470s) hex dump (first 32 bytes): 00 40 7c 09 81 88 ff ff 80 45 b8 21 81 88 ff ff .@|......E.!.... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000057619e23>] kmemleak_alloc_recursive include/linux/kmemleak.h:55 [inline] [<0000000057619e23>] slab_post_alloc_hook mm/slab.h:439 [inline] [<0000000057619e23>] slab_alloc mm/slab.c:3326 [inline] [<0000000057619e23>] kmem_cache_alloc_trace+0x13d/0x280 mm/slab.c:3553 [<0000000086ce5479>] kmalloc include/linux/slab.h:547 [inline] [<0000000086ce5479>] start_sync_thread+0x5d2/0xe10 net/netfilter/ipvs/ip_vs_sync.c:1862 [<000000001a9229cc>] do_ip_vs_set_ctl+0x4c5/0x780 net/netfilter/ipvs/ip_vs_ctl.c:2402 [<00000000ece457c8>] nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] [<00000000ece457c8>] nf_setsockopt+0x4c/0x80 net/netfilter/nf_sockopt.c:115 [<00000000942f62d4>] ip_setsockopt net/ipv4/ip_sockglue.c:1258 [inline] [<00000000942f62d4>] ip_setsockopt+0x9b/0xb0 net/ipv4/ip_sockglue.c:1238 [<00000000a56a8ffd>] udp_setsockopt+0x4e/0x90 net/ipv4/udp.c:2616 [<00000000fa895401>] sock_common_setsockopt+0x38/0x50 net/core/sock.c:3130 [<0000000095eef4cf>] __sys_setsockopt+0x98/0x120 net/socket.c:2078 [<000000009747cf88>] __do_sys_setsockopt net/socket.c:2089 [inline] [<000000009747cf88>] __se_sys_setsockopt net/socket.c:2086 [inline] [<000000009747cf88>] __x64_sys_setsockopt+0x26/0x30 net/socket.c:2086 [<00000000ded8ba80>] do_syscall_64+0x76/0x1a0 arch/x86/entry/common.c:301 [<00000000893b4ac8>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported-by: syzbot+7e2e50c8adfccd2e5041@syzkaller.appspotmail.com Suggested-by: Eric Biggers Fixes: 998e7a76804b ("ipvs: Use kthread_run() instead of doing a double-fork via kernel_thread()") Signed-off-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 2ac40135b576..b36a1df93e7c 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -808,11 +808,12 @@ struct ipvs_master_sync_state { struct ip_vs_sync_buff *sync_buff; unsigned long sync_queue_len; unsigned int sync_queue_delay; - struct task_struct *master_thread; struct delayed_work master_wakeup_work; struct netns_ipvs *ipvs; }; +struct ip_vs_sync_thread_data; + /* How much time to keep dests in trash */ #define IP_VS_DEST_TRASH_PERIOD (120 * HZ) @@ -943,7 +944,8 @@ struct netns_ipvs { spinlock_t sync_lock; struct ipvs_master_sync_state *ms; spinlock_t sync_buff_lock; - struct task_struct **backup_threads; + struct ip_vs_sync_thread_data *master_tinfo; + struct ip_vs_sync_thread_data *backup_tinfo; int threads_mask; volatile int sync_state; struct mutex sync_mutex; -- cgit From d2ce8d6bfcfed014fd281e06c9b1d4638ddf3f1e Mon Sep 17 00:00:00 2001 From: Jiunn Chang Date: Thu, 27 Jun 2019 00:04:26 -0500 Subject: nl80211: Fix undefined behavior in bit shift Shifting signed 32-bit value by 31 bits is undefined. Changing most significant bit to unsigned. Signed-off-by: Jiunn Chang Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6f09d1500960..fa7ebbc6ff27 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5314,7 +5314,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_TDLS_CHANNEL_SWITCH = 1 << 28, NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR = 1 << 29, NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR = 1 << 30, - NL80211_FEATURE_ND_RANDOM_MAC_ADDR = 1 << 31, + NL80211_FEATURE_ND_RANDOM_MAC_ADDR = 1U << 31, }; /** -- cgit From 32e454efbb2279b0fa5874abb0944a9d42080ad1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jun 2019 10:44:33 +0100 Subject: net: phylink: further documentation clarifications Clarify the validate() behaviour in a few cases which weren't mentioned in the documentation, but which are necessary for users to get the correct behaviour. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phylink.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 2d2e55dfea94..5b130140fb8f 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -93,12 +93,19 @@ struct phylink_mac_ops { * Note that the PHY may be able to transform from one connection * technology to another, so, eg, don't clear 1000BaseX just * because the MAC is unable to BaseX mode. This is more about - * clearing unsupported speeds and duplex settings. + * clearing unsupported speeds and duplex settings. The port modes + * should not be cleared; phylink_set_port_modes() will help with this. * * If the @state->interface mode is %PHY_INTERFACE_MODE_1000BASEX * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode * based on @state->advertising and/or @state->speed and update - * @state->interface accordingly. + * @state->interface accordingly. See phylink_helper_basex_speed(). + * + * When @state->interface is %PHY_INTERFACE_MODE_NA, phylink expects the + * MAC driver to return all supported link modes. + * + * If the @state->interface mode is not supported, then the @supported + * mask must be cleared. */ void validate(struct net_device *ndev, unsigned long *supported, struct phylink_link_state *state); -- cgit From 0472301a28f6cf53a6bc5783e48a2d0bbff4682f Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Fri, 28 Jun 2019 07:08:45 +0300 Subject: bpf: fix uapi bpf_prog_info fields alignment Merge commit 1c8c5a9d38f60 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next") undid the fix from commit 36f9814a494 ("bpf: fix uapi hole for 32 bit compat applications") by taking the gpl_compatible 1-bit field definition from commit b85fab0e67b162 ("bpf: Add gpl_compatible flag to struct bpf_prog_info") as is. That breaks architectures with 16-bit alignment like m68k. Add 31-bit pad after gpl_compatible to restore alignment of following fields. Thanks to Dmitry V. Levin his analysis of this bug history. Signed-off-by: Baruch Siach Acked-by: Song Liu Cc: Jiri Olsa Cc: Daniel Borkmann Cc: Geert Uytterhoeven Cc: Linus Torvalds Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a8b823c30b43..29a5bc3d5c66 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3143,6 +3143,7 @@ struct bpf_prog_info { char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; __u32 gpl_compatible:1; + __u32 :31; /* alignment pad */ __u64 netns_dev; __u64 netns_ino; __u32 nr_jited_ksyms; -- cgit From b60a77386b1d4868f72f6353d35dabe5fbe981f2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jun 2019 20:40:45 +0200 Subject: net: make skb_dst_force return true when dst is refcounted netfilter did not expect that skb_dst_force() can cause skb to lose its dst entry. I got a bug report with a skb->dst NULL dereference in netfilter output path. The backtrace contains nf_reinject(), so the dst might have been cleared when skb got queued to userspace. Other users were fixed via if (skb_dst(skb)) { skb_dst_force(skb); if (!skb_dst(skb)) goto handle_err; } But I think its preferable to make the 'dst might be cleared' part of the function explicit. In netfilter case, skb with a null dst is expected when queueing in prerouting hook, so drop skb for the other hooks. v2: v1 of this patch returned true in case skb had no dst entry. Eric said: Say if we have two skb_dst_force() calls for some reason on the same skb, only the first one will return false. This now returns false even when skb had no dst, as per Erics suggestion, so callers might need to check skb_dst() first before skb_dst_force(). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/dst.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 12b31c602cb0..f8206d3fed2f 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -302,8 +302,9 @@ static inline bool dst_hold_safe(struct dst_entry *dst) * @skb: buffer * * If dst is not yet refcounted and not destroyed, grab a ref on it. + * Returns true if dst is refcounted. */ -static inline void skb_dst_force(struct sk_buff *skb) +static inline bool skb_dst_force(struct sk_buff *skb) { if (skb_dst_is_noref(skb)) { struct dst_entry *dst = skb_dst(skb); @@ -314,6 +315,8 @@ static inline void skb_dst_force(struct sk_buff *skb) skb->_skb_refdst = (unsigned long)dst; } + + return skb->_skb_refdst != 0UL; } -- cgit From 79293f49677e2e703ef0d0efc9919319adacb3fb Mon Sep 17 00:00:00 2001 From: Jiunn Chang Date: Wed, 26 Jun 2019 22:25:30 -0500 Subject: packet: Fix undefined behavior in bit shift Shifting signed 32-bit value by 31 bits is undefined. Changing most significant bit to unsigned. Changes included in v2: - use subsystem specific subject lines - CC required mailing lists Signed-off-by: Jiunn Chang Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 467b654bd4c7..3d884d68eb30 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -123,7 +123,7 @@ struct tpacket_auxdata { /* Rx and Tx ring - header status */ #define TP_STATUS_TS_SOFTWARE (1 << 29) #define TP_STATUS_TS_SYS_HARDWARE (1 << 30) /* deprecated, never set */ -#define TP_STATUS_TS_RAW_HARDWARE (1 << 31) +#define TP_STATUS_TS_RAW_HARDWARE (1U << 31) /* Rx ring - feature request bits */ #define TP_FT_REQ_FILL_RXHASH 0x1 -- cgit From 40f6a2cb9cfc5da713f745b23bcc2c6761e5eb5e Mon Sep 17 00:00:00 2001 From: Vandana BN Date: Mon, 1 Jul 2019 17:25:39 +0530 Subject: net: dst.h: Fix shifting signed 32-bit value by 31 bits problem Fix DST_FEATURE_ECN_CA to use "U" cast to avoid shifting signed 32-bit value by 31 bits problem. Signed-off-by: Vandana BN Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index f8206d3fed2f..fe62fe2eb781 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -183,7 +183,7 @@ static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) } /* Kernel-internal feature bits that are unallocated in user space. */ -#define DST_FEATURE_ECN_CA (1 << 31) +#define DST_FEATURE_ECN_CA (1U << 31) #define DST_FEATURE_MASK (DST_FEATURE_ECN_CA) #define DST_FEATURE_ECN_MASK (DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN) -- cgit From 88405680ec57c35f5886dbb81b3f6f638f74f40d Mon Sep 17 00:00:00 2001 From: Vandana BN Date: Mon, 1 Jul 2019 19:46:10 +0530 Subject: net:gue.h:Fix shifting signed 32-bit value by 31 bits problem Fix GUE_PFLAG_REMCSUM to use "U" cast to avoid shifting signed 32-bit value by 31 bits problem. Signed-off-by: Vandana BN Signed-off-by: David S. Miller --- include/net/gue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/gue.h b/include/net/gue.h index fdad41469b65..3a6595bfa641 100644 --- a/include/net/gue.h +++ b/include/net/gue.h @@ -60,7 +60,7 @@ struct guehdr { /* Private flags in the private option extension */ -#define GUE_PFLAG_REMCSUM htonl(1 << 31) +#define GUE_PFLAG_REMCSUM htonl(1U << 31) #define GUE_PLEN_REMCSUM 4 #define GUE_PFLAGS_ALL (GUE_PFLAG_REMCSUM) -- cgit From e33d2b74d805af0e4c8060f41040595ba105a520 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 28 Jun 2019 11:03:41 -0700 Subject: idr: fix overflow case for idr_for_each_entry_ul() idr_for_each_entry_ul() is buggy as it can't handle overflow case correctly. When we have an ID == UINT_MAX, it becomes an infinite loop. This happens when running on 32-bit CPU where unsigned long has the same size with unsigned int. There is no better way to fix this than casting it to a larger integer, but we can't just 64 bit integer on 32 bit CPU. Instead we could just use an additional integer to help us to detect this overflow case, that is, adding a new parameter to this macro. Fortunately tc action is its only user right now. Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Reported-by: Li Shuang Tested-by: Davide Caratti Cc: Matthew Wilcox Cc: Chris Mi Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/idr.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index ee7abae143d3..68528a72d10d 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -191,14 +191,17 @@ static inline void idr_preload_end(void) * idr_for_each_entry_ul() - Iterate over an IDR's elements of a given type. * @idr: IDR handle. * @entry: The type * to use as cursor. + * @tmp: A temporary placeholder for ID. * @id: Entry ID. * * @entry and @id do not need to be initialized before the loop, and * after normal termination @entry is left with the value NULL. This * is convenient for a "not found" value. */ -#define idr_for_each_entry_ul(idr, entry, id) \ - for (id = 0; ((entry) = idr_get_next_ul(idr, &(id))) != NULL; ++id) +#define idr_for_each_entry_ul(idr, entry, tmp, id) \ + for (tmp = 0, id = 0; \ + tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + tmp = id, ++id) /** * idr_for_each_entry_continue() - Continue iteration over an IDR's elements of a given type -- cgit From d39d714969cda5cbda291402c8c6b1fb1047f42e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 28 Jun 2019 11:03:42 -0700 Subject: idr: introduce idr_for_each_entry_continue_ul() Similarly, other callers of idr_get_next_ul() suffer the same overflow bug as they don't handle it properly either. Introduce idr_for_each_entry_continue_ul() to help these callers iterate from a given ID. cls_flower needs more care here because it still has overflow when does arg->cookie++, we have to fold its nested loops into one and remove the arg->cookie++. Fixes: 01683a146999 ("net: sched: refactor flower walk to iterate over idr") Fixes: 12d6066c3b29 ("net/mlx5: Add flow counters idr") Reported-by: Li Shuang Cc: Davide Caratti Cc: Vlad Buslov Cc: Chris Mi Cc: Matthew Wilcox Signed-off-by: Cong Wang Tested-by: Davide Caratti Signed-off-by: David S. Miller --- include/linux/idr.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index 68528a72d10d..4ec8986e5dfb 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -216,6 +216,20 @@ static inline void idr_preload_end(void) entry; \ ++id, (entry) = idr_get_next((idr), &(id))) +/** + * idr_for_each_entry_continue_ul() - Continue iteration over an IDR's elements of a given type + * @idr: IDR handle. + * @entry: The type * to use as a cursor. + * @tmp: A temporary placeholder for ID. + * @id: Entry ID. + * + * Continue to iterate over entries, continuing after the current position. + */ +#define idr_for_each_entry_continue_ul(idr, entry, tmp, id) \ + for (tmp = id; \ + tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + tmp = id, ++id) + /* * IDA - ID Allocator, use when translation from id to pointer isn't necessary. */ -- cgit From acd3e96d53a24d219f720ed4012b62723ae05da1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 28 Jun 2019 16:11:39 -0700 Subject: net/tls: make sure offload also gets the keys wiped Commit 86029d10af18 ("tls: zero the crypto information from tls_context before freeing") added memzero_explicit() calls to clear the key material before freeing struct tls_context, but it missed tls_device.c has its own way of freeing this structure. Replace the missing free. Fixes: 86029d10af18 ("tls: zero the crypto information from tls_context before freeing") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- include/net/tls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 53d96bca220d..889df0312cd1 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -313,6 +313,7 @@ struct tls_offload_context_rx { (ALIGN(sizeof(struct tls_offload_context_rx), sizeof(void *)) + \ TLS_DRIVER_STATE_SIZE) +void tls_ctx_free(struct tls_context *ctx); int wait_on_pending_writer(struct sock *sk, long *timeo); int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); -- cgit From 99f0eae653b2db64917d0b58099eb51e300b311d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jul 2019 16:04:19 +0100 Subject: rxrpc: Fix oops in tracepoint If the rxrpc_eproto tracepoint is enabled, an oops will be cause by the trace line that rxrpc_extract_header() tries to emit when a protocol error occurs (typically because the packet is short) because the call argument is NULL. Fix this by using ?: to assume 0 as the debug_id if call is NULL. This can then be induced by: echo -e '\0\0\0\0\0\0\0\0' | ncat -4u --send-only 20001 where addr has the following program running on it: #include #include #include #include #include #include #include int main(void) { struct sockaddr_rxrpc srx; int fd; memset(&srx, 0, sizeof(srx)); srx.srx_family = AF_RXRPC; srx.srx_service = 0; srx.transport_type = AF_INET; srx.transport_len = sizeof(srx.transport.sin); srx.transport.sin.sin_family = AF_INET; srx.transport.sin.sin_port = htons(0x4e21); fd = socket(AF_RXRPC, SOCK_DGRAM, AF_INET6); bind(fd, (struct sockaddr *)&srx, sizeof(srx)); sleep(20); return 0; } It results in the following oops. BUG: kernel NULL pointer dereference, address: 0000000000000340 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page ... RIP: 0010:trace_event_raw_event_rxrpc_rx_eproto+0x47/0xac ... Call Trace: rxrpc_extract_header+0x86/0x171 ? rcu_read_lock_sched_held+0x5d/0x63 ? rxrpc_new_skb+0xd4/0x109 rxrpc_input_packet+0xef/0x14fc ? rxrpc_input_data+0x986/0x986 udp_queue_rcv_one_skb+0xbf/0x3d0 udp_unicast_rcv_skb.isra.8+0x64/0x71 ip_protocol_deliver_rcu+0xe4/0x1b4 ip_local_deliver+0xf0/0x154 __netif_receive_skb_one_core+0x50/0x6c netif_receive_skb_internal+0x26b/0x2e9 napi_gro_receive+0xf8/0x1da rtl8169_poll+0x303/0x4c4 net_rx_action+0x10e/0x333 __do_softirq+0x1a5/0x38f irq_exit+0x54/0xc4 do_IRQ+0xda/0xf8 common_interrupt+0xf/0xf ... ? cpuidle_enter_state+0x23c/0x34d cpuidle_enter+0x2a/0x36 do_idle+0x163/0x1ea cpu_startup_entry+0x1d/0x1f start_secondary+0x157/0x172 secondary_startup_64+0xa4/0xb0 Fixes: a25e21f0bcd2 ("rxrpc, afs: Use debug_ids rather than pointers in traces") Signed-off-by: David Howells Reviewed-by: Marc Dionne Signed-off-by: David S. Miller --- include/trace/events/rxrpc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d85816878a52..cc1d060cbf13 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1379,7 +1379,7 @@ TRACE_EVENT(rxrpc_rx_eproto, ), TP_fast_assign( - __entry->call = call->debug_id; + __entry->call = call ? call->debug_id : 0; __entry->serial = serial; __entry->why = why; ), -- cgit From 455302d1c9ae9318660aaeb9748a01ff414c9741 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 28 Jun 2019 11:04:07 +0300 Subject: xdp: fix hang while unregistering device bound to xdp socket Device that bound to XDP socket will not have zero refcount until the userspace application will not close it. This leads to hang inside 'netdev_wait_allrefs()' if device unregistering requested: # ip link del p1 < hang on recvmsg on netlink socket > # ps -x | grep ip 5126 pts/0 D+ 0:00 ip link del p1 # journalctl -b Jun 05 07:19:16 kernel: unregister_netdevice: waiting for p1 to become free. Usage count = 1 Jun 05 07:19:27 kernel: unregister_netdevice: waiting for p1 to become free. Usage count = 1 ... Fix that by implementing NETDEV_UNREGISTER event notification handler to properly clean up all the resources and unref device. This should also allow socket killing via ss(8) utility. Fixes: 965a99098443 ("xsk: add support for bind for Rx") Signed-off-by: Ilya Maximets Acked-by: Jonathan Lemon Signed-off-by: Daniel Borkmann --- include/net/xdp_sock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index d074b6d60f8a..7da155164947 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -61,6 +61,11 @@ struct xdp_sock { struct xsk_queue *tx ____cacheline_aligned_in_smp; struct list_head list; bool zc; + enum { + XSK_READY = 0, + XSK_BOUND, + XSK_UNBOUND, + } state; /* Protects multiple processes in the control path */ struct mutex mutex; /* Mutual exclusion of NAPI TX thread and sendmsg error paths -- cgit