From 4408d55a64677febdcb50d1b44d0dc714ce4187e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 13 Jan 2022 09:28:45 +0900 Subject: af_unix: Refactor unix_next_socket(). Currently, unix_next_socket() is overloaded depending on the 2nd argument. If it is NULL, unix_next_socket() returns the first socket in the hash. If not NULL, it returns the next socket in the same hash list or the first socket in the next non-empty hash list. This patch refactors unix_next_socket() into two functions unix_get_first() and unix_get_next(). unix_get_first() newly acquires a lock and returns the first socket in the list. unix_get_next() returns the next socket in a list or releases a lock and falls back to unix_get_first(). In the following patch, bpf iter holds entire sockets in a list and always releases the lock before .show(). It always calls unix_get_first() to acquire a lock in each iteration. So, this patch makes the change easier to follow. Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20220113002849.4384-2-kuniyu@amazon.co.jp Signed-off-by: Alexei Starovoitov --- net/unix/af_unix.c | 51 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 21 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c19569819866..e1c4082accdb 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3240,49 +3240,58 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) return sk; } -static struct sock *unix_next_socket(struct seq_file *seq, - struct sock *sk, - loff_t *pos) +static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) { unsigned long bucket = get_bucket(*pos); + struct sock *sk; - while (sk > (struct sock *)SEQ_START_TOKEN) { - sk = sk_next(sk); - if (!sk) - goto next_bucket; - if (sock_net(sk) == seq_file_net(seq)) - return sk; - } - - do { + while (bucket < ARRAY_SIZE(unix_socket_table)) { spin_lock(&unix_table_locks[bucket]); + sk = unix_from_bucket(seq, pos); if (sk) return sk; -next_bucket: - spin_unlock(&unix_table_locks[bucket++]); - *pos = set_bucket_offset(bucket, 1); - } while (bucket < ARRAY_SIZE(unix_socket_table)); + spin_unlock(&unix_table_locks[bucket]); + + *pos = set_bucket_offset(++bucket, 1); + } return NULL; } +static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, + loff_t *pos) +{ + unsigned long bucket = get_bucket(*pos); + + for (sk = sk_next(sk); sk; sk = sk_next(sk)) + if (sock_net(sk) == seq_file_net(seq)) + return sk; + + spin_unlock(&unix_table_locks[bucket]); + + *pos = set_bucket_offset(++bucket, 1); + + return unix_get_first(seq, pos); +} + static void *unix_seq_start(struct seq_file *seq, loff_t *pos) { if (!*pos) return SEQ_START_TOKEN; - if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) - return NULL; - - return unix_next_socket(seq, NULL, pos); + return unix_get_first(seq, pos); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; - return unix_next_socket(seq, v, pos); + + if (v == SEQ_START_TOKEN) + return unix_get_first(seq, pos); + + return unix_get_next(seq, v, pos); } static void unix_seq_stop(struct seq_file *seq, void *v) -- cgit From 855d8e77ffb05be6e54c34dababccb20318aec00 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 13 Jan 2022 09:28:46 +0900 Subject: bpf: af_unix: Use batching algorithm in bpf unix iter. The commit 04c7820b776f ("bpf: tcp: Bpf iter batching and lock_sock") introduces the batching algorithm to iterate TCP sockets with more consistency. This patch uses the same algorithm to iterate AF_UNIX sockets. Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20220113002849.4384-3-kuniyu@amazon.co.jp Signed-off-by: Alexei Starovoitov --- net/unix/af_unix.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 177 insertions(+), 7 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e1c4082accdb..d383d5f63b6b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3356,6 +3356,15 @@ static const struct seq_operations unix_seq_ops = { }; #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) +struct bpf_unix_iter_state { + struct seq_net_private p; + unsigned int cur_sk; + unsigned int end_sk; + unsigned int max_sk; + struct sock **batch; + bool st_bucket_done; +}; + struct bpf_iter__unix { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct unix_sock *, unix_sk); @@ -3374,24 +3383,156 @@ static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, return bpf_iter_run_prog(prog, &ctx); } +static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) + +{ + struct bpf_unix_iter_state *iter = seq->private; + unsigned int expected = 1; + struct sock *sk; + + sock_hold(start_sk); + iter->batch[iter->end_sk++] = start_sk; + + for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { + if (sock_net(sk) != seq_file_net(seq)) + continue; + + if (iter->end_sk < iter->max_sk) { + sock_hold(sk); + iter->batch[iter->end_sk++] = sk; + } + + expected++; + } + + spin_unlock(&unix_table_locks[start_sk->sk_hash]); + + return expected; +} + +static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) +{ + while (iter->cur_sk < iter->end_sk) + sock_put(iter->batch[iter->cur_sk++]); +} + +static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, + unsigned int new_batch_sz) +{ + struct sock **new_batch; + + new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, + GFP_USER | __GFP_NOWARN); + if (!new_batch) + return -ENOMEM; + + bpf_iter_unix_put_batch(iter); + kvfree(iter->batch); + iter->batch = new_batch; + iter->max_sk = new_batch_sz; + + return 0; +} + +static struct sock *bpf_iter_unix_batch(struct seq_file *seq, + loff_t *pos) +{ + struct bpf_unix_iter_state *iter = seq->private; + unsigned int expected; + bool resized = false; + struct sock *sk; + + if (iter->st_bucket_done) + *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); + +again: + /* Get a new batch */ + iter->cur_sk = 0; + iter->end_sk = 0; + + sk = unix_get_first(seq, pos); + if (!sk) + return NULL; /* Done */ + + expected = bpf_iter_unix_hold_batch(seq, sk); + + if (iter->end_sk == expected) { + iter->st_bucket_done = true; + return sk; + } + + if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { + resized = true; + goto again; + } + + return sk; +} + +static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (!*pos) + return SEQ_START_TOKEN; + + /* bpf iter does not support lseek, so it always + * continue from where it was stop()-ped. + */ + return bpf_iter_unix_batch(seq, pos); +} + +static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_unix_iter_state *iter = seq->private; + struct sock *sk; + + /* Whenever seq_next() is called, the iter->cur_sk is + * done with seq_show(), so advance to the next sk in + * the batch. + */ + if (iter->cur_sk < iter->end_sk) + sock_put(iter->batch[iter->cur_sk++]); + + ++*pos; + + if (iter->cur_sk < iter->end_sk) + sk = iter->batch[iter->cur_sk]; + else + sk = bpf_iter_unix_batch(seq, pos); + + return sk; +} + static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) { struct bpf_iter_meta meta; struct bpf_prog *prog; struct sock *sk = v; uid_t uid; + bool slow; + int ret; if (v == SEQ_START_TOKEN) return 0; + slow = lock_sock_fast(sk); + + if (unlikely(sk_unhashed(sk))) { + ret = SEQ_SKIP; + goto unlock; + } + uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); meta.seq = seq; prog = bpf_iter_get_info(&meta, false); - return unix_prog_seq_show(prog, &meta, v, uid); + ret = unix_prog_seq_show(prog, &meta, v, uid); +unlock: + unlock_sock_fast(sk, slow); + return ret; } static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) { + struct bpf_unix_iter_state *iter = seq->private; struct bpf_iter_meta meta; struct bpf_prog *prog; @@ -3402,12 +3543,13 @@ static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) (void)unix_prog_seq_show(prog, &meta, v, 0); } - unix_seq_stop(seq, v); + if (iter->cur_sk < iter->end_sk) + bpf_iter_unix_put_batch(iter); } static const struct seq_operations bpf_iter_unix_seq_ops = { - .start = unix_seq_start, - .next = unix_seq_next, + .start = bpf_iter_unix_seq_start, + .next = bpf_iter_unix_seq_next, .stop = bpf_iter_unix_seq_stop, .show = bpf_iter_unix_seq_show, }; @@ -3456,11 +3598,39 @@ static struct pernet_operations unix_net_ops = { DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, struct unix_sock *unix_sk, uid_t uid) +#define INIT_BATCH_SZ 16 + +static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) +{ + struct bpf_unix_iter_state *iter = priv_data; + int err; + + err = bpf_iter_init_seq_net(priv_data, aux); + if (err) + return err; + + err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); + if (err) { + bpf_iter_fini_seq_net(priv_data); + return err; + } + + return 0; +} + +static void bpf_iter_fini_unix(void *priv_data) +{ + struct bpf_unix_iter_state *iter = priv_data; + + bpf_iter_fini_seq_net(priv_data); + kvfree(iter->batch); +} + static const struct bpf_iter_seq_info unix_seq_info = { .seq_ops = &bpf_iter_unix_seq_ops, - .init_seq_private = bpf_iter_init_seq_net, - .fini_seq_private = bpf_iter_fini_seq_net, - .seq_priv_size = sizeof(struct seq_net_private), + .init_seq_private = bpf_iter_init_unix, + .fini_seq_private = bpf_iter_fini_unix, + .seq_priv_size = sizeof(struct bpf_unix_iter_state), }; static struct bpf_iter_reg unix_reg_info = { -- cgit From eb7d8f1d9ebc7379f09a51bf4faa35e0bfa7437d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 13 Jan 2022 09:28:47 +0900 Subject: bpf: Support bpf_(get|set)sockopt() in bpf unix iter. This patch makes bpf_(get|set)sockopt() available when iterating AF_UNIX sockets. Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20220113002849.4384-4-kuniyu@amazon.co.jp Signed-off-by: Alexei Starovoitov --- net/unix/af_unix.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d383d5f63b6b..3e0d6281fd1e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3633,6 +3633,20 @@ static const struct bpf_iter_seq_info unix_seq_info = { .seq_priv_size = sizeof(struct bpf_unix_iter_state), }; +static const struct bpf_func_proto * +bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_setsockopt: + return &bpf_sk_setsockopt_proto; + case BPF_FUNC_getsockopt: + return &bpf_sk_getsockopt_proto; + default: + return NULL; + } +} + static struct bpf_iter_reg unix_reg_info = { .target = "unix", .ctx_arg_info_size = 1, @@ -3640,6 +3654,7 @@ static struct bpf_iter_reg unix_reg_info = { { offsetof(struct bpf_iter__unix, unix_sk), PTR_TO_BTF_ID_OR_NULL }, }, + .get_func_proto = bpf_iter_unix_get_func_proto, .seq_info = &unix_seq_info, }; -- cgit From e82025c623e2bf04d162bafceb66a59115814479 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Mar 2022 12:08:08 +0900 Subject: af_unix: Fix some data-races around unix_sk(sk)->oob_skb. Out-of-band data automatically places a "mark" showing wherein the sequence the out-of-band data would have been. If the out-of-band data implies cancelling everything sent so far, the "mark" is helpful to flush them. When the socket's read pointer reaches the "mark", the ioctl() below sets a non zero value to the arg `atmark`: The out-of-band data is queued in sk->sk_receive_queue as well as ordinary data and also saved in unix_sk(sk)->oob_skb. It can be used to test if the head of the receive queue is the out-of-band data meaning the socket is at the "mark". While testing that, unix_ioctl() reads unix_sk(sk)->oob_skb locklessly. Thus, all accesses to oob_skb need some basic protection to avoid load/store tearing which KCSAN detects when these are called concurrently: - ioctl(fd_a, SIOCATMARK, &atmark, sizeof(atmark)) - send(fd_b_connected_to_a, buf, sizeof(buf), MSG_OOB) BUG: KCSAN: data-race in unix_ioctl / unix_stream_sendmsg write to 0xffff888003d9cff0 of 8 bytes by task 175 on cpu 1: unix_stream_sendmsg (net/unix/af_unix.c:2087 net/unix/af_unix.c:2191) sock_sendmsg (net/socket.c:705 net/socket.c:725) __sys_sendto (net/socket.c:2040) __x64_sys_sendto (net/socket.c:2048) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113) read to 0xffff888003d9cff0 of 8 bytes by task 176 on cpu 0: unix_ioctl (net/unix/af_unix.c:3101 (discriminator 1)) sock_do_ioctl (net/socket.c:1128) sock_ioctl (net/socket.c:1242) __x64_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:874 fs/ioctl.c:860 fs/ioctl.c:860) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113) value changed: 0xffff888003da0c00 -> 0xffff888003da0d00 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 176 Comm: unix_race_oob_i Not tainted 5.17.0-rc5-59529-g83dc4c2af682 #12 Hardware name: Red Hat KVM, BIOS 1.11.0-2.amzn2 04/01/2014 Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/unix/af_unix.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c19569819866..0c37e5595aae 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2084,7 +2084,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other if (ousk->oob_skb) consume_skb(ousk->oob_skb); - ousk->oob_skb = skb; + WRITE_ONCE(ousk->oob_skb, skb); scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); @@ -2602,9 +2602,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) oob_skb = u->oob_skb; - if (!(state->flags & MSG_PEEK)) { - u->oob_skb = NULL; - } + if (!(state->flags & MSG_PEEK)) + WRITE_ONCE(u->oob_skb, NULL); unix_state_unlock(sk); @@ -2639,7 +2638,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, skb = NULL; } else if (sock_flag(sk, SOCK_URGINLINE)) { if (!(flags & MSG_PEEK)) { - u->oob_skb = NULL; + WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); } } else if (!(flags & MSG_PEEK)) { @@ -3094,11 +3093,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCATMARK: { struct sk_buff *skb; - struct unix_sock *u = unix_sk(sk); int answ = 0; skb = skb_peek(&sk->sk_receive_queue); - if (skb && skb == u->oob_skb) + if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) answ = 1; err = put_user(answ, (int __user *)arg); } -- cgit From d9a232d435dcc966738b0f414a86f7edf4f4c8c4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Mar 2022 12:08:09 +0900 Subject: af_unix: Support POLLPRI for OOB. The commit 314001f0bf92 ("af_unix: Add OOB support") introduced OOB for AF_UNIX, but it lacks some changes for POLLPRI. Let's add the missing piece. In the selftest, normal datagrams are sent followed by OOB data, so this commit replaces `POLLIN | POLLPRI` with just `POLLPRI` in the first test case. Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0c37e5595aae..1e7ed5829ed5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3137,6 +3137,10 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa mask |= EPOLLIN | EPOLLRDNORM; if (sk_is_readable(sk)) mask |= EPOLLIN | EPOLLRDNORM; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (READ_ONCE(unix_sk(sk)->oob_skb)) + mask |= EPOLLPRI; +#endif /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && -- cgit From 4edf21aa94ee33c75f819f2b6eb6dd52ef8a1628 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Mar 2022 12:23:08 +0900 Subject: af_unix: Remove unnecessary brackets around CONFIG_AF_UNIX_OOB. Let's remove unnecessary brackets around CONFIG_AF_UNIX_OOB. Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20220317032308.65372-1-kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3e0d6281fd1e..4247c4134f31 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2049,7 +2049,7 @@ out: */ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) -#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) { struct unix_sock *ousk = unix_sk(other); @@ -2115,7 +2115,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) { -#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (len) len--; else @@ -2186,7 +2186,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, sent += size; } -#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (msg->msg_flags & MSG_OOB) { err = queue_oob(sock, msg, other); if (err) -- cgit From f4b41f062c424209e3939a81e6da022e049a45f2 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 4 Apr 2022 18:30:22 +0200 Subject: net: remove noblock parameter from skb_recv_datagram() skb_recv_datagram() has two parameters 'flags' and 'noblock' that are merged inside skb_recv_datagram() by 'flags | (noblock ? MSG_DONTWAIT : 0)' As 'flags' may contain MSG_DONTWAIT as value most callers split the 'flags' into 'flags' and 'noblock' with finally obsolete bit operations like this: skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &rc); And this is not even done consistently with the 'flags' parameter. This patch removes the obsolete and costly splitting into two parameters and only performs bit operations when really needed on the caller side. One missing conversion thankfully reported by kernel test robot. I missed to enable kunit tests to build the mctp code. Reported-by: kernel test robot Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/unix/af_unix.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e71a312faa1e..fecbd95da918 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1643,7 +1643,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, * so that no locks are necessary. */ - skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); + skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, + &err); if (!skb) { /* This means receive shutdown. */ if (err == 0) @@ -2500,7 +2501,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, int used, err; mutex_lock(&u->iolock); - skb = skb_recv_datagram(sk, 0, 1, &err); + skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); mutex_unlock(&u->iolock); if (!skb) return err; -- cgit From ec095263a965720e1ca39db1d9c5cd47846c789b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 11 Apr 2022 14:49:55 +0200 Subject: net: remove noblock parameter from recvmsg() entities The internal recvmsg() functions have two parameters 'flags' and 'noblock' that were merged inside skb_recv_datagram(). As a follow up patch to commit f4b41f062c42 ("net: remove noblock parameter from skb_recv_datagram()") this patch removes the separate 'noblock' parameter for recvmsg(). Analogue to the referenced patch for skb_recv_datagram() the 'flags' and 'noblock' parameters are unnecessarily split up with e.g. err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); or in err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg, sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); instead of simply using only flags all the time and check for MSG_DONTWAIT where needed (to preserve for the formerly separated no(n)block condition). Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20220411124955.154876-1-socketcan@hartkopp.net Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index fecbd95da918..e1dd9e9c8452 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2484,8 +2484,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si const struct proto *prot = READ_ONCE(sk->sk_prot); if (prot != &unix_dgram_proto) - return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, - flags & ~MSG_DONTWAIT, NULL); + return prot->recvmsg(sk, msg, size, flags, NULL); #endif return __unix_dgram_recvmsg(sk, msg, size, flags); } @@ -2917,8 +2916,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, const struct proto *prot = READ_ONCE(sk->sk_prot); if (prot != &unix_stream_proto) - return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, - flags & ~MSG_DONTWAIT, NULL); + return prot->recvmsg(sk, msg, size, flags, NULL); #endif return unix_stream_read_generic(&state, true); } -- cgit From b146cbf2e32f01f56244d670aef2f43d44fcf120 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 May 2022 15:46:26 -0700 Subject: af_unix: Silence randstruct GCC plugin warning While preparing for Clang randstruct support (which duplicated many of the warnings the randstruct GCC plugin warned about), one strange one remained only for the randstruct GCC plugin. Eliminating this rids the plugin of the last exception. It seems the plugin is happy to dereference individual members of a cross-struct cast, but it is upset about casting to a whole object pointer. This only manifests in one place in the kernel, so just replace the variable with individual member accesses. There is no change in executable instruction output. Drop the last exception from the randstruct GCC plugin. Cc: "David S. Miller" Cc: Christoph Hellwig Cc: Paolo Abeni Cc: Alexei Starovoitov Cc: Cong Wang Cc: Al Viro Cc: netdev@vger.kernel.org Cc: linux-hardening@vger.kernel.org Acked-by: Kuniyuki Iwashima Link: https://lore.kernel.org/lkml/20220511022217.58586-1-kuniyu@amazon.co.jp Acked-by: Jakub Kicinski Link: https://lore.kernel.org/lkml/20220511151542.4cb3ff17@kernel.org Signed-off-by: Kees Cook --- net/unix/af_unix.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e71a312faa1e..36367e7e3e0a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1808,11 +1808,9 @@ static int maybe_init_creds(struct scm_cookie *scm, static bool unix_skb_scm_eq(struct sk_buff *skb, struct scm_cookie *scm) { - const struct unix_skb_parms *u = &UNIXCB(skb); - - return u->pid == scm->pid && - uid_eq(u->uid, scm->creds.uid) && - gid_eq(u->gid, scm->creds.gid) && + return UNIXCB(skb).pid == scm->pid && + uid_eq(UNIXCB(skb).uid, scm->creds.uid) && + gid_eq(UNIXCB(skb).gid, scm->creds.gid) && unix_secdata_eq(scm, skb); } -- cgit From 662a80946ce13633ae90a55379f1346c10f0c432 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sun, 5 Jun 2022 16:23:25 -0700 Subject: af_unix: Fix a data-race in unix_dgram_peer_wake_me(). unix_dgram_poll() calls unix_dgram_peer_wake_me() without `other`'s lock held and check if its receive queue is full. Here we need to use unix_recvq_full_lockless() instead of unix_recvq_full(), otherwise KCSAN will report a data-race. Fixes: 7d267278a9ec ("unix: avoid use-after-free in ep_remove_wait_queue") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20220605232325.11804-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 654dcef7cfb3..2206e6f8902d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -490,7 +490,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) * -ECONNREFUSED. Otherwise, if we haven't queued any skbs * to other and its full, we will hang waiting for POLLOUT. */ - if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD)) + if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) return 1; if (connected) -- cgit