diff options
Diffstat (limited to 'net/netlink/af_netlink.c')
-rw-r--r-- | net/netlink/af_netlink.c | 280 |
1 files changed, 133 insertions, 147 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ff315351269f..a53ea60d0a78 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -59,7 +59,6 @@ #include <linux/rhashtable.h> #include <asm/cacheflush.h> #include <linux/hash.h> -#include <linux/genetlink.h> #include <linux/net_namespace.h> #include <linux/nospec.h> #include <linux/btf_ids.h> @@ -73,6 +72,7 @@ #include <trace/events/netlink.h> #include "af_netlink.h" +#include "genetlink.h" struct listeners { struct rcu_head rcu; @@ -130,7 +130,7 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { "nlk_cb_mutex-MAX_LINKS" }; -static int netlink_dump(struct sock *sk); +static int netlink_dump(struct sock *sk, bool lock_taken); /* nl_table locking explained: * Lookup and traversal are protected with an RCU read-side lock. Insertion @@ -393,15 +393,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) static void netlink_sock_destruct(struct sock *sk) { - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->cb_running) { - if (nlk->cb.done) - nlk->cb.done(&nlk->cb); - module_put(nlk->cb.module); - kfree_skb(nlk->cb.skb); - } - skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { @@ -414,14 +405,6 @@ static void netlink_sock_destruct(struct sock *sk) WARN_ON(nlk_sk(sk)->groups); } -static void netlink_sock_destruct_work(struct work_struct *work) -{ - struct netlink_sock *nlk = container_of(work, struct netlink_sock, - work); - - sk_free(&nlk->sk); -} - /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on * SMP. Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves @@ -636,8 +619,7 @@ static struct proto netlink_proto = { }; static int __netlink_create(struct net *net, struct socket *sock, - struct mutex *cb_mutex, int protocol, - int kern) + int protocol, int kern) { struct sock *sk; struct netlink_sock *nlk; @@ -651,15 +633,10 @@ static int __netlink_create(struct net *net, struct socket *sock, sock_init_data(sock, sk); nlk = nlk_sk(sk); - if (cb_mutex) { - nlk->cb_mutex = cb_mutex; - } else { - nlk->cb_mutex = &nlk->cb_def_mutex; - mutex_init(nlk->cb_mutex); - lockdep_set_class_and_name(nlk->cb_mutex, + mutex_init(&nlk->nl_cb_mutex); + lockdep_set_class_and_name(&nlk->nl_cb_mutex, nlk_cb_mutex_keys + protocol, nlk_cb_mutex_key_strings[protocol]); - } init_waitqueue_head(&nlk->wait); sk->sk_destruct = netlink_sock_destruct; @@ -671,7 +648,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, int kern) { struct module *module = NULL; - struct mutex *cb_mutex; struct netlink_sock *nlk; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); @@ -700,7 +676,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, module = nl_table[protocol].module; else err = -EPROTONOSUPPORT; - cb_mutex = nl_table[protocol].cb_mutex; bind = nl_table[protocol].bind; unbind = nl_table[protocol].unbind; release = nl_table[protocol].release; @@ -709,7 +684,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (err < 0) goto out; - err = __netlink_create(net, sock, cb_mutex, protocol, kern); + err = __netlink_create(net, sock, protocol, kern); if (err < 0) goto out_module; @@ -739,12 +714,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head) if (!refcount_dec_and_test(&sk->sk_refcnt)) return; - if (nlk->cb_running && nlk->cb.done) { - INIT_WORK(&nlk->work, netlink_sock_destruct_work); - schedule_work(&nlk->work); - return; - } - sk_free(sk); } @@ -796,6 +765,14 @@ static int netlink_release(struct socket *sock) NETLINK_URELEASE, &n); } + /* Terminate any outstanding dump */ + if (nlk->cb_running) { + if (nlk->cb.done) + nlk->cb.done(&nlk->cb); + module_put(nlk->cb.module); + kfree_skb(nlk->cb.skb); + } + module_put(nlk->module); if (netlink_is_kernel(sk)) { @@ -818,16 +795,6 @@ static int netlink_release(struct socket *sock) sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); - /* Because struct net might disappear soon, do not keep a pointer. */ - if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { - __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); - /* Because of deferred_put_nlk_sk and use of work queue, - * it is possible netns will be freed before this socket. - */ - sock_net_set(sk, &init_net); - __netns_tracker_alloc(&init_net, &sk->ns_tracker, - false, GFP_KERNEL); - } call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } @@ -1188,11 +1155,16 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) return sock; } -struct sock *netlink_getsockbyfilp(struct file *filp) +struct sock *netlink_getsockbyfd(int fd) { - struct inode *inode = file_inode(filp); + CLASS(fd, f)(fd); + struct inode *inode; struct sock *sock; + if (fd_empty(f)) + return ERR_PTR(-EBADF); + + inode = file_inode(fd_file(f)); if (!S_ISSOCK(inode->i_mode)) return ERR_PTR(-ENOTSOCK); @@ -1206,23 +1178,21 @@ struct sock *netlink_getsockbyfilp(struct file *filp) struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast) { + size_t head_size = SKB_HEAD_ALIGN(size); struct sk_buff *skb; void *data; - if (size <= NLMSG_GOODSIZE || broadcast) + if (head_size <= PAGE_SIZE || broadcast) return alloc_skb(size, GFP_KERNEL); - size = SKB_DATA_ALIGN(size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - - data = vmalloc(size); - if (data == NULL) + data = kvmalloc(head_size, GFP_KERNEL); + if (!data) return NULL; - skb = __build_skb(data, size); - if (skb == NULL) - vfree(data); - else + skb = __build_skb(data, head_size); + if (!skb) + kvfree(data); + else if (is_vmalloc_addr(data)) skb->destructor = netlink_skb_destructor; return skb; @@ -1307,6 +1277,7 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) { int delta; + skb_assert_len(skb); WARN_ON(skb->sk != NULL); delta = skb->end - skb->tail; if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) @@ -1779,6 +1750,9 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, netlink_unlock_table(); return err; } + case NETLINK_LISTEN_ALL_NSID: + flag = NETLINK_F_LISTEN_ALL_NSID; + break; case NETLINK_CAP_ACK: flag = NETLINK_F_CAP_ACK; break; @@ -1987,7 +1961,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (READ_ONCE(nlk->cb_running) && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { - ret = netlink_dump(sk); + ret = netlink_dump(sk, false); if (ret) { WRITE_ONCE(sk->sk_err, -ret); sk_error_report(sk); @@ -2019,7 +1993,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, struct sock *sk; struct netlink_sock *nlk; struct listeners *listeners = NULL; - struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL; unsigned int groups; BUG_ON(!nl_table); @@ -2030,7 +2003,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0) + if (__netlink_create(net, sock, unit, 1) < 0) goto out_sock_release_nosk; sk = sock->sk; @@ -2058,7 +2031,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (!nl_table[unit].registered) { nl_table[unit].groups = groups; rcu_assign_pointer(nl_table[unit].listeners, listeners); - nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; if (cfg) { nl_table[unit].bind = cfg->bind; @@ -2145,8 +2117,9 @@ void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) { struct sock *sk; struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; + struct hlist_node *tmp; - sk_for_each_bound(sk, &tbl->mc_list) + sk_for_each_bound_safe(sk, tmp, &tbl->mc_list) netlink_update_socket_mc(nlk_sk(sk), group, 0); } @@ -2168,6 +2141,70 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla } EXPORT_SYMBOL(__nlmsg_put); +static size_t +netlink_ack_tlv_len(struct netlink_sock *nlk, int err, + const struct netlink_ext_ack *extack) +{ + size_t tlvlen; + + if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags)) + return 0; + + tlvlen = 0; + if (extack->_msg) + tlvlen += nla_total_size(strlen(extack->_msg) + 1); + if (extack->cookie_len) + tlvlen += nla_total_size(extack->cookie_len); + + /* Following attributes are only reported as error (not warning) */ + if (!err) + return tlvlen; + + if (extack->bad_attr) + tlvlen += nla_total_size(sizeof(u32)); + if (extack->policy) + tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy); + if (extack->miss_type) + tlvlen += nla_total_size(sizeof(u32)); + if (extack->miss_nest) + tlvlen += nla_total_size(sizeof(u32)); + + return tlvlen; +} + +static bool nlmsg_check_in_payload(const struct nlmsghdr *nlh, const void *addr) +{ + return !WARN_ON(addr < nlmsg_data(nlh) || + addr - (const void *) nlh >= nlh->nlmsg_len); +} + +static void +netlink_ack_tlv_fill(struct sk_buff *skb, const struct nlmsghdr *nlh, int err, + const struct netlink_ext_ack *extack) +{ + if (extack->_msg) + WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg)); + if (extack->cookie_len) + WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, + extack->cookie_len, extack->cookie)); + + if (!err) + return; + + if (extack->bad_attr && nlmsg_check_in_payload(nlh, extack->bad_attr)) + WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, + (u8 *)extack->bad_attr - (const u8 *)nlh)); + if (extack->policy) + netlink_policy_dump_write_attr(skb, extack->policy, + NLMSGERR_ATTR_POLICY); + if (extack->miss_type) + WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE, + extack->miss_type)); + if (extack->miss_nest && nlmsg_check_in_payload(nlh, extack->miss_nest)) + WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST, + (u8 *)extack->miss_nest - (const u8 *)nlh)); +} + /* * It looks a bit ugly. * It would be better to create kernel thread. @@ -2178,6 +2215,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb, struct netlink_ext_ack *extack) { struct nlmsghdr *nlh; + size_t extack_len; nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno), NLM_F_MULTI | cb->answer_flags); @@ -2187,16 +2225,20 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb, nl_dump_check_consistent(cb, nlh); memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno)); - if (extack->_msg && test_bit(NETLINK_F_EXT_ACK, &nlk->flags)) { + extack_len = netlink_ack_tlv_len(nlk, nlk->dump_done_errno, extack); + if (extack_len) { nlh->nlmsg_flags |= NLM_F_ACK_TLVS; - if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg)) + if (skb_tailroom(skb) >= extack_len) { + netlink_ack_tlv_fill(skb, cb->nlh, + nlk->dump_done_errno, extack); nlmsg_end(skb, nlh); + } } return 0; } -static int netlink_dump(struct sock *sk) +static int netlink_dump(struct sock *sk, bool lock_taken) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ext_ack extack = {}; @@ -2208,7 +2250,8 @@ static int netlink_dump(struct sock *sk) int alloc_min_size; int alloc_size; - mutex_lock(nlk->cb_mutex); + if (!lock_taken) + mutex_lock(&nlk->nl_cb_mutex); if (!nlk->cb_running) { err = -EINVAL; goto errout_skb; @@ -2218,7 +2261,7 @@ static int netlink_dump(struct sock *sk) goto errout_skb; /* NLMSG_GOODSIZE is small to avoid high order allocations being - * required, but it makes sense to _attempt_ a 16K bytes allocation + * required, but it makes sense to _attempt_ a 32KiB allocation * to reduce number of system calls on dump operations, if user * ever provided a big enough buffer. */ @@ -2240,7 +2283,7 @@ static int netlink_dump(struct sock *sk) goto errout_skb; /* Trim skb to allocated size. User is expected to provide buffer as - * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at + * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at * netlink_recvmsg())). dump will pack as many smaller messages as * could fit within the allocated skb. skb is typically allocated * with larger space than required (could be as much as near 2x the @@ -2261,13 +2304,24 @@ static int netlink_dump(struct sock *sk) if (nlk->dump_done_errno > 0) { cb->extack = &extack; + nlk->dump_done_errno = cb->dump(skb, cb); + + /* EMSGSIZE plus something already in the skb means + * that there's more to dump but current skb has filled up. + * If the callback really wants to return EMSGSIZE to user space + * it needs to do so again, on the next cb->dump() call, + * without putting data in the skb. + */ + if (nlk->dump_done_errno == -EMSGSIZE && skb->len) + nlk->dump_done_errno = skb->len; + cb->extack = NULL; } if (nlk->dump_done_errno > 0 || skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) { - mutex_unlock(nlk->cb_mutex); + mutex_unlock(&nlk->nl_cb_mutex); if (sk_filter(sk, skb)) kfree_skb(skb); @@ -2301,13 +2355,13 @@ static int netlink_dump(struct sock *sk) WRITE_ONCE(nlk->cb_running, false); module = cb->module; skb = cb->skb; - mutex_unlock(nlk->cb_mutex); + mutex_unlock(&nlk->nl_cb_mutex); module_put(module); consume_skb(skb); return 0; errout_skb: - mutex_unlock(nlk->cb_mutex); + mutex_unlock(&nlk->nl_cb_mutex); kfree_skb(skb); return err; } @@ -2330,7 +2384,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, } nlk = nlk_sk(sk); - mutex_lock(nlk->cb_mutex); + mutex_lock(&nlk->nl_cb_mutex); /* A dump is in progress... */ if (nlk->cb_running) { ret = -EBUSY; @@ -2350,6 +2404,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb->data = control->data; cb->module = control->module; cb->min_dump_alloc = control->min_dump_alloc; + cb->flags = control->flags; cb->skb = skb; cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk); @@ -2365,9 +2420,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, WRITE_ONCE(nlk->cb_running, true); nlk->dump_done_errno = INT_MAX; - mutex_unlock(nlk->cb_mutex); - - ret = netlink_dump(sk); + ret = netlink_dump(sk, true); sock_put(sk); @@ -2383,76 +2436,13 @@ error_put: module_put(control->module); error_unlock: sock_put(sk); - mutex_unlock(nlk->cb_mutex); + mutex_unlock(&nlk->nl_cb_mutex); error_free: kfree_skb(skb); return ret; } EXPORT_SYMBOL(__netlink_dump_start); -static size_t -netlink_ack_tlv_len(struct netlink_sock *nlk, int err, - const struct netlink_ext_ack *extack) -{ - size_t tlvlen; - - if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags)) - return 0; - - tlvlen = 0; - if (extack->_msg) - tlvlen += nla_total_size(strlen(extack->_msg) + 1); - if (extack->cookie_len) - tlvlen += nla_total_size(extack->cookie_len); - - /* Following attributes are only reported as error (not warning) */ - if (!err) - return tlvlen; - - if (extack->bad_attr) - tlvlen += nla_total_size(sizeof(u32)); - if (extack->policy) - tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy); - if (extack->miss_type) - tlvlen += nla_total_size(sizeof(u32)); - if (extack->miss_nest) - tlvlen += nla_total_size(sizeof(u32)); - - return tlvlen; -} - -static void -netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb, - struct nlmsghdr *nlh, int err, - const struct netlink_ext_ack *extack) -{ - if (extack->_msg) - WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg)); - if (extack->cookie_len) - WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, - extack->cookie_len, extack->cookie)); - - if (!err) - return; - - if (extack->bad_attr && - !WARN_ON((u8 *)extack->bad_attr < in_skb->data || - (u8 *)extack->bad_attr >= in_skb->data + in_skb->len)) - WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, - (u8 *)extack->bad_attr - (u8 *)nlh)); - if (extack->policy) - netlink_policy_dump_write_attr(skb, extack->policy, - NLMSGERR_ATTR_POLICY); - if (extack->miss_type) - WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE, - extack->miss_type)); - if (extack->miss_nest && - !WARN_ON((u8 *)extack->miss_nest < in_skb->data || - (u8 *)extack->miss_nest > in_skb->data + in_skb->len)) - WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST, - (u8 *)extack->miss_nest - (u8 *)nlh)); -} - void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, const struct netlink_ext_ack *extack) { @@ -2498,7 +2488,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, } if (tlvlen) - netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack); + netlink_ack_tlv_fill(skb, nlh, err, extack); nlmsg_end(skb, rep); @@ -2927,12 +2917,8 @@ static int __init netlink_proto_init(void) for (i = 0; i < MAX_LINKS; i++) { if (rhashtable_init(&nl_table[i].hash, - &netlink_rhashtable_params) < 0) { - while (--i > 0) - rhashtable_destroy(&nl_table[i].hash); - kfree(nl_table); + &netlink_rhashtable_params) < 0) goto panic; - } } netlink_add_usersock_entry(); |