summaryrefslogtreecommitdiff
path: root/net/netlink/af_netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netlink/af_netlink.c')
-rw-r--r--net/netlink/af_netlink.c280
1 files changed, 133 insertions, 147 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ff315351269f..a53ea60d0a78 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -59,7 +59,6 @@
#include <linux/rhashtable.h>
#include <asm/cacheflush.h>
#include <linux/hash.h>
-#include <linux/genetlink.h>
#include <linux/net_namespace.h>
#include <linux/nospec.h>
#include <linux/btf_ids.h>
@@ -73,6 +72,7 @@
#include <trace/events/netlink.h>
#include "af_netlink.h"
+#include "genetlink.h"
struct listeners {
struct rcu_head rcu;
@@ -130,7 +130,7 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
"nlk_cb_mutex-MAX_LINKS"
};
-static int netlink_dump(struct sock *sk);
+static int netlink_dump(struct sock *sk, bool lock_taken);
/* nl_table locking explained:
* Lookup and traversal are protected with an RCU read-side lock. Insertion
@@ -393,15 +393,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
static void netlink_sock_destruct(struct sock *sk)
{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (nlk->cb_running) {
- if (nlk->cb.done)
- nlk->cb.done(&nlk->cb);
- module_put(nlk->cb.module);
- kfree_skb(nlk->cb.skb);
- }
-
skb_queue_purge(&sk->sk_receive_queue);
if (!sock_flag(sk, SOCK_DEAD)) {
@@ -414,14 +405,6 @@ static void netlink_sock_destruct(struct sock *sk)
WARN_ON(nlk_sk(sk)->groups);
}
-static void netlink_sock_destruct_work(struct work_struct *work)
-{
- struct netlink_sock *nlk = container_of(work, struct netlink_sock,
- work);
-
- sk_free(&nlk->sk);
-}
-
/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
* SMP. Look, when several writers sleep and reader wakes them up, all but one
* immediately hit write lock and grab all the cpus. Exclusive sleep solves
@@ -636,8 +619,7 @@ static struct proto netlink_proto = {
};
static int __netlink_create(struct net *net, struct socket *sock,
- struct mutex *cb_mutex, int protocol,
- int kern)
+ int protocol, int kern)
{
struct sock *sk;
struct netlink_sock *nlk;
@@ -651,15 +633,10 @@ static int __netlink_create(struct net *net, struct socket *sock,
sock_init_data(sock, sk);
nlk = nlk_sk(sk);
- if (cb_mutex) {
- nlk->cb_mutex = cb_mutex;
- } else {
- nlk->cb_mutex = &nlk->cb_def_mutex;
- mutex_init(nlk->cb_mutex);
- lockdep_set_class_and_name(nlk->cb_mutex,
+ mutex_init(&nlk->nl_cb_mutex);
+ lockdep_set_class_and_name(&nlk->nl_cb_mutex,
nlk_cb_mutex_keys + protocol,
nlk_cb_mutex_key_strings[protocol]);
- }
init_waitqueue_head(&nlk->wait);
sk->sk_destruct = netlink_sock_destruct;
@@ -671,7 +648,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
struct module *module = NULL;
- struct mutex *cb_mutex;
struct netlink_sock *nlk;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
@@ -700,7 +676,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
module = nl_table[protocol].module;
else
err = -EPROTONOSUPPORT;
- cb_mutex = nl_table[protocol].cb_mutex;
bind = nl_table[protocol].bind;
unbind = nl_table[protocol].unbind;
release = nl_table[protocol].release;
@@ -709,7 +684,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
if (err < 0)
goto out;
- err = __netlink_create(net, sock, cb_mutex, protocol, kern);
+ err = __netlink_create(net, sock, protocol, kern);
if (err < 0)
goto out_module;
@@ -739,12 +714,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
if (!refcount_dec_and_test(&sk->sk_refcnt))
return;
- if (nlk->cb_running && nlk->cb.done) {
- INIT_WORK(&nlk->work, netlink_sock_destruct_work);
- schedule_work(&nlk->work);
- return;
- }
-
sk_free(sk);
}
@@ -796,6 +765,14 @@ static int netlink_release(struct socket *sock)
NETLINK_URELEASE, &n);
}
+ /* Terminate any outstanding dump */
+ if (nlk->cb_running) {
+ if (nlk->cb.done)
+ nlk->cb.done(&nlk->cb);
+ module_put(nlk->cb.module);
+ kfree_skb(nlk->cb.skb);
+ }
+
module_put(nlk->module);
if (netlink_is_kernel(sk)) {
@@ -818,16 +795,6 @@ static int netlink_release(struct socket *sock)
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
- /* Because struct net might disappear soon, do not keep a pointer. */
- if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) {
- __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
- /* Because of deferred_put_nlk_sk and use of work queue,
- * it is possible netns will be freed before this socket.
- */
- sock_net_set(sk, &init_net);
- __netns_tracker_alloc(&init_net, &sk->ns_tracker,
- false, GFP_KERNEL);
- }
call_rcu(&nlk->rcu, deferred_put_nlk_sk);
return 0;
}
@@ -1188,11 +1155,16 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
return sock;
}
-struct sock *netlink_getsockbyfilp(struct file *filp)
+struct sock *netlink_getsockbyfd(int fd)
{
- struct inode *inode = file_inode(filp);
+ CLASS(fd, f)(fd);
+ struct inode *inode;
struct sock *sock;
+ if (fd_empty(f))
+ return ERR_PTR(-EBADF);
+
+ inode = file_inode(fd_file(f));
if (!S_ISSOCK(inode->i_mode))
return ERR_PTR(-ENOTSOCK);
@@ -1206,23 +1178,21 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
{
+ size_t head_size = SKB_HEAD_ALIGN(size);
struct sk_buff *skb;
void *data;
- if (size <= NLMSG_GOODSIZE || broadcast)
+ if (head_size <= PAGE_SIZE || broadcast)
return alloc_skb(size, GFP_KERNEL);
- size = SKB_DATA_ALIGN(size) +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-
- data = vmalloc(size);
- if (data == NULL)
+ data = kvmalloc(head_size, GFP_KERNEL);
+ if (!data)
return NULL;
- skb = __build_skb(data, size);
- if (skb == NULL)
- vfree(data);
- else
+ skb = __build_skb(data, head_size);
+ if (!skb)
+ kvfree(data);
+ else if (is_vmalloc_addr(data))
skb->destructor = netlink_skb_destructor;
return skb;
@@ -1307,6 +1277,7 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
{
int delta;
+ skb_assert_len(skb);
WARN_ON(skb->sk != NULL);
delta = skb->end - skb->tail;
if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
@@ -1779,6 +1750,9 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
netlink_unlock_table();
return err;
}
+ case NETLINK_LISTEN_ALL_NSID:
+ flag = NETLINK_F_LISTEN_ALL_NSID;
+ break;
case NETLINK_CAP_ACK:
flag = NETLINK_F_CAP_ACK;
break;
@@ -1987,7 +1961,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (READ_ONCE(nlk->cb_running) &&
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
- ret = netlink_dump(sk);
+ ret = netlink_dump(sk, false);
if (ret) {
WRITE_ONCE(sk->sk_err, -ret);
sk_error_report(sk);
@@ -2019,7 +1993,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
struct sock *sk;
struct netlink_sock *nlk;
struct listeners *listeners = NULL;
- struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
unsigned int groups;
BUG_ON(!nl_table);
@@ -2030,7 +2003,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
- if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0)
+ if (__netlink_create(net, sock, unit, 1) < 0)
goto out_sock_release_nosk;
sk = sock->sk;
@@ -2058,7 +2031,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (!nl_table[unit].registered) {
nl_table[unit].groups = groups;
rcu_assign_pointer(nl_table[unit].listeners, listeners);
- nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
if (cfg) {
nl_table[unit].bind = cfg->bind;
@@ -2145,8 +2117,9 @@ void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
{
struct sock *sk;
struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
+ struct hlist_node *tmp;
- sk_for_each_bound(sk, &tbl->mc_list)
+ sk_for_each_bound_safe(sk, tmp, &tbl->mc_list)
netlink_update_socket_mc(nlk_sk(sk), group, 0);
}
@@ -2168,6 +2141,70 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla
}
EXPORT_SYMBOL(__nlmsg_put);
+static size_t
+netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
+ const struct netlink_ext_ack *extack)
+{
+ size_t tlvlen;
+
+ if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
+ return 0;
+
+ tlvlen = 0;
+ if (extack->_msg)
+ tlvlen += nla_total_size(strlen(extack->_msg) + 1);
+ if (extack->cookie_len)
+ tlvlen += nla_total_size(extack->cookie_len);
+
+ /* Following attributes are only reported as error (not warning) */
+ if (!err)
+ return tlvlen;
+
+ if (extack->bad_attr)
+ tlvlen += nla_total_size(sizeof(u32));
+ if (extack->policy)
+ tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
+ if (extack->miss_type)
+ tlvlen += nla_total_size(sizeof(u32));
+ if (extack->miss_nest)
+ tlvlen += nla_total_size(sizeof(u32));
+
+ return tlvlen;
+}
+
+static bool nlmsg_check_in_payload(const struct nlmsghdr *nlh, const void *addr)
+{
+ return !WARN_ON(addr < nlmsg_data(nlh) ||
+ addr - (const void *) nlh >= nlh->nlmsg_len);
+}
+
+static void
+netlink_ack_tlv_fill(struct sk_buff *skb, const struct nlmsghdr *nlh, int err,
+ const struct netlink_ext_ack *extack)
+{
+ if (extack->_msg)
+ WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg));
+ if (extack->cookie_len)
+ WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
+ extack->cookie_len, extack->cookie));
+
+ if (!err)
+ return;
+
+ if (extack->bad_attr && nlmsg_check_in_payload(nlh, extack->bad_attr))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
+ (u8 *)extack->bad_attr - (const u8 *)nlh));
+ if (extack->policy)
+ netlink_policy_dump_write_attr(skb, extack->policy,
+ NLMSGERR_ATTR_POLICY);
+ if (extack->miss_type)
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
+ extack->miss_type));
+ if (extack->miss_nest && nlmsg_check_in_payload(nlh, extack->miss_nest))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
+ (u8 *)extack->miss_nest - (const u8 *)nlh));
+}
+
/*
* It looks a bit ugly.
* It would be better to create kernel thread.
@@ -2178,6 +2215,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
struct netlink_ext_ack *extack)
{
struct nlmsghdr *nlh;
+ size_t extack_len;
nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno),
NLM_F_MULTI | cb->answer_flags);
@@ -2187,16 +2225,20 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
nl_dump_check_consistent(cb, nlh);
memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno));
- if (extack->_msg && test_bit(NETLINK_F_EXT_ACK, &nlk->flags)) {
+ extack_len = netlink_ack_tlv_len(nlk, nlk->dump_done_errno, extack);
+ if (extack_len) {
nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
- if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg))
+ if (skb_tailroom(skb) >= extack_len) {
+ netlink_ack_tlv_fill(skb, cb->nlh,
+ nlk->dump_done_errno, extack);
nlmsg_end(skb, nlh);
+ }
}
return 0;
}
-static int netlink_dump(struct sock *sk)
+static int netlink_dump(struct sock *sk, bool lock_taken)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_ext_ack extack = {};
@@ -2208,7 +2250,8 @@ static int netlink_dump(struct sock *sk)
int alloc_min_size;
int alloc_size;
- mutex_lock(nlk->cb_mutex);
+ if (!lock_taken)
+ mutex_lock(&nlk->nl_cb_mutex);
if (!nlk->cb_running) {
err = -EINVAL;
goto errout_skb;
@@ -2218,7 +2261,7 @@ static int netlink_dump(struct sock *sk)
goto errout_skb;
/* NLMSG_GOODSIZE is small to avoid high order allocations being
- * required, but it makes sense to _attempt_ a 16K bytes allocation
+ * required, but it makes sense to _attempt_ a 32KiB allocation
* to reduce number of system calls on dump operations, if user
* ever provided a big enough buffer.
*/
@@ -2240,7 +2283,7 @@ static int netlink_dump(struct sock *sk)
goto errout_skb;
/* Trim skb to allocated size. User is expected to provide buffer as
- * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
+ * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at
* netlink_recvmsg())). dump will pack as many smaller messages as
* could fit within the allocated skb. skb is typically allocated
* with larger space than required (could be as much as near 2x the
@@ -2261,13 +2304,24 @@ static int netlink_dump(struct sock *sk)
if (nlk->dump_done_errno > 0) {
cb->extack = &extack;
+
nlk->dump_done_errno = cb->dump(skb, cb);
+
+ /* EMSGSIZE plus something already in the skb means
+ * that there's more to dump but current skb has filled up.
+ * If the callback really wants to return EMSGSIZE to user space
+ * it needs to do so again, on the next cb->dump() call,
+ * without putting data in the skb.
+ */
+ if (nlk->dump_done_errno == -EMSGSIZE && skb->len)
+ nlk->dump_done_errno = skb->len;
+
cb->extack = NULL;
}
if (nlk->dump_done_errno > 0 ||
skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
if (sk_filter(sk, skb))
kfree_skb(skb);
@@ -2301,13 +2355,13 @@ static int netlink_dump(struct sock *sk)
WRITE_ONCE(nlk->cb_running, false);
module = cb->module;
skb = cb->skb;
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
module_put(module);
consume_skb(skb);
return 0;
errout_skb:
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
kfree_skb(skb);
return err;
}
@@ -2330,7 +2384,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
}
nlk = nlk_sk(sk);
- mutex_lock(nlk->cb_mutex);
+ mutex_lock(&nlk->nl_cb_mutex);
/* A dump is in progress... */
if (nlk->cb_running) {
ret = -EBUSY;
@@ -2350,6 +2404,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->data = control->data;
cb->module = control->module;
cb->min_dump_alloc = control->min_dump_alloc;
+ cb->flags = control->flags;
cb->skb = skb;
cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
@@ -2365,9 +2420,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
WRITE_ONCE(nlk->cb_running, true);
nlk->dump_done_errno = INT_MAX;
- mutex_unlock(nlk->cb_mutex);
-
- ret = netlink_dump(sk);
+ ret = netlink_dump(sk, true);
sock_put(sk);
@@ -2383,76 +2436,13 @@ error_put:
module_put(control->module);
error_unlock:
sock_put(sk);
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
error_free:
kfree_skb(skb);
return ret;
}
EXPORT_SYMBOL(__netlink_dump_start);
-static size_t
-netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
- const struct netlink_ext_ack *extack)
-{
- size_t tlvlen;
-
- if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
- return 0;
-
- tlvlen = 0;
- if (extack->_msg)
- tlvlen += nla_total_size(strlen(extack->_msg) + 1);
- if (extack->cookie_len)
- tlvlen += nla_total_size(extack->cookie_len);
-
- /* Following attributes are only reported as error (not warning) */
- if (!err)
- return tlvlen;
-
- if (extack->bad_attr)
- tlvlen += nla_total_size(sizeof(u32));
- if (extack->policy)
- tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
- if (extack->miss_type)
- tlvlen += nla_total_size(sizeof(u32));
- if (extack->miss_nest)
- tlvlen += nla_total_size(sizeof(u32));
-
- return tlvlen;
-}
-
-static void
-netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
- struct nlmsghdr *nlh, int err,
- const struct netlink_ext_ack *extack)
-{
- if (extack->_msg)
- WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg));
- if (extack->cookie_len)
- WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
- extack->cookie_len, extack->cookie));
-
- if (!err)
- return;
-
- if (extack->bad_attr &&
- !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
- (u8 *)extack->bad_attr >= in_skb->data + in_skb->len))
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
- (u8 *)extack->bad_attr - (u8 *)nlh));
- if (extack->policy)
- netlink_policy_dump_write_attr(skb, extack->policy,
- NLMSGERR_ATTR_POLICY);
- if (extack->miss_type)
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
- extack->miss_type));
- if (extack->miss_nest &&
- !WARN_ON((u8 *)extack->miss_nest < in_skb->data ||
- (u8 *)extack->miss_nest > in_skb->data + in_skb->len))
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
- (u8 *)extack->miss_nest - (u8 *)nlh));
-}
-
void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
const struct netlink_ext_ack *extack)
{
@@ -2498,7 +2488,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
}
if (tlvlen)
- netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack);
+ netlink_ack_tlv_fill(skb, nlh, err, extack);
nlmsg_end(skb, rep);
@@ -2927,12 +2917,8 @@ static int __init netlink_proto_init(void)
for (i = 0; i < MAX_LINKS; i++) {
if (rhashtable_init(&nl_table[i].hash,
- &netlink_rhashtable_params) < 0) {
- while (--i > 0)
- rhashtable_destroy(&nl_table[i].hash);
- kfree(nl_table);
+ &netlink_rhashtable_params) < 0)
goto panic;
- }
}
netlink_add_usersock_entry();