summaryrefslogtreecommitdiff
path: root/net/netlink/af_netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netlink/af_netlink.c')
-rw-r--r--net/netlink/af_netlink.c307
1 files changed, 148 insertions, 159 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7554803218a2..6332a0e06596 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -59,7 +59,6 @@
#include <linux/rhashtable.h>
#include <asm/cacheflush.h>
#include <linux/hash.h>
-#include <linux/genetlink.h>
#include <linux/net_namespace.h>
#include <linux/nospec.h>
#include <linux/btf_ids.h>
@@ -73,6 +72,7 @@
#include <trace/events/netlink.h>
#include "af_netlink.h"
+#include "genetlink.h"
struct listeners {
struct rcu_head rcu;
@@ -387,21 +387,11 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
WARN_ON(skb->sk != NULL);
skb->sk = sk;
skb->destructor = netlink_skb_destructor;
- atomic_add(skb->truesize, &sk->sk_rmem_alloc);
sk_mem_charge(sk, skb->truesize);
}
static void netlink_sock_destruct(struct sock *sk)
{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (nlk->cb_running) {
- if (nlk->cb.done)
- nlk->cb.done(&nlk->cb);
- module_put(nlk->cb.module);
- kfree_skb(nlk->cb.skb);
- }
-
skb_queue_purge(&sk->sk_receive_queue);
if (!sock_flag(sk, SOCK_DEAD)) {
@@ -414,14 +404,6 @@ static void netlink_sock_destruct(struct sock *sk)
WARN_ON(nlk_sk(sk)->groups);
}
-static void netlink_sock_destruct_work(struct work_struct *work)
-{
- struct netlink_sock *nlk = container_of(work, struct netlink_sock,
- work);
-
- sk_free(&nlk->sk);
-}
-
/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
* SMP. Look, when several writers sleep and reader wakes them up, all but one
* immediately hit write lock and grab all the cpus. Exclusive sleep solves
@@ -636,8 +618,7 @@ static struct proto netlink_proto = {
};
static int __netlink_create(struct net *net, struct socket *sock,
- struct mutex *dump_cb_mutex, int protocol,
- int kern)
+ int protocol, int kern)
{
struct sock *sk;
struct netlink_sock *nlk;
@@ -655,7 +636,6 @@ static int __netlink_create(struct net *net, struct socket *sock,
lockdep_set_class_and_name(&nlk->nl_cb_mutex,
nlk_cb_mutex_keys + protocol,
nlk_cb_mutex_key_strings[protocol]);
- nlk->dump_cb_mutex = dump_cb_mutex;
init_waitqueue_head(&nlk->wait);
sk->sk_destruct = netlink_sock_destruct;
@@ -667,7 +647,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
struct module *module = NULL;
- struct mutex *cb_mutex;
struct netlink_sock *nlk;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
@@ -696,7 +675,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
module = nl_table[protocol].module;
else
err = -EPROTONOSUPPORT;
- cb_mutex = nl_table[protocol].cb_mutex;
bind = nl_table[protocol].bind;
unbind = nl_table[protocol].unbind;
release = nl_table[protocol].release;
@@ -705,7 +683,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
if (err < 0)
goto out;
- err = __netlink_create(net, sock, cb_mutex, protocol, kern);
+ err = __netlink_create(net, sock, protocol, kern);
if (err < 0)
goto out_module;
@@ -735,12 +713,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
if (!refcount_dec_and_test(&sk->sk_refcnt))
return;
- if (nlk->cb_running && nlk->cb.done) {
- INIT_WORK(&nlk->work, netlink_sock_destruct_work);
- schedule_work(&nlk->work);
- return;
- }
-
sk_free(sk);
}
@@ -792,6 +764,15 @@ static int netlink_release(struct socket *sock)
NETLINK_URELEASE, &n);
}
+ /* Terminate any outstanding dump */
+ if (nlk->cb_running) {
+ if (nlk->cb.done)
+ nlk->cb.done(&nlk->cb);
+ module_put(nlk->cb.module);
+ kfree_skb(nlk->cb.skb);
+ WRITE_ONCE(nlk->cb_running, false);
+ }
+
module_put(nlk->module);
if (netlink_is_kernel(sk)) {
@@ -814,16 +795,6 @@ static int netlink_release(struct socket *sock)
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
- /* Because struct net might disappear soon, do not keep a pointer. */
- if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) {
- __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
- /* Because of deferred_put_nlk_sk and use of work queue,
- * it is possible netns will be freed before this socket.
- */
- sock_net_set(sk, &init_net);
- __netns_tracker_alloc(&init_net, &sk->ns_tracker,
- false, GFP_KERNEL);
- }
call_rcu(&nlk->rcu, deferred_put_nlk_sk);
return 0;
}
@@ -1184,11 +1155,16 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
return sock;
}
-struct sock *netlink_getsockbyfilp(struct file *filp)
+struct sock *netlink_getsockbyfd(int fd)
{
- struct inode *inode = file_inode(filp);
+ CLASS(fd, f)(fd);
+ struct inode *inode;
struct sock *sock;
+ if (fd_empty(f))
+ return ERR_PTR(-EBADF);
+
+ inode = file_inode(fd_file(f));
if (!S_ISSOCK(inode->i_mode))
return ERR_PTR(-ENOTSOCK);
@@ -1235,41 +1211,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
long *timeo, struct sock *ssk)
{
+ DECLARE_WAITQUEUE(wait, current);
struct netlink_sock *nlk;
+ unsigned int rmem;
nlk = nlk_sk(sk);
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
- DECLARE_WAITQUEUE(wait, current);
- if (!*timeo) {
- if (!ssk || netlink_is_kernel(ssk))
- netlink_overrun(sk);
- sock_put(sk);
- kfree_skb(skb);
- return -EAGAIN;
- }
-
- __set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&nlk->wait, &wait);
+ if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) &&
+ !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
+ netlink_skb_set_owner_r(skb, sk);
+ return 0;
+ }
- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
- !sock_flag(sk, SOCK_DEAD))
- *timeo = schedule_timeout(*timeo);
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&nlk->wait, &wait);
+ if (!*timeo) {
+ if (!ssk || netlink_is_kernel(ssk))
+ netlink_overrun(sk);
sock_put(sk);
+ kfree_skb(skb);
+ return -EAGAIN;
+ }
- if (signal_pending(current)) {
- kfree_skb(skb);
- return sock_intr_errno(*timeo);
- }
- return 1;
+ __set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&nlk->wait, &wait);
+ rmem = atomic_read(&sk->sk_rmem_alloc);
+
+ if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) ||
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
+ !sock_flag(sk, SOCK_DEAD))
+ *timeo = schedule_timeout(*timeo);
+
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&nlk->wait, &wait);
+ sock_put(sk);
+
+ if (signal_pending(current)) {
+ kfree_skb(skb);
+ return sock_intr_errno(*timeo);
}
- netlink_skb_set_owner_r(skb, sk);
- return 0;
+
+ return 1;
}
static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
@@ -1301,6 +1284,7 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
{
int delta;
+ skb_assert_len(skb);
WARN_ON(skb->sk != NULL);
delta = skb->end - skb->tail;
if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
@@ -1329,6 +1313,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
ret = -ECONNREFUSED;
if (nlk->netlink_rcv != NULL) {
ret = skb->len;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
netlink_skb_set_owner_r(skb, sk);
NETLINK_CB(skb).sk = ssk;
netlink_deliver_tap_kernel(sk, ssk, skb);
@@ -1405,13 +1390,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
{
struct netlink_sock *nlk = nlk_sk(sk);
+ unsigned int rmem, rcvbuf;
+
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ if ((rmem == skb->truesize || rmem <= rcvbuf) &&
!test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
netlink_skb_set_owner_r(skb, sk);
__netlink_sendskb(sk, skb);
- return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
+ return rmem > (rcvbuf >> 1);
}
+
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
return -1;
}
@@ -2016,7 +2007,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
struct sock *sk;
struct netlink_sock *nlk;
struct listeners *listeners = NULL;
- struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
unsigned int groups;
BUG_ON(!nl_table);
@@ -2027,7 +2017,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
- if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0)
+ if (__netlink_create(net, sock, unit, 1) < 0)
goto out_sock_release_nosk;
sk = sock->sk;
@@ -2055,7 +2045,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (!nl_table[unit].registered) {
nl_table[unit].groups = groups;
rcu_assign_pointer(nl_table[unit].listeners, listeners);
- nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
if (cfg) {
nl_table[unit].bind = cfg->bind;
@@ -2142,8 +2131,9 @@ void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
{
struct sock *sk;
struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
+ struct hlist_node *tmp;
- sk_for_each_bound(sk, &tbl->mc_list)
+ sk_for_each_bound_safe(sk, tmp, &tbl->mc_list)
netlink_update_socket_mc(nlk_sk(sk), group, 0);
}
@@ -2165,6 +2155,70 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla
}
EXPORT_SYMBOL(__nlmsg_put);
+static size_t
+netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
+ const struct netlink_ext_ack *extack)
+{
+ size_t tlvlen;
+
+ if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
+ return 0;
+
+ tlvlen = 0;
+ if (extack->_msg)
+ tlvlen += nla_total_size(strlen(extack->_msg) + 1);
+ if (extack->cookie_len)
+ tlvlen += nla_total_size(extack->cookie_len);
+
+ /* Following attributes are only reported as error (not warning) */
+ if (!err)
+ return tlvlen;
+
+ if (extack->bad_attr)
+ tlvlen += nla_total_size(sizeof(u32));
+ if (extack->policy)
+ tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
+ if (extack->miss_type)
+ tlvlen += nla_total_size(sizeof(u32));
+ if (extack->miss_nest)
+ tlvlen += nla_total_size(sizeof(u32));
+
+ return tlvlen;
+}
+
+static bool nlmsg_check_in_payload(const struct nlmsghdr *nlh, const void *addr)
+{
+ return !WARN_ON(addr < nlmsg_data(nlh) ||
+ addr - (const void *) nlh >= nlh->nlmsg_len);
+}
+
+static void
+netlink_ack_tlv_fill(struct sk_buff *skb, const struct nlmsghdr *nlh, int err,
+ const struct netlink_ext_ack *extack)
+{
+ if (extack->_msg)
+ WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg));
+ if (extack->cookie_len)
+ WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
+ extack->cookie_len, extack->cookie));
+
+ if (!err)
+ return;
+
+ if (extack->bad_attr && nlmsg_check_in_payload(nlh, extack->bad_attr))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
+ (u8 *)extack->bad_attr - (const u8 *)nlh));
+ if (extack->policy)
+ netlink_policy_dump_write_attr(skb, extack->policy,
+ NLMSGERR_ATTR_POLICY);
+ if (extack->miss_type)
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
+ extack->miss_type));
+ if (extack->miss_nest && nlmsg_check_in_payload(nlh, extack->miss_nest))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
+ (u8 *)extack->miss_nest - (const u8 *)nlh));
+}
+
/*
* It looks a bit ugly.
* It would be better to create kernel thread.
@@ -2175,6 +2229,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
struct netlink_ext_ack *extack)
{
struct nlmsghdr *nlh;
+ size_t extack_len;
nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno),
NLM_F_MULTI | cb->answer_flags);
@@ -2184,10 +2239,14 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
nl_dump_check_consistent(cb, nlh);
memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno));
- if (extack->_msg && test_bit(NETLINK_F_EXT_ACK, &nlk->flags)) {
+ extack_len = netlink_ack_tlv_len(nlk, nlk->dump_done_errno, extack);
+ if (extack_len) {
nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
- if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg))
+ if (skb_tailroom(skb) >= extack_len) {
+ netlink_ack_tlv_fill(skb, cb->nlh,
+ nlk->dump_done_errno, extack);
nlmsg_end(skb, nlh);
+ }
}
return 0;
@@ -2199,6 +2258,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
struct netlink_ext_ack extack = {};
struct netlink_callback *cb;
struct sk_buff *skb = NULL;
+ unsigned int rmem, rcvbuf;
size_t max_recvmsg_len;
struct module *module;
int err = -ENOBUFS;
@@ -2212,11 +2272,8 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
goto errout_skb;
}
- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
- goto errout_skb;
-
/* NLMSG_GOODSIZE is small to avoid high order allocations being
- * required, but it makes sense to _attempt_ a 16K bytes allocation
+ * required, but it makes sense to _attempt_ a 32KiB allocation
* to reduce number of system calls on dump operations, if user
* ever provided a big enough buffer.
*/
@@ -2237,8 +2294,15 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
if (!skb)
goto errout_skb;
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+ if (rmem != skb->truesize && rmem >= rcvbuf) {
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+ goto errout_skb;
+ }
+
/* Trim skb to allocated size. User is expected to provide buffer as
- * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
+ * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at
* netlink_recvmsg())). dump will pack as many smaller messages as
* could fit within the allocated skb. skb is typically allocated
* with larger space than required (could be as much as near 2x the
@@ -2258,17 +2322,9 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
netlink_skb_set_owner_r(skb, sk);
if (nlk->dump_done_errno > 0) {
- struct mutex *extra_mutex = nlk->dump_cb_mutex;
-
cb->extack = &extack;
- if (cb->flags & RTNL_FLAG_DUMP_UNLOCKED)
- extra_mutex = NULL;
- if (extra_mutex)
- mutex_lock(extra_mutex);
nlk->dump_done_errno = cb->dump(skb, cb);
- if (extra_mutex)
- mutex_unlock(extra_mutex);
/* EMSGSIZE plus something already in the skb means
* that there's more to dump but current skb has filled up.
@@ -2406,69 +2462,6 @@ error_free:
}
EXPORT_SYMBOL(__netlink_dump_start);
-static size_t
-netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
- const struct netlink_ext_ack *extack)
-{
- size_t tlvlen;
-
- if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
- return 0;
-
- tlvlen = 0;
- if (extack->_msg)
- tlvlen += nla_total_size(strlen(extack->_msg) + 1);
- if (extack->cookie_len)
- tlvlen += nla_total_size(extack->cookie_len);
-
- /* Following attributes are only reported as error (not warning) */
- if (!err)
- return tlvlen;
-
- if (extack->bad_attr)
- tlvlen += nla_total_size(sizeof(u32));
- if (extack->policy)
- tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
- if (extack->miss_type)
- tlvlen += nla_total_size(sizeof(u32));
- if (extack->miss_nest)
- tlvlen += nla_total_size(sizeof(u32));
-
- return tlvlen;
-}
-
-static void
-netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
- struct nlmsghdr *nlh, int err,
- const struct netlink_ext_ack *extack)
-{
- if (extack->_msg)
- WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg));
- if (extack->cookie_len)
- WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
- extack->cookie_len, extack->cookie));
-
- if (!err)
- return;
-
- if (extack->bad_attr &&
- !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
- (u8 *)extack->bad_attr >= in_skb->data + in_skb->len))
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
- (u8 *)extack->bad_attr - (u8 *)nlh));
- if (extack->policy)
- netlink_policy_dump_write_attr(skb, extack->policy,
- NLMSGERR_ATTR_POLICY);
- if (extack->miss_type)
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
- extack->miss_type));
- if (extack->miss_nest &&
- !WARN_ON((u8 *)extack->miss_nest < in_skb->data ||
- (u8 *)extack->miss_nest > in_skb->data + in_skb->len))
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
- (u8 *)extack->miss_nest - (u8 *)nlh));
-}
-
void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
const struct netlink_ext_ack *extack)
{
@@ -2514,7 +2507,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
}
if (tlvlen)
- netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack);
+ netlink_ack_tlv_fill(skb, nlh, err, extack);
nlmsg_end(skb, rep);
@@ -2943,12 +2936,8 @@ static int __init netlink_proto_init(void)
for (i = 0; i < MAX_LINKS; i++) {
if (rhashtable_init(&nl_table[i].hash,
- &netlink_rhashtable_params) < 0) {
- while (--i > 0)
- rhashtable_destroy(&nl_table[i].hash);
- kfree(nl_table);
+ &netlink_rhashtable_params) < 0)
goto panic;
- }
}
netlink_add_usersock_entry();