diff options
Diffstat (limited to 'net/ipv4/inet_diag.c')
| -rw-r--r-- | net/ipv4/inet_diag.c | 1304 |
1 files changed, 610 insertions, 694 deletions
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 5f648751fce2..3f5b1418a610 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * inet_diag.c Module for monitoring INET transport protocols sockets. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> @@ -24,9 +20,7 @@ #include <net/ipv6.h> #include <net/inet_common.h> #include <net/inet_connection_sock.h> -#include <net/inet_hashtables.h> -#include <net/inet_timewait_sock.h> -#include <net/inet6_hashtables.h> +#include <net/bpf_sk_storage.h> #include <net/netlink.h> #include <linux/inet.h> @@ -35,79 +29,78 @@ #include <linux/inet_diag.h> #include <linux/sock_diag.h> -static const struct inet_diag_handler **inet_diag_table; +static const struct inet_diag_handler __rcu **inet_diag_table; struct inet_diag_entry { - __be32 *saddr; - __be32 *daddr; + const __be32 *saddr; + const __be32 *daddr; u16 sport; u16 dport; u16 family; u16 userlocks; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr saddr_storage; /* for IPv4-mapped-IPv6 addresses */ - struct in6_addr daddr_storage; /* for IPv4-mapped-IPv6 addresses */ + u32 ifindex; + u32 mark; +#ifdef CONFIG_SOCK_CGROUP_DATA + u64 cgroup_id; #endif }; -static DEFINE_MUTEX(inet_diag_table_mutex); - static const struct inet_diag_handler *inet_diag_lock_handler(int proto) { - if (!inet_diag_table[proto]) - request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET, proto); + const struct inet_diag_handler *handler; + + if (proto < 0 || proto >= IPPROTO_MAX) + return NULL; + + if (!READ_ONCE(inet_diag_table[proto])) + sock_load_diag_module(AF_INET, proto); - mutex_lock(&inet_diag_table_mutex); - if (!inet_diag_table[proto]) - return ERR_PTR(-ENOENT); + rcu_read_lock(); + handler = rcu_dereference(inet_diag_table[proto]); + if (handler && !try_module_get(handler->owner)) + handler = NULL; + rcu_read_unlock(); - return inet_diag_table[proto]; + return handler; } -static inline void inet_diag_unlock_handler( - const struct inet_diag_handler *handler) +static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) { - mutex_unlock(&inet_diag_table_mutex); + module_put(handler->owner); } -int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) +void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) { - const struct inet_sock *inet = inet_sk(sk); - struct inet_diag_msg *r; - struct nlmsghdr *nlh; - struct nlattr *attr; - void *info = NULL; - const struct inet_diag_handler *handler; - int ext = req->idiag_ext; - - handler = inet_diag_table[req->sdiag_protocol]; - BUG_ON(handler == NULL); - - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - nlmsg_flags); - if (!nlh) - return -EMSGSIZE; + r->idiag_family = READ_ONCE(sk->sk_family); - r = nlmsg_data(nlh); - BUG_ON(sk->sk_state == TCP_TIME_WAIT); + r->id.idiag_sport = htons(READ_ONCE(sk->sk_num)); + r->id.idiag_dport = READ_ONCE(sk->sk_dport); + r->id.idiag_if = READ_ONCE(sk->sk_bound_dev_if); + sock_diag_save_cookie(sk, r->id.idiag_cookie); - r->idiag_family = sk->sk_family; - r->idiag_state = sk->sk_state; - r->idiag_timer = 0; - r->idiag_retrans = 0; +#if IS_ENABLED(CONFIG_IPV6) + if (r->idiag_family == AF_INET6) { + data_race(*(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr); + data_race(*(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr); + } else +#endif + { + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); - r->id.idiag_if = sk->sk_bound_dev_if; - sock_diag_save_cookie(sk, r->id.idiag_cookie); + r->id.idiag_src[0] = READ_ONCE(sk->sk_rcv_saddr); + r->id.idiag_dst[0] = READ_ONCE(sk->sk_daddr); + } +} +EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); - r->id.idiag_sport = inet->inet_sport; - r->id.idiag_dport = inet->inet_dport; - r->id.idiag_src[0] = inet->inet_rcv_saddr; - r->id.idiag_dst[0] = inet->inet_daddr; +int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct inet_diag_msg *r, int ext, + struct user_namespace *user_ns, + bool net_admin) +{ + const struct inet_sock *inet = inet_sk(sk); + struct inet_diag_sockopt inet_sockopt; if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) goto errout; @@ -116,30 +109,152 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, * hence this needs to be included regardless of socket family. */ if (ext & (1 << (INET_DIAG_TOS - 1))) - if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0) + if (nla_put_u8(skb, INET_DIAG_TOS, READ_ONCE(inet->tos)) < 0) goto errout; #if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); - - *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = np->daddr; - if (ext & (1 << (INET_DIAG_TCLASS - 1))) - if (nla_put_u8(skb, INET_DIAG_TCLASS, np->tclass) < 0) + if (nla_put_u8(skb, INET_DIAG_TCLASS, + inet6_sk(sk)->tclass) < 0) goto errout; + + if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && + nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) + goto errout; + } +#endif + + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark))) + goto errout; + + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { + u32 classid = 0; + +#ifdef CONFIG_CGROUP_NET_CLASSID + classid = sock_cgroup_classid(&sk->sk_cgrp_data); +#endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = READ_ONCE(sk->sk_priority); + + if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) + goto errout; } + +#ifdef CONFIG_SOCK_CGROUP_DATA + if (nla_put_u64_64bit(skb, INET_DIAG_CGROUP_ID, + cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)), + INET_DIAG_PAD)) + goto errout; #endif - r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); + r->idiag_uid = from_kuid_munged(user_ns, sk_uid(sk)); r->idiag_inode = sock_i_ino(sk); + memset(&inet_sockopt, 0, sizeof(inet_sockopt)); + inet_sockopt.recverr = inet_test_bit(RECVERR, sk); + inet_sockopt.is_icsk = inet_test_bit(IS_ICSK, sk); + inet_sockopt.freebind = inet_test_bit(FREEBIND, sk); + inet_sockopt.hdrincl = inet_test_bit(HDRINCL, sk); + inet_sockopt.mc_loop = inet_test_bit(MC_LOOP, sk); + inet_sockopt.transparent = inet_test_bit(TRANSPARENT, sk); + inet_sockopt.mc_all = inet_test_bit(MC_ALL, sk); + inet_sockopt.nodefrag = inet_test_bit(NODEFRAG, sk); + inet_sockopt.bind_address_no_port = inet_test_bit(BIND_ADDRESS_NO_PORT, sk); + inet_sockopt.recverr_rfc4884 = inet_test_bit(RECVERR_RFC4884, sk); + inet_sockopt.defer_connect = inet_test_bit(DEFER_CONNECT, sk); + if (nla_put(skb, INET_DIAG_SOCKOPT, sizeof(inet_sockopt), + &inet_sockopt)) + goto errout; + + return 0; +errout: + return 1; +} +EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); + +static int inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr **req_nlas) +{ + struct nlattr *nla; + int remaining; + + nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) { + int type = nla_type(nla); + + if (type == INET_DIAG_REQ_PROTOCOL && nla_len(nla) != sizeof(u32)) + return -EINVAL; + + if (type < __INET_DIAG_REQ_MAX) + req_nlas[type] = nla; + } + return 0; +} + +static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req, + const struct inet_diag_dump_data *data) +{ + if (data->req_nlas[INET_DIAG_REQ_PROTOCOL]) + return nla_get_u32(data->req_nlas[INET_DIAG_REQ_PROTOCOL]); + return req->sdiag_protocol; +} + +#define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + +int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, + struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *req, + u16 nlmsg_flags, bool net_admin) +{ + const struct tcp_congestion_ops *ca_ops; + const struct inet_diag_handler *handler; + struct inet_diag_dump_data *cb_data; + int ext = req->idiag_ext; + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + struct nlattr *attr; + void *info = NULL; + u8 icsk_pending; + int protocol; + + cb_data = cb->data; + protocol = inet_diag_get_protocol(req, cb_data); + + /* inet_diag_lock_handler() made sure inet_diag_table[] is stable. */ + handler = rcu_dereference_protected(inet_diag_table[protocol], 1); + DEBUG_NET_WARN_ON_ONCE(!handler); + if (!handler) + return -ENXIO; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); + if (!nlh) + return -EMSGSIZE; + + r = nlmsg_data(nlh); + BUG_ON(!sk_fullsock(sk)); + + inet_diag_msg_common_fill(r, sk); + r->idiag_state = sk->sk_state; + r->idiag_timer = 0; + r->idiag_retrans = 0; + r->idiag_expires = 0; + + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, + sk_user_ns(NETLINK_CB(cb->skb).sk), + net_admin)) + goto errout; + if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), - .idiag_wmem = sk->sk_wmem_queued, - .idiag_fmem = sk->sk_forward_alloc, + .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), + .idiag_fmem = READ_ONCE(sk->sk_forward_alloc), .idiag_tmem = sk_wmem_alloc_get(sk), }; @@ -151,216 +266,166 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) goto errout; - if (icsk == NULL) { + /* + * RAW sockets might have user-defined protocols assigned, + * so report the one supplied on socket creation. + */ + if (sk->sk_type == SOCK_RAW) { + if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol)) + goto errout; + } + + if (!icsk) { handler->idiag_get_info(sk, r, NULL); goto out; } -#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) - - if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + icsk_pending = smp_load_acquire(&icsk->icsk_pending); + if (icsk_pending == ICSK_TIME_RETRANS || + icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; - r->idiag_retrans = icsk->icsk_retransmits; - r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); - } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + r->idiag_retrans = READ_ONCE(icsk->icsk_retransmits); + r->idiag_expires = + jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies); + } else if (icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; - r->idiag_retrans = icsk->icsk_probes_out; - r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); - } else if (timer_pending(&sk->sk_timer)) { + r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out); + r->idiag_expires = + jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies); + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { r->idiag_timer = 2; - r->idiag_retrans = icsk->icsk_probes_out; - r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); - } else { - r->idiag_timer = 0; - r->idiag_expires = 0; + r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out); + r->idiag_expires = + jiffies_delta_to_msecs(icsk->icsk_keepalive_timer.expires - jiffies); } -#undef EXPIRES_IN_MS - if (ext & (1 << (INET_DIAG_INFO - 1))) { - attr = nla_reserve(skb, INET_DIAG_INFO, - sizeof(struct tcp_info)); + if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { + attr = nla_reserve_64bit(skb, INET_DIAG_INFO, + handler->idiag_info_size, + INET_DIAG_PAD); if (!attr) goto errout; info = nla_data(attr); } - if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) - if (nla_put_string(skb, INET_DIAG_CONG, - icsk->icsk_ca_ops->name) < 0) + if (ext & (1 << (INET_DIAG_CONG - 1))) { + int err = 0; + + rcu_read_lock(); + ca_ops = READ_ONCE(icsk->icsk_ca_ops); + if (ca_ops) + err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name); + rcu_read_unlock(); + if (err < 0) goto errout; + } handler->idiag_get_info(sk, r, info); - if (sk->sk_state < TCP_TIME_WAIT && - icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) - icsk->icsk_ca_ops->get_info(sk, ext, skb); - -out: - return nlmsg_end(skb, nlh); - -errout: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} -EXPORT_SYMBOL_GPL(inet_sk_diag_fill); + if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux) + if (handler->idiag_get_aux(sk, net_admin, skb) < 0) + goto errout; -static int inet_csk_diag_fill(struct sock *sk, - struct sk_buff *skb, struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) -{ - return inet_sk_diag_fill(sk, inet_csk(sk), - skb, req, user_ns, portid, seq, nlmsg_flags, unlh); -} + if (sk->sk_state < TCP_TIME_WAIT) { + union tcp_cc_info info; + size_t sz = 0; + int attr; + + rcu_read_lock(); + ca_ops = READ_ONCE(icsk->icsk_ca_ops); + if (ca_ops && ca_ops->get_info) + sz = ca_ops->get_info(sk, ext, &attr, &info); + rcu_read_unlock(); + if (sz && nla_put(skb, attr, sz, &info) < 0) + goto errout; + } -static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, - struct sk_buff *skb, struct inet_diag_req_v2 *req, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) -{ - long tmo; - struct inet_diag_msg *r; - struct nlmsghdr *nlh; + /* Keep it at the end for potential retry with a larger skb, + * or else do best-effort fitting, which is only done for the + * first_nlmsg. + */ + if (cb_data->bpf_stg_diag) { + bool first_nlmsg = ((unsigned char *)nlh == skb->data); + unsigned int prev_min_dump_alloc; + unsigned int total_nla_size = 0; + unsigned int msg_len; + int err; + + msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; + err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb, + INET_DIAG_SK_BPF_STORAGES, + &total_nla_size); + + if (!err) + goto out; + + total_nla_size += msg_len; + prev_min_dump_alloc = cb->min_dump_alloc; + if (total_nla_size > prev_min_dump_alloc) + cb->min_dump_alloc = min_t(u32, total_nla_size, + MAX_DUMP_ALLOC_SIZE); + + if (!first_nlmsg) + goto errout; - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - nlmsg_flags); - if (!nlh) - return -EMSGSIZE; + if (cb->min_dump_alloc > prev_min_dump_alloc) + /* Retry with pskb_expand_head() with + * __GFP_DIRECT_RECLAIM + */ + goto errout; - r = nlmsg_data(nlh); - BUG_ON(tw->tw_state != TCP_TIME_WAIT); - - tmo = tw->tw_ttd - jiffies; - if (tmo < 0) - tmo = 0; - - r->idiag_family = tw->tw_family; - r->idiag_retrans = 0; - r->id.idiag_if = tw->tw_bound_dev_if; - sock_diag_save_cookie(tw, r->id.idiag_cookie); - r->id.idiag_sport = tw->tw_sport; - r->id.idiag_dport = tw->tw_dport; - r->id.idiag_src[0] = tw->tw_rcv_saddr; - r->id.idiag_dst[0] = tw->tw_daddr; - r->idiag_state = tw->tw_substate; - r->idiag_timer = 3; - r->idiag_expires = DIV_ROUND_UP(tmo * 1000, HZ); - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = 0; - r->idiag_inode = 0; -#if IS_ENABLED(CONFIG_IPV6) - if (tw->tw_family == AF_INET6) { - const struct inet6_timewait_sock *tw6 = - inet6_twsk((struct sock *)tw); + WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc); - *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr; + /* Send what we have for this sk + * and move on to the next sk in the following + * dump() + */ } -#endif - return nlmsg_end(skb, nlh); -} +out: + nlmsg_end(skb, nlh); + return 0; -static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_req_v2 *r, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) -{ - if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, - skb, r, portid, seq, nlmsg_flags, - unlh); - return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); +errout: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } +EXPORT_SYMBOL_GPL(inet_sk_diag_fill); -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) +static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + int hdrlen, + const struct inet_diag_req_v2 *req) { - int err; - struct sock *sk; - struct sk_buff *rep; - struct net *net = sock_net(in_skb->sk); - - err = -EINVAL; - if (req->sdiag_family == AF_INET) { - sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], - req->id.idiag_dport, req->id.idiag_src[0], - req->id.idiag_sport, req->id.idiag_if); - } -#if IS_ENABLED(CONFIG_IPV6) - else if (req->sdiag_family == AF_INET6) { - sk = inet6_lookup(net, hashinfo, - (struct in6_addr *)req->id.idiag_dst, - req->id.idiag_dport, - (struct in6_addr *)req->id.idiag_src, - req->id.idiag_sport, - req->id.idiag_if); - } -#endif - else { - goto out_nosk; - } - - err = -ENOENT; - if (sk == NULL) - goto out_nosk; + const struct inet_diag_handler *handler; + struct inet_diag_dump_data dump_data; + int err, protocol; - err = sock_diag_check_cookie(sk, req->id.idiag_cookie); + memset(&dump_data, 0, sizeof(dump_data)); + err = inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas); if (err) - goto out; + return err; - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + - sizeof(struct tcp_info) + 64, GFP_KERNEL); - if (!rep) { - err = -ENOMEM; - goto out; - } + protocol = inet_diag_get_protocol(req, &dump_data); - err = sk_diag_fill(sk, rep, req, - sk_user_ns(NETLINK_CB(in_skb).sk), - NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh); - if (err < 0) { - WARN_ON(err == -EMSGSIZE); - nlmsg_free(rep); - goto out; - } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, - MSG_DONTWAIT); - if (err > 0) - err = 0; + handler = inet_diag_lock_handler(protocol); + if (!handler) + return -ENOENT; -out: - if (sk) { - if (sk->sk_state == TCP_TIME_WAIT) - inet_twsk_put((struct inet_timewait_sock *)sk); - else - sock_put(sk); + if (cmd == SOCK_DIAG_BY_FAMILY) { + struct netlink_callback cb = { + .nlh = nlh, + .skb = in_skb, + .data = &dump_data, + }; + err = handler->dump_one(&cb, req); + } else if (cmd == SOCK_DESTROY && handler->destroy) { + err = handler->destroy(in_skb, req); + } else { + err = -EOPNOTSUPP; } -out_nosk: - return err; -} -EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); - -static int inet_diag_get_exact(struct sk_buff *in_skb, - const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) -{ - const struct inet_diag_handler *handler; - int err; - - handler = inet_diag_lock_handler(req->sdiag_protocol); - if (IS_ERR(handler)) - err = PTR_ERR(handler); - else - err = handler->dump_one(in_skb, nlh, req); inet_diag_unlock_handler(handler); return err; @@ -392,9 +457,8 @@ static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) return 1; } - static int inet_diag_bc_run(const struct nlattr *_bc, - const struct inet_diag_entry *entry) + const struct inet_diag_entry *entry) { const void *bc = nla_data(_bc); int len = nla_len(_bc); @@ -409,12 +473,18 @@ static int inet_diag_bc_run(const struct nlattr *_bc, case INET_DIAG_BC_JMP: yes = 0; break; + case INET_DIAG_BC_S_EQ: + yes = entry->sport == op[1].no; + break; case INET_DIAG_BC_S_GE: yes = entry->sport >= op[1].no; break; case INET_DIAG_BC_S_LE: yes = entry->sport <= op[1].no; break; + case INET_DIAG_BC_D_EQ: + yes = entry->dport == op[1].no; + break; case INET_DIAG_BC_D_GE: yes = entry->dport >= op[1].no; break; @@ -426,10 +496,10 @@ static int inet_diag_bc_run(const struct nlattr *_bc, break; case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: { - struct inet_diag_hostcond *cond; - __be32 *addr; + const struct inet_diag_hostcond *cond; + const __be32 *addr; - cond = (struct inet_diag_hostcond *)(op + 1); + cond = (const struct inet_diag_hostcond *)(op + 1); if (cond->port != -1 && cond->port != (op->code == INET_DIAG_BC_S_COND ? entry->sport : entry->dport)) { @@ -465,6 +535,32 @@ static int inet_diag_bc_run(const struct nlattr *_bc, yes = 0; break; } + case INET_DIAG_BC_DEV_COND: { + u32 ifindex; + + ifindex = *((const u32 *)(op + 1)); + if (ifindex != entry->ifindex) + yes = 0; + break; + } + case INET_DIAG_BC_MARK_COND: { + struct inet_diag_markcond *cond; + + cond = (struct inet_diag_markcond *)(op + 1); + if ((entry->mark & cond->mask) != cond->mark) + yes = 0; + break; + } +#ifdef CONFIG_SOCK_CGROUP_DATA + case INET_DIAG_BC_CGROUP_COND: { + u64 cgroup_id; + + cgroup_id = get_unaligned((const u64 *)(op + 1)); + if (cgroup_id != entry->cgroup_id) + yes = 0; + break; + } +#endif } if (yes) { @@ -478,30 +574,54 @@ static int inet_diag_bc_run(const struct nlattr *_bc, return len == 0; } -int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) +/* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV) + */ +static void entry_fill_addrs(struct inet_diag_entry *entry, + const struct sock *sk) { - struct inet_diag_entry entry; - struct inet_sock *inet = inet_sk(sk); - - if (bc == NULL) - return 1; - - entry.family = sk->sk_family; #if IS_ENABLED(CONFIG_IPV6) - if (entry.family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - - entry.saddr = np->rcv_saddr.s6_addr32; - entry.daddr = np->daddr.s6_addr32; + if (entry->family == AF_INET6) { + entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32; + entry->daddr = sk->sk_v6_daddr.s6_addr32; } else #endif { - entry.saddr = &inet->inet_rcv_saddr; - entry.daddr = &inet->inet_daddr; + entry->saddr = &sk->sk_rcv_saddr; + entry->daddr = &sk->sk_daddr; + } +} + +int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk) +{ + const struct nlattr *bc = cb_data->inet_diag_nla_bc; + const struct inet_sock *inet = inet_sk(sk); + struct inet_diag_entry entry; + + if (!bc) + return 1; + + entry.family = READ_ONCE(sk->sk_family); + entry_fill_addrs(&entry, sk); + entry.sport = READ_ONCE(inet->inet_num); + entry.dport = ntohs(READ_ONCE(inet->inet_dport)); + entry.ifindex = READ_ONCE(sk->sk_bound_dev_if); + if (cb_data->userlocks_needed) + entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0; + if (cb_data->mark_needed) { + if (sk_fullsock(sk)) + entry.mark = READ_ONCE(sk->sk_mark); + else if (sk->sk_state == TCP_NEW_SYN_RECV) + entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; + else if (sk->sk_state == TCP_TIME_WAIT) + entry.mark = inet_twsk(sk)->tw_mark; + else + entry.mark = 0; } - entry.sport = inet->inet_num; - entry.dport = ntohs(inet->inet_dport); - entry.userlocks = sk->sk_userlocks; +#ifdef CONFIG_SOCK_CGROUP_DATA + if (cb_data->cgroup_needed) + entry.cgroup_id = sk_fullsock(sk) ? + cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0; +#endif return inet_diag_bc_run(bc, &entry); } @@ -524,12 +644,23 @@ static int valid_cc(const void *bc, int len, int cc) return 0; } +/* data is u32 ifindex */ +static bool valid_devcond(const struct inet_diag_bc_op *op, int len, + int *min_len) +{ + /* Check ifindex space. */ + *min_len += sizeof(u32); + if (len < *min_len) + return false; + + return true; +} /* Validate an inet_diag_hostcond. */ static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, int *min_len) { - int addr_len; struct inet_diag_hostcond *cond; + int addr_len; /* Check hostcond space. */ *min_len += sizeof(struct inet_diag_hostcond); @@ -563,8 +694,8 @@ static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, } /* Validate a port comparison operator. */ -static inline bool valid_port_comparison(const struct inet_diag_bc_op *op, - int len, int *min_len) +static bool valid_port_comparison(const struct inet_diag_bc_op *op, + int len, int *min_len) { /* Port comparisons put the port in a follow-on inet_diag_bc_op. */ *min_len += sizeof(struct inet_diag_bc_op); @@ -573,30 +704,80 @@ static inline bool valid_port_comparison(const struct inet_diag_bc_op *op, return true; } -static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) +static bool valid_markcond(const struct inet_diag_bc_op *op, int len, + int *min_len) { - const void *bc = bytecode; - int len = bytecode_len; + *min_len += sizeof(struct inet_diag_markcond); + return len >= *min_len; +} + +#ifdef CONFIG_SOCK_CGROUP_DATA +static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len, + int *min_len) +{ + *min_len += sizeof(u64); + return len >= *min_len; +} +#endif + +static int inet_diag_bc_audit(struct inet_diag_dump_data *cb_data, + const struct sk_buff *skb) +{ + const struct nlattr *attr = cb_data->inet_diag_nla_bc; + const void *bytecode, *bc; + int bytecode_len, len; + bool net_admin; + + if (!attr) + return 0; + + if (nla_len(attr) < sizeof(struct inet_diag_bc_op)) + return -EINVAL; + + net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); + bytecode = bc = nla_data(attr); + len = bytecode_len = nla_len(attr); while (len > 0) { - const struct inet_diag_bc_op *op = bc; int min_len = sizeof(struct inet_diag_bc_op); + const struct inet_diag_bc_op *op = bc; -//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: if (!valid_hostcond(bc, len, &min_len)) return -EINVAL; break; + case INET_DIAG_BC_DEV_COND: + if (!valid_devcond(bc, len, &min_len)) + return -EINVAL; + break; + case INET_DIAG_BC_S_EQ: case INET_DIAG_BC_S_GE: case INET_DIAG_BC_S_LE: + case INET_DIAG_BC_D_EQ: case INET_DIAG_BC_D_GE: case INET_DIAG_BC_D_LE: if (!valid_port_comparison(bc, len, &min_len)) return -EINVAL; break; + case INET_DIAG_BC_MARK_COND: + if (!net_admin) + return -EPERM; + if (!valid_markcond(bc, len, &min_len)) + return -EINVAL; + cb_data->mark_needed = true; + break; +#ifdef CONFIG_SOCK_CGROUP_DATA + case INET_DIAG_BC_CGROUP_COND: + if (!valid_cgroupcond(bc, len, &min_len)) + return -EINVAL; + cb_data->cgroup_needed = true; + break; +#endif case INET_DIAG_BC_AUTO: + cb_data->userlocks_needed = true; + fallthrough; case INET_DIAG_BC_JMP: case INET_DIAG_BC_NOP: break; @@ -620,429 +801,129 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) return len == 0 ? 0 : -EINVAL; } -static int inet_csk_diag_dump(struct sock *sk, - struct sk_buff *skb, - struct netlink_callback *cb, - struct inet_diag_req_v2 *r, - const struct nlattr *bc) -{ - if (!inet_diag_bc_sk(bc, sk)) - return 0; - - return inet_csk_diag_fill(sk, skb, r, - sk_user_ns(NETLINK_CB(cb->skb).sk), - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); -} - -static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, - struct sk_buff *skb, - struct netlink_callback *cb, - struct inet_diag_req_v2 *r, - const struct nlattr *bc) +static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *r) { - if (bc != NULL) { - struct inet_diag_entry entry; + struct inet_diag_dump_data *cb_data = cb->data; + const struct inet_diag_handler *handler; + u32 prev_min_dump_alloc; + int protocol, err = 0; - entry.family = tw->tw_family; -#if IS_ENABLED(CONFIG_IPV6) - if (tw->tw_family == AF_INET6) { - struct inet6_timewait_sock *tw6 = - inet6_twsk((struct sock *)tw); - entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32; - entry.daddr = tw6->tw_v6_daddr.s6_addr32; - } else -#endif - { - entry.saddr = &tw->tw_rcv_saddr; - entry.daddr = &tw->tw_daddr; - } - entry.sport = tw->tw_num; - entry.dport = ntohs(tw->tw_dport); - entry.userlocks = 0; + protocol = inet_diag_get_protocol(r, cb_data); - if (!inet_diag_bc_run(bc, &entry)) - return 0; +again: + prev_min_dump_alloc = cb->min_dump_alloc; + handler = inet_diag_lock_handler(protocol); + if (handler) { + handler->dump(skb, cb, r); + inet_diag_unlock_handler(handler); + } else { + err = -ENOENT; + } + /* The skb is not large enough to fit one sk info and + * inet_sk_diag_fill() has requested for a larger skb. + */ + if (!skb->len && cb->min_dump_alloc > prev_min_dump_alloc) { + err = pskb_expand_head(skb, 0, cb->min_dump_alloc, GFP_KERNEL); + if (!err) + goto again; } - return inet_twsk_diag_fill(tw, skb, r, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); + return err ? : skb->len; } -/* Get the IPv4, IPv6, or IPv4-mapped-IPv6 local and remote addresses - * from a request_sock. For IPv4-mapped-IPv6 we must map IPv4 to IPv6. - */ -static inline void inet_diag_req_addrs(const struct sock *sk, - const struct request_sock *req, - struct inet_diag_entry *entry) +static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct inet_request_sock *ireq = inet_rsk(req); - -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - if (req->rsk_ops->family == AF_INET6) { - entry->saddr = inet6_rsk(req)->loc_addr.s6_addr32; - entry->daddr = inet6_rsk(req)->rmt_addr.s6_addr32; - } else if (req->rsk_ops->family == AF_INET) { - ipv6_addr_set_v4mapped(ireq->loc_addr, - &entry->saddr_storage); - ipv6_addr_set_v4mapped(ireq->rmt_addr, - &entry->daddr_storage); - entry->saddr = entry->saddr_storage.s6_addr32; - entry->daddr = entry->daddr_storage.s6_addr32; - } - } else -#endif - { - entry->saddr = &ireq->loc_addr; - entry->daddr = &ireq->rmt_addr; - } + return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh)); } -static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, - struct request_sock *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, - const struct nlmsghdr *unlh) +static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) { - const struct inet_request_sock *ireq = inet_rsk(req); - struct inet_sock *inet = inet_sk(sk); - struct inet_diag_msg *r; - struct nlmsghdr *nlh; - long tmo; + const struct nlmsghdr *nlh = cb->nlh; + struct inet_diag_dump_data *cb_data; + struct sk_buff *skb = cb->skb; + struct nlattr *nla; + int err; - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - NLM_F_MULTI); - if (!nlh) - return -EMSGSIZE; + cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL); + if (!cb_data) + return -ENOMEM; - r = nlmsg_data(nlh); - r->idiag_family = sk->sk_family; - r->idiag_state = TCP_SYN_RECV; - r->idiag_timer = 1; - r->idiag_retrans = req->num_retrans; - - r->id.idiag_if = sk->sk_bound_dev_if; - sock_diag_save_cookie(req, r->id.idiag_cookie); - - tmo = req->expires - jiffies; - if (tmo < 0) - tmo = 0; - - r->id.idiag_sport = inet->inet_sport; - r->id.idiag_dport = ireq->rmt_port; - r->id.idiag_src[0] = ireq->loc_addr; - r->id.idiag_dst[0] = ireq->rmt_addr; - r->idiag_expires = jiffies_to_msecs(tmo); - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); - r->idiag_inode = 0; -#if IS_ENABLED(CONFIG_IPV6) - if (r->idiag_family == AF_INET6) { - struct inet_diag_entry entry; - inet_diag_req_addrs(sk, req, &entry); - memcpy(r->id.idiag_src, entry.saddr, sizeof(struct in6_addr)); - memcpy(r->id.idiag_dst, entry.daddr, sizeof(struct in6_addr)); + err = inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas); + if (err) { + kfree(cb_data); + return err; } -#endif - - return nlmsg_end(skb, nlh); -} - -static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, - struct netlink_callback *cb, - struct inet_diag_req_v2 *r, - const struct nlattr *bc) -{ - struct inet_diag_entry entry; - struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt; - struct inet_sock *inet = inet_sk(sk); - int j, s_j; - int reqnum, s_reqnum; - int err = 0; - - s_j = cb->args[3]; - s_reqnum = cb->args[4]; - - if (s_j > 0) - s_j--; - - entry.family = sk->sk_family; - - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - - lopt = icsk->icsk_accept_queue.listen_opt; - if (!lopt || !lopt->qlen) - goto out; - - if (bc != NULL) { - entry.sport = inet->inet_num; - entry.userlocks = sk->sk_userlocks; + err = inet_diag_bc_audit(cb_data, skb); + if (err) { + kfree(cb_data); + return err; } - for (j = s_j; j < lopt->nr_table_entries; j++) { - struct request_sock *req, *head = lopt->syn_table[j]; - - reqnum = 0; - for (req = head; req; reqnum++, req = req->dl_next) { - struct inet_request_sock *ireq = inet_rsk(req); + nla = cb_data->inet_diag_nla_bpf_stgs; + if (nla) { + struct bpf_sk_storage_diag *bpf_stg_diag; - if (reqnum < s_reqnum) - continue; - if (r->id.idiag_dport != ireq->rmt_port && - r->id.idiag_dport) - continue; - - if (bc) { - inet_diag_req_addrs(sk, req, &entry); - entry.dport = ntohs(ireq->rmt_port); - - if (!inet_diag_bc_run(bc, &entry)) - continue; - } - - err = inet_diag_fill_req(skb, sk, req, - sk_user_ns(NETLINK_CB(cb->skb).sk), - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, cb->nlh); - if (err < 0) { - cb->args[3] = j + 1; - cb->args[4] = reqnum; - goto out; - } + bpf_stg_diag = bpf_sk_storage_diag_alloc(nla); + if (IS_ERR(bpf_stg_diag)) { + kfree(cb_data); + return PTR_ERR(bpf_stg_diag); } - - s_reqnum = 0; + cb_data->bpf_stg_diag = bpf_stg_diag; } -out: - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - - return err; + cb->data = cb_data; + return 0; } -void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req_v2 *r, struct nlattr *bc) +static int inet_diag_dump_start(struct netlink_callback *cb) { - int i, num; - int s_i, s_num; - struct net *net = sock_net(skb->sk); - - s_i = cb->args[1]; - s_num = num = cb->args[2]; - - if (cb->args[0] == 0) { - if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) - goto skip_listen_ht; - - for (i = s_i; i < INET_LHTABLE_SIZE; i++) { - struct sock *sk; - struct hlist_nulls_node *node; - struct inet_listen_hashbucket *ilb; - - num = 0; - ilb = &hashinfo->listening_hash[i]; - spin_lock_bh(&ilb->lock); - sk_nulls_for_each(sk, node, &ilb->head) { - struct inet_sock *inet = inet_sk(sk); - - if (!net_eq(sock_net(sk), net)) - continue; - - if (num < s_num) { - num++; - continue; - } - - if (r->sdiag_family != AF_UNSPEC && - sk->sk_family != r->sdiag_family) - goto next_listen; - - if (r->id.idiag_sport != inet->inet_sport && - r->id.idiag_sport) - goto next_listen; - - if (!(r->idiag_states & TCPF_LISTEN) || - r->id.idiag_dport || - cb->args[3] > 0) - goto syn_recv; - - if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { - spin_unlock_bh(&ilb->lock); - goto done; - } - -syn_recv: - if (!(r->idiag_states & TCPF_SYN_RECV)) - goto next_listen; - - if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) { - spin_unlock_bh(&ilb->lock); - goto done; - } - -next_listen: - cb->args[3] = 0; - cb->args[4] = 0; - ++num; - } - spin_unlock_bh(&ilb->lock); - - s_num = 0; - cb->args[3] = 0; - cb->args[4] = 0; - } -skip_listen_ht: - cb->args[0] = 1; - s_i = num = s_num = 0; - } - - if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) - goto out; - - for (i = s_i; i <= hashinfo->ehash_mask; i++) { - struct inet_ehash_bucket *head = &hashinfo->ehash[i]; - spinlock_t *lock = inet_ehash_lockp(hashinfo, i); - struct sock *sk; - struct hlist_nulls_node *node; - - num = 0; - - if (hlist_nulls_empty(&head->chain) && - hlist_nulls_empty(&head->twchain)) - continue; - - if (i > s_i) - s_num = 0; - - spin_lock_bh(lock); - sk_nulls_for_each(sk, node, &head->chain) { - struct inet_sock *inet = inet_sk(sk); - - if (!net_eq(sock_net(sk), net)) - continue; - if (num < s_num) - goto next_normal; - if (!(r->idiag_states & (1 << sk->sk_state))) - goto next_normal; - if (r->sdiag_family != AF_UNSPEC && - sk->sk_family != r->sdiag_family) - goto next_normal; - if (r->id.idiag_sport != inet->inet_sport && - r->id.idiag_sport) - goto next_normal; - if (r->id.idiag_dport != inet->inet_dport && - r->id.idiag_dport) - goto next_normal; - if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { - spin_unlock_bh(lock); - goto done; - } -next_normal: - ++num; - } - - if (r->idiag_states & TCPF_TIME_WAIT) { - struct inet_timewait_sock *tw; - - inet_twsk_for_each(tw, node, - &head->twchain) { - if (!net_eq(twsk_net(tw), net)) - continue; - - if (num < s_num) - goto next_dying; - if (r->sdiag_family != AF_UNSPEC && - tw->tw_family != r->sdiag_family) - goto next_dying; - if (r->id.idiag_sport != tw->tw_sport && - r->id.idiag_sport) - goto next_dying; - if (r->id.idiag_dport != tw->tw_dport && - r->id.idiag_dport) - goto next_dying; - if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) { - spin_unlock_bh(lock); - goto done; - } -next_dying: - ++num; - } - } - spin_unlock_bh(lock); - } - -done: - cb->args[1] = i; - cb->args[2] = num; -out: - ; + return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req_v2)); } -EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); -static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) +static int inet_diag_dump_start_compat(struct netlink_callback *cb) { - const struct inet_diag_handler *handler; - int err = 0; - - handler = inet_diag_lock_handler(r->sdiag_protocol); - if (!IS_ERR(handler)) - handler->dump(skb, cb, r, bc); - else - err = PTR_ERR(handler); - inet_diag_unlock_handler(handler); - - return err ? : skb->len; + return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req)); } -static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +static int inet_diag_dump_done(struct netlink_callback *cb) { - struct nlattr *bc = NULL; - int hdrlen = sizeof(struct inet_diag_req_v2); + struct inet_diag_dump_data *cb_data = cb->data; - if (nlmsg_attrlen(cb->nlh, hdrlen)) - bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); + bpf_sk_storage_diag_free(cb_data->bpf_stg_diag); + kfree(cb->data); - return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc); + return 0; } -static inline int inet_diag_type2proto(int type) +static int inet_diag_type2proto(int type) { switch (type) { case TCPDIAG_GETSOCK: return IPPROTO_TCP; - case DCCPDIAG_GETSOCK: - return IPPROTO_DCCP; default: return 0; } } -static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) +static int inet_diag_dump_compat(struct sk_buff *skb, + struct netlink_callback *cb) { struct inet_diag_req *rc = nlmsg_data(cb->nlh); struct inet_diag_req_v2 req; - struct nlattr *bc = NULL; - int hdrlen = sizeof(struct inet_diag_req); req.sdiag_family = AF_UNSPEC; /* compatibility */ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); req.idiag_ext = rc->idiag_ext; + req.pad = 0; req.idiag_states = rc->idiag_states; req.id = rc->id; - if (nlmsg_attrlen(cb->nlh, hdrlen)) - bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); - - return __inet_diag_dump(skb, cb, &req, bc); + return __inet_diag_dump(skb, cb, &req); } static int inet_diag_get_exact_compat(struct sk_buff *in_skb, - const struct nlmsghdr *nlh) + const struct nlmsghdr *nlh) { struct inet_diag_req *rc = nlmsg_data(nlh); struct inet_diag_req_v2 req; @@ -1050,10 +931,12 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, req.sdiag_family = rc->idiag_family; req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); req.idiag_ext = rc->idiag_ext; + req.pad = 0; req.idiag_states = rc->idiag_states; req.id = rc->id; - return inet_diag_get_exact(in_skb, nlh, &req); + return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, + sizeof(struct inet_diag_req), &req); } static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -1066,28 +949,18 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; if (nlh->nlmsg_flags & NLM_F_DUMP) { - if (nlmsg_attrlen(nlh, hdrlen)) { - struct nlattr *attr; - - attr = nlmsg_find_attr(nlh, hdrlen, - INET_DIAG_REQ_BYTECODE); - if (attr == NULL || - nla_len(attr) < sizeof(struct inet_diag_bc_op) || - inet_diag_bc_audit(nla_data(attr), nla_len(attr))) - return -EINVAL; - } - { - struct netlink_dump_control c = { - .dump = inet_diag_dump_compat, - }; - return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); - } + struct netlink_dump_control c = { + .start = inet_diag_dump_start_compat, + .done = inet_diag_dump_done, + .dump = inet_diag_dump_compat, + }; + return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); } return inet_diag_get_exact_compat(skb, nlh); } -static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); struct net *net = sock_net(skb->sk); @@ -1095,54 +968,92 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (nlmsg_len(h) < hdrlen) return -EINVAL; - if (h->nlmsg_flags & NLM_F_DUMP) { - if (nlmsg_attrlen(h, hdrlen)) { - struct nlattr *attr; - attr = nlmsg_find_attr(h, hdrlen, - INET_DIAG_REQ_BYTECODE); - if (attr == NULL || - nla_len(attr) < sizeof(struct inet_diag_bc_op) || - inet_diag_bc_audit(nla_data(attr), nla_len(attr))) - return -EINVAL; - } - { - struct netlink_dump_control c = { - .dump = inet_diag_dump, - }; - return netlink_dump_start(net->diag_nlsk, skb, h, &c); - } + if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && + h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = inet_diag_dump_start, + .done = inet_diag_dump_done, + .dump = inet_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } - return inet_diag_get_exact(skb, h, nlmsg_data(h)); + return inet_diag_cmd_exact(h->nlmsg_type, skb, h, hdrlen, + nlmsg_data(h)); +} + +static +int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) +{ + const struct inet_diag_handler *handler; + struct nlmsghdr *nlh; + struct nlattr *attr; + struct inet_diag_msg *r; + void *info = NULL; + int err = 0; + + nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0); + if (!nlh) + return -ENOMEM; + + r = nlmsg_data(nlh); + memset(r, 0, sizeof(*r)); + inet_diag_msg_common_fill(r, sk); + if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_STREAM) + r->id.idiag_sport = inet_sk(sk)->inet_sport; + r->idiag_state = sk->sk_state; + + if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) { + nlmsg_cancel(skb, nlh); + return err; + } + + handler = inet_diag_lock_handler(sk->sk_protocol); + if (!handler) { + nlmsg_cancel(skb, nlh); + return -ENOENT; + } + + attr = handler->idiag_info_size + ? nla_reserve_64bit(skb, INET_DIAG_INFO, + handler->idiag_info_size, + INET_DIAG_PAD) + : NULL; + if (attr) + info = nla_data(attr); + + handler->idiag_get_info(sk, r, info); + inet_diag_unlock_handler(handler); + + nlmsg_end(skb, nlh); + return 0; } static const struct sock_diag_handler inet_diag_handler = { + .owner = THIS_MODULE, .family = AF_INET, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, + .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; static const struct sock_diag_handler inet6_diag_handler = { + .owner = THIS_MODULE, .family = AF_INET6, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, + .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; int inet_diag_register(const struct inet_diag_handler *h) { const __u16 type = h->idiag_type; - int err = -EINVAL; if (type >= IPPROTO_MAX) - goto out; + return -EINVAL; - mutex_lock(&inet_diag_table_mutex); - err = -EEXIST; - if (inet_diag_table[type] == NULL) { - inet_diag_table[type] = h; - err = 0; - } - mutex_unlock(&inet_diag_table_mutex); -out: - return err; + return !cmpxchg((const struct inet_diag_handler **)&inet_diag_table[type], + NULL, h) ? 0 : -EEXIST; } EXPORT_SYMBOL_GPL(inet_diag_register); @@ -1153,12 +1064,16 @@ void inet_diag_unregister(const struct inet_diag_handler *h) if (type >= IPPROTO_MAX) return; - mutex_lock(&inet_diag_table_mutex); - inet_diag_table[type] = NULL; - mutex_unlock(&inet_diag_table_mutex); + xchg((const struct inet_diag_handler **)&inet_diag_table[type], + NULL); } EXPORT_SYMBOL_GPL(inet_diag_unregister); +static const struct sock_diag_inet_compat inet_diag_compat = { + .owner = THIS_MODULE, + .fn = inet_diag_rcv_msg_compat, +}; + static int __init inet_diag_init(void) { const int inet_diag_table_size = (IPPROTO_MAX * @@ -1177,7 +1092,7 @@ static int __init inet_diag_init(void) if (err) goto out_free_inet; - sock_diag_register_inet_compat(inet_diag_rcv_msg_compat); + sock_diag_register_inet_compat(&inet_diag_compat); out: return err; @@ -1192,12 +1107,13 @@ static void __exit inet_diag_exit(void) { sock_diag_unregister(&inet6_diag_handler); sock_diag_unregister(&inet_diag_handler); - sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat); + sock_diag_unregister_inet_compat(&inet_diag_compat); kfree(inet_diag_table); } module_init(inet_diag_init); module_exit(inet_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("INET/INET6: socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */); |
