diff options
Diffstat (limited to 'net/core/rtnetlink.c')
| -rw-r--r-- | net/core/rtnetlink.c | 3928 |
1 files changed, 2856 insertions, 1072 deletions
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f5a98082ac7a..b1ed55141d8a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -7,13 +8,8 @@ * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Fixes: - * Vitaly E. Lavrov RTA_OK arithmetics was wrong. + * Vitaly E. Lavrov RTA_OK arithmetic was wrong. */ #include <linux/bitops.h> @@ -46,7 +42,6 @@ #include <linux/inet.h> #include <linux/netdevice.h> -#include <net/switchdev.h> #include <net/ip.h> #include <net/protocol.h> #include <net/arp.h> @@ -58,9 +53,17 @@ #include <net/fib_rules.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> +#include <net/netdev_lock.h> +#include <net/devlink.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/addrconf.h> +#endif +#include <linux/dpll.h> + +#include "dev.h" #define RTNL_MAX_TYPE 50 -#define RTNL_SLAVE_MAX_TYPE 36 +#define RTNL_SLAVE_MAX_TYPE 44 struct rtnl_link { rtnl_doit_func doit; @@ -78,11 +81,15 @@ void rtnl_lock(void) } EXPORT_SYMBOL(rtnl_lock); +int rtnl_lock_interruptible(void) +{ + return mutex_lock_interruptible(&rtnl_mutex); +} + int rtnl_lock_killable(void) { return mutex_lock_killable(&rtnl_mutex); } -EXPORT_SYMBOL(rtnl_lock_killable); static struct sk_buff *defer_kfree_skb_list; void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail) @@ -100,6 +107,39 @@ void __rtnl_unlock(void) defer_kfree_skb_list = NULL; + /* Ensure that we didn't actually add any TODO item when __rtnl_unlock() + * is used. In some places, e.g. in cfg80211, we have code that will do + * something like + * rtnl_lock() + * wiphy_lock() + * ... + * rtnl_unlock() + * + * and because netdev_run_todo() acquires the RTNL for items on the list + * we could cause a situation such as this: + * Thread 1 Thread 2 + * rtnl_lock() + * unregister_netdevice() + * __rtnl_unlock() + * rtnl_lock() + * wiphy_lock() + * rtnl_unlock() + * netdev_run_todo() + * __rtnl_unlock() + * + * // list not empty now + * // because of thread 2 + * rtnl_lock() + * while (!list_empty(...)) + * rtnl_lock() + * wiphy_lock() + * **** DEADLOCK **** + * + * However, usage of __rtnl_unlock() is rare, and so we can ensure that + * it's not used in cases where something is added to do the list. + */ + WARN_ON(!list_empty(&net_todo_list)); + mutex_unlock(&rtnl_mutex); while (head) { @@ -144,7 +184,177 @@ bool lockdep_rtnl_is_held(void) EXPORT_SYMBOL(lockdep_rtnl_is_held); #endif /* #ifdef CONFIG_PROVE_LOCKING */ -static struct rtnl_link *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL +void __rtnl_net_lock(struct net *net) +{ + ASSERT_RTNL(); + + mutex_lock(&net->rtnl_mutex); +} +EXPORT_SYMBOL(__rtnl_net_lock); + +void __rtnl_net_unlock(struct net *net) +{ + ASSERT_RTNL(); + + mutex_unlock(&net->rtnl_mutex); +} +EXPORT_SYMBOL(__rtnl_net_unlock); + +void rtnl_net_lock(struct net *net) +{ + rtnl_lock(); + __rtnl_net_lock(net); +} +EXPORT_SYMBOL(rtnl_net_lock); + +void rtnl_net_unlock(struct net *net) +{ + __rtnl_net_unlock(net); + rtnl_unlock(); +} +EXPORT_SYMBOL(rtnl_net_unlock); + +int rtnl_net_trylock(struct net *net) +{ + int ret = rtnl_trylock(); + + if (ret) + __rtnl_net_lock(net); + + return ret; +} +EXPORT_SYMBOL(rtnl_net_trylock); + +int rtnl_net_lock_killable(struct net *net) +{ + int ret = rtnl_lock_killable(); + + if (!ret) + __rtnl_net_lock(net); + + return ret; +} + +static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b) +{ + if (net_eq(net_a, net_b)) + return 0; + + /* always init_net first */ + if (net_eq(net_a, &init_net)) + return -1; + + if (net_eq(net_b, &init_net)) + return 1; + + /* otherwise lock in ascending order */ + return net_a < net_b ? -1 : 1; +} + +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) +{ + const struct net *net_a, *net_b; + + net_a = container_of(a, struct net, rtnl_mutex.dep_map); + net_b = container_of(b, struct net, rtnl_mutex.dep_map); + + return rtnl_net_cmp_locks(net_a, net_b); +} + +bool rtnl_net_is_locked(struct net *net) +{ + return rtnl_is_locked() && mutex_is_locked(&net->rtnl_mutex); +} +EXPORT_SYMBOL(rtnl_net_is_locked); + +bool lockdep_rtnl_net_is_held(struct net *net) +{ + return lockdep_rtnl_is_held() && lockdep_is_held(&net->rtnl_mutex); +} +EXPORT_SYMBOL(lockdep_rtnl_net_is_held); +#else +static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b) +{ + /* No need to swap */ + return -1; +} +#endif + +struct rtnl_nets { + /* ->newlink() needs to freeze 3 netns at most; + * 2 for the new device, 1 for its peer. + */ + struct net *net[3]; + unsigned char len; +}; + +static void rtnl_nets_init(struct rtnl_nets *rtnl_nets) +{ + memset(rtnl_nets, 0, sizeof(*rtnl_nets)); +} + +static void rtnl_nets_destroy(struct rtnl_nets *rtnl_nets) +{ + int i; + + for (i = 0; i < rtnl_nets->len; i++) { + put_net(rtnl_nets->net[i]); + rtnl_nets->net[i] = NULL; + } + + rtnl_nets->len = 0; +} + +/** + * rtnl_nets_add - Add netns to be locked before ->newlink(). + * + * @rtnl_nets: rtnl_nets pointer passed to ->get_peer_net(). + * @net: netns pointer with an extra refcnt held. + * + * The extra refcnt is released in rtnl_nets_destroy(). + */ +static void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net) +{ + int i; + + DEBUG_NET_WARN_ON_ONCE(rtnl_nets->len == ARRAY_SIZE(rtnl_nets->net)); + + for (i = 0; i < rtnl_nets->len; i++) { + switch (rtnl_net_cmp_locks(rtnl_nets->net[i], net)) { + case 0: + put_net(net); + return; + case 1: + swap(rtnl_nets->net[i], net); + } + } + + rtnl_nets->net[i] = net; + rtnl_nets->len++; +} + +static void rtnl_nets_lock(struct rtnl_nets *rtnl_nets) +{ + int i; + + rtnl_lock(); + + for (i = 0; i < rtnl_nets->len; i++) + __rtnl_net_lock(rtnl_nets->net[i]); +} + +static void rtnl_nets_unlock(struct rtnl_nets *rtnl_nets) +{ + int i; + + for (i = 0; i < rtnl_nets->len; i++) + __rtnl_net_unlock(rtnl_nets->net[i]); + + rtnl_unlock(); +} + +static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; static inline int rtm_msgindex(int msgtype) { @@ -162,7 +372,7 @@ static inline int rtm_msgindex(int msgtype) static struct rtnl_link *rtnl_get_link(int protocol, int msgtype) { - struct rtnl_link **tab; + struct rtnl_link __rcu **tab; if (protocol >= ARRAY_SIZE(rtnl_msg_handlers)) protocol = PF_UNSPEC; @@ -171,7 +381,7 @@ static struct rtnl_link *rtnl_get_link(int protocol, int msgtype) if (!tab) tab = rcu_dereference_rtnl(rtnl_msg_handlers[PF_UNSPEC]); - return tab[msgtype]; + return rcu_dereference_rtnl(tab[msgtype]); } static int rtnl_register_internal(struct module *owner, @@ -188,7 +398,7 @@ static int rtnl_register_internal(struct module *owner, msgindex = rtm_msgindex(msgtype); rtnl_lock(); - tab = rtnl_msg_handlers[protocol]; + tab = rtnl_dereference(rtnl_msg_handlers[protocol]); if (tab == NULL) { tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL); if (!tab) @@ -219,6 +429,8 @@ static int rtnl_register_internal(struct module *owner, if (dumpit) link->dumpit = dumpit; + WARN_ON(rtnl_msgtype_kind(msgtype) != RTNL_KIND_DEL && + (flags & RTNL_FLAG_BULK_DEL_SUPPORTED)); link->flags |= flags; /* publish protocol:msgtype */ @@ -232,66 +444,16 @@ unlock: } /** - * rtnl_register_module - Register a rtnetlink message type - * - * @owner: module registering the hook (THIS_MODULE) - * @protocol: Protocol family or PF_UNSPEC - * @msgtype: rtnetlink message type - * @doit: Function pointer called for each request message - * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions - * - * Like rtnl_register, but for use by removable modules. - */ -int rtnl_register_module(struct module *owner, - int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit, - unsigned int flags) -{ - return rtnl_register_internal(owner, protocol, msgtype, - doit, dumpit, flags); -} -EXPORT_SYMBOL_GPL(rtnl_register_module); - -/** - * rtnl_register - Register a rtnetlink message type - * @protocol: Protocol family or PF_UNSPEC - * @msgtype: rtnetlink message type - * @doit: Function pointer called for each request message - * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions - * - * Registers the specified function pointers (at least one of them has - * to be non-NULL) to be called whenever a request message for the - * specified protocol family and message type is received. - * - * The special protocol family PF_UNSPEC may be used to define fallback - * function pointers for the case when no entry for the specific protocol - * family exists. - */ -void rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit, - unsigned int flags) -{ - int err; - - err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit, - flags); - if (err) - pr_err("Unable to register rtnetlink message handler, " - "protocol = %d, message type = %d\n", protocol, msgtype); -} - -/** * rtnl_unregister - Unregister a rtnetlink message type * @protocol: Protocol family or PF_UNSPEC * @msgtype: rtnetlink message type * * Returns 0 on success or a negative error code. */ -int rtnl_unregister(int protocol, int msgtype) +static int rtnl_unregister(int protocol, int msgtype) { - struct rtnl_link **tab, *link; + struct rtnl_link __rcu **tab; + struct rtnl_link *link; int msgindex; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); @@ -304,43 +466,37 @@ int rtnl_unregister(int protocol, int msgtype) return -ENOENT; } - link = tab[msgindex]; - rcu_assign_pointer(tab[msgindex], NULL); + link = rcu_replace_pointer_rtnl(tab[msgindex], NULL); rtnl_unlock(); kfree_rcu(link, rcu); return 0; } -EXPORT_SYMBOL_GPL(rtnl_unregister); /** * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol * @protocol : Protocol family or PF_UNSPEC * - * Identical to calling rtnl_unregster() for all registered message types + * Identical to calling rtnl_unregister() for all registered message types * of a certain protocol family. */ void rtnl_unregister_all(int protocol) { - struct rtnl_link **tab, *link; + struct rtnl_link __rcu **tab; + struct rtnl_link *link; int msgindex; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); rtnl_lock(); - tab = rtnl_msg_handlers[protocol]; + tab = rcu_replace_pointer_rtnl(rtnl_msg_handlers[protocol], NULL); if (!tab) { rtnl_unlock(); return; } - RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL); for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) { - link = tab[msgindex]; - if (!link) - continue; - - rcu_assign_pointer(tab[msgindex], NULL); + link = rcu_replace_pointer_rtnl(tab[msgindex], NULL); kfree_rcu(link, rcu); } rtnl_unlock(); @@ -351,46 +507,86 @@ void rtnl_unregister_all(int protocol) } EXPORT_SYMBOL_GPL(rtnl_unregister_all); -static LIST_HEAD(link_ops); - -static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) +/** + * __rtnl_register_many - Register rtnetlink message types + * @handlers: Array of struct rtnl_msg_handlers + * @n: The length of @handlers + * + * Registers the specified function pointers (at least one of them has + * to be non-NULL) to be called whenever a request message for the + * specified protocol family and message type is received. + * + * The special protocol family PF_UNSPEC may be used to define fallback + * function pointers for the case when no entry for the specific protocol + * family exists. + * + * When one element of @handlers fails to register, + * 1) built-in: panics. + * 2) modules : the previous successful registrations are unwinded + * and an error is returned. + * + * Use rtnl_register_many(). + */ +int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n) { - const struct rtnl_link_ops *ops; + const struct rtnl_msg_handler *handler; + int i, err; - list_for_each_entry(ops, &link_ops, list) { - if (!strcmp(ops->kind, kind)) - return ops; + for (i = 0, handler = handlers; i < n; i++, handler++) { + err = rtnl_register_internal(handler->owner, handler->protocol, + handler->msgtype, handler->doit, + handler->dumpit, handler->flags); + if (err) { + if (!handler->owner) + panic("Unable to register rtnetlink message " + "handlers, %pS\n", handlers); + + __rtnl_unregister_many(handlers, i); + break; + } } - return NULL; + + return err; } +EXPORT_SYMBOL_GPL(__rtnl_register_many); -/** - * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. - * @ops: struct rtnl_link_ops * to register - * - * The caller must hold the rtnl_mutex. This function should be used - * by drivers that create devices during module initialization. It - * must be called before registering the devices. - * - * Returns 0 on success or a negative error code. - */ -int __rtnl_link_register(struct rtnl_link_ops *ops) +void __rtnl_unregister_many(const struct rtnl_msg_handler *handlers, int n) { - if (rtnl_link_ops_get(ops->kind)) - return -EEXIST; + const struct rtnl_msg_handler *handler; + int i; - /* The check for setup is here because if ops - * does not have that filled up, it is not possible - * to use the ops for creating device. So do not - * fill up dellink as well. That disables rtnl_dellink. - */ - if (ops->setup && !ops->dellink) - ops->dellink = unregister_netdevice_queue; + for (i = n - 1, handler = handlers + n - 1; i >= 0; i--, handler--) + rtnl_unregister(handler->protocol, handler->msgtype); +} +EXPORT_SYMBOL_GPL(__rtnl_unregister_many); - list_add_tail(&ops->list, &link_ops); - return 0; +static DEFINE_MUTEX(link_ops_mutex); +static LIST_HEAD(link_ops); + +static struct rtnl_link_ops *rtnl_link_ops_get(const char *kind, int *srcu_index) +{ + struct rtnl_link_ops *ops; + + rcu_read_lock(); + + list_for_each_entry_rcu(ops, &link_ops, list) { + if (!strcmp(ops->kind, kind)) { + *srcu_index = srcu_read_lock(&ops->srcu); + goto unlock; + } + } + + ops = NULL; +unlock: + rcu_read_unlock(); + + return ops; +} + +static void rtnl_link_ops_put(struct rtnl_link_ops *ops, int srcu_index) +{ + srcu_read_unlock(&ops->srcu, srcu_index); } -EXPORT_SYMBOL_GPL(__rtnl_link_register); /** * rtnl_link_register - Register rtnl_link_ops with rtnetlink. @@ -400,6 +596,7 @@ EXPORT_SYMBOL_GPL(__rtnl_link_register); */ int rtnl_link_register(struct rtnl_link_ops *ops) { + struct rtnl_link_ops *tmp; int err; /* Sanity-check max sizes to avoid stack buffer overflow. */ @@ -407,9 +604,31 @@ int rtnl_link_register(struct rtnl_link_ops *ops) ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)) return -EINVAL; - rtnl_lock(); - err = __rtnl_link_register(ops); - rtnl_unlock(); + /* The check for alloc/setup is here because if ops + * does not have that filled up, it is not possible + * to use the ops for creating device. So do not + * fill up dellink as well. That disables rtnl_dellink. + */ + if ((ops->alloc || ops->setup) && !ops->dellink) + ops->dellink = unregister_netdevice_queue; + + err = init_srcu_struct(&ops->srcu); + if (err) + return err; + + mutex_lock(&link_ops_mutex); + + list_for_each_entry(tmp, &link_ops, list) { + if (!strcmp(ops->kind, tmp->kind)) { + err = -EEXIST; + goto unlock; + } + } + + list_add_tail_rcu(&ops->list, &link_ops); +unlock: + mutex_unlock(&link_ops_mutex); + return err; } EXPORT_SYMBOL_GPL(rtnl_link_register); @@ -426,48 +645,20 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) unregister_netdevice_many(&list_kill); } -/** - * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. - * @ops: struct rtnl_link_ops * to unregister - * - * The caller must hold the rtnl_mutex and guarantee net_namespace_list - * integrity (hold pernet_ops_rwsem for writing to close the race - * with setup_net() and cleanup_net()). - */ -void __rtnl_link_unregister(struct rtnl_link_ops *ops) -{ - struct net *net; - - for_each_net(net) { - __rtnl_kill_links(net, ops); - } - list_del(&ops->list); -} -EXPORT_SYMBOL_GPL(__rtnl_link_unregister); - /* Return with the rtnl_lock held when there are no network * devices unregistering in any network namespace. */ static void rtnl_lock_unregistering_all(void) { - struct net *net; - bool unregistering; DEFINE_WAIT_FUNC(wait, woken_wake_function); add_wait_queue(&netdev_unregistering_wq, &wait); for (;;) { - unregistering = false; rtnl_lock(); /* We held write locked pernet_ops_rwsem, and parallel * setup_net() and cleanup_net() are not possible. */ - for_each_net(net) { - if (net->dev_unreg_count > 0) { - unregistering = true; - break; - } - } - if (!unregistering) + if (!atomic_read(&dev_unreg_count)) break; __rtnl_unlock(); @@ -482,10 +673,22 @@ static void rtnl_lock_unregistering_all(void) */ void rtnl_link_unregister(struct rtnl_link_ops *ops) { + struct net *net; + + mutex_lock(&link_ops_mutex); + list_del_rcu(&ops->list); + mutex_unlock(&link_ops_mutex); + + synchronize_srcu(&ops->srcu); + cleanup_srcu_struct(&ops->srcu); + /* Close the race with setup_net() and cleanup_net() */ down_write(&pernet_ops_rwsem); rtnl_lock_unregistering_all(); - __rtnl_link_unregister(ops); + + for_each_net(net) + __rtnl_kill_links(net, ops); + rtnl_unlock(); up_write(&pernet_ops_rwsem); } @@ -542,29 +745,51 @@ static size_t rtnl_link_get_size(const struct net_device *dev) static LIST_HEAD(rtnl_af_ops); -static const struct rtnl_af_ops *rtnl_af_lookup(const int family) +static struct rtnl_af_ops *rtnl_af_lookup(const int family, int *srcu_index) { - const struct rtnl_af_ops *ops; + struct rtnl_af_ops *ops; + + ASSERT_RTNL(); + + rcu_read_lock(); list_for_each_entry_rcu(ops, &rtnl_af_ops, list) { - if (ops->family == family) - return ops; + if (ops->family == family) { + *srcu_index = srcu_read_lock(&ops->srcu); + goto unlock; + } } - return NULL; + ops = NULL; +unlock: + rcu_read_unlock(); + + return ops; +} + +static void rtnl_af_put(struct rtnl_af_ops *ops, int srcu_index) +{ + srcu_read_unlock(&ops->srcu, srcu_index); } /** * rtnl_af_register - Register rtnl_af_ops with rtnetlink. * @ops: struct rtnl_af_ops * to register * - * Returns 0 on success or a negative error code. + * Return: 0 on success or a negative error code. */ -void rtnl_af_register(struct rtnl_af_ops *ops) +int rtnl_af_register(struct rtnl_af_ops *ops) { + int err = init_srcu_struct(&ops->srcu); + + if (err) + return err; + rtnl_lock(); list_add_tail_rcu(&ops->list, &rtnl_af_ops); rtnl_unlock(); + + return 0; } EXPORT_SYMBOL_GPL(rtnl_af_register); @@ -579,6 +804,8 @@ void rtnl_af_unregister(struct rtnl_af_ops *ops) rtnl_unlock(); synchronize_rcu(); + synchronize_srcu(&ops->srcu); + cleanup_srcu_struct(&ops->srcu); } EXPORT_SYMBOL_GPL(rtnl_af_unregister); @@ -635,7 +862,7 @@ static int rtnl_link_slave_info_fill(struct sk_buff *skb, if (nla_put_string(skb, IFLA_INFO_SLAVE_KIND, ops->kind) < 0) return -EMSGSIZE; if (ops->fill_slave_info) { - slave_data = nla_nest_start(skb, IFLA_INFO_SLAVE_DATA); + slave_data = nla_nest_start_noflag(skb, IFLA_INFO_SLAVE_DATA); if (!slave_data) return -EMSGSIZE; err = ops->fill_slave_info(skb, master_dev, dev); @@ -667,7 +894,7 @@ static int rtnl_link_info_fill(struct sk_buff *skb, return err; } if (ops->fill_info) { - data = nla_nest_start(skb, IFLA_INFO_DATA); + data = nla_nest_start_noflag(skb, IFLA_INFO_DATA); if (data == NULL) return -EMSGSIZE; err = ops->fill_info(skb, dev); @@ -687,7 +914,7 @@ static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) struct nlattr *linkinfo; int err = -EMSGSIZE; - linkinfo = nla_nest_start(skb, IFLA_LINKINFO); + linkinfo = nla_nest_start_noflag(skb, IFLA_LINKINFO); if (linkinfo == NULL) goto out; @@ -711,15 +938,8 @@ out: int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) { struct sock *rtnl = net->rtnl; - int err = 0; - NETLINK_CB(skb).dst_group = group; - if (echo) - refcount_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); - if (echo) - err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); - return err; + return nlmsg_notify(rtnl, skb, pid, group, echo, GFP_KERNEL); } int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) @@ -731,15 +951,11 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) EXPORT_SYMBOL(rtnl_unicast); void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, - struct nlmsghdr *nlh, gfp_t flags) + const struct nlmsghdr *nlh, gfp_t flags) { struct sock *rtnl = net->rtnl; - int report = 0; - - if (nlh) - report = nlmsg_report(nlh); - nlmsg_notify(rtnl, skb, pid, group, report, flags); + nlmsg_notify(rtnl, skb, pid, group, nlmsg_report(nlh), flags); } EXPORT_SYMBOL(rtnl_notify); @@ -756,7 +972,11 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) struct nlattr *mx; int i, valid = 0; - mx = nla_nest_start(skb, RTA_METRICS); + /* nothing is dumped for dst_default_metrics, so just skip the loop */ + if (metrics == dst_default_metrics.metrics) + return 0; + + mx = nla_nest_start_noflag(skb, RTA_METRICS); if (mx == NULL) return -ENOBUFS; @@ -806,11 +1026,13 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, .rta_error = error, .rta_id = id, }; + unsigned long delta; if (dst) { - ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse); + delta = jiffies - READ_ONCE(dst->lastuse); + ci.rta_lastuse = jiffies_delta_to_clock_t(delta); ci.rta_used = dst->__use; - ci.rta_clntref = atomic_read(&dst->__refcnt); + ci.rta_clntref = rcuref_read(&dst->__rcuref); } if (expires) { unsigned long clock; @@ -823,31 +1045,44 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, } EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); +void netif_set_operstate(struct net_device *dev, int newstate) +{ + unsigned int old = READ_ONCE(dev->operstate); + + do { + if (old == newstate) + return; + } while (!try_cmpxchg(&dev->operstate, &old, newstate)); + + netif_state_change(dev); +} +EXPORT_SYMBOL(netif_set_operstate); + static void set_operstate(struct net_device *dev, unsigned char transition) { - unsigned char operstate = dev->operstate; + unsigned char operstate = READ_ONCE(dev->operstate); switch (transition) { case IF_OPER_UP: if ((operstate == IF_OPER_DORMANT || + operstate == IF_OPER_TESTING || operstate == IF_OPER_UNKNOWN) && - !netif_dormant(dev)) + !netif_dormant(dev) && !netif_testing(dev)) operstate = IF_OPER_UP; break; + case IF_OPER_TESTING: + if (netif_oper_up(dev)) + operstate = IF_OPER_TESTING; + break; + case IF_OPER_DORMANT: - if (operstate == IF_OPER_UP || - operstate == IF_OPER_UNKNOWN) + if (netif_oper_up(dev)) operstate = IF_OPER_DORMANT; break; } - if (dev->operstate != operstate) { - write_lock_bh(&dev_base_lock); - dev->operstate = operstate; - write_unlock_bh(&dev_base_lock); - netdev_state_change(dev); - } + netif_set_operstate(dev, operstate); } static unsigned int rtnl_dev_get_flags(const struct net_device *dev) @@ -913,6 +1148,7 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, size += num_vfs * (nla_total_size(0) + nla_total_size(sizeof(struct ifla_vf_mac)) + + nla_total_size(sizeof(struct ifla_vf_broadcast)) + nla_total_size(sizeof(struct ifla_vf_vlan)) + nla_total_size(0) + /* nest IFLA_VF_VLAN_LIST */ nla_total_size(MAX_VLAN_LIST_LEN * @@ -922,24 +1158,30 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + nla_total_size(sizeof(struct ifla_vf_rss_query_en)) + - nla_total_size(0) + /* nest IFLA_VF_STATS */ - /* IFLA_VF_STATS_RX_PACKETS */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_TX_PACKETS */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_RX_BYTES */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_TX_BYTES */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_BROADCAST */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_MULTICAST */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_RX_DROPPED */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_TX_DROPPED */ - nla_total_size_64bit(sizeof(__u64)) + nla_total_size(sizeof(struct ifla_vf_trust))); + if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) { + size += num_vfs * + (nla_total_size(0) + /* nest IFLA_VF_STATS */ + /* IFLA_VF_STATS_RX_PACKETS */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_PACKETS */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_RX_BYTES */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_BYTES */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_BROADCAST */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_MULTICAST */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_RX_DROPPED */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_DROPPED */ + nla_total_size_64bit(sizeof(__u64))); + } + if (dev->netdev_ops->ndo_get_vf_guid) + size += num_vfs * 2 * + nla_total_size(sizeof(struct ifla_vf_guid)); return size; } else return 0; @@ -980,16 +1222,61 @@ static size_t rtnl_xdp_size(void) return xdp_size; } +static size_t rtnl_prop_list_size(const struct net_device *dev) +{ + struct netdev_name_node *name_node; + unsigned int cnt = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(name_node, &dev->name_node->list, list) + cnt++; + rcu_read_unlock(); + + if (!cnt) + return 0; + + return nla_total_size(0) + cnt * nla_total_size(ALTIFNAMSIZ); +} + +static size_t rtnl_proto_down_size(const struct net_device *dev) +{ + size_t size = nla_total_size(1); + + /* Assume dev->proto_down_reason is not zero. */ + size += nla_total_size(0) + nla_total_size(4); + + return size; +} + +static size_t rtnl_devlink_port_size(const struct net_device *dev) +{ + size_t size = nla_total_size(0); /* nest IFLA_DEVLINK_PORT */ + + if (dev->devlink_port) + size += devlink_nl_port_handle_size(dev->devlink_port); + + return size; +} + +static size_t rtnl_dpll_pin_size(const struct net_device *dev) +{ + size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */ + + size += dpll_netdev_pin_handle_size(dev); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { - return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + size_t size; + + size = NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */ + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ + nla_total_size_64bit(sizeof(struct rtnl_link_ifmap)) - + nla_total_size(sizeof(struct rtnl_link_stats)) - + nla_total_size_64bit(sizeof(struct rtnl_link_stats64)) + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ + nla_total_size(4) /* IFLA_TXQLEN */ @@ -999,12 +1286,19 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_CARRIER */ + nla_total_size(4) /* IFLA_PROMISCUITY */ + + nla_total_size(4) /* IFLA_ALLMULTI */ + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */ + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */ + + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */ + + nla_total_size(4) /* IFLA_GSO_IPV4_MAX_SIZE */ + + nla_total_size(4) /* IFLA_GRO_IPV4_MAX_SIZE */ + + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */ + + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + + nla_total_size(1) /* IFLA_NETNS_IMMUTABLE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ + nla_total_size(4) /* IFLA_LINK_NETNSID */ + nla_total_size(4) /* IFLA_GROUP */ @@ -1021,13 +1315,26 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(4) /* IFLA_NEW_NETNSID */ + nla_total_size(4) /* IFLA_NEW_IFINDEX */ - + nla_total_size(1) /* IFLA_PROTO_DOWN */ + + rtnl_proto_down_size(dev) /* proto down */ + nla_total_size(4) /* IFLA_TARGET_NETNSID */ + nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */ + nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */ + nla_total_size(4) /* IFLA_MIN_MTU */ + nla_total_size(4) /* IFLA_MAX_MTU */ + + rtnl_prop_list_size(dev) + + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */ + + rtnl_devlink_port_size(dev) + + rtnl_dpll_pin_size(dev) + + nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */ + + nla_total_size(2) /* IFLA_HEADROOM */ + + nla_total_size(2) /* IFLA_TAILROOM */ + 0; + + if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS)) + size += nla_total_size(sizeof(struct rtnl_link_stats)) + + nla_total_size_64bit(sizeof(struct rtnl_link_stats64)); + + return size; } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) @@ -1037,12 +1344,12 @@ static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) int vf; int err; - vf_ports = nla_nest_start(skb, IFLA_VF_PORTS); + vf_ports = nla_nest_start_noflag(skb, IFLA_VF_PORTS); if (!vf_ports) return -EMSGSIZE; for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) { - vf_port = nla_nest_start(skb, IFLA_VF_PORT); + vf_port = nla_nest_start_noflag(skb, IFLA_VF_PORT); if (!vf_port) goto nla_put_failure; if (nla_put_u32(skb, IFLA_PORT_VF, vf)) @@ -1071,7 +1378,7 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) struct nlattr *port_self; int err; - port_self = nla_nest_start(skb, IFLA_PORT_SELF); + port_self = nla_nest_start_noflag(skb, IFLA_PORT_SELF); if (!port_self) return -EMSGSIZE; @@ -1146,22 +1453,17 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { + struct netdev_phys_item_id ppid = { }; int err; - struct switchdev_attr attr = { - .orig_dev = dev, - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - .flags = SWITCHDEV_F_NO_RECURSE, - }; - err = switchdev_port_attr_get(dev, &attr); + err = netif_get_port_parent_id(dev, &ppid, false); if (err) { if (err == -EOPNOTSUPP) return 0; return err; } - if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len, - attr.u.ppid.id)) + if (nla_put(skb, IFLA_PHYS_SWITCH_ID, ppid.id_len, ppid.id)) return -EMSGSIZE; return 0; @@ -1194,7 +1496,7 @@ static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb, static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, struct net_device *dev, int vfs_num, - struct nlattr *vfinfo) + u32 ext_filter_mask) { struct ifla_vf_rss_query_en vf_rss_query_en; struct nlattr *vf, *vfstats, *vfvlanlist; @@ -1207,7 +1509,10 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, struct ifla_vf_vlan vf_vlan; struct ifla_vf_rate vf_rate; struct ifla_vf_mac vf_mac; + struct ifla_vf_broadcast vf_broadcast; struct ifla_vf_info ivi; + struct ifla_vf_guid node_guid; + struct ifla_vf_guid port_guid; memset(&ivi, 0, sizeof(ivi)); @@ -1229,6 +1534,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, return 0; memset(&vf_vlan_info, 0, sizeof(vf_vlan_info)); + memset(&node_guid, 0, sizeof(node_guid)); + memset(&port_guid, 0, sizeof(port_guid)); vf_mac.vf = vf_vlan.vf = @@ -1238,9 +1545,12 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, vf_spoofchk.vf = vf_linkstate.vf = vf_rss_query_en.vf = - vf_trust.vf = ivi.vf; + vf_trust.vf = + node_guid.vf = + port_guid.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); + memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; vf_vlan_info.vlan = ivi.vlan; @@ -1253,10 +1563,11 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, vf_linkstate.link_state = ivi.linkstate; vf_rss_query_en.setting = ivi.rss_query_en; vf_trust.setting = ivi.trusted; - vf = nla_nest_start(skb, IFLA_VF_INFO); + vf = nla_nest_start_noflag(skb, IFLA_VF_INFO); if (!vf) - goto nla_put_vfinfo_failure; + return -EMSGSIZE; if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || + nla_put(skb, IFLA_VF_BROADCAST, sizeof(vf_broadcast), &vf_broadcast) || nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate), &vf_rate) || @@ -1272,7 +1583,17 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, nla_put(skb, IFLA_VF_TRUST, sizeof(vf_trust), &vf_trust)) goto nla_put_vf_failure; - vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST); + + if (dev->netdev_ops->ndo_get_vf_guid && + !dev->netdev_ops->ndo_get_vf_guid(dev, vfs_num, &node_guid, + &port_guid)) { + if (nla_put(skb, IFLA_VF_IB_NODE_GUID, sizeof(node_guid), + &node_guid) || + nla_put(skb, IFLA_VF_IB_PORT_GUID, sizeof(port_guid), + &port_guid)) + goto nla_put_vf_failure; + } + vfvlanlist = nla_nest_start_noflag(skb, IFLA_VF_VLAN_LIST); if (!vfvlanlist) goto nla_put_vf_failure; if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info), @@ -1281,40 +1602,40 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, goto nla_put_vf_failure; } nla_nest_end(skb, vfvlanlist); - memset(&vf_stats, 0, sizeof(vf_stats)); - if (dev->netdev_ops->ndo_get_vf_stats) - dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, - &vf_stats); - vfstats = nla_nest_start(skb, IFLA_VF_STATS); - if (!vfstats) - goto nla_put_vf_failure; - if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, - vf_stats.rx_packets, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, - vf_stats.tx_packets, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES, - vf_stats.rx_bytes, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES, - vf_stats.tx_bytes, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, - vf_stats.broadcast, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, - vf_stats.multicast, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED, - vf_stats.rx_dropped, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED, - vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) { - nla_nest_cancel(skb, vfstats); - goto nla_put_vf_failure; + if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) { + memset(&vf_stats, 0, sizeof(vf_stats)); + if (dev->netdev_ops->ndo_get_vf_stats) + dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, + &vf_stats); + vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS); + if (!vfstats) + goto nla_put_vf_failure; + if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, + vf_stats.rx_packets, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, + vf_stats.tx_packets, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES, + vf_stats.rx_bytes, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES, + vf_stats.tx_bytes, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, + vf_stats.broadcast, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, + vf_stats.multicast, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED, + vf_stats.rx_dropped, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED, + vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) { + nla_nest_cancel(skb, vfstats); + goto nla_put_vf_failure; + } + nla_nest_end(skb, vfstats); } - nla_nest_end(skb, vfstats); nla_nest_end(skb, vf); return 0; nla_put_vf_failure: nla_nest_cancel(skb, vf); -nla_put_vfinfo_failure: - nla_nest_cancel(skb, vfinfo); return -EMSGSIZE; } @@ -1335,30 +1656,33 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb, if (!dev->netdev_ops->ndo_get_vf_config) return 0; - vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); + vfinfo = nla_nest_start_noflag(skb, IFLA_VFINFO_LIST); if (!vfinfo) return -EMSGSIZE; for (i = 0; i < num_vfs; i++) { - if (rtnl_fill_vfinfo(skb, dev, i, vfinfo)) + if (rtnl_fill_vfinfo(skb, dev, i, ext_filter_mask)) { + nla_nest_cancel(skb, vfinfo); return -EMSGSIZE; + } } nla_nest_end(skb, vfinfo); return 0; } -static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) +static int rtnl_fill_link_ifmap(struct sk_buff *skb, + const struct net_device *dev) { struct rtnl_link_ifmap map; memset(&map, 0, sizeof(map)); - map.mem_start = dev->mem_start; - map.mem_end = dev->mem_end; - map.base_addr = dev->base_addr; - map.irq = dev->irq; - map.dma = dev->dma; - map.port = dev->if_port; + map.mem_start = READ_ONCE(dev->mem_start); + map.mem_end = READ_ONCE(dev->mem_end); + map.base_addr = READ_ONCE(dev->base_addr); + map.irq = READ_ONCE(dev->irq); + map.dma = READ_ONCE(dev->dma); + map.port = READ_ONCE(dev->if_port); if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD)) return -EMSGSIZE; @@ -1369,24 +1693,25 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) static u32 rtnl_xdp_prog_skb(struct net_device *dev) { const struct bpf_prog *generic_xdp_prog; + u32 res = 0; - ASSERT_RTNL(); + rcu_read_lock(); + generic_xdp_prog = rcu_dereference(dev->xdp_prog); + if (generic_xdp_prog) + res = generic_xdp_prog->aux->id; + rcu_read_unlock(); - generic_xdp_prog = rtnl_dereference(dev->xdp_prog); - if (!generic_xdp_prog) - return 0; - return generic_xdp_prog->aux->id; + return res; } static u32 rtnl_xdp_prog_drv(struct net_device *dev) { - return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG); + return dev_xdp_prog_id(dev, XDP_MODE_DRV); } static u32 rtnl_xdp_prog_hw(struct net_device *dev) { - return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, - XDP_QUERY_PROG_HW); + return dev_xdp_prog_id(dev, XDP_MODE_HW); } static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev, @@ -1420,7 +1745,7 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) int err; u8 mode; - xdp = nla_nest_start(skb, IFLA_XDP); + xdp = nla_nest_start_noflag(skb, IFLA_XDP); if (!xdp) return -EMSGSIZE; @@ -1496,20 +1821,22 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev) upper_dev = netdev_master_upper_dev_get_rcu(dev); if (upper_dev) - ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex); + ret = nla_put_u32(skb, IFLA_MASTER, + READ_ONCE(upper_dev->ifindex)); rcu_read_unlock(); return ret; } -static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev) +static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev, + bool force) { - int ifindex = dev_get_iflink(dev); + int iflink = dev_get_iflink(dev); - if (dev->ifindex == ifindex) - return 0; + if (force || READ_ONCE(dev->ifindex) != iflink) + return nla_put_u32(skb, IFLA_LINK, iflink); - return nla_put_u32(skb, IFLA_LINK, ifindex); + return 0; } static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb, @@ -1524,20 +1851,24 @@ static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb, static int rtnl_fill_link_netnsid(struct sk_buff *skb, const struct net_device *dev, - struct net *src_net) + struct net *src_net, gfp_t gfp) { + bool put_iflink = false; + if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) { struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); if (!net_eq(dev_net(dev), link_net)) { - int id = peernet2id_alloc(src_net, link_net); + int id = peernet2id_alloc(src_net, link_net, gfp); if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) return -EMSGSIZE; + + put_iflink = true; } } - return 0; + return nla_put_iflink(skb, dev, put_iflink); } static int rtnl_fill_link_af(struct sk_buff *skb, @@ -1547,7 +1878,7 @@ static int rtnl_fill_link_af(struct sk_buff *skb, const struct rtnl_af_ops *af_ops; struct nlattr *af_spec; - af_spec = nla_nest_start(skb, IFLA_AF_SPEC); + af_spec = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!af_spec) return -EMSGSIZE; @@ -1558,7 +1889,7 @@ static int rtnl_fill_link_af(struct sk_buff *skb, if (!af_ops->fill_link_af) continue; - af = nla_nest_start(skb, af_ops->family); + af = nla_nest_start_noflag(skb, af_ops->family); if (!af) return -EMSGSIZE; @@ -1581,15 +1912,129 @@ static int rtnl_fill_link_af(struct sk_buff *skb, return 0; } +static int rtnl_fill_alt_ifnames(struct sk_buff *skb, + const struct net_device *dev) +{ + struct netdev_name_node *name_node; + int count = 0; + + list_for_each_entry_rcu(name_node, &dev->name_node->list, list) { + if (nla_put_string(skb, IFLA_ALT_IFNAME, name_node->name)) + return -EMSGSIZE; + count++; + } + return count; +} + +/* RCU protected. */ +static int rtnl_fill_prop_list(struct sk_buff *skb, + const struct net_device *dev) +{ + struct nlattr *prop_list; + int ret; + + prop_list = nla_nest_start(skb, IFLA_PROP_LIST); + if (!prop_list) + return -EMSGSIZE; + + ret = rtnl_fill_alt_ifnames(skb, dev); + if (ret <= 0) + goto nest_cancel; + + nla_nest_end(skb, prop_list); + return 0; + +nest_cancel: + nla_nest_cancel(skb, prop_list); + return ret; +} + +static int rtnl_fill_proto_down(struct sk_buff *skb, + const struct net_device *dev) +{ + struct nlattr *pr; + u32 preason; + + if (nla_put_u8(skb, IFLA_PROTO_DOWN, READ_ONCE(dev->proto_down))) + goto nla_put_failure; + + preason = READ_ONCE(dev->proto_down_reason); + if (!preason) + return 0; + + pr = nla_nest_start(skb, IFLA_PROTO_DOWN_REASON); + if (!pr) + return -EMSGSIZE; + + if (nla_put_u32(skb, IFLA_PROTO_DOWN_REASON_VALUE, preason)) { + nla_nest_cancel(skb, pr); + goto nla_put_failure; + } + + nla_nest_end(skb, pr); + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int rtnl_fill_devlink_port(struct sk_buff *skb, + const struct net_device *dev) +{ + struct nlattr *devlink_port_nest; + int ret; + + devlink_port_nest = nla_nest_start(skb, IFLA_DEVLINK_PORT); + if (!devlink_port_nest) + return -EMSGSIZE; + + if (dev->devlink_port) { + ret = devlink_nl_port_handle_fill(skb, dev->devlink_port); + if (ret < 0) + goto nest_cancel; + } + + nla_nest_end(skb, devlink_port_nest); + return 0; + +nest_cancel: + nla_nest_cancel(skb, devlink_port_nest); + return ret; +} + +static int rtnl_fill_dpll_pin(struct sk_buff *skb, + const struct net_device *dev) +{ + struct nlattr *dpll_pin_nest; + int ret; + + dpll_pin_nest = nla_nest_start(skb, IFLA_DPLL_PIN); + if (!dpll_pin_nest) + return -EMSGSIZE; + + ret = dpll_netdev_add_pin_handle(skb, dev); + if (ret < 0) + goto nest_cancel; + + nla_nest_end(skb, dpll_pin_nest); + return 0; + +nest_cancel: + nla_nest_cancel(skb, dpll_pin_nest); + return ret; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, u32 event, int *new_nsid, int new_ifindex, - int tgt_netnsid) + int tgt_netnsid, gfp_t gfp) { + char devname[IFNAMSIZ]; struct ifinfomsg *ifm; struct nlmsghdr *nlh; + struct Qdisc *qdisc; ASSERT_RTNL(); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -1599,44 +2044,69 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, ifm = nlmsg_data(nlh); ifm->ifi_family = AF_UNSPEC; ifm->__ifi_pad = 0; - ifm->ifi_type = dev->type; - ifm->ifi_index = dev->ifindex; - ifm->ifi_flags = dev_get_flags(dev); + ifm->ifi_type = READ_ONCE(dev->type); + ifm->ifi_index = READ_ONCE(dev->ifindex); + ifm->ifi_flags = netif_get_flags(dev); ifm->ifi_change = change; if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid)) goto nla_put_failure; - if (nla_put_string(skb, IFLA_IFNAME, dev->name) || - nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || + netdev_copy_name(dev, devname); + if (nla_put_string(skb, IFLA_IFNAME, devname)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_TXQLEN, READ_ONCE(dev->tx_queue_len)) || nla_put_u8(skb, IFLA_OPERSTATE, - netif_running(dev) ? dev->operstate : IF_OPER_DOWN) || - nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) || - nla_put_u32(skb, IFLA_MTU, dev->mtu) || - nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) || - nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) || - nla_put_u32(skb, IFLA_GROUP, dev->group) || - nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || - nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || - nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || - nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || + netif_running(dev) ? READ_ONCE(dev->operstate) : + IF_OPER_DOWN) || + nla_put_u8(skb, IFLA_LINKMODE, READ_ONCE(dev->link_mode)) || + nla_put_u8(skb, IFLA_NETNS_IMMUTABLE, dev->netns_immutable) || + nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) || + nla_put_u32(skb, IFLA_MIN_MTU, READ_ONCE(dev->min_mtu)) || + nla_put_u32(skb, IFLA_MAX_MTU, READ_ONCE(dev->max_mtu)) || + nla_put_u32(skb, IFLA_GROUP, READ_ONCE(dev->group)) || + nla_put_u32(skb, IFLA_PROMISCUITY, READ_ONCE(dev->promiscuity)) || + nla_put_u32(skb, IFLA_ALLMULTI, READ_ONCE(dev->allmulti)) || + nla_put_u32(skb, IFLA_NUM_TX_QUEUES, + READ_ONCE(dev->num_tx_queues)) || + nla_put_u32(skb, IFLA_GSO_MAX_SEGS, + READ_ONCE(dev->gso_max_segs)) || + nla_put_u32(skb, IFLA_GSO_MAX_SIZE, + READ_ONCE(dev->gso_max_size)) || + nla_put_u32(skb, IFLA_GRO_MAX_SIZE, + READ_ONCE(dev->gro_max_size)) || + nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, + READ_ONCE(dev->gso_ipv4_max_size)) || + nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, + READ_ONCE(dev->gro_ipv4_max_size)) || + nla_put_u32(skb, IFLA_TSO_MAX_SIZE, + READ_ONCE(dev->tso_max_size)) || + nla_put_u32(skb, IFLA_TSO_MAX_SEGS, + READ_ONCE(dev->tso_max_segs)) || + nla_put_uint(skb, IFLA_MAX_PACING_OFFLOAD_HORIZON, + READ_ONCE(dev->max_pacing_offload_horizon)) || #ifdef CONFIG_RPS - nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || + nla_put_u32(skb, IFLA_NUM_RX_QUEUES, + READ_ONCE(dev->num_rx_queues)) || #endif - nla_put_iflink(skb, dev) || put_master_ifindex(skb, dev) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || - (dev->qdisc && - nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || nla_put_ifalias(skb, dev) || nla_put_u32(skb, IFLA_CARRIER_CHANGES, atomic_read(&dev->carrier_up_count) + atomic_read(&dev->carrier_down_count)) || - nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) || nla_put_u32(skb, IFLA_CARRIER_UP_COUNT, atomic_read(&dev->carrier_up_count)) || nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT, - atomic_read(&dev->carrier_down_count))) + atomic_read(&dev->carrier_down_count)) || + nla_put_u16(skb, IFLA_HEADROOM, + READ_ONCE(dev->needed_headroom)) || + nla_put_u16(skb, IFLA_TAILROOM, + READ_ONCE(dev->needed_tailroom))) + goto nla_put_failure; + + if (rtnl_fill_proto_down(skb, dev)) goto nla_put_failure; if (event != IFLA_EVENT_NONE) { @@ -1644,9 +2114,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; } - if (rtnl_fill_link_ifmap(skb, dev)) - goto nla_put_failure; - if (dev->addr_len) { if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) || nla_put(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast)) @@ -1662,7 +2129,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (rtnl_phys_switch_id_fill(skb, dev)) goto nla_put_failure; - if (rtnl_fill_stats(skb, dev)) + if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS) && + rtnl_fill_stats(skb, dev)) goto nla_put_failure; if (rtnl_fill_vf(skb, dev, ext_filter_mask)) @@ -1679,9 +2147,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; } - if (rtnl_fill_link_netnsid(skb, dev, src_net)) - goto nla_put_failure; - if (new_nsid && nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) goto nla_put_failure; @@ -1689,12 +2154,40 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0) goto nla_put_failure; + if (memchr_inv(dev->perm_addr, '\0', dev->addr_len) && + nla_put(skb, IFLA_PERM_ADDRESS, dev->addr_len, dev->perm_addr)) + goto nla_put_failure; rcu_read_lock(); + if (rtnl_fill_link_netnsid(skb, dev, src_net, GFP_ATOMIC)) + goto nla_put_failure_rcu; + qdisc = rcu_dereference(dev->qdisc); + if (qdisc && nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) + goto nla_put_failure_rcu; if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) goto nla_put_failure_rcu; + if (rtnl_fill_link_ifmap(skb, dev)) + goto nla_put_failure_rcu; + if (rtnl_fill_prop_list(skb, dev)) + goto nla_put_failure_rcu; rcu_read_unlock(); + if (dev->dev.parent && + nla_put_string(skb, IFLA_PARENT_DEV_NAME, + dev_name(dev->dev.parent))) + goto nla_put_failure; + + if (dev->dev.parent && dev->dev.parent->bus && + nla_put_string(skb, IFLA_PARENT_DEV_BUS_NAME, + dev->dev.parent->bus->name)) + goto nla_put_failure; + + if (rtnl_fill_devlink_port(skb, dev)) + goto nla_put_failure; + + if (rtnl_fill_dpll_pin(skb, dev)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; @@ -1706,6 +2199,7 @@ nla_put_failure: } static const struct nla_policy ifla_policy[IFLA_MAX+1] = { + [IFLA_UNSPEC] = { .strict_start_type = IFLA_DPLL_PIN }, [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, @@ -1734,7 +2228,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 }, [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, [IFLA_GSO_MAX_SEGS] = { .type = NLA_U32 }, - [IFLA_GSO_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_GSO_MAX_SIZE] = NLA_POLICY_MIN(NLA_U32, MAX_TCP_HEADER + 1), [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, @@ -1748,6 +2242,22 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 }, [IFLA_MIN_MTU] = { .type = NLA_U32 }, [IFLA_MAX_MTU] = { .type = NLA_U32 }, + [IFLA_PROP_LIST] = { .type = NLA_NESTED }, + [IFLA_ALT_IFNAME] = { .type = NLA_STRING, + .len = ALTIFNAMSIZ - 1 }, + [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT }, + [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED }, + [IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), + [IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING }, + [IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT }, + [IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT }, + [IFLA_ALLMULTI] = { .type = NLA_REJECT }, + [IFLA_GSO_IPV4_MAX_SIZE] = NLA_POLICY_MIN(NLA_U32, MAX_TCP_HEADER + 1), + [IFLA_GRO_IPV4_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_NETNS_IMMUTABLE] = { .type = NLA_REJECT }, + [IFLA_HEADROOM] = { .type = NLA_REJECT }, + [IFLA_TAILROOM] = { .type = NLA_REJECT }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1759,6 +2269,7 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, + [IFLA_VF_BROADCAST] = { .type = NLA_REJECT }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, [IFLA_VF_VLAN_LIST] = { .type = NLA_NESTED }, [IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) }, @@ -1792,26 +2303,28 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { }; static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = { + [IFLA_XDP_UNSPEC] = { .strict_start_type = IFLA_XDP_EXPECTED_FD }, [IFLA_XDP_FD] = { .type = NLA_S32 }, + [IFLA_XDP_EXPECTED_FD] = { .type = NLA_S32 }, [IFLA_XDP_ATTACHED] = { .type = NLA_U8 }, [IFLA_XDP_FLAGS] = { .type = NLA_U32 }, [IFLA_XDP_PROG_ID] = { .type = NLA_U32 }, }; -static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla) +static struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla, + int *ops_srcu_index) { - const struct rtnl_link_ops *ops = NULL; struct nlattr *linfo[IFLA_INFO_MAX + 1]; + struct rtnl_link_ops *ops = NULL; - if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, - ifla_info_policy, NULL) < 0) + if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0) return NULL; if (linfo[IFLA_INFO_KIND]) { char kind[MODULE_NAME_LEN]; - nla_strlcpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind)); - ops = rtnl_link_ops_get(kind); + nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind)); + ops = rtnl_link_ops_get(kind, ops_srcu_index); } return ops; @@ -1825,6 +2338,13 @@ static bool link_master_filtered(struct net_device *dev, int master_idx) return false; master = netdev_master_upper_dev_get(dev); + + /* 0 is already used to denote IFLA_MASTER wasn't passed, therefore need + * another invalid value for ifindex to denote "no master". + */ + if (master_idx == -1) + return !!master; + if (!master || master->ifindex != master_idx) return true; @@ -1887,12 +2407,12 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, if (strict_check) { struct ifinfomsg *ifm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG(extack, "Invalid header for link dump"); return -EINVAL; } - ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change) { NL_SET_ERR_MSG(extack, "Invalid values in header for link dump request"); @@ -1903,8 +2423,9 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, return -EINVAL; } - return nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, - ifla_policy, extack); + return nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, + IFLA_MAX, ifla_policy, + extack); } /* A hack to preserve kernel<->userspace interface. @@ -1917,30 +2438,29 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - return nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, extack); + return nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, + extack); } static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; + struct rtnl_link_ops *kind_ops = NULL; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - struct net *tgt_net = net; - int h, s_h; - int idx = 0, s_idx; - struct net_device *dev; - struct hlist_head *head; + unsigned int flags = NLM_F_MULTI; struct nlattr *tb[IFLA_MAX+1]; + struct { + unsigned long ifindex; + } *ctx = (void *)cb->ctx; + struct net *tgt_net = net; u32 ext_filter_mask = 0; - const struct rtnl_link_ops *kind_ops = NULL; - unsigned int flags = NLM_F_MULTI; + struct net_device *dev; + int ops_srcu_index; int master_idx = 0; int netnsid = -1; int err, i; - s_h = cb->args[0]; - s_idx = cb->args[1]; - err = rtnl_valid_dump_ifinfo_req(nlh, cb->strict_check, tb, extack); if (err < 0) { if (cb->strict_check) @@ -1960,7 +2480,9 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) tgt_net = rtnl_get_net_ns_capable(skb->sk, netnsid); if (IS_ERR(tgt_net)) { NL_SET_ERR_MSG(extack, "Invalid target network namespace id"); - return PTR_ERR(tgt_net); + err = PTR_ERR(tgt_net); + netnsid = -1; + goto out; } break; case IFLA_EXT_MASK: @@ -1970,12 +2492,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) master_idx = nla_get_u32(tb[i]); break; case IFLA_LINKINFO: - kind_ops = linkinfo_to_kind_ops(tb[i]); + kind_ops = linkinfo_to_kind_ops(tb[i], &ops_srcu_index); break; default: if (cb->strict_check) { NL_SET_ERR_MSG(extack, "Unsupported attribute in link dump request"); - return -EINVAL; + err = -EINVAL; + goto out; } } } @@ -1984,54 +2507,59 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) flags |= NLM_F_DUMP_FILTERED; walk_entries: - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &tgt_net->dev_index_head[h]; - hlist_for_each_entry(dev, head, index_hlist) { - if (link_dump_filtered(dev, master_idx, kind_ops)) - goto cont; - if (idx < s_idx) - goto cont; - err = rtnl_fill_ifinfo(skb, dev, net, - RTM_NEWLINK, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, 0, flags, - ext_filter_mask, 0, NULL, 0, - netnsid); - - if (err < 0) { - if (likely(skb->len)) - goto out; - - goto out_err; - } -cont: - idx++; - } + err = 0; + for_each_netdev_dump(tgt_net, dev, ctx->ifindex) { + if (link_dump_filtered(dev, master_idx, kind_ops)) + continue; + err = rtnl_fill_ifinfo(skb, dev, net, RTM_NEWLINK, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, 0, flags, + ext_filter_mask, 0, NULL, 0, + netnsid, GFP_KERNEL); + if (err < 0) + break; } -out: - err = skb->len; -out_err: - cb->args[1] = idx; - cb->args[0] = h; - cb->seq = net->dev_base_seq; + + + cb->seq = tgt_net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + +out: + + if (kind_ops) + rtnl_link_ops_put(kind_ops, ops_srcu_index); if (netnsid >= 0) put_net(tgt_net); return err; } -int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, - struct netlink_ext_ack *exterr) +int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer, + struct netlink_ext_ack *exterr) { - return nla_parse(tb, IFLA_MAX, head, len, ifla_policy, exterr); + const struct ifinfomsg *ifmp; + const struct nlattr *attrs; + size_t len; + + ifmp = nla_data(nla_peer); + attrs = nla_data(nla_peer) + sizeof(struct ifinfomsg); + len = nla_len(nla_peer) - sizeof(struct ifinfomsg); + + if (ifmp->ifi_index < 0) { + NL_SET_ERR_MSG_ATTR(exterr, nla_peer, + "ifindex can't be negative"); + return -EINVAL; + } + + return nla_parse_deprecated(tb, IFLA_MAX, attrs, len, ifla_policy, + exterr); } -EXPORT_SYMBOL(rtnl_nla_parse_ifla); +EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg); -struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) +static struct net *rtnl_link_get_net_ifla(struct nlattr *tb[]) { - struct net *net; + struct net *net = NULL; + /* Examine the link attributes and figure out which * network namespace we are talking about. */ @@ -2039,8 +2567,17 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); else if (tb[IFLA_NET_NS_FD]) net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD])); - else + + return net; +} + +struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) +{ + struct net *net = rtnl_link_get_net_ifla(tb); + + if (!net) net = get_net(src_net); + return net; } EXPORT_SYMBOL(rtnl_link_get_net); @@ -2120,16 +2657,59 @@ invalid_attr: return -EINVAL; } -static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) +static int rtnl_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) { - if (dev) { - if (tb[IFLA_ADDRESS] && - nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) - return -EINVAL; + const struct net_device_ops *ops = dev->netdev_ops; - if (tb[IFLA_BROADCAST] && - nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) - return -EINVAL; + if (!ops->ndo_set_vf_rate) + return -EOPNOTSUPP; + if (max_tx_rate && max_tx_rate < min_tx_rate) + return -EINVAL; + + return ops->ndo_set_vf_rate(dev, vf, min_tx_rate, max_tx_rate); +} + +static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + if (tb[IFLA_ADDRESS] && + nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) + return -EINVAL; + + if (tb[IFLA_BROADCAST] && + nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) + return -EINVAL; + + if (tb[IFLA_GSO_MAX_SIZE] && + nla_get_u32(tb[IFLA_GSO_MAX_SIZE]) > dev->tso_max_size) { + NL_SET_ERR_MSG(extack, "too big gso_max_size"); + return -EINVAL; + } + + if (tb[IFLA_GSO_MAX_SEGS] && + (nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > GSO_MAX_SEGS || + nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > dev->tso_max_segs)) { + NL_SET_ERR_MSG(extack, "too big gso_max_segs"); + return -EINVAL; + } + + if (tb[IFLA_GRO_MAX_SIZE] && + nla_get_u32(tb[IFLA_GRO_MAX_SIZE]) > GRO_MAX_SIZE) { + NL_SET_ERR_MSG(extack, "too big gro_max_size"); + return -EINVAL; + } + + if (tb[IFLA_GSO_IPV4_MAX_SIZE] && + nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]) > dev->tso_max_size) { + NL_SET_ERR_MSG(extack, "too big gso_ipv4_max_size"); + return -EINVAL; + } + + if (tb[IFLA_GRO_IPV4_MAX_SIZE] && + nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]) > GRO_MAX_SIZE) { + NL_SET_ERR_MSG(extack, "too big gro_ipv4_max_size"); + return -EINVAL; } if (tb[IFLA_AF_SPEC]) { @@ -2137,29 +2717,24 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) int rem, err; nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { - const struct rtnl_af_ops *af_ops; + struct rtnl_af_ops *af_ops; + int af_ops_srcu_index; - rcu_read_lock(); - af_ops = rtnl_af_lookup(nla_type(af)); - if (!af_ops) { - rcu_read_unlock(); + af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index); + if (!af_ops) return -EAFNOSUPPORT; - } - if (!af_ops->set_link_af) { - rcu_read_unlock(); - return -EOPNOTSUPP; - } + if (!af_ops->set_link_af) + err = -EOPNOTSUPP; + else if (af_ops->validate_link_af) + err = af_ops->validate_link_af(dev, af, extack); + else + err = 0; - if (af_ops->validate_link_af) { - err = af_ops->validate_link_af(dev, af); - if (err < 0) { - rcu_read_unlock(); - return err; - } - } + rtnl_af_put(af_ops, af_ops_srcu_index); - rcu_read_unlock(); + if (err < 0) + return err; } } @@ -2190,6 +2765,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_MAC]) { struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); + if (ivm->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_mac) err = ops->ndo_set_vf_mac(dev, ivm->vf, @@ -2201,6 +2778,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_VLAN]) { struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + if (ivv->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_vlan) err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, @@ -2221,7 +2800,7 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) { if (nla_type(attr) != IFLA_VF_VLAN_INFO || - nla_len(attr) < NLA_HDRLEN) { + nla_len(attr) < sizeof(struct ifla_vf_vlan_info)) { return -EINVAL; } if (len >= MAX_VLAN_LIST_LEN) @@ -2233,6 +2812,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (len == 0) return -EINVAL; + if (ivvl[0]->vf >= INT_MAX) + return -EINVAL; err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) @@ -2243,17 +2824,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); struct ifla_vf_info ivf; + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_get_vf_config) err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); if (err < 0) return err; - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivf.min_tx_rate, - ivt->rate); + err = rtnl_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, ivt->rate); if (err < 0) return err; } @@ -2261,11 +2841,11 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_RATE]) { struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivt->min_tx_rate, - ivt->max_tx_rate); + if (ivt->vf >= INT_MAX) + return -EINVAL; + + err = rtnl_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, ivt->max_tx_rate); if (err < 0) return err; } @@ -2273,6 +2853,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_SPOOFCHK]) { struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + if (ivs->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_spoofchk) err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, @@ -2284,6 +2866,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_LINK_STATE]) { struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + if (ivl->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_link_state) err = ops->ndo_set_vf_link_state(dev, ivl->vf, @@ -2297,6 +2881,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) err = -EOPNOTSUPP; ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); + if (ivrssq_en->vf >= INT_MAX) + return -EINVAL; if (ops->ndo_set_vf_rss_query_en) err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, ivrssq_en->setting); @@ -2307,6 +2893,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_TRUST]) { struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]); + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_trust) err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting); @@ -2317,15 +2905,18 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_IB_NODE_GUID]) { struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]); + if (ivt->vf >= INT_MAX) + return -EINVAL; if (!ops->ndo_set_vf_guid) return -EOPNOTSUPP; - return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID); } if (tb[IFLA_VF_IB_PORT_GUID]) { struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]); + if (ivt->vf >= INT_MAX) + return -EINVAL; if (!ops->ndo_set_vf_guid) return -EOPNOTSUPP; @@ -2342,12 +2933,19 @@ static int do_set_master(struct net_device *dev, int ifindex, const struct net_device_ops *ops; int err; + /* Release the lower lock, the upper is responsible for locking + * the lower if needed. None of the existing upper devices + * use netdev instance lock, so don't grab it. + */ + if (upper_dev) { if (upper_dev->ifindex == ifindex) return 0; ops = upper_dev->netdev_ops; if (ops->ndo_del_slave) { + netdev_unlock_ops(dev); err = ops->ndo_del_slave(upper_dev, dev); + netdev_lock_ops(dev); if (err) return err; } else { @@ -2361,7 +2959,9 @@ static int do_set_master(struct net_device *dev, int ifindex, return -EINVAL; ops = upper_dev->netdev_ops; if (ops->ndo_add_slave) { + netdev_unlock_ops(dev); err = ops->ndo_add_slave(upper_dev, dev, extack); + netdev_lock_ops(dev); if (err) return err; } else { @@ -2371,36 +2971,102 @@ static int do_set_master(struct net_device *dev, int ifindex, return 0; } +static const struct nla_policy ifla_proto_down_reason_policy[IFLA_PROTO_DOWN_REASON_VALUE + 1] = { + [IFLA_PROTO_DOWN_REASON_MASK] = { .type = NLA_U32 }, + [IFLA_PROTO_DOWN_REASON_VALUE] = { .type = NLA_U32 }, +}; + +static int do_set_proto_down(struct net_device *dev, + struct nlattr *nl_proto_down, + struct nlattr *nl_proto_down_reason, + struct netlink_ext_ack *extack) +{ + struct nlattr *pdreason[IFLA_PROTO_DOWN_REASON_MAX + 1]; + unsigned long mask = 0; + u32 value; + bool proto_down; + int err; + + if (!dev->change_proto_down) { + NL_SET_ERR_MSG(extack, "Protodown not supported by device"); + return -EOPNOTSUPP; + } + + if (nl_proto_down_reason) { + err = nla_parse_nested_deprecated(pdreason, + IFLA_PROTO_DOWN_REASON_MAX, + nl_proto_down_reason, + ifla_proto_down_reason_policy, + NULL); + if (err < 0) + return err; + + if (!pdreason[IFLA_PROTO_DOWN_REASON_VALUE]) { + NL_SET_ERR_MSG(extack, "Invalid protodown reason value"); + return -EINVAL; + } + + value = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_VALUE]); + + if (pdreason[IFLA_PROTO_DOWN_REASON_MASK]) + mask = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_MASK]); + + netdev_change_proto_down_reason_locked(dev, mask, value); + } + + if (nl_proto_down) { + proto_down = nla_get_u8(nl_proto_down); + + /* Don't turn off protodown if there are active reasons */ + if (!proto_down && dev->proto_down_reason) { + NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons"); + return -EBUSY; + } + err = netif_change_proto_down(dev, proto_down); + if (err) + return err; + } + + return 0; +} + #define DO_SETLINK_MODIFIED 0x01 /* notify flag means notify + modified. */ #define DO_SETLINK_NOTIFY 0x03 -static int do_setlink(const struct sk_buff *skb, - struct net_device *dev, struct ifinfomsg *ifm, +static int do_setlink(const struct sk_buff *skb, struct net_device *dev, + struct net *tgt_net, struct ifinfomsg *ifm, struct netlink_ext_ack *extack, - struct nlattr **tb, char *ifname, int status) + struct nlattr **tb, int status) { const struct net_device_ops *ops = dev->netdev_ops; + char ifname[IFNAMSIZ]; int err; - err = validate_linkmsg(dev, tb); + err = validate_linkmsg(dev, tb, extack); if (err < 0) return err; - if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) { - struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev), - tb, CAP_NET_ADMIN); - if (IS_ERR(net)) { - err = PTR_ERR(net); - goto errout; - } + if (tb[IFLA_IFNAME]) + nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; - err = dev_change_net_namespace(dev, net, ifname); - put_net(net); + if (!net_eq(tgt_net, dev_net(dev))) { + const char *pat = ifname[0] ? ifname : NULL; + int new_ifindex; + + new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0); + + err = __dev_change_net_namespace(dev, tgt_net, pat, + new_ifindex, extack); if (err) - goto errout; + return err; + status |= DO_SETLINK_MODIFIED; } + netdev_lock_ops(dev); + if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; struct ifmap k_map; @@ -2431,35 +3097,35 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_ADDRESS]) { - struct sockaddr *sa; - int len; - - len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len, - sizeof(*sa)); - sa = kmalloc(len, GFP_KERNEL); - if (!sa) { - err = -ENOMEM; + struct sockaddr_storage ss = { }; + + netdev_unlock_ops(dev); + + /* dev_addr_sem is an outer lock, enforce proper ordering */ + down_write(&dev_addr_sem); + netdev_lock_ops(dev); + + ss.ss_family = dev->type; + memcpy(ss.__data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); + err = netif_set_mac_address(dev, &ss, extack); + if (err) { + up_write(&dev_addr_sem); goto errout; } - sa->sa_family = dev->type; - memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), - dev->addr_len); - err = dev_set_mac_address(dev, sa, extack); - kfree(sa); - if (err) - goto errout; status |= DO_SETLINK_MODIFIED; + + up_write(&dev_addr_sem); } if (tb[IFLA_MTU]) { - err = dev_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack); + err = netif_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack); if (err < 0) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_GROUP]) { - dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); + netif_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); status |= DO_SETLINK_NOTIFY; } @@ -2469,15 +3135,15 @@ static int do_setlink(const struct sk_buff *skb, * requested. */ if (ifm->ifi_index > 0 && ifname[0]) { - err = dev_change_name(dev, ifname); + err = netif_change_name(dev, ifname); if (err < 0) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_IFALIAS]) { - err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), - nla_len(tb[IFLA_IFALIAS])); + err = netif_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), + nla_len(tb[IFLA_IFALIAS])); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; @@ -2489,8 +3155,8 @@ static int do_setlink(const struct sk_buff *skb, } if (ifm->ifi_flags || ifm->ifi_change) { - err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), - extack); + err = netif_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), + extack); if (err < 0) goto errout; } @@ -2503,7 +3169,7 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_CARRIER]) { - err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); + err = netif_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); if (err) goto errout; status |= DO_SETLINK_MODIFIED; @@ -2512,7 +3178,7 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_TXQLEN]) { unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]); - err = dev_change_tx_queue_len(dev, value); + err = netif_change_tx_queue_len(dev, value); if (err) goto errout; status |= DO_SETLINK_MODIFIED; @@ -2521,11 +3187,6 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_GSO_MAX_SIZE]) { u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]); - if (max_size > GSO_MAX_SIZE) { - err = -EINVAL; - goto errout; - } - if (dev->gso_max_size ^ max_size) { netif_set_gso_max_size(dev, max_size); status |= DO_SETLINK_MODIFIED; @@ -2535,13 +3196,35 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_GSO_MAX_SEGS]) { u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]); - if (max_segs > GSO_MAX_SEGS) { - err = -EINVAL; - goto errout; + if (dev->gso_max_segs ^ max_segs) { + netif_set_gso_max_segs(dev, max_segs); + status |= DO_SETLINK_MODIFIED; } + } - if (dev->gso_max_segs ^ max_segs) { - dev->gso_max_segs = max_segs; + if (tb[IFLA_GRO_MAX_SIZE]) { + u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]); + + if (dev->gro_max_size ^ gro_max_size) { + netif_set_gro_max_size(dev, gro_max_size); + status |= DO_SETLINK_MODIFIED; + } + } + + if (tb[IFLA_GSO_IPV4_MAX_SIZE]) { + u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]); + + if (dev->gso_ipv4_max_size ^ max_size) { + netif_set_gso_ipv4_max_size(dev, max_size); + status |= DO_SETLINK_MODIFIED; + } + } + + if (tb[IFLA_GRO_IPV4_MAX_SIZE]) { + u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]); + + if (dev->gro_ipv4_max_size ^ gro_max_size) { + netif_set_gro_ipv4_max_size(dev, gro_max_size); status |= DO_SETLINK_MODIFIED; } } @@ -2552,11 +3235,9 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_LINKMODE]) { unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]); - write_lock_bh(&dev_base_lock); if (dev->link_mode ^ value) status |= DO_SETLINK_NOTIFY; - dev->link_mode = value; - write_unlock_bh(&dev_base_lock); + WRITE_ONCE(dev->link_mode, value); } if (tb[IFLA_VFINFO_LIST]) { @@ -2570,8 +3251,10 @@ static int do_setlink(const struct sk_buff *skb, err = -EINVAL; goto errout; } - err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, - ifla_vf_policy, NULL); + err = nla_parse_nested_deprecated(vfinfo, IFLA_VF_MAX, + attr, + ifla_vf_policy, + NULL); if (err < 0) goto errout; err = do_setvfinfo(dev, vfinfo); @@ -2598,8 +3281,10 @@ static int do_setlink(const struct sk_buff *skb, err = -EINVAL; goto errout; } - err = nla_parse_nested(port, IFLA_PORT_MAX, attr, - ifla_port_policy, NULL); + err = nla_parse_nested_deprecated(port, IFLA_PORT_MAX, + attr, + ifla_port_policy, + NULL); if (err < 0) goto errout; if (!port[IFLA_PORT_VF]) { @@ -2618,9 +3303,9 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_PORT_SELF]) { struct nlattr *port[IFLA_PORT_MAX+1]; - err = nla_parse_nested(port, IFLA_PORT_MAX, - tb[IFLA_PORT_SELF], ifla_port_policy, - NULL); + err = nla_parse_nested_deprecated(port, IFLA_PORT_MAX, + tb[IFLA_PORT_SELF], + ifla_port_policy, NULL); if (err < 0) goto errout; @@ -2637,27 +3322,29 @@ static int do_setlink(const struct sk_buff *skb, int rem; nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { - const struct rtnl_af_ops *af_ops; + struct rtnl_af_ops *af_ops; + int af_ops_srcu_index; - rcu_read_lock(); + af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index); + if (!af_ops) { + err = -EAFNOSUPPORT; + goto errout; + } - BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af)))); + err = af_ops->set_link_af(dev, af, extack); + rtnl_af_put(af_ops, af_ops_srcu_index); - err = af_ops->set_link_af(dev, af); - if (err < 0) { - rcu_read_unlock(); + if (err < 0) goto errout; - } - rcu_read_unlock(); status |= DO_SETLINK_NOTIFY; } } err = 0; - if (tb[IFLA_PROTO_DOWN]) { - err = dev_change_proto_down(dev, - nla_get_u8(tb[IFLA_PROTO_DOWN])); + if (tb[IFLA_PROTO_DOWN] || tb[IFLA_PROTO_DOWN_REASON]) { + err = do_set_proto_down(dev, tb[IFLA_PROTO_DOWN], + tb[IFLA_PROTO_DOWN_REASON], extack); if (err) goto errout; status |= DO_SETLINK_NOTIFY; @@ -2667,8 +3354,9 @@ static int do_setlink(const struct sk_buff *skb, struct nlattr *xdp[IFLA_XDP_MAX + 1]; u32 xdp_flags = 0; - err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP], - ifla_xdp_policy, NULL); + err = nla_parse_nested_deprecated(xdp, IFLA_XDP_MAX, + tb[IFLA_XDP], + ifla_xdp_policy, NULL); if (err < 0) goto errout; @@ -2690,8 +3378,20 @@ static int do_setlink(const struct sk_buff *skb, } if (xdp[IFLA_XDP_FD]) { + int expected_fd = -1; + + if (xdp_flags & XDP_FLAGS_REPLACE) { + if (!xdp[IFLA_XDP_EXPECTED_FD]) { + err = -EINVAL; + goto errout; + } + expected_fd = + nla_get_s32(xdp[IFLA_XDP_EXPECTED_FD]); + } + err = dev_change_xdp_fd(dev, extack, nla_get_s32(xdp[IFLA_XDP_FD]), + expected_fd, xdp_flags); if (err) goto errout; @@ -2702,28 +3402,46 @@ static int do_setlink(const struct sk_buff *skb, errout: if (status & DO_SETLINK_MODIFIED) { if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY) - netdev_state_change(dev); + netif_state_change(dev); if (err < 0) net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", dev->name); } + netdev_unlock_ops(dev); + return err; } +static struct net_device *rtnl_dev_get(struct net *net, + struct nlattr *tb[]) +{ + char ifname[ALTIFNAMSIZ]; + + if (tb[IFLA_IFNAME]) + nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else if (tb[IFLA_ALT_IFNAME]) + nla_strscpy(ifname, tb[IFLA_ALT_IFNAME], ALTIFNAMSIZ); + else + return NULL; + + return __dev_get_by_name(net, ifname); +} + static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct ifinfomsg *ifm = nlmsg_data(nlh); struct net *net = sock_net(skb->sk); - struct ifinfomsg *ifm; - struct net_device *dev; - int err; struct nlattr *tb[IFLA_MAX+1]; - char ifname[IFNAMSIZ]; + struct net_device *dev = NULL; + struct rtnl_nets rtnl_nets; + struct net *tgt_net; + int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); if (err < 0) goto errout; @@ -2731,26 +3449,32 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - else - ifname[0] = '\0'; + tgt_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN); + if (IS_ERR(tgt_net)) { + err = PTR_ERR(tgt_net); + goto errout; + } + + rtnl_nets_init(&rtnl_nets); + rtnl_nets_add(&rtnl_nets, get_net(net)); + rtnl_nets_add(&rtnl_nets, tgt_net); + + rtnl_nets_lock(&rtnl_nets); - err = -EINVAL; - ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(net, ifname); + else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) + dev = rtnl_dev_get(net, tb); else - goto errout; + err = -EINVAL; - if (dev == NULL) { + if (dev) + err = do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0); + else if (!err) err = -ENODEV; - goto errout; - } - err = do_setlink(skb, dev, ifm, extack, tb, ifname, 0); + rtnl_nets_unlock(&rtnl_nets); + rtnl_nets_destroy(&rtnl_nets); errout: return err; } @@ -2791,7 +3515,7 @@ static int rtnl_group_dellink(const struct net *net, int group) return 0; } -int rtnl_delete_link(struct net_device *dev) +int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *nlh) { const struct rtnl_link_ops *ops; LIST_HEAD(list_kill); @@ -2801,7 +3525,7 @@ int rtnl_delete_link(struct net_device *dev) return -EOPNOTSUPP; ops->dellink(dev, &list_kill); - unregister_netdevice_many(&list_kill); + unregister_netdevice_many_notify(&list_kill, portid, nlh); return 0; } @@ -2810,16 +3534,17 @@ EXPORT_SYMBOL_GPL(rtnl_delete_link); static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct ifinfomsg *ifm = nlmsg_data(nlh); struct net *net = sock_net(skb->sk); - struct net *tgt_net = net; - struct net_device *dev = NULL; - struct ifinfomsg *ifm; - char ifname[IFNAMSIZ]; + u32 portid = NETLINK_CB(skb).portid; struct nlattr *tb[IFLA_MAX+1]; - int err; + struct net_device *dev = NULL; + struct net *tgt_net = net; int netnsid = -1; + int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); if (err < 0) return err; @@ -2827,9 +3552,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - if (tb[IFLA_TARGET_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]); tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid); @@ -2837,36 +3559,34 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, return PTR_ERR(tgt_net); } - err = -EINVAL; - ifm = nlmsg_data(nlh); + rtnl_net_lock(tgt_net); + if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(tgt_net, ifname); + else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) + dev = rtnl_dev_get(tgt_net, tb); + + if (dev) + err = rtnl_delete_link(dev, portid, nlh); + else if (ifm->ifi_index > 0 || tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) + err = -ENODEV; else if (tb[IFLA_GROUP]) err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP])); else - goto out; - - if (!dev) { - if (tb[IFLA_IFNAME] || ifm->ifi_index > 0) - err = -ENODEV; - - goto out; - } + err = -EINVAL; - err = rtnl_delete_link(dev); + rtnl_net_unlock(tgt_net); -out: if (netnsid >= 0) put_net(tgt_net); return err; } -int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) +int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm, + u32 portid, const struct nlmsghdr *nlh) { - unsigned int old_flags; + unsigned int old_flags, changed; int err; old_flags = dev->flags; @@ -2877,12 +3597,13 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) return err; } - if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) { - __dev_notify_flags(dev, old_flags, (old_flags ^ dev->flags)); - } else { - dev->rtnl_link_state = RTNL_LINK_INITIALIZED; - __dev_notify_flags(dev, old_flags, ~0U); + changed = old_flags ^ dev->flags; + if (dev->rtnl_link_initializing) { + dev->rtnl_link_initializing = false; + changed = ~0U; } + + __dev_notify_flags(dev, old_flags, changed, portid, nlh); return 0; } EXPORT_SYMBOL(rtnl_configure_link); @@ -2896,6 +3617,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, struct net_device *dev; unsigned int num_tx_queues = 1; unsigned int num_rx_queues = 1; + int err; if (tb[IFLA_NUM_TX_QUEUES]) num_tx_queues = nla_get_u32(tb[IFLA_NUM_TX_QUEUES]); @@ -2917,20 +3639,43 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, return ERR_PTR(-EINVAL); } - dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, - ops->setup, num_tx_queues, num_rx_queues); + if (ops->alloc) { + dev = ops->alloc(tb, ifname, name_assign_type, + num_tx_queues, num_rx_queues); + if (IS_ERR(dev)) + return dev; + } else { + dev = alloc_netdev_mqs(ops->priv_size, ifname, + name_assign_type, ops->setup, + num_tx_queues, num_rx_queues); + } + if (!dev) return ERR_PTR(-ENOMEM); + err = validate_linkmsg(dev, tb, extack); + if (err < 0) { + free_netdev(dev); + return ERR_PTR(err); + } + dev_net_set(dev, net); dev->rtnl_link_ops = ops; - dev->rtnl_link_state = RTNL_LINK_INITIALIZING; + dev->rtnl_link_initializing = true; + + if (tb[IFLA_MTU]) { + u32 mtu = nla_get_u32(tb[IFLA_MTU]); - if (tb[IFLA_MTU]) - dev->mtu = nla_get_u32(tb[IFLA_MTU]); + err = dev_validate_mtu(dev, mtu, extack); + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } + dev->mtu = mtu; + } if (tb[IFLA_ADDRESS]) { - memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), - nla_len(tb[IFLA_ADDRESS])); + __dev_addr_set(dev, nla_data(tb[IFLA_ADDRESS]), + nla_len(tb[IFLA_ADDRESS])); dev->addr_assign_type = NET_ADDR_SET; } if (tb[IFLA_BROADCAST]) @@ -2943,234 +3688,147 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); if (tb[IFLA_GROUP]) - dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); + netif_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); if (tb[IFLA_GSO_MAX_SIZE]) netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE])); if (tb[IFLA_GSO_MAX_SEGS]) - dev->gso_max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]); + netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS])); + if (tb[IFLA_GRO_MAX_SIZE]) + netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE])); + if (tb[IFLA_GSO_IPV4_MAX_SIZE]) + netif_set_gso_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE])); + if (tb[IFLA_GRO_IPV4_MAX_SIZE]) + netif_set_gro_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE])); return dev; } EXPORT_SYMBOL(rtnl_create_link); -static int rtnl_group_changelink(const struct sk_buff *skb, - struct net *net, int group, - struct ifinfomsg *ifm, - struct netlink_ext_ack *extack, - struct nlattr **tb) -{ - struct net_device *dev, *aux; - int err; - - for_each_netdev_safe(net, dev, aux) { - if (dev->group == group) { - err = do_setlink(skb, dev, ifm, extack, tb, NULL, 0); - if (err < 0) - return err; - } - } - - return 0; -} +struct rtnl_newlink_tbs { + struct nlattr *tb[IFLA_MAX + 1]; + struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; + struct nlattr *attr[RTNL_MAX_TYPE + 1]; + struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; +}; -static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attr, struct netlink_ext_ack *extack) +static int rtnl_changelink(const struct sk_buff *skb, struct nlmsghdr *nlh, + const struct rtnl_link_ops *ops, + struct net_device *dev, struct net *tgt_net, + struct rtnl_newlink_tbs *tbs, + struct nlattr **data, + struct netlink_ext_ack *extack) { - struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; - unsigned char name_assign_type = NET_NAME_USER; - struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; - const struct rtnl_link_ops *m_ops = NULL; - struct net_device *master_dev = NULL; - struct net *net = sock_net(skb->sk); - const struct rtnl_link_ops *ops; - struct nlattr *tb[IFLA_MAX + 1]; - struct net *dest_net, *link_net; - struct nlattr **slave_data; - char kind[MODULE_NAME_LEN]; - struct net_device *dev; - struct ifinfomsg *ifm; - char ifname[IFNAMSIZ]; - struct nlattr **data; + struct nlattr ** const linkinfo = tbs->linkinfo; + struct nlattr ** const tb = tbs->tb; + int status = 0; int err; -#ifdef CONFIG_MODULES -replay: -#endif - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); - if (err < 0) - return err; - - err = rtnl_ensure_unique_netns(tb, extack, false); - if (err < 0) - return err; - - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - else - ifname[0] = '\0'; - - ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) - dev = __dev_get_by_index(net, ifm->ifi_index); - else { - if (ifname[0]) - dev = __dev_get_by_name(net, ifname); - else - dev = NULL; - } + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; - if (dev) { - master_dev = netdev_master_upper_dev_get(dev); - if (master_dev) - m_ops = master_dev->rtnl_link_ops; - } + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; - err = validate_linkmsg(dev, tb); - if (err < 0) - return err; + if (linkinfo[IFLA_INFO_DATA]) { + if (!ops || ops != dev->rtnl_link_ops || !ops->changelink) + return -EOPNOTSUPP; - if (tb[IFLA_LINKINFO]) { - err = nla_parse_nested(linkinfo, IFLA_INFO_MAX, - tb[IFLA_LINKINFO], ifla_info_policy, - NULL); + err = ops->changelink(dev, tb, data, extack); if (err < 0) return err; - } else - memset(linkinfo, 0, sizeof(linkinfo)); - if (linkinfo[IFLA_INFO_KIND]) { - nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); - ops = rtnl_link_ops_get(kind); - } else { - kind[0] = '\0'; - ops = NULL; + status |= DO_SETLINK_NOTIFY; } - data = NULL; - if (ops) { - if (ops->maxtype > RTNL_MAX_TYPE) - return -EINVAL; + if (linkinfo[IFLA_INFO_SLAVE_DATA]) { + const struct rtnl_link_ops *m_ops = NULL; + struct nlattr **slave_data = NULL; + struct net_device *master_dev; - if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { - err = nla_parse_nested(attr, ops->maxtype, - linkinfo[IFLA_INFO_DATA], - ops->policy, extack); - if (err < 0) - return err; - data = attr; - } - if (ops->validate) { - err = ops->validate(tb, data, extack); - if (err < 0) - return err; - } - } + master_dev = netdev_master_upper_dev_get(dev); + if (master_dev) + m_ops = master_dev->rtnl_link_ops; + + if (!m_ops || !m_ops->slave_changelink) + return -EOPNOTSUPP; - slave_data = NULL; - if (m_ops) { if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE) return -EINVAL; - if (m_ops->slave_maxtype && - linkinfo[IFLA_INFO_SLAVE_DATA]) { - err = nla_parse_nested(slave_attr, m_ops->slave_maxtype, - linkinfo[IFLA_INFO_SLAVE_DATA], - m_ops->slave_policy, extack); + if (m_ops->slave_maxtype) { + err = nla_parse_nested_deprecated(tbs->slave_attr, + m_ops->slave_maxtype, + linkinfo[IFLA_INFO_SLAVE_DATA], + m_ops->slave_policy, extack); if (err < 0) return err; - slave_data = slave_attr; - } - } - if (dev) { - int status = 0; + slave_data = tbs->slave_attr; + } - if (nlh->nlmsg_flags & NLM_F_EXCL) - return -EEXIST; - if (nlh->nlmsg_flags & NLM_F_REPLACE) - return -EOPNOTSUPP; + err = m_ops->slave_changelink(master_dev, dev, tb, slave_data, extack); + if (err < 0) + return err; - if (linkinfo[IFLA_INFO_DATA]) { - if (!ops || ops != dev->rtnl_link_ops || - !ops->changelink) - return -EOPNOTSUPP; + status |= DO_SETLINK_NOTIFY; + } - err = ops->changelink(dev, tb, data, extack); - if (err < 0) - return err; - status |= DO_SETLINK_NOTIFY; - } + return do_setlink(skb, dev, tgt_net, nlmsg_data(nlh), extack, tb, status); +} - if (linkinfo[IFLA_INFO_SLAVE_DATA]) { - if (!m_ops || !m_ops->slave_changelink) - return -EOPNOTSUPP; +static int rtnl_group_changelink(const struct sk_buff *skb, + struct net *net, struct net *tgt_net, + int group, struct ifinfomsg *ifm, + struct netlink_ext_ack *extack, + struct nlattr **tb) +{ + struct net_device *dev, *aux; + int err; - err = m_ops->slave_changelink(master_dev, dev, tb, - slave_data, extack); + for_each_netdev_safe(net, dev, aux) { + if (dev->group == group) { + err = do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0); if (err < 0) return err; - status |= DO_SETLINK_NOTIFY; } - - return do_setlink(skb, dev, ifm, extack, tb, ifname, status); - } - - if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { - if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) - return rtnl_group_changelink(skb, net, - nla_get_u32(tb[IFLA_GROUP]), - ifm, extack, tb); - return -ENODEV; } - if (tb[IFLA_MAP] || tb[IFLA_PROTINFO]) - return -EOPNOTSUPP; + return 0; +} - if (!ops) { -#ifdef CONFIG_MODULES - if (kind[0]) { - __rtnl_unlock(); - request_module("rtnl-link-%s", kind); - rtnl_lock(); - ops = rtnl_link_ops_get(kind); - if (ops) - goto replay; - } -#endif - NL_SET_ERR_MSG(extack, "Unknown device type"); - return -EOPNOTSUPP; - } +static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, + const struct rtnl_link_ops *ops, + struct net *tgt_net, struct net *link_net, + struct net *peer_net, + const struct nlmsghdr *nlh, + struct nlattr **tb, struct nlattr **data, + struct netlink_ext_ack *extack) +{ + unsigned char name_assign_type = NET_NAME_USER; + struct rtnl_newlink_params params = { + .src_net = sock_net(skb->sk), + .link_net = link_net, + .peer_net = peer_net, + .tb = tb, + .data = data, + }; + u32 portid = NETLINK_CB(skb).portid; + struct net_device *dev; + char ifname[IFNAMSIZ]; + int err; - if (!ops->setup) + if (!ops->alloc && !ops->setup) return -EOPNOTSUPP; - if (!ifname[0]) { + if (tb[IFLA_IFNAME]) { + nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + } else { snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); name_assign_type = NET_NAME_ENUM; } - dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN); - if (IS_ERR(dest_net)) - return PTR_ERR(dest_net); - - if (tb[IFLA_LINK_NETNSID]) { - int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); - - link_net = get_net_ns_by_id(dest_net, id); - if (!link_net) { - NL_SET_ERR_MSG(extack, "Unknown network namespace id"); - err = -EINVAL; - goto out; - } - err = -EPERM; - if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) - goto out; - } else { - link_net = NULL; - } - - dev = rtnl_create_link(link_net ? : dest_net, ifname, - name_assign_type, ops, tb, extack); + dev = rtnl_create_link(tgt_net, ifname, name_assign_type, ops, tb, + extack); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out; @@ -3178,44 +3836,31 @@ replay: dev->ifindex = ifm->ifi_index; - if (ops->newlink) { - err = ops->newlink(link_net ? : net, dev, tb, data, extack); - /* Drivers should call free_netdev() in ->destructor - * and unregister it on failure after registration - * so that device could be finally freed in rtnl_unlock. - */ - if (err < 0) { - /* If device is not registered at all, free it now */ - if (dev->reg_state == NETREG_UNINITIALIZED) - free_netdev(dev); - goto out; - } - } else { + if (ops->newlink) + err = ops->newlink(dev, ¶ms, extack); + else err = register_netdevice(dev); - if (err < 0) { - free_netdev(dev); - goto out; - } + if (err < 0) { + free_netdev(dev); + goto out; } - err = rtnl_configure_link(dev, ifm); + + netdev_lock_ops(dev); + + err = rtnl_configure_link(dev, ifm, portid, nlh); if (err < 0) goto out_unregister; - if (link_net) { - err = dev_change_net_namespace(dev, dest_net, ifname); - if (err < 0) - goto out_unregister; - } if (tb[IFLA_MASTER]) { err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); if (err) goto out_unregister; } + + netdev_unlock_ops(dev); out: - if (link_net) - put_net(link_net); - put_net(dest_net); return err; out_unregister: + netdev_unlock_ops(dev); if (ops->newlink) { LIST_HEAD(list_kill); @@ -3227,18 +3872,213 @@ out_unregister: goto out; } +static struct net *rtnl_get_peer_net(const struct rtnl_link_ops *ops, + struct nlattr *tbp[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_MAX + 1]; + int err; + + if (!data || !data[ops->peer_type]) + return rtnl_link_get_net_ifla(tbp); + + err = rtnl_nla_parse_ifinfomsg(tb, data[ops->peer_type], extack); + if (err < 0) + return ERR_PTR(err); + + if (ops->validate) { + err = ops->validate(tb, NULL, extack); + if (err < 0) + return ERR_PTR(err); + } + + return rtnl_link_get_net_ifla(tb); +} + +static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, + const struct rtnl_link_ops *ops, + struct net *tgt_net, struct net *link_net, + struct net *peer_net, + struct rtnl_newlink_tbs *tbs, + struct nlattr **data, + struct netlink_ext_ack *extack) +{ + struct nlattr ** const tb = tbs->tb; + struct net *net = sock_net(skb->sk); + struct net *device_net; + struct net_device *dev; + struct ifinfomsg *ifm; + bool link_specified; + + /* When creating, lookup for existing device in target net namespace */ + device_net = (nlh->nlmsg_flags & NLM_F_CREATE) && + (nlh->nlmsg_flags & NLM_F_EXCL) ? + tgt_net : net; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) { + link_specified = true; + dev = __dev_get_by_index(device_net, ifm->ifi_index); + } else if (ifm->ifi_index < 0) { + NL_SET_ERR_MSG(extack, "ifindex can't be negative"); + return -EINVAL; + } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) { + link_specified = true; + dev = rtnl_dev_get(device_net, tb); + } else { + link_specified = false; + dev = NULL; + } + + if (dev) + return rtnl_changelink(skb, nlh, ops, dev, tgt_net, tbs, data, extack); + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + /* No dev found and NLM_F_CREATE not set. Requested dev does not exist, + * or it's for a group + */ + if (link_specified || !tb[IFLA_GROUP]) + return -ENODEV; + + return rtnl_group_changelink(skb, net, tgt_net, + nla_get_u32(tb[IFLA_GROUP]), + ifm, extack, tb); + } + + if (tb[IFLA_MAP] || tb[IFLA_PROTINFO]) + return -EOPNOTSUPP; + + if (!ops) { + NL_SET_ERR_MSG(extack, "Unknown device type"); + return -EOPNOTSUPP; + } + + return rtnl_newlink_create(skb, ifm, ops, tgt_net, link_net, peer_net, nlh, + tb, data, extack); +} + static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - struct nlattr **attr; + struct net *tgt_net, *link_net = NULL, *peer_net = NULL; + struct nlattr **tb, **linkinfo, **data = NULL; + struct rtnl_link_ops *ops = NULL; + struct rtnl_newlink_tbs *tbs; + struct rtnl_nets rtnl_nets; + int ops_srcu_index; int ret; - attr = kmalloc_array(RTNL_MAX_TYPE + 1, sizeof(*attr), GFP_KERNEL); - if (!attr) + tbs = kmalloc(sizeof(*tbs), GFP_KERNEL); + if (!tbs) return -ENOMEM; - ret = __rtnl_newlink(skb, nlh, attr, extack); - kfree(attr); + tb = tbs->tb; + ret = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), tb, + IFLA_MAX, ifla_policy, extack); + if (ret < 0) + goto free; + + ret = rtnl_ensure_unique_netns(tb, extack, false); + if (ret < 0) + goto free; + + linkinfo = tbs->linkinfo; + if (tb[IFLA_LINKINFO]) { + ret = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX, + tb[IFLA_LINKINFO], + ifla_info_policy, NULL); + if (ret < 0) + goto free; + } else { + memset(linkinfo, 0, sizeof(tbs->linkinfo)); + } + + if (linkinfo[IFLA_INFO_KIND]) { + char kind[MODULE_NAME_LEN]; + + nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); + ops = rtnl_link_ops_get(kind, &ops_srcu_index); +#ifdef CONFIG_MODULES + if (!ops) { + request_module("rtnl-link-%s", kind); + ops = rtnl_link_ops_get(kind, &ops_srcu_index); + } +#endif + } + + rtnl_nets_init(&rtnl_nets); + + if (ops) { + if (ops->maxtype > RTNL_MAX_TYPE) { + ret = -EINVAL; + goto put_ops; + } + + if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { + ret = nla_parse_nested_deprecated(tbs->attr, ops->maxtype, + linkinfo[IFLA_INFO_DATA], + ops->policy, extack); + if (ret < 0) + goto put_ops; + + data = tbs->attr; + } + + if (ops->validate) { + ret = ops->validate(tb, data, extack); + if (ret < 0) + goto put_ops; + } + + if (ops->peer_type) { + peer_net = rtnl_get_peer_net(ops, tb, data, extack); + if (IS_ERR(peer_net)) { + ret = PTR_ERR(peer_net); + goto put_ops; + } + if (peer_net) + rtnl_nets_add(&rtnl_nets, peer_net); + } + } + + tgt_net = rtnl_link_get_net_capable(skb, sock_net(skb->sk), tb, CAP_NET_ADMIN); + if (IS_ERR(tgt_net)) { + ret = PTR_ERR(tgt_net); + goto put_net; + } + + rtnl_nets_add(&rtnl_nets, tgt_net); + + if (tb[IFLA_LINK_NETNSID]) { + int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); + + link_net = get_net_ns_by_id(tgt_net, id); + if (!link_net) { + NL_SET_ERR_MSG(extack, "Unknown network namespace id"); + ret = -EINVAL; + goto put_net; + } + + rtnl_nets_add(&rtnl_nets, link_net); + + if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + goto put_net; + } + } + + rtnl_nets_lock(&rtnl_nets); + ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, peer_net, tbs, data, extack); + rtnl_nets_unlock(&rtnl_nets); + +put_net: + rtnl_nets_destroy(&rtnl_nets); +put_ops: + if (ops) + rtnl_link_ops_put(ops, ops_srcu_index); +free: + kfree(tbs); return ret; } @@ -3250,24 +4090,24 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb, struct ifinfomsg *ifm; int i, err; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG(extack, "Invalid header for get link"); return -EINVAL; } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, - extack); + return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); - ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change) { NL_SET_ERR_MSG(extack, "Invalid values in header for get link request"); return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, - extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); if (err) return err; @@ -3277,6 +4117,7 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb, switch (i) { case IFLA_IFNAME: + case IFLA_ALT_IFNAME: case IFLA_EXT_MASK: case IFLA_TARGET_NETNSID: break; @@ -3295,7 +4136,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct net *tgt_net = net; struct ifinfomsg *ifm; - char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; struct sk_buff *nskb; @@ -3318,9 +4158,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, return PTR_ERR(tgt_net); } - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -3328,8 +4165,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(tgt_net, ifname); + else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) + dev = rtnl_dev_get(tgt_net, tb); else goto out; @@ -3338,14 +4175,22 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; err = -ENOBUFS; - nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); + nskb = nlmsg_new_large(if_nlmsg_size(dev, ext_filter_mask)); if (nskb == NULL) goto out; + /* Synchronize the carrier state so we don't report a state + * that we're not actually going to honour immediately; if + * the driver just did a carrier off->on transition, we can + * only TX if link watch work has run, but without this we'd + * already report carrier on, even if it doesn't work yet. + */ + linkwatch_sync_dev(dev); + err = rtnl_fill_ifinfo(nskb, dev, net, RTM_NEWLINK, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, ext_filter_mask, - 0, NULL, 0, netnsid); + 0, NULL, 0, netnsid, GFP_KERNEL); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -3359,22 +4204,130 @@ out: return err; } -static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr, + bool *changed, struct netlink_ext_ack *extack) +{ + char *alt_ifname; + size_t size; + int err; + + err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack); + if (err) + return err; + + if (cmd == RTM_NEWLINKPROP) { + size = rtnl_prop_list_size(dev); + size += nla_total_size(ALTIFNAMSIZ); + if (size >= U16_MAX) { + NL_SET_ERR_MSG(extack, + "effective property list too long"); + return -EINVAL; + } + } + + alt_ifname = nla_strdup(attr, GFP_KERNEL_ACCOUNT); + if (!alt_ifname) + return -ENOMEM; + + if (cmd == RTM_NEWLINKPROP) { + err = netdev_name_node_alt_create(dev, alt_ifname); + if (!err) + alt_ifname = NULL; + } else if (cmd == RTM_DELLINKPROP) { + err = netdev_name_node_alt_destroy(dev, alt_ifname); + } else { + WARN_ON_ONCE(1); + err = -EINVAL; + } + + kfree(alt_ifname); + if (!err) + *changed = true; + return err; +} + +static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); + struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; - struct nlattr *tb[IFLA_MAX+1]; + struct ifinfomsg *ifm; + bool changed = false; + struct nlattr *attr; + int err, rem; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); + if (err) + return err; + + err = rtnl_ensure_unique_netns(tb, extack, true); + if (err) + return err; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(net, ifm->ifi_index); + else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) + dev = rtnl_dev_get(net, tb); + else + return -EINVAL; + + if (!dev) + return -ENODEV; + + if (!tb[IFLA_PROP_LIST]) + return 0; + + nla_for_each_nested(attr, tb[IFLA_PROP_LIST], rem) { + switch (nla_type(attr)) { + case IFLA_ALT_IFNAME: + err = rtnl_alt_ifname(cmd, dev, attr, &changed, extack); + if (err) + return err; + break; + } + } + + if (changed) + netdev_state_change(dev); + return 0; +} + +static int rtnl_newlinkprop(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + return rtnl_linkprop(RTM_NEWLINKPROP, skb, nlh, extack); +} + +static int rtnl_dellinkprop(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack); +} + +static noinline_for_stack u32 rtnl_calcit(struct sk_buff *skb, + struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + size_t min_ifinfo_dump_size = 0; u32 ext_filter_mask = 0; - u16 min_ifinfo_dump_size = 0; - int hdrlen; + struct net_device *dev; + struct nlattr *nla; + int hdrlen, rem; /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */ hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) { - if (tb[IFLA_EXT_MASK]) - ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return NLMSG_GOODSIZE; + + nla_for_each_attr_type(nla, IFLA_EXT_MASK, + nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), rem) { + if (nla_len(nla) == sizeof(u32)) + ext_filter_mask = nla_get_u32(nla); } if (!ext_filter_mask) @@ -3385,9 +4338,8 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) */ rcu_read_lock(); for_each_netdev_rcu(net, dev) { - min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size, - if_nlmsg_size(dev, - ext_filter_mask)); + min_ifinfo_dump_size = max(min_ifinfo_dump_size, + if_nlmsg_size(dev, ext_filter_mask)); } rcu_read_unlock(); @@ -3405,7 +4357,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) s_idx = 1; for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { - struct rtnl_link **tab; + struct rtnl_link __rcu **tab; struct rtnl_link *link; rtnl_dumpit_func dumpit; @@ -3419,7 +4371,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) if (!tab) continue; - link = tab[type]; + link = rcu_dereference_rtnl(tab[type]); if (!link) continue; @@ -3444,20 +4396,26 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned int change, u32 event, gfp_t flags, int *new_nsid, - int new_ifindex) + int new_ifindex, u32 portid, + const struct nlmsghdr *nlh) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; - size_t if_info_size; + u32 seq = 0; - skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags); + skb = nlmsg_new(if_nlmsg_size(dev, 0), flags); if (skb == NULL) goto errout; + if (nlmsg_report(nlh)) + seq = nlmsg_seq(nlh); + else + portid = 0; + err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), - type, 0, 0, change, 0, 0, event, - new_nsid, new_ifindex, -1); + type, portid, seq, change, 0, 0, event, + new_nsid, new_ifindex, -1, flags); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -3466,21 +4424,22 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, } return skb; errout: - if (err < 0) - rtnl_set_sk_err(net, RTNLGRP_LINK, err); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); return NULL; } -void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags) +void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags, + u32 portid, const struct nlmsghdr *nlh) { struct net *net = dev_net(dev); - rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags); + rtnl_notify(skb, net, portid, RTNLGRP_LINK, nlh, flags); } static void rtmsg_ifinfo_event(int type, struct net_device *dev, unsigned int change, u32 event, - gfp_t flags, int *new_nsid, int new_ifindex) + gfp_t flags, int *new_nsid, int new_ifindex, + u32 portid, const struct nlmsghdr *nlh) { struct sk_buff *skb; @@ -3488,23 +4447,23 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev, return; skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid, - new_ifindex); + new_ifindex, portid, nlh); if (skb) - rtmsg_ifinfo_send(skb, dev, flags); + rtmsg_ifinfo_send(skb, dev, flags, portid, nlh); } void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, - gfp_t flags) + gfp_t flags, u32 portid, const struct nlmsghdr *nlh) { rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, - NULL, 0); + NULL, 0, portid, nlh); } void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, gfp_t flags, int *new_nsid, int new_ifindex) { rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, - new_nsid, new_ifindex); + new_nsid, new_ifindex, 0, NULL); } static int nlmsg_populate_fdb_fill(struct sk_buff *skb, @@ -3529,7 +4488,7 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, ndm->ndm_ifindex = dev->ifindex; ndm->ndm_state = ndm_state; - if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) + if (nla_put(skb, NDA_LLADDR, dev->addr_len, addr)) goto nla_put_failure; if (vid) if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid)) @@ -3543,10 +4502,10 @@ nla_put_failure: return -EMSGSIZE; } -static inline size_t rtnl_fdb_nlmsg_size(void) +static inline size_t rtnl_fdb_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + - nla_total_size(ETH_ALEN) + /* NDA_LLADDR */ + nla_total_size(dev->addr_len) + /* NDA_LLADDR */ nla_total_size(sizeof(u16)) + /* NDA_VLAN */ 0; } @@ -3558,7 +4517,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type, struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(rtnl_fdb_nlmsg_size(), GFP_ATOMIC); + skb = nlmsg_new(rtnl_fdb_nlmsg_size(dev), GFP_ATOMIC); if (!skb) goto errout; @@ -3575,7 +4534,7 @@ errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } -/** +/* * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry */ int ndo_dflt_fdb_add(struct ndmsg *ndm, @@ -3590,12 +4549,17 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm, * implement its own handler for this. */ if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { - pr_info("%s: FDB only supports static addresses\n", dev->name); + netdev_info(dev, "default FDB implementation only supports local addresses\n"); + return err; + } + + if (tb[NDA_FLAGS_EXT]) { + netdev_info(dev, "invalid flags given to default FDB implementation\n"); return err; } if (vid) { - pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name); + netdev_info(dev, "vlans aren't supported yet for dev_uc|mc_add()\n"); return err; } @@ -3645,7 +4609,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, u16 vid; int err; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, + extack); if (err < 0) return err; @@ -3681,12 +4646,13 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, /* Support fdb on master device the net/bridge default case */ if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && - (dev->priv_flags & IFF_BRIDGE_PORT)) { + netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops = br_dev->netdev_ops; + bool notified = false; err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid, - nlh->nlmsg_flags, extack); + nlh->nlmsg_flags, ¬ified, extack); if (err) goto out; else @@ -3695,16 +4661,18 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF)) { + bool notified = false; + if (dev->netdev_ops->ndo_fdb_add) err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, vid, nlh->nlmsg_flags, - extack); + ¬ified, extack); else err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, nlh->nlmsg_flags); - if (!err) { + if (!err && !notified) { rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH, ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; @@ -3714,7 +4682,7 @@ out: return err; } -/** +/* * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry */ int ndo_dflt_fdb_del(struct ndmsg *ndm, @@ -3728,7 +4696,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, * implement its own handler for this. */ if (!(ndm->ndm_state & NUD_PERMANENT)) { - pr_info("%s: FDB only supports static addresses\n", dev->name); + netdev_info(dev, "default FDB implementation only supports local addresses\n"); return err; } @@ -3744,18 +4712,25 @@ EXPORT_SYMBOL(ndo_dflt_fdb_del); static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK); struct net *net = sock_net(skb->sk); + const struct net_device_ops *ops; struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; - int err = -EINVAL; - __u8 *addr; + __u8 *addr = NULL; + int err; u16 vid; - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); + if (!del_bulk) { + err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, + NULL, extack); + } else { + /* For bulk delete, the drivers will parse the message with + * policy. + */ + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); + } if (err < 0) return err; @@ -3771,9 +4746,16 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { - NL_SET_ERR_MSG(extack, "invalid address"); - return -EINVAL; + if (!del_bulk) { + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + NL_SET_ERR_MSG(extack, "invalid address"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); + + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); + if (err) + return err; } if (dev->type != ARPHRD_ETHER) { @@ -3781,22 +4763,23 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } - addr = nla_data(tb[NDA_LLADDR]); - - err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); - if (err) - return err; - err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && - (dev->priv_flags & IFF_BRIDGE_PORT)) { + netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); - const struct net_device_ops *ops = br_dev->netdev_ops; + bool notified = false; - if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid); + ops = br_dev->netdev_ops; + if (!del_bulk) { + if (ops->ndo_fdb_del) + err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, + ¬ified, extack); + } else { + if (ops->ndo_fdb_del_bulk) + err = ops->ndo_fdb_del_bulk(nlh, dev, extack); + } if (err) goto out; @@ -3806,15 +4789,26 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, /* Embedded bridge, macvlan, and any other device support */ if (ndm->ndm_flags & NTF_SELF) { - if (dev->netdev_ops->ndo_fdb_del) - err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr, - vid); - else - err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); + bool notified = false; + + ops = dev->netdev_ops; + if (!del_bulk) { + if (ops->ndo_fdb_del) + err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, + ¬ified, extack); + else + err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); + } else { + /* in case err was cleared by NTF_MASTER call */ + err = -EOPNOTSUPP; + if (ops->ndo_fdb_del_bulk) + err = ops->ndo_fdb_del_bulk(nlh, dev, extack); + } if (!err) { - rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH, - ndm->ndm_state); + if (!del_bulk && !notified) + rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH, + ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; } } @@ -3828,15 +4822,16 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, int *idx, struct netdev_hw_addr_list *list) { + struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct netdev_hw_addr *ha; - int err; u32 portid, seq; + int err; portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { - if (*idx < cb->args[2]) + if (*idx < ctx->fdb_idx) goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0, @@ -3853,8 +4848,11 @@ skip: /** * ndo_dflt_fdb_dump - default netdevice operation to dump an FDB table. - * @nlh: netlink message header + * @skb: socket buffer to store message in + * @cb: netlink callback * @dev: netdevice + * @filter_dev: ignored + * @idx: the number of FDB table entries dumped is added to *@idx * * Default netdevice operation to dump the existing unicast address list. * Returns number of addresses from list put in skb. @@ -3889,20 +4887,20 @@ static int valid_fdb_dump_strict(const struct nlmsghdr *nlh, struct ndmsg *ndm; int err, i; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { + ndm = nlmsg_payload(nlh, sizeof(*ndm)); + if (!ndm) { NL_SET_ERR_MSG(extack, "Invalid header for fdb dump request"); return -EINVAL; } - ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) { - NL_SET_ERR_MSG(extack, "Invalid values in header for fbd dump request"); + NL_SET_ERR_MSG(extack, "Invalid values in header for fdb dump request"); return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, - NULL, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, + NDA_MAX, NULL, extack); if (err < 0) return err; @@ -3954,8 +4952,9 @@ static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh, nla_attr_size(sizeof(u32)))) { struct ifinfomsg *ifm; - err = nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), + tb, IFLA_MAX, ifla_policy, + extack); if (err < 0) { return -EINVAL; } else if (err == 0) { @@ -3971,18 +4970,16 @@ static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh, static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct net_device *dev; - struct net_device *br_dev = NULL; - const struct net_device_ops *ops = NULL; - const struct net_device_ops *cops = NULL; + const struct net_device_ops *ops = NULL, *cops = NULL; + struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; + struct net_device *dev, *br_dev = NULL; struct net *net = sock_net(skb->sk); - struct hlist_head *head; int brport_idx = 0; int br_idx = 0; - int h, s_h; - int idx = 0, s_idx; - int err = 0; int fidx = 0; + int err; + + NL_ASSERT_CTX_FITS(struct ndo_fdb_dump_context); if (cb->strict_check) err = valid_fdb_dump_strict(cb->nlh, &br_idx, &brport_idx, @@ -4001,70 +4998,51 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) ops = br_dev->netdev_ops; } - s_h = cb->args[0]; - s_idx = cb->args[1]; - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - hlist_for_each_entry(dev, head, index_hlist) { - - if (brport_idx && (dev->ifindex != brport_idx)) - continue; - - if (!br_idx) { /* user did not specify a specific bridge */ - if (dev->priv_flags & IFF_BRIDGE_PORT) { - br_dev = netdev_master_upper_dev_get(dev); - cops = br_dev->netdev_ops; - } - } else { - if (dev != br_dev && - !(dev->priv_flags & IFF_BRIDGE_PORT)) - continue; + for_each_netdev_dump(net, dev, ctx->ifindex) { + if (brport_idx && (dev->ifindex != brport_idx)) + continue; - if (br_dev != netdev_master_upper_dev_get(dev) && - !(dev->priv_flags & IFF_EBRIDGE)) - continue; - cops = ops; + if (!br_idx) { /* user did not specify a specific bridge */ + if (netif_is_bridge_port(dev)) { + br_dev = netdev_master_upper_dev_get(dev); + cops = br_dev->netdev_ops; } + } else { + if (dev != br_dev && + !netif_is_bridge_port(dev)) + continue; - if (idx < s_idx) - goto cont; + if (br_dev != netdev_master_upper_dev_get(dev) && + !netif_is_bridge_master(dev)) + continue; + cops = ops; + } - if (dev->priv_flags & IFF_BRIDGE_PORT) { - if (cops && cops->ndo_fdb_dump) { - err = cops->ndo_fdb_dump(skb, cb, - br_dev, dev, - &fidx); - if (err == -EMSGSIZE) - goto out; - } + if (netif_is_bridge_port(dev)) { + if (cops && cops->ndo_fdb_dump) { + err = cops->ndo_fdb_dump(skb, cb, br_dev, dev, + &fidx); + if (err == -EMSGSIZE) + break; } + } - if (dev->netdev_ops->ndo_fdb_dump) - err = dev->netdev_ops->ndo_fdb_dump(skb, cb, - dev, NULL, - &fidx); - else - err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, - &fidx); - if (err == -EMSGSIZE) - goto out; + if (dev->netdev_ops->ndo_fdb_dump) + err = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, + &fidx); + else + err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, &fidx); + if (err == -EMSGSIZE) + break; - cops = NULL; + cops = NULL; - /* reset fdb offset to 0 for rest of the interfaces */ - cb->args[2] = 0; - fidx = 0; -cont: - idx++; - } + /* reset fdb offset to 0 for rest of the interfaces */ + ctx->fdb_idx = 0; + fidx = 0; } -out: - cb->args[0] = h; - cb->args[1] = idx; - cb->args[2] = fidx; + ctx->fdb_idx = fidx; return skb->len; } @@ -4077,12 +5055,12 @@ static int valid_fdb_get_strict(const struct nlmsghdr *nlh, struct ndmsg *ndm; int err, i; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { + ndm = nlmsg_payload(nlh, sizeof(*ndm)); + if (!ndm) { NL_SET_ERR_MSG(extack, "Invalid header for fdb get request"); return -EINVAL; } - ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for fdb get request"); @@ -4094,8 +5072,8 @@ static int valid_fdb_get_strict(const struct nlmsghdr *nlh, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, - nda_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, + NDA_MAX, nda_policy, extack); if (err < 0) return err; @@ -4181,7 +5159,7 @@ static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (dev) { if (!ndm_flags || (ndm_flags & NTF_MASTER)) { - if (!(dev->priv_flags & IFF_BRIDGE_PORT)) { + if (!netif_is_bridge_port(dev)) { NL_SET_ERR_MSG(extack, "Device is not a bridge port"); return -EINVAL; } @@ -4261,7 +5239,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, ifm->__ifi_pad = 0; ifm->ifi_type = dev->type; ifm->ifi_index = dev->ifindex; - ifm->ifi_flags = dev_get_flags(dev); + ifm->ifi_flags = netif_get_flags(dev); ifm->ifi_change = 0; @@ -4276,7 +5254,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; - br_afspec = nla_nest_start(skb, IFLA_AF_SPEC); + br_afspec = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!br_afspec) goto nla_put_failure; @@ -4300,7 +5278,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, } nla_nest_end(skb, br_afspec); - protinfo = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); + protinfo = nla_nest_start(skb, IFLA_PROTINFO); if (!protinfo) goto nla_put_failure; @@ -4320,7 +5298,11 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, brport_nla_put_flag(skb, flags, mask, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD) || brport_nla_put_flag(skb, flags, mask, - IFLA_BRPORT_PROXYARP, BR_PROXYARP)) { + IFLA_BRPORT_PROXYARP, BR_PROXYARP) || + brport_nla_put_flag(skb, flags, mask, + IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD) || + brport_nla_put_flag(skb, flags, mask, + IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD)) { nla_nest_cancel(skb, protinfo); goto nla_put_failure; } @@ -4345,23 +5327,26 @@ static int valid_bridge_getlink_req(const struct nlmsghdr *nlh, if (strict_check) { struct ifinfomsg *ifm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG(extack, "Invalid header for bridge link dump"); return -EINVAL; } - ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change || ifm->ifi_index) { NL_SET_ERR_MSG(extack, "Invalid values in header for bridge link dump request"); return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ifinfomsg), tb, - IFLA_MAX, ifla_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, + sizeof(struct ifinfomsg), + tb, IFLA_MAX, ifla_policy, + extack); } else { - err = nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, - IFLA_MAX, ifla_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), + tb, IFLA_MAX, ifla_policy, + extack); } if (err < 0) return err; @@ -4480,6 +5465,10 @@ static int rtnl_bridge_notify(struct net_device *dev) if (err < 0) goto errout; + /* Notification info is only filled for bridge ports, not the bridge + * device itself. Therefore, a zero notification length is valid and + * should not result in an error. + */ if (!skb->len) goto errout; @@ -4499,10 +5488,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; - struct nlattr *br_spec, *attr = NULL; + struct nlattr *br_spec, *attr, *br_flags_attr = NULL; int rem, err = -EOPNOTSUPP; u16 flags = 0; - bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -4520,13 +5508,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) { if (nla_len(attr) < sizeof(flags)) return -EINVAL; - have_flags = true; + br_flags_attr = attr; flags = nla_get_u16(attr); - break; + } + + if (nla_type(attr) == IFLA_BRIDGE_MODE) { + if (nla_len(attr) < sizeof(u16)) + return -EINVAL; } } } @@ -4564,8 +5556,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, } } - if (have_flags) - memcpy(nla_data(attr), &flags, sizeof(flags)); + if (br_flags_attr) + memcpy(nla_data(br_flags_attr), &flags, sizeof(flags)); out: return err; } @@ -4596,15 +5588,14 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { - nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { - if (nla_len(attr) < sizeof(flags)) - return -EINVAL; + nla_for_each_nested_type(attr, IFLA_BRIDGE_FLAGS, br_spec, + rem) { + if (nla_len(attr) < sizeof(flags)) + return -EINVAL; - have_flags = true; - flags = nla_get_u16(attr); - break; - } + have_flags = true; + flags = nla_get_u16(attr); + break; } } @@ -4652,82 +5643,257 @@ static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr) (!idxattr || idxattr == attrid); } -#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1) -static int rtnl_get_offload_stats_attr_size(int attr_id) +static bool +rtnl_offload_xstats_have_ndo(const struct net_device *dev, int attr_id) { - switch (attr_id) { - case IFLA_OFFLOAD_XSTATS_CPU_HIT: - return sizeof(struct rtnl_link_stats64); - } + return dev->netdev_ops && + dev->netdev_ops->ndo_has_offload_stats && + dev->netdev_ops->ndo_get_offload_stats && + dev->netdev_ops->ndo_has_offload_stats(dev, attr_id); +} - return 0; +static unsigned int +rtnl_offload_xstats_get_size_ndo(const struct net_device *dev, int attr_id) +{ + return rtnl_offload_xstats_have_ndo(dev, attr_id) ? + sizeof(struct rtnl_link_stats64) : 0; } -static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev, - int *prividx) +static int +rtnl_offload_xstats_fill_ndo(struct net_device *dev, int attr_id, + struct sk_buff *skb) { + unsigned int size = rtnl_offload_xstats_get_size_ndo(dev, attr_id); struct nlattr *attr = NULL; - int attr_id, size; void *attr_data; int err; - if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && - dev->netdev_ops->ndo_get_offload_stats)) + if (!size) return -ENODATA; - for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; - attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { - if (attr_id < *prividx) - continue; + attr = nla_reserve_64bit(skb, attr_id, size, + IFLA_OFFLOAD_XSTATS_UNSPEC); + if (!attr) + return -EMSGSIZE; - size = rtnl_get_offload_stats_attr_size(attr_id); - if (!size) - continue; + attr_data = nla_data(attr); + memset(attr_data, 0, size); - if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id)) - continue; + err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, attr_data); + if (err) + return err; + + return 0; +} + +static unsigned int +rtnl_offload_xstats_get_size_stats(const struct net_device *dev, + enum netdev_offload_xstats_type type) +{ + bool enabled = netdev_offload_xstats_enabled(dev, type); + + return enabled ? sizeof(struct rtnl_hw_stats64) : 0; +} + +struct rtnl_offload_xstats_request_used { + bool request; + bool used; +}; + +static int +rtnl_offload_xstats_get_stats(struct net_device *dev, + enum netdev_offload_xstats_type type, + struct rtnl_offload_xstats_request_used *ru, + struct rtnl_hw_stats64 *stats, + struct netlink_ext_ack *extack) +{ + bool request; + bool used; + int err; + + request = netdev_offload_xstats_enabled(dev, type); + if (!request) { + used = false; + goto out; + } + + err = netdev_offload_xstats_get(dev, type, stats, &used, extack); + if (err) + return err; + +out: + if (ru) { + ru->request = request; + ru->used = used; + } + return 0; +} - attr = nla_reserve_64bit(skb, attr_id, size, +static int +rtnl_offload_xstats_fill_hw_s_info_one(struct sk_buff *skb, int attr_id, + struct rtnl_offload_xstats_request_used *ru) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, attr_id); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, ru->request)) + goto nla_put_failure; + + if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, ru->used)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int +rtnl_offload_xstats_fill_hw_s_info(struct sk_buff *skb, struct net_device *dev, + struct netlink_ext_ack *extack) +{ + enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; + struct rtnl_offload_xstats_request_used ru_l3; + struct nlattr *nest; + int err; + + err = rtnl_offload_xstats_get_stats(dev, t_l3, &ru_l3, NULL, extack); + if (err) + return err; + + nest = nla_nest_start(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO); + if (!nest) + return -EMSGSIZE; + + if (rtnl_offload_xstats_fill_hw_s_info_one(skb, + IFLA_OFFLOAD_XSTATS_L3_STATS, + &ru_l3)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev, + int *prividx, u32 off_filter_mask, + struct netlink_ext_ack *extack) +{ + enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; + int attr_id_hw_s_info = IFLA_OFFLOAD_XSTATS_HW_S_INFO; + int attr_id_l3_stats = IFLA_OFFLOAD_XSTATS_L3_STATS; + int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT; + bool have_data = false; + int err; + + if (*prividx <= attr_id_cpu_hit && + (off_filter_mask & + IFLA_STATS_FILTER_BIT(attr_id_cpu_hit))) { + err = rtnl_offload_xstats_fill_ndo(dev, attr_id_cpu_hit, skb); + if (!err) { + have_data = true; + } else if (err != -ENODATA) { + *prividx = attr_id_cpu_hit; + return err; + } + } + + if (*prividx <= attr_id_hw_s_info && + (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_hw_s_info))) { + *prividx = attr_id_hw_s_info; + + err = rtnl_offload_xstats_fill_hw_s_info(skb, dev, extack); + if (err) + return err; + + have_data = true; + *prividx = 0; + } + + if (*prividx <= attr_id_l3_stats && + (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_l3_stats))) { + unsigned int size_l3; + struct nlattr *attr; + + *prividx = attr_id_l3_stats; + + size_l3 = rtnl_offload_xstats_get_size_stats(dev, t_l3); + if (!size_l3) + goto skip_l3_stats; + attr = nla_reserve_64bit(skb, attr_id_l3_stats, size_l3, IFLA_OFFLOAD_XSTATS_UNSPEC); if (!attr) - goto nla_put_failure; + return -EMSGSIZE; - attr_data = nla_data(attr); - memset(attr_data, 0, size); - err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, - attr_data); + err = rtnl_offload_xstats_get_stats(dev, t_l3, NULL, + nla_data(attr), extack); if (err) - goto get_offload_stats_failure; + return err; + + have_data = true; +skip_l3_stats: + *prividx = 0; } - if (!attr) + if (!have_data) return -ENODATA; *prividx = 0; return 0; +} -nla_put_failure: - err = -EMSGSIZE; -get_offload_stats_failure: - *prividx = attr_id; - return err; +static unsigned int +rtnl_offload_xstats_get_size_hw_s_info_one(const struct net_device *dev, + enum netdev_offload_xstats_type type) +{ + return nla_total_size(0) + + /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST */ + nla_total_size(sizeof(u8)) + + /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED */ + nla_total_size(sizeof(u8)) + + 0; +} + +static unsigned int +rtnl_offload_xstats_get_size_hw_s_info(const struct net_device *dev) +{ + enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; + + return nla_total_size(0) + + /* IFLA_OFFLOAD_XSTATS_L3_STATS */ + rtnl_offload_xstats_get_size_hw_s_info_one(dev, t_l3) + + 0; } -static int rtnl_get_offload_stats_size(const struct net_device *dev) +static int rtnl_offload_xstats_get_size(const struct net_device *dev, + u32 off_filter_mask) { + enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; + int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT; int nla_size = 0; - int attr_id; int size; - if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && - dev->netdev_ops->ndo_get_offload_stats)) - return 0; + if (off_filter_mask & + IFLA_STATS_FILTER_BIT(attr_id_cpu_hit)) { + size = rtnl_offload_xstats_get_size_ndo(dev, attr_id_cpu_hit); + nla_size += nla_total_size_64bit(size); + } - for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; - attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { - if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id)) - continue; - size = rtnl_get_offload_stats_attr_size(attr_id); + if (off_filter_mask & + IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO)) + nla_size += rtnl_offload_xstats_get_size_hw_s_info(dev); + + if (off_filter_mask & + IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_L3_STATS)) { + size = rtnl_offload_xstats_get_size_stats(dev, t_l3); nla_size += nla_total_size_64bit(size); } @@ -4737,11 +5903,21 @@ static int rtnl_get_offload_stats_size(const struct net_device *dev) return nla_size; } +struct rtnl_stats_dump_filters { + /* mask[0] filters outer attributes. Then individual nests have their + * filtering mask at the index of the nested attribute. + */ + u32 mask[IFLA_STATS_MAX + 1]; +}; + static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, - unsigned int flags, unsigned int filter_mask, - int *idxattr, int *prividx) + unsigned int flags, + const struct rtnl_stats_dump_filters *filters, + int *idxattr, int *prividx, + struct netlink_ext_ack *extack) { + unsigned int filter_mask = filters->mask[0]; struct if_stats_msg *ifsm; struct nlmsghdr *nlh; struct nlattr *attr; @@ -4767,8 +5943,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64, sizeof(struct rtnl_link_stats64), IFLA_STATS_UNSPEC); - if (!attr) + if (!attr) { + err = -EMSGSIZE; goto nla_put_failure; + } sp = nla_data(attr); dev_get_stats(dev, sp); @@ -4779,10 +5957,12 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (ops && ops->fill_linkxstats) { *idxattr = IFLA_STATS_LINK_XSTATS; - attr = nla_nest_start(skb, - IFLA_STATS_LINK_XSTATS); - if (!attr) + attr = nla_nest_start_noflag(skb, + IFLA_STATS_LINK_XSTATS); + if (!attr) { + err = -EMSGSIZE; goto nla_put_failure; + } err = ops->fill_linkxstats(skb, dev, prividx, *idxattr); nla_nest_end(skb, attr); @@ -4802,10 +5982,12 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, ops = master->rtnl_link_ops; if (ops && ops->fill_linkxstats) { *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE; - attr = nla_nest_start(skb, - IFLA_STATS_LINK_XSTATS_SLAVE); - if (!attr) + attr = nla_nest_start_noflag(skb, + IFLA_STATS_LINK_XSTATS_SLAVE); + if (!attr) { + err = -EMSGSIZE; goto nla_put_failure; + } err = ops->fill_linkxstats(skb, dev, prividx, *idxattr); nla_nest_end(skb, attr); @@ -4817,12 +5999,19 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, *idxattr)) { + u32 off_filter_mask; + + off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS]; *idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS; - attr = nla_nest_start(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS); - if (!attr) + attr = nla_nest_start_noflag(skb, + IFLA_STATS_LINK_OFFLOAD_XSTATS); + if (!attr) { + err = -EMSGSIZE; goto nla_put_failure; + } - err = rtnl_get_offload_stats(skb, dev, prividx); + err = rtnl_offload_xstats_fill(skb, dev, prividx, + off_filter_mask, extack); if (err == -ENODATA) nla_nest_cancel(skb, attr); else @@ -4837,19 +6026,22 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, struct rtnl_af_ops *af_ops; *idxattr = IFLA_STATS_AF_SPEC; - attr = nla_nest_start(skb, IFLA_STATS_AF_SPEC); - if (!attr) + attr = nla_nest_start_noflag(skb, IFLA_STATS_AF_SPEC); + if (!attr) { + err = -EMSGSIZE; goto nla_put_failure; + } rcu_read_lock(); list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->fill_stats_af) { struct nlattr *af; - int err; - af = nla_nest_start(skb, af_ops->family); + af = nla_nest_start_noflag(skb, + af_ops->family); if (!af) { rcu_read_unlock(); + err = -EMSGSIZE; goto nla_put_failure; } err = af_ops->fill_stats_af(skb, dev); @@ -4882,13 +6074,14 @@ nla_put_failure: else nlmsg_end(skb, nlh); - return -EMSGSIZE; + return err; } static size_t if_nlmsg_stats_size(const struct net_device *dev, - u32 filter_mask) + const struct rtnl_stats_dump_filters *filters) { - size_t size = 0; + size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg)); + unsigned int filter_mask = filters->mask[0]; if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0)) size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64)); @@ -4924,8 +6117,12 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, } } - if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) - size += rtnl_get_offload_stats_size(dev); + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) { + u32 off_filter_mask; + + off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS]; + size += rtnl_offload_xstats_get_size(dev, off_filter_mask); + } if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, 0)) { struct rtnl_af_ops *af_ops; @@ -4949,12 +6146,86 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, return size; } +#define RTNL_STATS_OFFLOAD_XSTATS_VALID ((1 << __IFLA_OFFLOAD_XSTATS_MAX) - 1) + +static const struct nla_policy +rtnl_stats_get_policy_filters[IFLA_STATS_MAX + 1] = { + [IFLA_STATS_LINK_OFFLOAD_XSTATS] = + NLA_POLICY_MASK(NLA_U32, RTNL_STATS_OFFLOAD_XSTATS_VALID), +}; + +static const struct nla_policy +rtnl_stats_get_policy[IFLA_STATS_GETSET_MAX + 1] = { + [IFLA_STATS_GET_FILTERS] = + NLA_POLICY_NESTED(rtnl_stats_get_policy_filters), +}; + +static const struct nla_policy +ifla_stats_set_policy[IFLA_STATS_GETSET_MAX + 1] = { + [IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS] = NLA_POLICY_MAX(NLA_U8, 1), +}; + +static int rtnl_stats_get_parse_filters(struct nlattr *ifla_filters, + struct rtnl_stats_dump_filters *filters, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_STATS_MAX + 1]; + int err; + int at; + + err = nla_parse_nested(tb, IFLA_STATS_MAX, ifla_filters, + rtnl_stats_get_policy_filters, extack); + if (err < 0) + return err; + + for (at = 1; at <= IFLA_STATS_MAX; at++) { + if (tb[at]) { + if (!(filters->mask[0] & IFLA_STATS_FILTER_BIT(at))) { + NL_SET_ERR_MSG(extack, "Filtered attribute not enabled in filter_mask"); + return -EINVAL; + } + filters->mask[at] = nla_get_u32(tb[at]); + } + } + + return 0; +} + +static int rtnl_stats_get_parse(const struct nlmsghdr *nlh, + u32 filter_mask, + struct rtnl_stats_dump_filters *filters, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1]; + int err; + int i; + + filters->mask[0] = filter_mask; + for (i = 1; i < ARRAY_SIZE(filters->mask); i++) + filters->mask[i] = -1U; + + err = nlmsg_parse(nlh, sizeof(struct if_stats_msg), tb, + IFLA_STATS_GETSET_MAX, rtnl_stats_get_policy, extack); + if (err < 0) + return err; + + if (tb[IFLA_STATS_GET_FILTERS]) { + err = rtnl_stats_get_parse_filters(tb[IFLA_STATS_GET_FILTERS], + filters, extack); + if (err) + return err; + } + + return 0; +} + static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, bool is_dump, struct netlink_ext_ack *extack) { struct if_stats_msg *ifsm; - if (nlh->nlmsg_len < sizeof(*ifsm)) { + ifsm = nlmsg_payload(nlh, sizeof(*ifsm)); + if (!ifsm) { NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); return -EINVAL; } @@ -4962,8 +6233,6 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, if (!strict_check) return 0; - ifsm = nlmsg_data(nlh); - /* only requests using strict checks can pass data to influence * the dump. The legacy exception is filter_mask. */ @@ -4971,10 +6240,6 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request"); return -EINVAL; } - if (nlmsg_attrlen(nlh, sizeof(*ifsm))) { - NL_SET_ERR_MSG(extack, "Invalid attributes after stats header"); - return -EINVAL; - } if (ifsm->filter_mask >= IFLA_STATS_FILTER_BIT(IFLA_STATS_MAX + 1)) { NL_SET_ERR_MSG(extack, "Invalid stats requested through filter mask"); return -EINVAL; @@ -4986,12 +6251,12 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct rtnl_stats_dump_filters filters; struct net *net = sock_net(skb->sk); struct net_device *dev = NULL; int idxattr = 0, prividx = 0; struct if_stats_msg *ifsm; struct sk_buff *nskb; - u32 filter_mask; int err; err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb), @@ -5008,17 +6273,22 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, if (!dev) return -ENODEV; - filter_mask = ifsm->filter_mask; - if (!filter_mask) + if (!ifsm->filter_mask) { + NL_SET_ERR_MSG(extack, "Filter mask must be set for stats get"); return -EINVAL; + } + + err = rtnl_stats_get_parse(nlh, ifsm->filter_mask, &filters, extack); + if (err) + return err; - nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_stats_size(dev, &filters), GFP_KERNEL); if (!nskb) return -ENOBUFS; err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, - 0, filter_mask, &idxattr, &prividx); + 0, &filters, &idxattr, &prividx, extack); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */ WARN_ON(err == -EMSGSIZE); @@ -5033,19 +6303,17 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; - int h, s_h, err, s_idx, s_idxattr, s_prividx; + struct rtnl_stats_dump_filters filters; struct net *net = sock_net(skb->sk); unsigned int flags = NLM_F_MULTI; struct if_stats_msg *ifsm; - struct hlist_head *head; + struct { + unsigned long ifindex; + int idxattr; + int prividx; + } *ctx = (void *)cb->ctx; struct net_device *dev; - u32 filter_mask = 0; - int idx = 0; - - s_h = cb->args[0]; - s_idx = cb->args[1]; - s_idxattr = cb->args[2]; - s_prividx = cb->args[3]; + int err; cb->seq = net->dev_base_seq; @@ -5054,58 +6322,550 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) return err; ifsm = nlmsg_data(cb->nlh); - filter_mask = ifsm->filter_mask; - if (!filter_mask) { + if (!ifsm->filter_mask) { NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump"); return -EINVAL; } - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - hlist_for_each_entry(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, - flags, filter_mask, - &s_idxattr, &s_prividx); - /* If we ran out of room on the first message, - * we're in trouble - */ - WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + err = rtnl_stats_get_parse(cb->nlh, ifsm->filter_mask, &filters, + extack); + if (err) + return err; - if (err < 0) - goto out; - s_prividx = 0; - s_idxattr = 0; - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } + for_each_netdev_dump(net, dev, ctx->ifindex) { + err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + flags, &filters, + &ctx->idxattr, &ctx->prividx, + extack); + /* If we ran out of room on the first message, + * we're in trouble. + */ + WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + + if (err < 0) + break; + ctx->prividx = 0; + ctx->idxattr = 0; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + } + + return err; +} + +void rtnl_offload_xstats_notify(struct net_device *dev) +{ + struct rtnl_stats_dump_filters response_filters = {}; + struct net *net = dev_net(dev); + int idxattr = 0, prividx = 0; + struct sk_buff *skb; + int err = -ENOBUFS; + + ASSERT_RTNL(); + + response_filters.mask[0] |= + IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS); + response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |= + IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO); + + skb = nlmsg_new(if_nlmsg_stats_size(dev, &response_filters), + GFP_KERNEL); + if (!skb) + goto errout; + + err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, 0, 0, 0, 0, + &response_filters, &idxattr, &prividx, NULL); + if (err < 0) { + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_STATS, NULL, GFP_KERNEL); + return; + +errout: + rtnl_set_sk_err(net, RTNLGRP_STATS, err); +} +EXPORT_SYMBOL(rtnl_offload_xstats_notify); + +static int rtnl_stats_set(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; + struct rtnl_stats_dump_filters response_filters = {}; + struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct net_device *dev = NULL; + struct if_stats_msg *ifsm; + bool notify = false; + int err; + + err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb), + false, extack); + if (err) + return err; + + ifsm = nlmsg_data(nlh); + if (ifsm->family != AF_UNSPEC) { + NL_SET_ERR_MSG(extack, "Address family should be AF_UNSPEC"); + return -EINVAL; + } + + if (ifsm->ifindex > 0) + dev = __dev_get_by_index(net, ifsm->ifindex); + else + return -EINVAL; + + if (!dev) + return -ENODEV; + + if (ifsm->filter_mask) { + NL_SET_ERR_MSG(extack, "Filter mask must be 0 for stats set"); + return -EINVAL; + } + + err = nlmsg_parse(nlh, sizeof(*ifsm), tb, IFLA_STATS_GETSET_MAX, + ifla_stats_set_policy, extack); + if (err < 0) + return err; + + if (tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]) { + u8 req = nla_get_u8(tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]); + + if (req) + err = netdev_offload_xstats_enable(dev, t_l3, extack); + else + err = netdev_offload_xstats_disable(dev, t_l3); + + if (!err) + notify = true; + else if (err != -EALREADY) + return err; + + response_filters.mask[0] |= + IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS); + response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |= + IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO); + } + + if (notify) + rtnl_offload_xstats_notify(dev); + + return 0; +} + +static int rtnl_mdb_valid_dump_req(const struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct br_port_msg *bpm; + + bpm = nlmsg_payload(nlh, sizeof(*bpm)); + if (!bpm) { + NL_SET_ERR_MSG(extack, "Invalid header for mdb dump request"); + return -EINVAL; + } + + if (bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Filtering by device index is not supported for mdb dump request"); + return -EINVAL; + } + if (nlmsg_attrlen(nlh, sizeof(*bpm))) { + NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request"); + return -EINVAL; + } + + return 0; +} + +struct rtnl_mdb_dump_ctx { + long idx; +}; + +static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rtnl_mdb_dump_ctx *ctx = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int idx, s_idx; + int err; + + NL_ASSERT_CTX_FITS(struct rtnl_mdb_dump_ctx); + + if (cb->strict_check) { + err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack); + if (err) + return err; + } + + s_idx = ctx->idx; + idx = 0; + + for_each_netdev(net, dev) { + if (idx < s_idx) + goto skip; + if (!dev->netdev_ops->ndo_mdb_dump) + goto skip; + + err = dev->netdev_ops->ndo_mdb_dump(dev, skb, cb); + if (err == -EMSGSIZE) + goto out; + /* Moving on to next device, reset markers and sequence + * counters since they are all maintained per-device. + */ + memset(cb->ctx, 0, sizeof(cb->ctx)); + cb->prev_seq = 0; + cb->seq = 0; +skip: + idx++; } -out: - cb->args[3] = s_prividx; - cb->args[2] = s_idxattr; - cb->args[1] = idx; - cb->args[0] = h; +out: + ctx->idx = idx; return skb->len; } +static int rtnl_validate_mdb_entry_get(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(attr); + + if (nla_len(attr) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); + return -EINVAL; + } + + if (entry->ifindex) { + NL_SET_ERR_MSG(extack, "Entry ifindex cannot be specified"); + return -EINVAL; + } + + if (entry->state) { + NL_SET_ERR_MSG(extack, "Entry state cannot be specified"); + return -EINVAL; + } + + if (entry->flags) { + NL_SET_ERR_MSG(extack, "Entry flags cannot be specified"); + return -EINVAL; + } + + if (entry->vid >= VLAN_VID_MASK) { + NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); + return -EINVAL; + } + + if (entry->addr.proto != htons(ETH_P_IP) && + entry->addr.proto != htons(ETH_P_IPV6) && + entry->addr.proto != 0) { + NL_SET_ERR_MSG(extack, "Unknown entry protocol"); + return -EINVAL; + } + + return 0; +} + +static const struct nla_policy mdba_get_policy[MDBA_GET_ENTRY_MAX + 1] = { + [MDBA_GET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + rtnl_validate_mdb_entry_get, + sizeof(struct br_mdb_entry)), + [MDBA_GET_ENTRY_ATTRS] = { .type = NLA_NESTED }, +}; + +static int rtnl_mdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[MDBA_GET_ENTRY_MAX + 1]; + struct net *net = sock_net(in_skb->sk); + struct br_port_msg *bpm; + struct net_device *dev; + int err; + + err = nlmsg_parse(nlh, sizeof(struct br_port_msg), tb, + MDBA_GET_ENTRY_MAX, mdba_get_policy, extack); + if (err) + return err; + + bpm = nlmsg_data(nlh); + if (!bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Invalid ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "Device doesn't exist"); + return -ENODEV; + } + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_GET_ENTRY)) { + NL_SET_ERR_MSG(extack, "Missing MDBA_GET_ENTRY attribute"); + return -EINVAL; + } + + if (!dev->netdev_ops->ndo_mdb_get) { + NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); + return -EOPNOTSUPP; + } + + return dev->netdev_ops->ndo_mdb_get(dev, tb, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, extack); +} + +static int rtnl_validate_mdb_entry(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(attr); + + if (nla_len(attr) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); + return -EINVAL; + } + + if (entry->ifindex == 0) { + NL_SET_ERR_MSG(extack, "Zero entry ifindex is not allowed"); + return -EINVAL; + } + + if (entry->addr.proto == htons(ETH_P_IP)) { + if (!ipv4_is_multicast(entry->addr.u.ip4) && + !ipv4_is_zeronet(entry->addr.u.ip4)) { + NL_SET_ERR_MSG(extack, "IPv4 entry group address is not multicast or 0.0.0.0"); + return -EINVAL; + } + if (ipv4_is_local_multicast(entry->addr.u.ip4)) { + NL_SET_ERR_MSG(extack, "IPv4 entry group address is local multicast"); + return -EINVAL; + } +#if IS_ENABLED(CONFIG_IPV6) + } else if (entry->addr.proto == htons(ETH_P_IPV6)) { + if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) { + NL_SET_ERR_MSG(extack, "IPv6 entry group address is link-local all nodes"); + return -EINVAL; + } +#endif + } else if (entry->addr.proto == 0) { + /* L2 mdb */ + if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) { + NL_SET_ERR_MSG(extack, "L2 entry group is not multicast"); + return -EINVAL; + } + } else { + NL_SET_ERR_MSG(extack, "Unknown entry protocol"); + return -EINVAL; + } + + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { + NL_SET_ERR_MSG(extack, "Unknown entry state"); + return -EINVAL; + } + if (entry->vid >= VLAN_VID_MASK) { + NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); + return -EINVAL; + } + + return 0; +} + +static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = { + [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 }, + [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + rtnl_validate_mdb_entry, + sizeof(struct br_mdb_entry)), + [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, +}; + +static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct br_port_msg *bpm; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, mdba_policy, extack); + if (err) + return err; + + bpm = nlmsg_data(nlh); + if (!bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Invalid ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "Device doesn't exist"); + return -ENODEV; + } + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { + NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute"); + return -EINVAL; + } + + if (!dev->netdev_ops->ndo_mdb_add) { + NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); + return -EOPNOTSUPP; + } + + return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack); +} + +static int rtnl_validate_mdb_entry_del_bulk(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(attr); + struct br_mdb_entry zero_entry = {}; + + if (nla_len(attr) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); + return -EINVAL; + } + + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { + NL_SET_ERR_MSG(extack, "Unknown entry state"); + return -EINVAL; + } + + if (entry->flags) { + NL_SET_ERR_MSG(extack, "Entry flags cannot be set"); + return -EINVAL; + } + + if (entry->vid >= VLAN_N_VID - 1) { + NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); + return -EINVAL; + } + + if (memcmp(&entry->addr, &zero_entry.addr, sizeof(entry->addr))) { + NL_SET_ERR_MSG(extack, "Entry address cannot be set"); + return -EINVAL; + } + + return 0; +} + +static const struct nla_policy mdba_del_bulk_policy[MDBA_SET_ENTRY_MAX + 1] = { + [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + rtnl_validate_mdb_entry_del_bulk, + sizeof(struct br_mdb_entry)), + [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, +}; + +static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK); + struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct br_port_msg *bpm; + struct net_device *dev; + int err; + + if (!del_bulk) + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, mdba_policy, + extack); + else + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, + mdba_del_bulk_policy, extack); + if (err) + return err; + + bpm = nlmsg_data(nlh); + if (!bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Invalid ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "Device doesn't exist"); + return -ENODEV; + } + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { + NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute"); + return -EINVAL; + } + + if (del_bulk) { + if (!dev->netdev_ops->ndo_mdb_del_bulk) { + NL_SET_ERR_MSG(extack, "Device does not support MDB bulk deletion"); + return -EOPNOTSUPP; + } + return dev->netdev_ops->ndo_mdb_del_bulk(dev, tb, extack); + } + + if (!dev->netdev_ops->ndo_mdb_del) { + NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); + return -EOPNOTSUPP; + } + + return dev->netdev_ops->ndo_mdb_del(dev, tb, extack); +} + /* Process one rtnetlink message. */ +static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + const bool needs_lock = !(cb->flags & RTNL_FLAG_DUMP_UNLOCKED); + rtnl_dumpit_func dumpit = cb->data; + int err; + + /* Previous iteration have already finished, avoid calling->dumpit() + * again, it may not expect to be called after it reached the end. + */ + if (!dumpit) + return 0; + + if (needs_lock) + rtnl_lock(); + err = dumpit(skb, cb); + if (needs_lock) + rtnl_unlock(); + + /* Old dump handlers used to send NLM_DONE as in a separate recvmsg(). + * Some applications which parse netlink manually depend on this. + */ + if (cb->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) { + if (err < 0 && err != -EMSGSIZE) + return err; + if (!err) + cb->data = NULL; + + return skb->len; + } + return err; +} + +static int rtnetlink_dump_start(struct sock *ssk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct netlink_dump_control *control) +{ + if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE || + !(control->flags & RTNL_FLAG_DUMP_UNLOCKED)) { + WARN_ON(control->data); + control->data = control->dump; + control->dump = rtnl_dumpit; + } + + return netlink_dump_start(ssk, skb, nlh, control); +} + static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct rtnl_link *link; + enum rtnl_kinds kind; struct module *owner; int err = -EOPNOTSUPP; rtnl_doit_func doit; unsigned int flags; - int kind; int family; int type; @@ -5120,16 +6880,16 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; - kind = type&3; + kind = rtnl_msgtype_kind(type); - if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) + if (kind != RTNL_KIND_GET && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; rcu_read_lock(); - if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { + if (kind == RTNL_KIND_GET && (nlh->nlmsg_flags & NLM_F_DUMP)) { struct sock *rtnl; rtnl_dumpit_func dumpit; - u16 min_dump_alloc = 0; + u32 min_dump_alloc = 0; link = rtnl_get_link(family, type); if (!link || !link->dumpit) { @@ -5140,6 +6900,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, } owner = link->owner; dumpit = link->dumpit; + flags = link->flags; if (type == RTM_GETLINK - RTM_BASE) min_dump_alloc = rtnl_calcit(skb, nlh); @@ -5157,8 +6918,9 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, .dump = dumpit, .min_dump_alloc = min_dump_alloc, .module = owner, + .flags = flags, }; - err = netlink_dump_start(rtnl, skb, nlh, &c); + err = rtnetlink_dump_start(rtnl, skb, nlh, &c); /* netlink_dump_start() will keep a reference on * module if dump is still in progress. */ @@ -5182,6 +6944,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, } flags = link->flags; + if (kind == RTNL_KIND_DEL && (nlh->nlmsg_flags & NLM_F_BULK) && + !(flags & RTNL_FLAG_BULK_DEL_SUPPORTED)) { + NL_SET_ERR_MSG(extack, "Bulk delete is not supported"); + module_put(owner); + goto err_unlock; + } + if (flags & RTNL_FLAG_DOIT_UNLOCKED) { doit = link->doit; rcu_read_unlock(); @@ -5247,7 +7016,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_CHANGELOWERSTATE: case NETDEV_CHANGE_TX_QUEUE_LEN: rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), - GFP_KERNEL, NULL, 0); + GFP_KERNEL, NULL, 0, 0, NULL); break; default: break; @@ -5266,7 +7035,6 @@ static int __net_init rtnetlink_net_init(struct net *net) struct netlink_kernel_cfg cfg = { .groups = RTNLGRP_MAX, .input = rtnetlink_rcv, - .cb_mutex = &rtnl_mutex, .flags = NL_CFG_F_NONROOT_RECV, .bind = rtnetlink_bind, }; @@ -5289,6 +7057,41 @@ static struct pernet_operations rtnetlink_net_ops = { .exit = rtnetlink_net_exit, }; +static const struct rtnl_msg_handler rtnetlink_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink, + .flags = RTNL_FLAG_DOIT_PERNET}, + {.msgtype = RTM_DELLINK, .doit = rtnl_dellink, + .flags = RTNL_FLAG_DOIT_PERNET_WIP}, + {.msgtype = RTM_GETLINK, .doit = rtnl_getlink, + .dumpit = rtnl_dump_ifinfo, .flags = RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, + {.msgtype = RTM_SETLINK, .doit = rtnl_setlink, + .flags = RTNL_FLAG_DOIT_PERNET_WIP}, + {.msgtype = RTM_GETADDR, .dumpit = rtnl_dump_all}, + {.msgtype = RTM_GETROUTE, .dumpit = rtnl_dump_all}, + {.msgtype = RTM_GETNETCONF, .dumpit = rtnl_dump_all}, + {.msgtype = RTM_GETSTATS, .doit = rtnl_stats_get, + .dumpit = rtnl_stats_dump}, + {.msgtype = RTM_SETSTATS, .doit = rtnl_stats_set}, + {.msgtype = RTM_NEWLINKPROP, .doit = rtnl_newlinkprop}, + {.msgtype = RTM_DELLINKPROP, .doit = rtnl_dellinkprop}, + {.protocol = PF_BRIDGE, .msgtype = RTM_GETLINK, + .dumpit = rtnl_bridge_getlink}, + {.protocol = PF_BRIDGE, .msgtype = RTM_DELLINK, + .doit = rtnl_bridge_dellink}, + {.protocol = PF_BRIDGE, .msgtype = RTM_SETLINK, + .doit = rtnl_bridge_setlink}, + {.protocol = PF_BRIDGE, .msgtype = RTM_NEWNEIGH, .doit = rtnl_fdb_add}, + {.protocol = PF_BRIDGE, .msgtype = RTM_DELNEIGH, .doit = rtnl_fdb_del, + .flags = RTNL_FLAG_BULK_DEL_SUPPORTED}, + {.protocol = PF_BRIDGE, .msgtype = RTM_GETNEIGH, .doit = rtnl_fdb_get, + .dumpit = rtnl_fdb_dump}, + {.protocol = PF_BRIDGE, .msgtype = RTM_NEWMDB, .doit = rtnl_mdb_add}, + {.protocol = PF_BRIDGE, .msgtype = RTM_DELMDB, .doit = rtnl_mdb_del, + .flags = RTNL_FLAG_BULK_DEL_SUPPORTED}, + {.protocol = PF_BRIDGE, .msgtype = RTM_GETMDB, .doit = rtnl_mdb_get, + .dumpit = rtnl_mdb_dump}, +}; + void __init rtnetlink_init(void) { if (register_pernet_subsys(&rtnetlink_net_ops)) @@ -5296,24 +7099,5 @@ void __init rtnetlink_init(void) register_netdevice_notifier(&rtnetlink_dev_notifier); - rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, - rtnl_dump_ifinfo, 0); - rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0); - - rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0); - rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0); - rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0); - - rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0); - - rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0); - rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0); - - rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, - 0); + rtnl_register_many(rtnetlink_rtnl_msg_handlers); } |
