summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c3
-rw-r--r--net/core/dev.c351
-rw-r--r--net/core/dev_ioctl.c14
-rw-r--r--net/core/dst.c16
-rw-r--r--net/core/ethtool.c21
-rw-r--r--net/core/fib_notifier.c10
-rw-r--r--net/core/fib_rules.c15
-rw-r--r--net/core/filter.c399
-rw-r--r--net/core/flow_dissector.c130
-rw-r--r--net/core/gro_cells.c1
-rw-r--r--net/core/lwt_bpf.c2
-rw-r--r--net/core/neighbour.c18
-rw-r--r--net/core/net-procfs.c1
-rw-r--r--net/core/net-sysfs.c28
-rw-r--r--net/core/net-sysfs.h1
-rw-r--r--net/core/net-traces.c4
-rw-r--r--net/core/net_namespace.c1
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/pktgen.c18
-rw-r--r--net/core/rtnetlink.c461
-rw-r--r--net/core/skbuff.c73
-rw-r--r--net/core/sock.c45
-rw-r--r--net/core/sock_reuseport.c13
-rw-r--r--net/core/stream.c1
-rw-r--r--net/core/sysctl_net_core.c1
-rw-r--r--net/core/tso.c1
27 files changed, 1161 insertions, 470 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 56d771a887b6..1fd0a9c88b1b 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux networking core.
#
diff --git a/net/core/datagram.c b/net/core/datagram.c
index f7fb7e3f2acf..522873ed120b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* SUCS NET3:
*
@@ -188,7 +189,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
}
if (!skb->len) {
skb = skb_set_peeked(skb);
- if (unlikely(IS_ERR(skb))) {
+ if (IS_ERR(skb)) {
*err = PTR_ERR(skb);
return NULL;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 588b473194a8..8ee29f4f5fa9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
#include <linux/crash_dump.h>
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
+#include <linux/net_namespace.h>
#include "net-sysfs.h"
@@ -162,7 +163,6 @@ static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info);
static struct napi_struct *napi_by_id(unsigned int napi_id);
@@ -188,6 +188,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id);
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+static DEFINE_MUTEX(ifalias_mutex);
+
/* protects napi_hash addition/deletion and napi_gen_id */
static DEFINE_SPINLOCK(napi_hash_lock);
@@ -1062,7 +1064,10 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
unsigned long *inuse;
struct net_device *d;
- p = strnchr(name, IFNAMSIZ-1, '%');
+ if (!dev_valid_name(name))
+ return -EINVAL;
+
+ p = strchr(name, '%');
if (p) {
/*
* Verify the string as this thing may have come from
@@ -1093,8 +1098,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
free_page((unsigned long) inuse);
}
- if (buf != name)
- snprintf(buf, IFNAMSIZ, name, i);
+ snprintf(buf, IFNAMSIZ, name, i);
if (!__dev_get_by_name(net, buf))
return i;
@@ -1102,7 +1106,21 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
* when the name is long and there isn't enough space left
* for the digits, or if all bits are used.
*/
- return -ENFILE;
+ return p ? -ENFILE : -EEXIST;
+}
+
+static int dev_alloc_name_ns(struct net *net,
+ struct net_device *dev,
+ const char *name)
+{
+ char buf[IFNAMSIZ];
+ int ret;
+
+ BUG_ON(!net);
+ ret = __dev_alloc_name(net, name, buf);
+ if (ret >= 0)
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return ret;
}
/**
@@ -1121,50 +1139,16 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
int dev_alloc_name(struct net_device *dev, const char *name)
{
- char buf[IFNAMSIZ];
- struct net *net;
- int ret;
-
- BUG_ON(!dev_net(dev));
- net = dev_net(dev);
- ret = __dev_alloc_name(net, name, buf);
- if (ret >= 0)
- strlcpy(dev->name, buf, IFNAMSIZ);
- return ret;
+ return dev_alloc_name_ns(dev_net(dev), dev, name);
}
EXPORT_SYMBOL(dev_alloc_name);
-static int dev_alloc_name_ns(struct net *net,
- struct net_device *dev,
- const char *name)
-{
- char buf[IFNAMSIZ];
- int ret;
-
- ret = __dev_alloc_name(net, name, buf);
- if (ret >= 0)
- strlcpy(dev->name, buf, IFNAMSIZ);
- return ret;
-}
-
-static int dev_get_valid_name(struct net *net,
- struct net_device *dev,
- const char *name)
+int dev_get_valid_name(struct net *net, struct net_device *dev,
+ const char *name)
{
- BUG_ON(!net);
-
- if (!dev_valid_name(name))
- return -EINVAL;
-
- if (strchr(name, '%'))
- return dev_alloc_name_ns(net, dev, name);
- else if (__dev_get_by_name(net, name))
- return -EEXIST;
- else if (dev->name != name)
- strlcpy(dev->name, name, IFNAMSIZ);
-
- return 0;
+ return dev_alloc_name_ns(net, dev, name);
}
+EXPORT_SYMBOL(dev_get_valid_name);
/**
* dev_change_name - change name of a device
@@ -1265,29 +1249,53 @@ rollback:
*/
int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
{
- char *new_ifalias;
-
- ASSERT_RTNL();
+ struct dev_ifalias *new_alias = NULL;
if (len >= IFALIASZ)
return -EINVAL;
- if (!len) {
- kfree(dev->ifalias);
- dev->ifalias = NULL;
- return 0;
+ if (len) {
+ new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
+ if (!new_alias)
+ return -ENOMEM;
+
+ memcpy(new_alias->ifalias, alias, len);
+ new_alias->ifalias[len] = 0;
}
- new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
- if (!new_ifalias)
- return -ENOMEM;
- dev->ifalias = new_ifalias;
- memcpy(dev->ifalias, alias, len);
- dev->ifalias[len] = 0;
+ mutex_lock(&ifalias_mutex);
+ rcu_swap_protected(dev->ifalias, new_alias,
+ mutex_is_locked(&ifalias_mutex));
+ mutex_unlock(&ifalias_mutex);
+
+ if (new_alias)
+ kfree_rcu(new_alias, rcuhead);
return len;
}
+/**
+ * dev_get_alias - get ifalias of a device
+ * @dev: device
+ * @name: buffer to store name of ifalias
+ * @len: size of buffer
+ *
+ * get ifalias for a device. Caller must make sure dev cannot go
+ * away, e.g. rcu read lock or own a reference count to device.
+ */
+int dev_get_alias(const struct net_device *dev, char *name, size_t len)
+{
+ const struct dev_ifalias *alias;
+ int ret = 0;
+
+ rcu_read_lock();
+ alias = rcu_dereference(dev->ifalias);
+ if (alias)
+ ret = snprintf(name, len, "%s", alias->ifalias);
+ rcu_read_unlock();
+
+ return ret;
+}
/**
* netdev_features_change - device changes features
@@ -1312,10 +1320,11 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info.dev = dev,
+ };
- change_info.flags_changed = 0;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGE,
&change_info.info);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
@@ -1536,9 +1545,10 @@ EXPORT_SYMBOL(dev_disable_lro);
static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- netdev_notifier_info_init(&info, dev);
return nb->notifier_call(nb, val, &info);
}
@@ -1663,11 +1673,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
*/
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info)
{
ASSERT_RTNL();
- netdev_notifier_info_init(info, dev);
return raw_notifier_call_chain(&netdev_chain, val, info);
}
@@ -1682,9 +1690,11 @@ static int call_netdevice_notifiers_info(unsigned long val,
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- return call_netdevice_notifiers_info(val, dev, &info);
+ return call_netdevice_notifiers_info(val, &info);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -2012,6 +2022,7 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
return 0;
}
+EXPORT_SYMBOL(netdev_txq_to_tc);
#ifdef CONFIG_XPS
static DEFINE_MUTEX(xps_map_mutex);
@@ -3245,22 +3256,22 @@ EXPORT_SYMBOL(dev_loopback_xmit);
static struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
- struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
+ struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
struct tcf_result cl_res;
- if (!cl)
+ if (!miniq)
return skb;
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
- qdisc_bstats_cpu_update(cl->q, skb);
+ mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_cpu_drop(cl->q);
+ mini_qdisc_qstats_cpu_drop(miniq);
*ret = NET_XMIT_DROP;
kfree_skb(skb);
return NULL;
@@ -3725,7 +3736,7 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
flow_table = rcu_dereference(rxqueue->rps_flow_table);
if (flow_table && flow_id <= flow_table->mask) {
rflow = &flow_table->flows[flow_id];
- cpu = ACCESS_ONCE(rflow->cpu);
+ cpu = READ_ONCE(rflow->cpu);
if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
((int)(per_cpu(softnet_data, cpu).input_queue_head -
rflow->last_qtail) <
@@ -3864,8 +3875,8 @@ drop:
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
+ u32 metalen, act = XDP_DROP;
struct xdp_buff xdp;
- u32 act = XDP_DROP;
void *orig_data;
int hlen, off;
u32 mac_len;
@@ -3876,8 +3887,25 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (skb_cloned(skb))
return XDP_PASS;
- if (skb_linearize(skb))
- goto do_drop;
+ /* XDP packets must be linear and must have sufficient headroom
+ * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+ * native XDP provides, thus we need to do it here as well.
+ */
+ if (skb_is_nonlinear(skb) ||
+ skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+ int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ int troom = skb->tail + skb->data_len - skb->end;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ if (pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+ goto do_drop;
+ if (troom > 0 && __skb_linearize(skb))
+ goto do_drop;
+ }
/* The XDP program wants to see the packet starting at the MAC
* header.
@@ -3885,6 +3913,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
mac_len = skb->data - skb_mac_header(skb);
hlen = skb_headlen(skb) + mac_len;
xdp.data = skb->data - mac_len;
+ xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + hlen;
xdp.data_hard_start = skb->data - skb_headroom(skb);
orig_data = xdp.data;
@@ -3902,10 +3931,12 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
case XDP_REDIRECT:
case XDP_TX:
__skb_push(skb, mac_len);
- /* fall through */
+ break;
case XDP_PASS:
+ metalen = xdp.data - xdp.data_meta;
+ if (metalen)
+ skb_metadata_set(skb, metalen);
break;
-
default:
bpf_warn_invalid_xdp_action(act);
/* fall through */
@@ -4140,7 +4171,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
#ifdef CONFIG_NET_CLS_ACT
- struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+ struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
struct tcf_result cl_res;
/* If there's at least one ingress present somewhere (so
@@ -4148,8 +4179,9 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
* that are not configured with an ingress qdisc will bail
* out here.
*/
- if (!cl)
+ if (!miniq)
return skb;
+
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
@@ -4157,15 +4189,15 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
qdisc_skb_cb(skb)->pkt_len = skb->len;
skb->tc_at_ingress = 1;
- qdisc_bstats_cpu_update(cl->q, skb);
+ mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_cpu_drop(cl->q);
+ mini_qdisc_qstats_cpu_drop(miniq);
kfree_skb(skb);
return NULL;
case TC_ACT_STOLEN:
@@ -4443,6 +4475,33 @@ out:
return ret;
}
+/**
+ * netif_receive_skb_core - special purpose version of netif_receive_skb
+ * @skb: buffer to process
+ *
+ * More direct receive version of netif_receive_skb(). It should
+ * only be used by callers that have a need to skip RPS and Generic XDP.
+ * Caller must also take care of handling if (page_is_)pfmemalloc.
+ *
+ * This function may only be called from softirq context and interrupts
+ * should be enabled.
+ *
+ * Return values (usually ignored):
+ * NET_RX_SUCCESS: no congestion
+ * NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb_core(struct sk_buff *skb)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __netif_receive_skb_core(skb, false);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(netif_receive_skb_core);
+
static int __netif_receive_skb(struct sk_buff *skb)
{
int ret;
@@ -4468,7 +4527,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
return ret;
}
-static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
{
struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
struct bpf_prog *new = xdp->prog;
@@ -4695,6 +4754,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
diffs |= p->vlan_tci ^ skb->vlan_tci;
diffs |= skb_metadata_dst_cmp(p, skb);
+ diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
skb_mac_header(skb));
@@ -6228,9 +6288,19 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
- void *upper_priv, void *upper_info)
-{
- struct netdev_notifier_changeupper_info changeupper_info;
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ .extack = extack,
+ },
+ .upper_dev = upper_dev,
+ .master = master,
+ .linking = true,
+ .upper_info = upper_info,
+ };
int ret = 0;
ASSERT_RTNL();
@@ -6248,12 +6318,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- changeupper_info.upper_dev = upper_dev;
- changeupper_info.master = master;
- changeupper_info.linking = true;
- changeupper_info.upper_info = upper_info;
-
- ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
@@ -6264,7 +6329,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (ret)
return ret;
- ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
@@ -6289,9 +6354,11 @@ rollback:
* returns zero.
*/
int netdev_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ struct netlink_ext_ack *extack)
{
- return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, false,
+ NULL, NULL, extack);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -6310,10 +6377,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
*/
int netdev_master_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev,
- void *upper_priv, void *upper_info)
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
{
return __netdev_upper_dev_link(dev, upper_dev, true,
- upper_priv, upper_info);
+ upper_priv, upper_info, extack);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
@@ -6328,20 +6396,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- struct netdev_notifier_changeupper_info changeupper_info;
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ },
+ .upper_dev = upper_dev,
+ .linking = false,
+ };
ASSERT_RTNL();
- changeupper_info.upper_dev = upper_dev;
changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
- changeupper_info.linking = false;
- call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
- call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -6357,11 +6429,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink);
void netdev_bonding_info_change(struct net_device *dev,
struct netdev_bonding_info *bonding_info)
{
- struct netdev_notifier_bonding_info info;
+ struct netdev_notifier_bonding_info info = {
+ .info.dev = dev,
+ };
memcpy(&info.bonding_info, bonding_info,
sizeof(struct netdev_bonding_info));
- call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+ call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
&info.info);
}
EXPORT_SYMBOL(netdev_bonding_info_change);
@@ -6487,11 +6561,13 @@ EXPORT_SYMBOL(dev_get_nest_level);
void netdev_lower_state_changed(struct net_device *lower_dev,
void *lower_state_info)
{
- struct netdev_notifier_changelowerstate_info changelowerstate_info;
+ struct netdev_notifier_changelowerstate_info changelowerstate_info = {
+ .info.dev = lower_dev,
+ };
ASSERT_RTNL();
changelowerstate_info.lower_state_info = lower_state_info;
- call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
&changelowerstate_info.info);
}
EXPORT_SYMBOL(netdev_lower_state_changed);
@@ -6782,11 +6858,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
if (dev->flags & IFF_UP &&
(changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info = {
+ .dev = dev,
+ },
+ .flags_changed = changes,
+ };
- change_info.flags_changed = changes;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
- &change_info.info);
+ call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
}
}
@@ -6993,26 +7072,26 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down);
-u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id)
+u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
{
- struct netdev_xdp xdp;
+ struct netdev_bpf xdp;
memset(&xdp, 0, sizeof(xdp));
xdp.command = XDP_QUERY_PROG;
/* Query must always succeed. */
- WARN_ON(xdp_op(dev, &xdp) < 0);
+ WARN_ON(bpf_op(dev, &xdp) < 0);
if (prog_id)
*prog_id = xdp.prog_id;
return xdp.prog_attached;
}
-static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
struct netlink_ext_ack *extack, u32 flags,
struct bpf_prog *prog)
{
- struct netdev_xdp xdp;
+ struct netdev_bpf xdp;
memset(&xdp, 0, sizeof(xdp));
if (flags & XDP_FLAGS_HW_MODE)
@@ -7023,7 +7102,7 @@ static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
xdp.flags = flags;
xdp.prog = prog;
- return xdp_op(dev, &xdp);
+ return bpf_op(dev, &xdp);
}
/**
@@ -7040,32 +7119,36 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
{
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
- xdp_op_t xdp_op, xdp_chk;
+ bpf_op_t bpf_op, bpf_chk;
int err;
ASSERT_RTNL();
- xdp_op = xdp_chk = ops->ndo_xdp;
- if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
+ bpf_op = bpf_chk = ops->ndo_bpf;
+ if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
return -EOPNOTSUPP;
- if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
- xdp_op = generic_xdp_install;
- if (xdp_op == xdp_chk)
- xdp_chk = generic_xdp_install;
+ if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
+ bpf_op = generic_xdp_install;
+ if (bpf_op == bpf_chk)
+ bpf_chk = generic_xdp_install;
if (fd >= 0) {
- if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL))
+ if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
return -EEXIST;
if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
- __dev_xdp_attached(dev, xdp_op, NULL))
+ __dev_xdp_attached(dev, bpf_op, NULL))
return -EBUSY;
- prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+ if (bpf_op == ops->ndo_bpf)
+ prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+ dev);
+ else
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
- err = dev_xdp_install(dev, xdp_op, extack, flags, prog);
+ err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
if (err < 0 && prog)
bpf_prog_put(prog);
@@ -7157,7 +7240,7 @@ static void rollback_registered_many(struct list_head *head)
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- GFP_KERNEL);
+ GFP_KERNEL, NULL);
/*
* Flush the unicast and multicast chains
@@ -7994,7 +8077,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
- size_t alloc_size;
+ unsigned int alloc_size;
struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -8244,7 +8327,7 @@ EXPORT_SYMBOL(unregister_netdev);
int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
{
- int err;
+ int err, new_nsid;
ASSERT_RTNL();
@@ -8300,7 +8383,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net)
+ new_nsid = peernet2id_alloc(dev_net(dev), net);
+ else
+ new_nsid = peernet2id(dev_net(dev), net);
+ rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid);
/*
* Flush the unicast and multicast chains
@@ -8562,6 +8649,8 @@ static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
kfree(net->dev_index_head);
+ if (net != &init_net)
+ WARN_ON_ONCE(!list_empty(&net->dev_base_head));
}
static struct pernet_operations __net_initdata netdev_net_ops = {
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 709a4e6fb447..7e690d0ccd05 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kmod.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
@@ -303,7 +304,18 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
case SIOCSIFTXQLEN:
if (ifr->ifr_qlen < 0)
return -EINVAL;
- dev->tx_queue_len = ifr->ifr_qlen;
+ if (dev->tx_queue_len ^ ifr->ifr_qlen) {
+ unsigned int orig_len = dev->tx_queue_len;
+
+ dev->tx_queue_len = ifr->ifr_qlen;
+ err = call_netdevice_notifiers(
+ NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+ err = notifier_to_errno(err);
+ if (err) {
+ dev->tx_queue_len = orig_len;
+ return err;
+ }
+ }
return 0;
case SIOCSIFNAME:
diff --git a/net/core/dst.c b/net/core/dst.c
index a6c47da7d0f8..662a2d4a3d19 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -322,3 +322,19 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
return md_dst;
}
EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
+
+void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
+{
+#ifdef CONFIG_DST_CACHE
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
+
+ if (one_md_dst->type == METADATA_IP_TUNNEL)
+ dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
+ }
+#endif
+ free_percpu(md_dst);
+}
+EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 3228411ada0f..f8fcf450a36e 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -403,6 +403,22 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
return 0;
}
+/* Given two link masks, AND them together and save the result in dst. */
+void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst,
+ struct ethtool_link_ksettings *src)
+{
+ unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS);
+ unsigned int idx = 0;
+
+ for (; idx < size; idx++) {
+ dst->link_modes.supported[idx] &=
+ src->link_modes.supported[idx];
+ dst->link_modes.advertising[idx] &=
+ src->link_modes.advertising[idx];
+ }
+}
+EXPORT_SYMBOL(ethtool_intersect_link_masks);
+
void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst,
u32 legacy_u32)
{
@@ -436,7 +452,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32);
/* return false if legacy contained non-0 deprecated fields
- * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated
+ * maxtxpkt/maxrxpkt. rest of ksettings always updated
*/
static bool
convert_legacy_settings_to_link_ksettings(
@@ -451,8 +467,7 @@ convert_legacy_settings_to_link_ksettings(
* deprecated legacy fields, and they should not use
* %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
*/
- if (legacy_settings->transceiver ||
- legacy_settings->maxtxpkt ||
+ if (legacy_settings->maxtxpkt ||
legacy_settings->maxrxpkt)
retval = false;
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 4fc202dbdfb6..0c048bdeb016 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -34,12 +34,14 @@ static unsigned int fib_seq_sum(void)
rtnl_lock();
for_each_net(net) {
- list_for_each_entry(ops, &net->fib_notifier_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
if (!try_module_get(ops->owner))
continue;
fib_seq += ops->fib_seq_read(net);
module_put(ops->owner);
}
+ rcu_read_unlock();
}
rtnl_unlock();
@@ -161,8 +163,14 @@ static int __net_init fib_notifier_net_init(struct net *net)
return 0;
}
+static void __net_exit fib_notifier_net_exit(struct net *net)
+{
+ WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops));
+}
+
static struct pernet_operations fib_notifier_net_ops = {
.init = fib_notifier_net_init,
+ .exit = fib_notifier_net_exit,
};
static int __init fib_notifier_init(void)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 9a6d97c1d810..98e1066c3d55 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -314,10 +314,12 @@ static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
static int call_fib_rule_notifiers(struct net *net,
enum fib_event_type event_type,
struct fib_rule *rule,
- struct fib_rules_ops *ops)
+ struct fib_rules_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct fib_rule_notifier_info info = {
.info.family = ops->family,
+ .info.extack = extack,
.rule = rule,
};
@@ -609,7 +611,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rule->tun_id)
ip_tunnel_need_metadata();
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops);
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -749,7 +751,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops);
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
+ NULL);
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
NETLINK_CB(skb).portid);
fib_rule_put(rule);
@@ -1019,8 +1022,14 @@ static int __net_init fib_rules_net_init(struct net *net)
return 0;
}
+static void __net_exit fib_rules_net_exit(struct net *net)
+{
+ WARN_ON_ONCE(!list_empty(&net->rules_ops));
+}
+
static struct pernet_operations fib_rules_net_ops = {
.init = fib_rules_net_init,
+ .exit = fib_rules_net_exit,
};
static int __init fib_rules_init(void)
diff --git a/net/core/filter.c b/net/core/filter.c
index 74b8c91fb5f4..1afa17935954 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -43,6 +43,7 @@
#include <linux/timer.h>
#include <linux/uaccess.h>
#include <asm/unaligned.h>
+#include <asm/cmpxchg.h>
#include <linux/filter.h>
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
@@ -1406,7 +1407,7 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
{
int err = __bpf_try_make_writable(skb, write_len);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return err;
}
@@ -1839,31 +1840,32 @@ static const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
+BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
+ struct bpf_map *, map, u32, key, u64, flags)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+ /* If user passes invalid input drop the packet. */
if (unlikely(flags))
- return SK_ABORTED;
+ return SK_DROP;
- ri->ifindex = key;
- ri->flags = flags;
- ri->map = map;
+ tcb->bpf.key = key;
+ tcb->bpf.flags = flags;
+ tcb->bpf.map = map;
- return SK_REDIRECT;
+ return SK_PASS;
}
-struct sock *do_sk_redirect_map(void)
+struct sock *do_sk_redirect_map(struct sk_buff *skb)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk = NULL;
- if (ri->map) {
- sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
+ if (tcb->bpf.map) {
+ sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
- ri->ifindex = 0;
- ri->map = NULL;
- /* we do not clear flags for future lookup */
+ tcb->bpf.key = 0;
+ tcb->bpf.map = NULL;
}
return sk;
@@ -1873,9 +1875,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
.func = bpf_sk_redirect_map,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_ANYTHING,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
};
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
@@ -1966,7 +1969,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
bpf_pull_mac_rcsum(skb);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -1988,7 +1991,7 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
ret = skb_vlan_pop(skb);
bpf_pull_mac_rcsum(skb);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2182,7 +2185,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
* need to be verified first.
*/
ret = bpf_skb_proto_xlat(skb, proto);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2307,7 +2310,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
bpf_skb_net_grow(skb, len_diff_abs);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2398,7 +2401,7 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
skb_gso_reset(skb);
}
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2438,7 +2441,7 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
skb_reset_mac_header(skb);
}
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return 0;
}
@@ -2451,14 +2454,26 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
+static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
+{
+ return xdp_data_meta_unsupported(xdp) ? 0 :
+ xdp->data - xdp->data_meta;
+}
+
BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
{
+ unsigned long metalen = xdp_get_metalen(xdp);
+ void *data_start = xdp->data_hard_start + metalen;
void *data = xdp->data + offset;
- if (unlikely(data < xdp->data_hard_start ||
+ if (unlikely(data < data_start ||
data > xdp->data_end - ETH_HLEN))
return -EINVAL;
+ if (metalen)
+ memmove(xdp->data_meta + offset,
+ xdp->data_meta, metalen);
+ xdp->data_meta += offset;
xdp->data = data;
return 0;
@@ -2472,6 +2487,33 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
+{
+ void *meta = xdp->data_meta + offset;
+ unsigned long metalen = xdp->data - meta;
+
+ if (xdp_data_meta_unsupported(xdp))
+ return -ENOTSUPP;
+ if (unlikely(meta < xdp->data_hard_start ||
+ meta > xdp->data))
+ return -EINVAL;
+ if (unlikely((metalen & (sizeof(__u32) - 1)) ||
+ (metalen > 32)))
+ return -EACCES;
+
+ xdp->data_meta = meta;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
+ .func = bpf_xdp_adjust_meta,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
static int __bpf_tx_xdp(struct net_device *dev,
struct bpf_map *map,
struct xdp_buff *xdp,
@@ -2486,10 +2528,36 @@ static int __bpf_tx_xdp(struct net_device *dev,
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
if (err)
return err;
- if (map)
+ dev->netdev_ops->ndo_xdp_flush(dev);
+ return 0;
+}
+
+static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
+ struct bpf_map *map,
+ struct xdp_buff *xdp,
+ u32 index)
+{
+ int err;
+
+ if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+ struct net_device *dev = fwd;
+
+ if (!dev->netdev_ops->ndo_xdp_xmit)
+ return -EOPNOTSUPP;
+
+ err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+ if (err)
+ return err;
__dev_map_insert_ctx(map, index);
- else
- dev->netdev_ops->ndo_xdp_flush(dev);
+
+ } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
+ struct bpf_cpu_map_entry *rcpu = fwd;
+
+ err = cpu_map_enqueue(rcpu, xdp, dev_rx);
+ if (err)
+ return err;
+ __cpu_map_insert_ctx(map, index);
+ }
return 0;
}
@@ -2499,11 +2567,33 @@ void xdp_do_flush_map(void)
struct bpf_map *map = ri->map_to_flush;
ri->map_to_flush = NULL;
- if (map)
- __dev_map_flush(map);
+ if (map) {
+ switch (map->map_type) {
+ case BPF_MAP_TYPE_DEVMAP:
+ __dev_map_flush(map);
+ break;
+ case BPF_MAP_TYPE_CPUMAP:
+ __cpu_map_flush(map);
+ break;
+ default:
+ break;
+ }
+ }
}
EXPORT_SYMBOL_GPL(xdp_do_flush_map);
+static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
+{
+ switch (map->map_type) {
+ case BPF_MAP_TYPE_DEVMAP:
+ return __dev_map_lookup_elem(map, index);
+ case BPF_MAP_TYPE_CPUMAP:
+ return __cpu_map_lookup_elem(map, index);
+ default:
+ return NULL;
+ }
+}
+
static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
unsigned long aux)
{
@@ -2516,8 +2606,8 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
- struct net_device *fwd = NULL;
u32 index = ri->ifindex;
+ void *fwd = NULL;
int err;
ri->ifindex = 0;
@@ -2530,7 +2620,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
goto err;
}
- fwd = __dev_map_lookup_elem(map, index);
+ fwd = __xdp_map_lookup_elem(map, index);
if (!fwd) {
err = -EINVAL;
goto err;
@@ -2538,7 +2628,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
if (ri->map_to_flush && ri->map_to_flush != map)
xdp_do_flush_map();
- err = __bpf_tx_xdp(fwd, map, xdp, index);
+ err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
if (unlikely(err))
goto err;
@@ -2580,54 +2670,88 @@ err:
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
-int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *xdp_prog)
+static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
+{
+ unsigned int len;
+
+ if (unlikely(!(fwd->flags & IFF_UP)))
+ return -ENETDOWN;
+
+ len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
+ if (skb->len > len)
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
struct net_device *fwd = NULL;
u32 index = ri->ifindex;
- unsigned int len;
int err = 0;
ri->ifindex = 0;
ri->map = NULL;
ri->map_owner = 0;
- if (map) {
- if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
- err = -EFAULT;
- map = NULL;
- goto err;
- }
- fwd = __dev_map_lookup_elem(map, index);
- } else {
- fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
+ err = -EFAULT;
+ map = NULL;
+ goto err;
}
+ fwd = __xdp_map_lookup_elem(map, index);
if (unlikely(!fwd)) {
err = -EINVAL;
goto err;
}
- if (unlikely(!(fwd->flags & IFF_UP))) {
- err = -ENETDOWN;
+ if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+ if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+ goto err;
+ skb->dev = fwd;
+ } else {
+ /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
+ err = -EBADRQC;
goto err;
}
- len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
- if (skb->len > len) {
- err = -EMSGSIZE;
+ _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+ return 0;
+err:
+ _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+ return err;
+}
+
+int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ u32 index = ri->ifindex;
+ struct net_device *fwd;
+ int err = 0;
+
+ if (ri->map)
+ return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
+
+ ri->ifindex = 0;
+ fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ if (unlikely(!fwd)) {
+ err = -EINVAL;
goto err;
}
+ if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+ goto err;
+
skb->dev = fwd;
- map ? _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index)
- : _trace_xdp_redirect(dev, xdp_prog, index);
+ _trace_xdp_redirect(dev, xdp_prog, index);
return 0;
err:
- map ? _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err)
- : _trace_xdp_redirect_err(dev, xdp_prog, index, err);
+ _trace_xdp_redirect_err(dev, xdp_prog, index, err);
return err;
}
EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
@@ -2696,7 +2820,8 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
- func == bpf_xdp_adjust_head)
+ func == bpf_xdp_adjust_head ||
+ func == bpf_xdp_adjust_meta)
return true;
return false;
@@ -2947,14 +3072,15 @@ static const struct bpf_func_proto *
bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
{
if (!md_dst) {
- /* Race is not possible, since it's called from verifier
- * that is holding verifier mutex.
- */
- md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
- METADATA_IP_TUNNEL,
- GFP_KERNEL);
- if (!md_dst)
+ struct metadata_dst __percpu *tmp;
+
+ tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
+ METADATA_IP_TUNNEL,
+ GFP_KERNEL);
+ if (!tmp)
return NULL;
+ if (cmpxchg(&md_dst, NULL, tmp))
+ metadata_dst_free_percpu(tmp);
}
switch (which) {
@@ -3149,7 +3275,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
static const struct bpf_func_proto bpf_setsockopt_proto = {
.func = bpf_setsockopt,
- .gpl_only = true,
+ .gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
@@ -3158,6 +3284,47 @@ static const struct bpf_func_proto bpf_setsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
+ int, level, int, optname, char *, optval, int, optlen)
+{
+ struct sock *sk = bpf_sock->sk;
+
+ if (!sk_fullsock(sk))
+ goto err_clear;
+
+#ifdef CONFIG_INET
+ if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
+ if (optname == TCP_CONGESTION) {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (!icsk->icsk_ca_ops || optlen <= 1)
+ goto err_clear;
+ strncpy(optval, icsk->icsk_ca_ops->name, optlen);
+ optval[optlen - 1] = 0;
+ } else {
+ goto err_clear;
+ }
+ } else {
+ goto err_clear;
+ }
+ return 0;
+#endif
+err_clear:
+ memset(optval, 0, optlen);
+ return -EINVAL;
+}
+
+static const struct bpf_func_proto bpf_getsockopt_proto = {
+ .func = bpf_getsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3292,6 +3459,8 @@ xdp_func_proto(enum bpf_func_id func_id)
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_xdp_adjust_head:
return &bpf_xdp_adjust_head_proto;
+ case BPF_FUNC_xdp_adjust_meta:
+ return &bpf_xdp_adjust_meta_proto;
case BPF_FUNC_redirect:
return &bpf_xdp_redirect_proto;
case BPF_FUNC_redirect_map:
@@ -3334,6 +3503,8 @@ static const struct bpf_func_proto *
switch (func_id) {
case BPF_FUNC_setsockopt:
return &bpf_setsockopt_proto;
+ case BPF_FUNC_getsockopt:
+ return &bpf_getsockopt_proto;
case BPF_FUNC_sock_map_update:
return &bpf_sock_map_update_proto;
default:
@@ -3422,6 +3593,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
if (size != size_default)
return false;
@@ -3448,6 +3620,7 @@ static bool sk_filter_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
@@ -3472,6 +3645,7 @@ static bool lwt_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
return false;
}
@@ -3590,6 +3764,9 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
info->reg_type = PTR_TO_PACKET;
break;
+ case bpf_ctx_range(struct __sk_buff, data_meta):
+ info->reg_type = PTR_TO_PACKET_META;
+ break;
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
@@ -3623,6 +3800,9 @@ static bool xdp_is_valid_access(int off, int size,
case offsetof(struct xdp_md, data):
info->reg_type = PTR_TO_PACKET;
break;
+ case offsetof(struct xdp_md, data_meta):
+ info->reg_type = PTR_TO_PACKET_META;
+ break;
case offsetof(struct xdp_md, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
@@ -3681,9 +3861,14 @@ static bool sk_skb_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, tc_classid):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
+ return false;
+ }
+
if (type == BPF_WRITE) {
switch (off) {
- case bpf_ctx_range(struct __sk_buff, mark):
case bpf_ctx_range(struct __sk_buff, tc_index):
case bpf_ctx_range(struct __sk_buff, priority):
break;
@@ -3693,7 +3878,7 @@ static bool sk_skb_is_valid_access(int off, int size,
}
switch (off) {
- case bpf_ctx_range(struct __sk_buff, tc_classid):
+ case bpf_ctx_range(struct __sk_buff, mark):
return false;
case bpf_ctx_range(struct __sk_buff, data):
info->reg_type = PTR_TO_PACKET;
@@ -3851,6 +4036,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
offsetof(struct sk_buff, data));
break;
+ case offsetof(struct __sk_buff, data_meta):
+ off = si->off;
+ off -= offsetof(struct __sk_buff, data_meta);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct bpf_skb_data_end, data_meta);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+ si->src_reg, off);
+ break;
+
case offsetof(struct __sk_buff, data_end):
off = si->off;
off -= offsetof(struct __sk_buff, data_end);
@@ -4099,6 +4293,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, data));
break;
+ case offsetof(struct xdp_md, data_meta):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, data_meta));
+ break;
case offsetof(struct xdp_md, data_end):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
si->dst_reg, si->src_reg,
@@ -4242,68 +4441,120 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
-const struct bpf_verifier_ops sk_filter_prog_ops = {
+static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+ int off;
+
+ switch (si->off) {
+ case offsetof(struct __sk_buff, data_end):
+ off = si->off;
+ off -= offsetof(struct __sk_buff, data_end);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct tcp_skb_cb, bpf.data_end);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+ si->src_reg, off);
+ break;
+ default:
+ return bpf_convert_ctx_access(type, si, insn_buf, prog,
+ target_size);
+ }
+
+ return insn - insn_buf;
+}
+
+const struct bpf_verifier_ops sk_filter_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
-const struct bpf_verifier_ops tc_cls_act_prog_ops = {
+const struct bpf_prog_ops sk_filter_prog_ops = {
+};
+
+const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
.get_func_proto = tc_cls_act_func_proto,
.is_valid_access = tc_cls_act_is_valid_access,
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+};
+
+const struct bpf_prog_ops tc_cls_act_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops xdp_prog_ops = {
+const struct bpf_verifier_ops xdp_verifier_ops = {
.get_func_proto = xdp_func_proto,
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
+};
+
+const struct bpf_prog_ops xdp_prog_ops = {
.test_run = bpf_prog_test_run_xdp,
};
-const struct bpf_verifier_ops cg_skb_prog_ops = {
+const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_skb_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops lwt_inout_prog_ops = {
+const struct bpf_verifier_ops lwt_inout_verifier_ops = {
.get_func_proto = lwt_inout_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops lwt_inout_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops lwt_xmit_prog_ops = {
+const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
.get_func_proto = lwt_xmit_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+};
+
+const struct bpf_prog_ops lwt_xmit_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops cg_sock_prog_ops = {
+const struct bpf_verifier_ops cg_sock_verifier_ops = {
.get_func_proto = sock_filter_func_proto,
.is_valid_access = sock_filter_is_valid_access,
.convert_ctx_access = sock_filter_convert_ctx_access,
};
-const struct bpf_verifier_ops sock_ops_prog_ops = {
+const struct bpf_prog_ops cg_sock_prog_ops = {
+};
+
+const struct bpf_verifier_ops sock_ops_verifier_ops = {
.get_func_proto = sock_ops_func_proto,
.is_valid_access = sock_ops_is_valid_access,
.convert_ctx_access = sock_ops_convert_ctx_access,
};
-const struct bpf_verifier_ops sk_skb_prog_ops = {
+const struct bpf_prog_ops sock_ops_prog_ops = {
+};
+
+const struct bpf_verifier_ops sk_skb_verifier_ops = {
.get_func_proto = sk_skb_func_proto,
.is_valid_access = sk_skb_is_valid_access,
- .convert_ctx_access = bpf_convert_ctx_access,
+ .convert_ctx_access = sk_skb_convert_ctx_access,
.gen_prologue = sk_skb_prologue,
};
+const struct bpf_prog_ops sk_skb_prog_ops = {
+};
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 0a977373d003..15ce30063765 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -5,10 +5,12 @@
#include <linux/ipv6.h>
#include <linux/if_vlan.h>
#include <net/dsa.h>
+#include <net/dst_metadata.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/gre.h>
#include <net/pptp.h>
+#include <net/tipc.h>
#include <linux/igmp.h>
#include <linux/icmp.h>
#include <linux/sctp.h>
@@ -115,6 +117,102 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
}
EXPORT_SYMBOL(__skb_flow_get_ports);
+static void
+skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct flow_dissector_key_control *ctrl;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL))
+ return;
+
+ ctrl = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ target_container);
+ ctrl->addr_type = type;
+}
+
+static void
+__skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct ip_tunnel_info *info;
+ struct ip_tunnel_key *key;
+
+ /* A quick check to see if there might be something to do. */
+ if (!dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS))
+ return;
+
+ info = skb_tunnel_info(skb);
+ if (!info)
+ return;
+
+ key = &info->key;
+
+ switch (ip_tunnel_info_af(info)) {
+ case AF_INET:
+ skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ flow_dissector,
+ target_container);
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+ struct flow_dissector_key_ipv4_addrs *ipv4;
+
+ ipv4 = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ target_container);
+ ipv4->src = key->u.ipv4.src;
+ ipv4->dst = key->u.ipv4.dst;
+ }
+ break;
+ case AF_INET6:
+ skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ flow_dissector,
+ target_container);
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
+ struct flow_dissector_key_ipv6_addrs *ipv6;
+
+ ipv6 = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+ target_container);
+ ipv6->src = key->u.ipv6.src;
+ ipv6->dst = key->u.ipv6.dst;
+ }
+ break;
+ }
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_dissector_key_keyid *keyid;
+
+ keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ target_container);
+ keyid->keyid = tunnel_id_to_key32(key->tun_id);
+ }
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+ struct flow_dissector_key_ports *tp;
+
+ tp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ target_container);
+ tp->src = key->tp_src;
+ tp->dst = key->tp_dst;
+ }
+}
+
static enum flow_dissect_ret
__skb_flow_dissect_mpls(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
@@ -478,6 +576,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
+ __skb_flow_dissect_tunnel_info(skb, flow_dissector,
+ target_container);
+
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct ethhdr *eth = eth_hdr(skb);
@@ -672,23 +773,22 @@ proto_again:
break;
}
case htons(ETH_P_TIPC): {
- struct {
- __be32 pre[3];
- __be32 srcnode;
- } *hdr, _hdr;
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ struct tipc_basic_hdr *hdr, _hdr;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr),
+ data, hlen, &_hdr);
if (!hdr) {
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
}
if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
+ FLOW_DISSECTOR_KEY_TIPC)) {
key_addrs = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_TIPC_ADDRS,
+ FLOW_DISSECTOR_KEY_TIPC,
target_container);
- key_addrs->tipcaddrs.srcnode = hdr->srcnode;
- key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
+ key_addrs->tipckey.key = tipc_hdr_rps_key(hdr);
+ key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC;
}
fdret = FLOW_DISSECT_RET_OUT_GOOD;
break;
@@ -924,8 +1024,8 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
diff -= sizeof(flow->addrs.v6addrs);
break;
- case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
- diff -= sizeof(flow->addrs.tipcaddrs);
+ case FLOW_DISSECTOR_KEY_TIPC:
+ diff -= sizeof(flow->addrs.tipckey);
break;
}
return (sizeof(*flow) - diff) / sizeof(u32);
@@ -939,8 +1039,8 @@ __be32 flow_get_u32_src(const struct flow_keys *flow)
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
return (__force __be32)ipv6_addr_hash(
&flow->addrs.v6addrs.src);
- case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
- return flow->addrs.tipcaddrs.srcnode;
+ case FLOW_DISSECTOR_KEY_TIPC:
+ return flow->addrs.tipckey.key;
default:
return 0;
}
@@ -1221,8 +1321,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
.offset = offsetof(struct flow_keys, addrs.v6addrs),
},
{
- .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS,
- .offset = offsetof(struct flow_keys, addrs.tipcaddrs),
+ .key_id = FLOW_DISSECTOR_KEY_TIPC,
+ .offset = offsetof(struct flow_keys, addrs.tipckey),
},
{
.key_id = FLOW_DISSECTOR_KEY_PORTS,
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 814e58a3ce8b..4b54e5f107c6 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 1307731ddfe4..e7e626fb87bb 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -51,7 +51,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
*/
preempt_disable();
rcu_read_lock();
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(lwt->prog, skb);
rcu_read_unlock();
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 16a1a4c4eb57..6ea3a1a7f36a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -457,7 +457,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
const void *pkey)
{
struct neighbour *n;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val;
struct neigh_hash_table *nht;
@@ -488,7 +488,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
u32 hash_val;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
int error;
struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
struct neigh_hash_table *nht;
@@ -572,7 +572,7 @@ out_neigh_release:
}
EXPORT_SYMBOL(__neigh_create);
-static u32 pneigh_hash(const void *pkey, int key_len)
+static u32 pneigh_hash(const void *pkey, unsigned int key_len)
{
u32 hash_val = *(u32 *)(pkey + key_len - 4);
hash_val ^= (hash_val >> 16);
@@ -585,7 +585,7 @@ static u32 pneigh_hash(const void *pkey, int key_len)
static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
struct net *net,
const void *pkey,
- int key_len,
+ unsigned int key_len,
struct net_device *dev)
{
while (n) {
@@ -601,7 +601,7 @@ static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
struct net *net, const void *pkey, struct net_device *dev)
{
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
@@ -614,7 +614,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
struct net_device *dev, int creat)
{
struct pneigh_entry *n;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
read_lock_bh(&tbl->lock);
@@ -659,7 +659,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
struct net_device *dev)
{
struct pneigh_entry *n, **np;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
write_lock_bh(&tbl->lock);
@@ -1662,7 +1662,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tbl == NULL)
return -EAFNOSUPPORT;
- if (nla_len(dst_attr) < tbl->key_len)
+ if (nla_len(dst_attr) < (int)tbl->key_len)
goto out;
if (ndm->ndm_flags & NTF_PROXY) {
@@ -1730,7 +1730,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tbl == NULL)
return -EAFNOSUPPORT;
- if (nla_len(tb[NDA_DST]) < tbl->key_len)
+ if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
goto out;
dst = nla_data(tb[NDA_DST]);
lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 4847964931df..615ccab55f38 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 927a6dcbad96..799b75268291 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -382,7 +382,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
struct net_device *netdev = to_net_dev(dev);
struct net *net = dev_net(netdev);
size_t count = len;
- ssize_t ret;
+ ssize_t ret = 0;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -393,23 +393,30 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
if (!rtnl_trylock())
return restart_syscall();
- ret = dev_set_alias(netdev, buf, count);
+
+ if (dev_isalive(netdev)) {
+ ret = dev_set_alias(netdev, buf, count);
+ if (ret < 0)
+ goto err;
+ ret = len;
+ netdev_state_change(netdev);
+ }
+err:
rtnl_unlock();
- return ret < 0 ? ret : len;
+ return ret;
}
static ssize_t ifalias_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
const struct net_device *netdev = to_net_dev(dev);
+ char tmp[IFALIASZ];
ssize_t ret = 0;
- if (!rtnl_trylock())
- return restart_syscall();
- if (netdev->ifalias)
- ret = sprintf(buf, "%s\n", netdev->ifalias);
- rtnl_unlock();
+ ret = dev_get_alias(netdev, tmp, sizeof(tmp));
+ if (ret > 0)
+ ret = sprintf(buf, "%s\n", tmp);
return ret;
}
static DEVICE_ATTR_RW(ifalias);
@@ -1488,7 +1495,10 @@ static void netdev_release(struct device *d)
BUG_ON(dev->reg_state != NETREG_RELEASED);
- kfree(dev->ifalias);
+ /* no need to wait for rcu grace period:
+ * device is dead and about to be freed.
+ */
+ kfree(rcu_access_pointer(dev->ifalias));
netdev_freemem(dev);
}
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 2745a1b51e03..006876c7b78d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NET_SYSFS_H__
#define __NET_SYSFS_H__
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 1132820c8e62..380934580fa1 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* consolidates trace point definitions
*
@@ -31,6 +32,7 @@
#include <trace/events/napi.h>
#include <trace/events/sock.h>
#include <trace/events/udp.h>
+#include <trace/events/tcp.h>
#include <trace/events/fib.h>
#include <trace/events/qdisc.h>
#if IS_ENABLED(CONFIG_IPV6)
@@ -48,3 +50,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6cfdc7c84c48..b797832565d3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -234,6 +234,7 @@ int peernet2id_alloc(struct net *net, struct net *peer)
rtnl_net_notifyid(net, RTM_NEWNSID, id);
return id;
}
+EXPORT_SYMBOL_GPL(peernet2id_alloc);
/* This function returns, if assigned, the id of a peer netns. */
int peernet2id(struct net *net, struct net *peer)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 912731bed7b7..57557a6a950c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -334,7 +334,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
/* It is up to the caller to keep npinfo alive. */
struct netpoll_info *npinfo;
- WARN_ON_ONCE(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
npinfo = rcu_dereference_bh(np->dev->npinfo);
if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6e1e10ff433a..f95a15086225 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2165,7 +2165,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
+ pkt_dev->pkt_overhead;
}
- for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ for (i = 0; i < sizeof(struct in6_addr); i++)
if (pkt_dev->cur_in6_saddr.s6_addr[i]) {
set = 1;
break;
@@ -2711,7 +2711,7 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
int datalen)
{
- struct timeval timestamp;
+ struct timespec64 timestamp;
struct pktgen_hdr *pgh;
pgh = skb_put(skb, sizeof(*pgh));
@@ -2773,9 +2773,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
pgh->tv_sec = 0;
pgh->tv_usec = 0;
} else {
- do_gettimeofday(&timestamp);
+ /*
+ * pgh->tv_sec wraps in y2106 when interpreted as unsigned
+ * as done by wireshark, or y2038 when interpreted as signed.
+ * This is probably harmless, but if anyone wants to improve
+ * it, we could introduce a variant that puts 64-bit nanoseconds
+ * into the respective header bytes.
+ * This would also be slightly faster to read.
+ */
+ ktime_get_real_ts64(&timestamp);
pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
+ pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC);
}
}
@@ -3377,7 +3385,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
- unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
+ unsigned int burst = READ_ONCE(pkt_dev->burst);
struct net_device *odev = pkt_dev->odev;
struct netdev_queue *txq;
struct sk_buff *skb;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d4bcdcc68e92..dabba2a91fc8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -453,7 +453,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
{
const struct rtnl_af_ops *ops;
- list_for_each_entry(ops, &rtnl_af_ops, list) {
+ list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
if (ops->family == family)
return ops;
}
@@ -470,32 +470,22 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
void rtnl_af_register(struct rtnl_af_ops *ops)
{
rtnl_lock();
- list_add_tail(&ops->list, &rtnl_af_ops);
+ list_add_tail_rcu(&ops->list, &rtnl_af_ops);
rtnl_unlock();
}
EXPORT_SYMBOL_GPL(rtnl_af_register);
/**
- * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
- * @ops: struct rtnl_af_ops * to unregister
- *
- * The caller must hold the rtnl_mutex.
- */
-void __rtnl_af_unregister(struct rtnl_af_ops *ops)
-{
- list_del(&ops->list);
-}
-EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
-
-/**
* rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
* @ops: struct rtnl_af_ops * to unregister
*/
void rtnl_af_unregister(struct rtnl_af_ops *ops)
{
rtnl_lock();
- __rtnl_af_unregister(ops);
+ list_del_rcu(&ops->list);
rtnl_unlock();
+
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(rtnl_af_unregister);
@@ -508,13 +498,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
/* IFLA_AF_SPEC */
size = nla_total_size(sizeof(struct nlattr));
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->get_link_af_size) {
/* AF_* + nested data */
size += nla_total_size(sizeof(struct nlattr)) +
af_ops->get_link_af_size(dev, ext_filter_mask);
}
}
+ rcu_read_unlock();
return size;
}
@@ -522,11 +514,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
static bool rtnl_have_link_slave_info(const struct net_device *dev)
{
struct net_device *master_dev;
+ bool ret = false;
- master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
+ rcu_read_lock();
+
+ master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
if (master_dev && master_dev->rtnl_link_ops)
- return true;
- return false;
+ ret = true;
+ rcu_read_unlock();
+ return ret;
}
static int rtnl_link_slave_info_fill(struct sk_buff *skb,
@@ -923,8 +919,10 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
+ rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(4) /* IFLA_EVENT */
- + nla_total_size(1); /* IFLA_PROTO_DOWN */
-
+ + nla_total_size(4) /* IFLA_NEW_NETNSID */
+ + nla_total_size(1) /* IFLA_PROTO_DOWN */
+ + nla_total_size(4) /* IFLA_IF_NETNSID */
+ + 0;
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1211,6 +1209,36 @@ nla_put_vfinfo_failure:
return -EMSGSIZE;
}
+static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
+ struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ struct nlattr *vfinfo;
+ int i, num_vfs;
+
+ if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0))
+ return 0;
+
+ num_vfs = dev_num_vf(dev->dev.parent);
+ if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs))
+ return -EMSGSIZE;
+
+ if (!dev->netdev_ops->ndo_get_vf_config)
+ return 0;
+
+ vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+ if (!vfinfo)
+ return -EMSGSIZE;
+
+ for (i = 0; i < num_vfs; i++) {
+ if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(skb, vfinfo);
+ return 0;
+}
+
static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
{
struct rtnl_link_ifmap map;
@@ -1242,10 +1270,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
*prog_id = generic_xdp_prog->aux->id;
return XDP_ATTACHED_SKB;
}
- if (!ops->ndo_xdp)
+ if (!ops->ndo_bpf)
return XDP_ATTACHED_NONE;
- return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id);
+ return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id);
}
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1307,16 +1335,108 @@ static u32 rtnl_get_event(unsigned long event)
return rtnl_event_type;
}
-static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
+{
+ const struct net_device *upper_dev;
+ int ret = 0;
+
+ rcu_read_lock();
+
+ upper_dev = netdev_master_upper_dev_get_rcu(dev);
+ if (upper_dev)
+ ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);
+
+ rcu_read_unlock();
+ return ret;
+}
+
+static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev)
+{
+ int ifindex = dev_get_iflink(dev);
+
+ if (dev->ifindex == ifindex)
+ return 0;
+
+ return nla_put_u32(skb, IFLA_LINK, ifindex);
+}
+
+static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ char buf[IFALIASZ];
+ int ret;
+
+ ret = dev_get_alias(dev, buf, sizeof(buf));
+ return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0;
+}
+
+static int rtnl_fill_link_netnsid(struct sk_buff *skb,
+ const struct net_device *dev,
+ struct net *src_net)
+{
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) {
+ struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+
+ if (!net_eq(dev_net(dev), link_net)) {
+ int id = peernet2id_alloc(src_net, link_net);
+
+ if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+ return -EMSGSIZE;
+ }
+ }
+
+ return 0;
+}
+
+static int rtnl_fill_link_af(struct sk_buff *skb,
+ const struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ const struct rtnl_af_ops *af_ops;
+ struct nlattr *af_spec;
+
+ af_spec = nla_nest_start(skb, IFLA_AF_SPEC);
+ if (!af_spec)
+ return -EMSGSIZE;
+
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
+ struct nlattr *af;
+ int err;
+
+ if (!af_ops->fill_link_af)
+ continue;
+
+ af = nla_nest_start(skb, af_ops->family);
+ if (!af)
+ return -EMSGSIZE;
+
+ err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
+ /*
+ * Caller may return ENODATA to indicate that there
+ * was no data to be dumped. This is not an error, it
+ * means we should trim the attribute header and
+ * continue.
+ */
+ if (err == -ENODATA)
+ nla_nest_cancel(skb, af);
+ else if (err < 0)
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, af);
+ }
+
+ nla_nest_end(skb, af_spec);
+ return 0;
+}
+
+static int rtnl_fill_ifinfo(struct sk_buff *skb,
+ struct net_device *dev, struct net *src_net,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask,
- u32 event)
+ u32 event, int *new_nsid, int tgt_netnsid)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
- struct nlattr *af_spec;
- struct rtnl_af_ops *af_ops;
- struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1331,6 +1451,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = change;
+ if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid))
+ goto nla_put_failure;
+
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
nla_put_u8(skb, IFLA_OPERSTATE,
@@ -1345,15 +1468,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
- (dev->ifindex != dev_get_iflink(dev) &&
- nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
- (upper_dev &&
- nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
+ nla_put_iflink(skb, dev) ||
+ put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
(dev->qdisc &&
nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
- (dev->ifalias &&
- nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
+ nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_changes)) ||
nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
@@ -1385,27 +1505,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_fill_stats(skb, dev))
goto nla_put_failure;
- if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
- nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
+ if (rtnl_fill_vf(skb, dev, ext_filter_mask))
goto nla_put_failure;
- if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
- ext_filter_mask & RTEXT_FILTER_VF) {
- int i;
- struct nlattr *vfinfo;
- int num_vfs = dev_num_vf(dev->dev.parent);
-
- vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
- if (!vfinfo)
- goto nla_put_failure;
- for (i = 0; i < num_vfs; i++) {
- if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
- goto nla_put_failure;
- }
-
- nla_nest_end(skb, vfinfo);
- }
-
if (rtnl_port_fill(skb, dev, ext_filter_mask))
goto nla_put_failure;
@@ -1417,51 +1519,23 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
- if (dev->rtnl_link_ops &&
- dev->rtnl_link_ops->get_link_net) {
- struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
-
- if (!net_eq(dev_net(dev), link_net)) {
- int id = peernet2id_alloc(dev_net(dev), link_net);
-
- if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
- goto nla_put_failure;
- }
- }
-
- if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+ if (rtnl_fill_link_netnsid(skb, dev, src_net))
goto nla_put_failure;
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
- if (af_ops->fill_link_af) {
- struct nlattr *af;
- int err;
-
- if (!(af = nla_nest_start(skb, af_ops->family)))
- goto nla_put_failure;
-
- err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
-
- /*
- * Caller may return ENODATA to indicate that there
- * was no data to be dumped. This is not an error, it
- * means we should trim the attribute header and
- * continue.
- */
- if (err == -ENODATA)
- nla_nest_cancel(skb, af);
- else if (err < 0)
- goto nla_put_failure;
-
- nla_nest_end(skb, af);
- }
- }
+ if (new_nsid &&
+ nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
+ goto nla_put_failure;
- nla_nest_end(skb, af_spec);
+ rcu_read_lock();
+ if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
+ goto nla_put_failure_rcu;
+ rcu_read_unlock();
nlmsg_end(skb, nlh);
return 0;
+nla_put_failure_rcu:
+ rcu_read_unlock();
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -1483,7 +1557,10 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_LINKINFO] = { .type = NLA_NESTED },
[IFLA_NET_NS_PID] = { .type = NLA_U32 },
[IFLA_NET_NS_FD] = { .type = NLA_U32 },
- [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
+ /* IFLA_IFALIAS is a string, but policy is set to NLA_BINARY to
+ * allow 0-length string (needed to remove an alias).
+ */
+ [IFLA_IFALIAS] = { .type = NLA_BINARY, .len = IFALIASZ - 1 },
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
[IFLA_PORT_SELF] = { .type = NLA_NESTED },
@@ -1500,6 +1577,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_XDP] = { .type = NLA_NESTED },
[IFLA_EVENT] = { .type = NLA_U32 },
[IFLA_GROUP] = { .type = NLA_U32 },
+ [IFLA_IF_NETNSID] = { .type = NLA_S32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1603,9 +1681,28 @@ static bool link_dump_filtered(struct net_device *dev,
return false;
}
+static struct net *get_target_net(struct sk_buff *skb, int netnsid)
+{
+ struct net *net;
+
+ net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
+ if (!net)
+ return ERR_PTR(-EINVAL);
+
+ /* For now, the caller is required to have CAP_NET_ADMIN in
+ * the user namespace owning the target net ns.
+ */
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
+ return ERR_PTR(-EACCES);
+ }
+ return net;
+}
+
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ struct net *tgt_net = net;
int h, s_h;
int idx = 0, s_idx;
struct net_device *dev;
@@ -1615,6 +1712,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
const struct rtnl_link_ops *kind_ops = NULL;
unsigned int flags = NLM_F_MULTI;
int master_idx = 0;
+ int netnsid = -1;
int err;
int hdrlen;
@@ -1633,6 +1731,15 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
ifla_policy, NULL) >= 0) {
+ if (tb[IFLA_IF_NETNSID]) {
+ netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+ tgt_net = get_target_net(skb, netnsid);
+ if (IS_ERR(tgt_net)) {
+ tgt_net = net;
+ netnsid = -1;
+ }
+ }
+
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1648,17 +1755,19 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
- head = &net->dev_index_head[h];
+ head = &tgt_net->dev_index_head[h];
hlist_for_each_entry(dev, head, index_hlist) {
if (link_dump_filtered(dev, master_idx, kind_ops))
goto cont;
if (idx < s_idx)
goto cont;
- err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+ err = rtnl_fill_ifinfo(skb, dev, net,
+ RTM_NEWLINK,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
flags,
- ext_filter_mask, 0);
+ ext_filter_mask, 0, NULL,
+ netnsid);
if (err < 0) {
if (likely(skb->len))
@@ -1677,6 +1786,8 @@ out_err:
cb->args[0] = h;
cb->seq = net->dev_base_seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ if (netnsid >= 0)
+ put_net(tgt_net);
return err;
}
@@ -1723,17 +1834,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
- if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+ rcu_read_lock();
+ af_ops = rtnl_af_lookup(nla_type(af));
+ if (!af_ops) {
+ rcu_read_unlock();
return -EAFNOSUPPORT;
+ }
- if (!af_ops->set_link_af)
+ if (!af_ops->set_link_af) {
+ rcu_read_unlock();
return -EOPNOTSUPP;
+ }
if (af_ops->validate_link_af) {
err = af_ops->validate_link_af(dev, af);
- if (err < 0)
+ if (err < 0) {
+ rcu_read_unlock();
return err;
+ }
}
+
+ rcu_read_unlock();
}
}
@@ -1909,7 +2030,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
return err;
}
-static int do_set_master(struct net_device *dev, int ifindex)
+static int do_set_master(struct net_device *dev, int ifindex,
+ struct netlink_ext_ack *extack)
{
struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
const struct net_device_ops *ops;
@@ -1934,7 +2056,7 @@ static int do_set_master(struct net_device *dev, int ifindex)
return -EINVAL;
ops = upper_dev->netdev_ops;
if (ops->ndo_add_slave) {
- err = ops->ndo_add_slave(upper_dev, dev);
+ err = ops->ndo_add_slave(upper_dev, dev, extack);
if (err)
return err;
} else {
@@ -2067,7 +2189,7 @@ static int do_setlink(const struct sk_buff *skb,
}
if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
if (err)
goto errout;
status |= DO_SETLINK_MODIFIED;
@@ -2093,7 +2215,7 @@ static int do_setlink(const struct sk_buff *skb,
dev->tx_queue_len = orig_len;
goto errout;
}
- status |= DO_SETLINK_NOTIFY;
+ status |= DO_SETLINK_MODIFIED;
}
}
@@ -2190,13 +2312,17 @@ static int do_setlink(const struct sk_buff *skb,
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
- if (!(af_ops = rtnl_af_lookup(nla_type(af))))
- BUG();
+ rcu_read_lock();
+
+ BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
err = af_ops->set_link_af(dev, af);
- if (err < 0)
+ if (err < 0) {
+ rcu_read_unlock();
goto errout;
+ }
+ rcu_read_unlock();
status |= DO_SETLINK_NOTIFY;
}
}
@@ -2248,7 +2374,7 @@ static int do_setlink(const struct sk_buff *skb,
errout:
if (status & DO_SETLINK_MODIFIED) {
- if (status & DO_SETLINK_NOTIFY)
+ if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY)
netdev_state_change(dev);
if (err < 0)
@@ -2274,6 +2400,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
+ if (tb[IFLA_IF_NETNSID])
+ return -EOPNOTSUPP;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
@@ -2368,6 +2497,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (tb[IFLA_IF_NETNSID])
+ return -EOPNOTSUPP;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
@@ -2499,6 +2631,9 @@ replay:
if (err < 0)
return err;
+ if (tb[IFLA_IF_NETNSID])
+ return -EOPNOTSUPP;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
@@ -2576,12 +2711,6 @@ replay:
return err;
slave_data = slave_attr;
}
- if (m_ops->slave_validate) {
- err = m_ops->slave_validate(tb, slave_data,
- extack);
- if (err < 0)
- return err;
- }
}
if (dev) {
@@ -2711,7 +2840,8 @@ replay:
goto out_unregister;
}
if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]),
+ extack);
if (err)
goto out_unregister;
}
@@ -2737,11 +2867,13 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
+ struct net *tgt_net = net;
struct ifinfomsg *ifm;
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
struct net_device *dev = NULL;
struct sk_buff *nskb;
+ int netnsid = -1;
int err;
u32 ext_filter_mask = 0;
@@ -2749,35 +2881,50 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (tb[IFLA_IF_NETNSID]) {
+ netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+ tgt_net = get_target_net(skb, netnsid);
+ if (IS_ERR(tgt_net))
+ return PTR_ERR(tgt_net);
+ }
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+ err = -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
- dev = __dev_get_by_index(net, ifm->ifi_index);
+ dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(net, ifname);
+ dev = __dev_get_by_name(tgt_net, ifname);
else
- return -EINVAL;
+ goto out;
+ err = -ENODEV;
if (dev == NULL)
- return -ENODEV;
+ goto out;
+ err = -ENOBUFS;
nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
if (nskb == NULL)
- return -ENOBUFS;
+ goto out;
- err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0);
+ err = rtnl_fill_ifinfo(nskb, dev, net,
+ RTM_NEWLINK, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, 0, 0, ext_filter_mask,
+ 0, NULL, netnsid);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
kfree_skb(nskb);
} else
err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
+out:
+ if (netnsid >= 0)
+ put_net(tgt_net);
return err;
}
@@ -2856,7 +3003,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned int change,
- u32 event, gfp_t flags)
+ u32 event, gfp_t flags, int *new_nsid)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -2867,7 +3014,9 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
if (skb == NULL)
goto errout;
- err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event);
+ err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
+ type, 0, 0, change, 0, 0, event,
+ new_nsid, -1);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -2890,14 +3039,14 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
static void rtmsg_ifinfo_event(int type, struct net_device *dev,
unsigned int change, u32 event,
- gfp_t flags)
+ gfp_t flags, int *new_nsid)
{
struct sk_buff *skb;
if (dev->reg_state != NETREG_REGISTERED)
return;
- skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags);
+ skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid);
if (skb)
rtmsg_ifinfo_send(skb, dev, flags);
}
@@ -2905,9 +3054,15 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
gfp_t flags)
{
- rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags);
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL);
+}
+
+void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
+ gfp_t flags, int *new_nsid)
+{
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
+ new_nsid);
}
-EXPORT_SYMBOL(rtmsg_ifinfo);
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
@@ -3014,21 +3169,21 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_add);
-static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid)
+static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid,
+ struct netlink_ext_ack *extack)
{
u16 vid = 0;
if (vlan_attr) {
if (nla_len(vlan_attr) != sizeof(u16)) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n");
+ NL_SET_ERR_MSG(extack, "invalid vlan attribute size");
return -EINVAL;
}
vid = nla_get_u16(vlan_attr);
if (!vid || vid >= VLAN_VID_MASK) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n",
- vid);
+ NL_SET_ERR_MSG(extack, "invalid vlan id");
return -EINVAL;
}
}
@@ -3053,24 +3208,24 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex == 0) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n");
+ NL_SET_ERR_MSG(extack, "invalid ifindex");
return -EINVAL;
}
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n");
+ NL_SET_ERR_MSG(extack, "invalid address");
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
- err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -3157,24 +3312,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex == 0) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n");
+ NL_SET_ERR_MSG(extack, "invalid ifindex");
return -EINVAL;
}
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
+ NL_SET_ERR_MSG(extack, "invalid address");
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
- err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -3614,7 +3769,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
dev = __dev_get_by_index(net, ifm->ifi_index);
if (!dev) {
- pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
@@ -3689,7 +3844,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
dev = __dev_get_by_index(net, ifm->ifi_index);
if (!dev) {
- pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
@@ -3940,25 +4095,30 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
if (!attr)
goto nla_put_failure;
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->fill_stats_af) {
struct nlattr *af;
int err;
af = nla_nest_start(skb, af_ops->family);
- if (!af)
+ if (!af) {
+ rcu_read_unlock();
goto nla_put_failure;
-
+ }
err = af_ops->fill_stats_af(skb, dev);
- if (err == -ENODATA)
+ if (err == -ENODATA) {
nla_nest_cancel(skb, af);
- else if (err < 0)
+ } else if (err < 0) {
+ rcu_read_unlock();
goto nla_put_failure;
+ }
nla_nest_end(skb, af);
}
}
+ rcu_read_unlock();
nla_nest_end(skb, attr);
@@ -4027,7 +4187,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
/* for IFLA_STATS_AF_SPEC */
size += nla_total_size(0);
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->get_stats_af_size) {
size += nla_total_size(
af_ops->get_stats_af_size(dev));
@@ -4036,6 +4197,7 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
size += nla_total_size(0);
}
}
+ rcu_read_unlock();
}
return size;
@@ -4279,15 +4441,20 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
switch (event) {
case NETDEV_REBOOT:
+ case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
case NETDEV_CHANGENAME:
case NETDEV_FEAT_CHANGE:
case NETDEV_BONDING_FAILOVER:
+ case NETDEV_POST_TYPE_CHANGE:
case NETDEV_NOTIFY_PEERS:
+ case NETDEV_CHANGEUPPER:
case NETDEV_RESEND_IGMP:
case NETDEV_CHANGEINFODATA:
+ case NETDEV_CHANGELOWERSTATE:
+ case NETDEV_CHANGE_TX_QUEUE_LEN:
rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
- GFP_KERNEL);
+ GFP_KERNEL, NULL);
break;
default:
break;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 16982de649b9..6b0ff396fa9d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -41,7 +41,6 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/kmemcheck.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/in.h>
@@ -234,14 +233,12 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
- kmemcheck_annotate_variable(shinfo->destructor_arg);
if (flags & SKB_ALLOC_FCLONE) {
struct sk_buff_fclones *fclones;
fclones = container_of(skb, struct sk_buff_fclones, skb1);
- kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
skb->fclone = SKB_FCLONE_ORIG;
refcount_set(&fclones->fclone_ref, 1);
@@ -301,7 +298,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
- kmemcheck_annotate_variable(shinfo->destructor_arg);
return skb;
}
@@ -357,7 +353,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
*/
void *netdev_alloc_frag(unsigned int fragsz)
{
- return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+ return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(netdev_alloc_frag);
@@ -370,7 +366,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
void *napi_alloc_frag(unsigned int fragsz)
{
- return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+ return __napi_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(napi_alloc_frag);
@@ -1124,9 +1120,13 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
+ struct sock *save_sk = skb->sk;
+
/* Streams do not free skb on error. Reset to prev state. */
msg->msg_iter = orig_iter;
+ skb->sk = sk;
___pskb_trim(skb, orig_len);
+ skb->sk = save_sk;
return err;
}
@@ -1279,7 +1279,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
if (!n)
return NULL;
- kmemcheck_annotate_bitfield(n, flags1);
n->fclone = SKB_FCLONE_UNAVAILABLE;
}
@@ -1350,8 +1349,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
/* Set the tail pointer and length */
skb_put(n, skb->len);
- if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
- BUG();
+ BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
copy_skb_header(n, skb);
return n;
@@ -1449,8 +1447,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
BUG_ON(nhead < 0);
- if (skb_shared(skb))
- BUG();
+ BUG_ON(skb_shared(skb));
size = SKB_DATA_ALIGN(size);
@@ -1509,6 +1506,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
+ skb_metadata_clear(skb);
+
/* It is not generally safe to change skb->truesize.
* For the moment, we really care of rx path, or
* when skb is orphaned (not attached to a socket).
@@ -1593,9 +1592,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
head_copy_off = newheadroom - head_copy_len;
/* Copy the linear header and data. */
- if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
- skb->len + head_copy_len))
- BUG();
+ BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
+ skb->len + head_copy_len));
copy_skb_header(n, skb);
@@ -1876,8 +1874,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
return NULL;
}
- if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
- BUG();
+ BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
+ skb_tail_pointer(skb), delta));
/* Optimization: no fragments, no reasons to preestimate
* size of pulled pages. Superb.
@@ -1896,7 +1894,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
}
/* If we need update frag list, we are in troubles.
- * Certainly, it possible to add an offset to skb data,
+ * Certainly, it is possible to add an offset to skb data,
* but taking into account that pulling is expected to
* be very rare operation, it is worth to fight against
* further bloating skb head and crucify ourselves here instead.
@@ -2848,12 +2846,15 @@ EXPORT_SYMBOL(skb_queue_purge);
*/
void skb_rbtree_purge(struct rb_root *root)
{
- struct sk_buff *skb, *next;
+ struct rb_node *p = rb_first(root);
- rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode)
- kfree_skb(skb);
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
- *root = RB_ROOT;
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ kfree_skb(skb);
+ }
}
/**
@@ -4762,6 +4763,7 @@ EXPORT_SYMBOL(kfree_skb_partial);
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
bool *fragstolen, int *delta_truesize)
{
+ struct skb_shared_info *to_shinfo, *from_shinfo;
int i, delta, len = from->len;
*fragstolen = false;
@@ -4776,7 +4778,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return true;
}
- if (skb_has_frag_list(to) || skb_has_frag_list(from))
+ to_shinfo = skb_shinfo(to);
+ from_shinfo = skb_shinfo(from);
+ if (to_shinfo->frag_list || from_shinfo->frag_list)
return false;
if (skb_zcopy(to) || skb_zcopy(from))
return false;
@@ -4785,8 +4789,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
struct page *page;
unsigned int offset;
- if (skb_shinfo(to)->nr_frags +
- skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ if (to_shinfo->nr_frags +
+ from_shinfo->nr_frags >= MAX_SKB_FRAGS)
return false;
if (skb_head_is_locked(from))
@@ -4797,12 +4801,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
- skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
+ skb_fill_page_desc(to, to_shinfo->nr_frags,
page, offset, skb_headlen(from));
*fragstolen = true;
} else {
- if (skb_shinfo(to)->nr_frags +
- skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
+ if (to_shinfo->nr_frags +
+ from_shinfo->nr_frags > MAX_SKB_FRAGS)
return false;
delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
@@ -4810,19 +4814,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
WARN_ON_ONCE(delta < len);
- memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
- skb_shinfo(from)->frags,
- skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
- skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
+ memcpy(to_shinfo->frags + to_shinfo->nr_frags,
+ from_shinfo->frags,
+ from_shinfo->nr_frags * sizeof(skb_frag_t));
+ to_shinfo->nr_frags += from_shinfo->nr_frags;
if (!skb_cloned(from))
- skb_shinfo(from)->nr_frags = 0;
+ from_shinfo->nr_frags = 0;
/* if the skb is not cloned this does nothing
* since we set nr_frags to 0.
*/
- for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
- skb_frag_ref(from, i);
+ for (i = 0; i < from_shinfo->nr_frags; i++)
+ __skb_frag_ref(&from_shinfo->frags[i]);
to->truesize += delta;
to->len += len;
@@ -4860,6 +4864,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
if (!xnet)
return;
+ ipvs_reset(skb);
skb_orphan(skb);
skb->mark = 0;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 23953b741a41..c0b5b2f17412 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1469,8 +1469,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
sk = kmalloc(prot->obj_size, priority);
if (sk != NULL) {
- kmemcheck_annotate_bitfield(sk, flags);
-
if (security_sk_alloc(sk, family, priority))
goto out_free;
@@ -1677,12 +1675,17 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
+
+ /* sk->sk_memcg will be populated at accept() time */
+ newsk->sk_memcg = NULL;
+
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
+ cgroup_sk_alloc(&newsk->sk_cgrp_data);
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
@@ -1714,9 +1717,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
- mem_cgroup_sk_alloc(newsk);
- cgroup_sk_alloc(&newsk->sk_cgrp_data);
-
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -2344,16 +2344,18 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
/* guarantee minimum buffer size under pressure */
if (kind == SK_MEM_RECV) {
- if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
+ if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
return 1;
} else { /* SK_MEM_SEND */
+ int wmem0 = sk_get_wmem0(sk, prot);
+
if (sk->sk_type == SOCK_STREAM) {
- if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
+ if (sk->sk_wmem_queued < wmem0)
return 1;
- } else if (refcount_read(&sk->sk_wmem_alloc) <
- prot->sysctl_wmem[0])
+ } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
return 1;
+ }
}
if (sk_has_memory_pressure(sk)) {
@@ -2683,7 +2685,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk_init_common(sk);
sk->sk_send_head = NULL;
- init_timer(&sk->sk_timer);
+ timer_setup(&sk->sk_timer, NULL, 0);
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default;
@@ -2742,6 +2744,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_max_pacing_rate = ~0U;
sk->sk_pacing_rate = ~0U;
+ sk->sk_pacing_shift = 10;
sk->sk_incoming_cpu = -1;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
@@ -3040,7 +3043,6 @@ struct prot_inuse {
static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
-#ifdef CONFIG_NET_NS
void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
{
__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
@@ -3084,27 +3086,6 @@ static __init int net_inuse_init(void)
}
core_initcall(net_inuse_init);
-#else
-static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
-
-void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
-{
- __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
-}
-EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
-
-int sock_prot_inuse_get(struct net *net, struct proto *prot)
-{
- int cpu, idx = prot->inuse_idx;
- int res = 0;
-
- for_each_possible_cpu(cpu)
- res += per_cpu(prot_inuse, cpu).val[idx];
-
- return res >= 0 ? res : 0;
-}
-EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
-#endif
static void assign_proto_idx(struct proto *prot)
{
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index eed1ebf7f29d..5eeb1d20cc38 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* To speed up listener socket lookup, create an array to store all sockets
* listening on the same port. This allows a decision to be made after finding
@@ -36,9 +37,14 @@ int reuseport_alloc(struct sock *sk)
* soft irq of receive path or setsockopt from process context
*/
spin_lock_bh(&reuseport_lock);
- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- "multiple allocations for the same socket");
+
+ /* Allocation attempts can occur concurrently via the setsockopt path
+ * and the bind/hash path. Nothing to do when we lose the race.
+ */
+ if (rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock)))
+ goto out;
+
reuse = __reuseport_alloc(INIT_SOCKS);
if (!reuse) {
spin_unlock_bh(&reuseport_lock);
@@ -49,6 +55,7 @@ int reuseport_alloc(struct sock *sk)
reuse->num_socks = 1;
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+out:
spin_unlock_bh(&reuseport_lock);
return 0;
diff --git a/net/core/stream.c b/net/core/stream.c
index 20231dbb1da0..1cff9c6270c6 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* SUCS NET3:
*
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b7cd9aafe99e..cbc3dde4cfcc 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* -*- linux-c -*-
* sysctl_net_core.c: sysctl interface to net core subsystem.
*
diff --git a/net/core/tso.c b/net/core/tso.c
index 5dca7ce8ee9f..43f4eba61933 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/if_vlan.h>
#include <net/ip.h>