summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c131
-rw-r--r--net/core/dev_ioctl.c19
-rw-r--r--net/core/devlink.c8
-rw-r--r--net/core/dst.c37
-rw-r--r--net/core/fib_rules.c21
-rw-r--r--net/core/filter.c1
-rw-r--r--net/core/neighbour.c14
-rw-r--r--net/core/net_namespace.c19
-rw-r--r--net/core/rtnetlink.c93
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/core/sock.c23
-rw-r--r--net/core/sysctl_net_core.c2
13 files changed, 251 insertions, 124 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 4dd594741b6d..f9653987c0f9 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -68,7 +68,7 @@ static inline int connection_based(struct sock *sk)
return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
}
-static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
+static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
void *key)
{
unsigned long bits = (unsigned long)key;
diff --git a/net/core/dev.c b/net/core/dev.c
index 96cf83da0d66..416137c64bf8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1253,8 +1253,9 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
if (!new_ifalias)
return -ENOMEM;
dev->ifalias = new_ifalias;
+ memcpy(dev->ifalias, alias, len);
+ dev->ifalias[len] = 0;
- strlcpy(dev->ifalias, alias, len+1);
return len;
}
@@ -4766,6 +4767,13 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_complete_by_type);
+static void napi_skb_free_stolen_head(struct sk_buff *skb)
+{
+ skb_dst_drop(skb);
+ secpath_reset(skb);
+ kmem_cache_free(skbuff_head_cache, skb);
+}
+
static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
{
switch (ret) {
@@ -4779,13 +4787,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
break;
case GRO_MERGED_FREE:
- if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
- skb_dst_drop(skb);
- secpath_reset(skb);
- kmem_cache_free(skbuff_head_cache, skb);
- } else {
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ napi_skb_free_stolen_head(skb);
+ else
__kfree_skb(skb);
- }
break;
case GRO_HELD:
@@ -4857,10 +4862,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
break;
case GRO_DROP:
- case GRO_MERGED_FREE:
napi_reuse_skb(napi, skb);
break;
+ case GRO_MERGED_FREE:
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ napi_skb_free_stolen_head(skb);
+ else
+ napi_reuse_skb(napi, skb);
+ break;
+
case GRO_MERGED:
case GRO_CONSUMED:
break;
@@ -4948,6 +4959,19 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
}
EXPORT_SYMBOL(__skb_gro_checksum_complete);
+static void net_rps_send_ipi(struct softnet_data *remsd)
+{
+#ifdef CONFIG_RPS
+ while (remsd) {
+ struct softnet_data *next = remsd->rps_ipi_next;
+
+ if (cpu_online(remsd->cpu))
+ smp_call_function_single_async(remsd->cpu, &remsd->csd);
+ remsd = next;
+ }
+#endif
+}
+
/*
* net_rps_action_and_irq_enable sends any pending IPI's for rps.
* Note: called with local irq disabled, but exits with local irq enabled.
@@ -4963,14 +4987,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
local_irq_enable();
/* Send pending IPI's to kick RPS processing on remote cpus. */
- while (remsd) {
- struct softnet_data *next = remsd->rps_ipi_next;
-
- if (cpu_online(remsd->cpu))
- smp_call_function_single_async(remsd->cpu,
- &remsd->csd);
- remsd = next;
- }
+ net_rps_send_ipi(remsd);
} else
#endif
local_irq_enable();
@@ -5199,8 +5216,6 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
if (rc == BUSY_POLL_BUDGET)
__napi_schedule(napi);
local_bh_enable();
- if (local_softirq_pending())
- do_softirq();
}
void napi_busy_loop(unsigned int napi_id,
@@ -6852,6 +6867,32 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down);
+bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op)
+{
+ struct netdev_xdp xdp;
+
+ memset(&xdp, 0, sizeof(xdp));
+ xdp.command = XDP_QUERY_PROG;
+
+ /* Query must always succeed. */
+ WARN_ON(xdp_op(dev, &xdp) < 0);
+ return xdp.prog_attached;
+}
+
+static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+ struct netlink_ext_ack *extack,
+ struct bpf_prog *prog)
+{
+ struct netdev_xdp xdp;
+
+ memset(&xdp, 0, sizeof(xdp));
+ xdp.command = XDP_SETUP_PROG;
+ xdp.extack = extack;
+ xdp.prog = prog;
+
+ return xdp_op(dev, &xdp);
+}
+
/**
* dev_change_xdp_fd - set or clear a bpf program for a device rx path
* @dev: device
@@ -6864,41 +6905,34 @@ EXPORT_SYMBOL(dev_change_proto_down);
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
int fd, u32 flags)
{
- int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp);
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
- struct netdev_xdp xdp;
+ xdp_op_t xdp_op, xdp_chk;
int err;
ASSERT_RTNL();
- xdp_op = ops->ndo_xdp;
+ xdp_op = xdp_chk = ops->ndo_xdp;
+ if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE))
+ return -EOPNOTSUPP;
if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
xdp_op = generic_xdp_install;
+ if (xdp_op == xdp_chk)
+ xdp_chk = generic_xdp_install;
if (fd >= 0) {
- if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
-
- err = xdp_op(dev, &xdp);
- if (err < 0)
- return err;
- if (xdp.prog_attached)
- return -EBUSY;
- }
+ if (xdp_chk && __dev_xdp_attached(dev, xdp_chk))
+ return -EEXIST;
+ if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
+ __dev_xdp_attached(dev, xdp_op))
+ return -EBUSY;
prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_SETUP_PROG;
- xdp.extack = extack;
- xdp.prog = prog;
-
- err = xdp_op(dev, &xdp);
+ err = dev_xdp_install(dev, xdp_op, extack, prog);
if (err < 0 && prog)
bpf_prog_put(prog);
@@ -7482,6 +7516,8 @@ out:
err_uninit:
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
+ if (dev->priv_destructor)
+ dev->priv_destructor(dev);
goto out;
}
EXPORT_SYMBOL(register_netdevice);
@@ -7689,8 +7725,10 @@ void netdev_run_todo(void)
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
WARN_ON(dev->dn_ptr);
- if (dev->destructor)
- dev->destructor(dev);
+ if (dev->priv_destructor)
+ dev->priv_destructor(dev);
+ if (dev->needs_free_netdev)
+ free_netdev(dev);
/* Report a network device has been unregistered */
rtnl_lock();
@@ -7755,9 +7793,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
} else {
netdev_stats_to_stats64(storage, &dev->stats);
}
- storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
- storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
- storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
+ storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
+ storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
+ storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
@@ -8173,7 +8211,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
struct sk_buff **list_skb;
struct sk_buff *skb;
unsigned int cpu;
- struct softnet_data *sd, *oldsd;
+ struct softnet_data *sd, *oldsd, *remsd = NULL;
local_irq_disable();
cpu = smp_processor_id();
@@ -8214,6 +8252,13 @@ static int dev_cpu_dead(unsigned int oldcpu)
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
+#ifdef CONFIG_RPS
+ remsd = oldsd->rps_ipi_list;
+ oldsd->rps_ipi_list = NULL;
+#endif
+ /* send out pending IPI's on offline CPU */
+ net_rps_send_ipi(remsd);
+
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
netif_rx_ni(skb);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d293506..27fad31784a8 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -410,6 +410,22 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (cmd == SIOCGIFNAME)
return dev_ifname(net, (struct ifreq __user *)arg);
+ /*
+ * Take care of Wireless Extensions. Unfortunately struct iwreq
+ * isn't a proper subset of struct ifreq (it's 8 byte shorter)
+ * so we need to treat it specially, otherwise applications may
+ * fault if the struct they're passing happens to land at the
+ * end of a mapped page.
+ */
+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+ struct iwreq iwr;
+
+ if (copy_from_user(&iwr, arg, sizeof(iwr)))
+ return -EFAULT;
+
+ return wext_handle_ioctl(net, &iwr, cmd, arg);
+ }
+
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
@@ -559,9 +575,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
ret = -EFAULT;
return ret;
}
- /* Take care of Wireless Extensions */
- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
- return wext_handle_ioctl(net, &ifr, cmd, arg);
return -ENOTTY;
}
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b0b87a292e7c..a0adfc31a3fe 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1680,8 +1680,10 @@ start_again:
hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
&devlink_nl_family, NLM_F_MULTI, cmd);
- if (!hdr)
+ if (!hdr) {
+ nlmsg_free(skb);
return -EMSGSIZE;
+ }
if (devlink_nl_put_handle(skb, devlink))
goto nla_put_failure;
@@ -2098,8 +2100,10 @@ start_again:
hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
&devlink_nl_family, NLM_F_MULTI, cmd);
- if (!hdr)
+ if (!hdr) {
+ nlmsg_free(skb);
return -EMSGSIZE;
+ }
if (devlink_nl_put_handle(skb, devlink))
goto nla_put_failure;
diff --git a/net/core/dst.c b/net/core/dst.c
index 960e503b5a52..13ba4a090c41 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(dst_discard_out);
-const u32 dst_default_metrics[RTAX_MAX + 1] = {
+const struct dst_metrics dst_default_metrics = {
/* This initializer is needed to force linker to place this variable
* into const section. Otherwise it might end into bss section.
* We really want to avoid false sharing on this variable, and catch
* any writes on it.
*/
- [RTAX_MAX] = 0xdeadbeef,
+ .refcnt = ATOMIC_INIT(1),
};
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
if (dev)
dev_hold(dev);
dst->ops = ops;
- dst_init_metrics(dst, dst_default_metrics, true);
+ dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
dst->path = dst;
dst->from = NULL;
@@ -314,25 +314,30 @@ EXPORT_SYMBOL(dst_release);
u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
{
- u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+ struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
if (p) {
- u32 *old_p = __DST_METRICS_PTR(old);
+ struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
unsigned long prev, new;
- memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+ atomic_set(&p->refcnt, 1);
+ memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
new = (unsigned long) p;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev != old) {
kfree(p);
- p = __DST_METRICS_PTR(prev);
+ p = (struct dst_metrics *)__DST_METRICS_PTR(prev);
if (prev & DST_METRICS_READ_ONLY)
p = NULL;
+ } else if (prev & DST_METRICS_REFCOUNTED) {
+ if (atomic_dec_and_test(&old_p->refcnt))
+ kfree(old_p);
}
}
- return p;
+ BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0);
+ return (u32 *)p;
}
EXPORT_SYMBOL(dst_cow_metrics_generic);
@@ -341,7 +346,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
{
unsigned long prev, new;
- new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY;
+ new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev == old)
kfree(__DST_METRICS_PTR(old));
@@ -464,6 +469,20 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
spin_lock_bh(&dst_garbage.lock);
dst = dst_garbage.list;
dst_garbage.list = NULL;
+ /* The code in dst_ifdown places a hold on the loopback device.
+ * If the gc entry processing is set to expire after a lengthy
+ * interval, this hold can cause netdev_wait_allrefs() to hang
+ * out and wait for a long time -- until the the loopback
+ * interface is released. If we're really unlucky, it'll emit
+ * pr_emerg messages to console too. Reset the interval here,
+ * so dst cleanups occur in a more timely fashion.
+ */
+ if (dst_garbage.timer_inc > DST_GC_INC) {
+ dst_garbage.timer_inc = DST_GC_INC;
+ dst_garbage.timer_expires = DST_GC_MIN;
+ mod_delayed_work(system_wq, &dst_gc_work,
+ dst_garbage.timer_expires);
+ }
spin_unlock_bh(&dst_garbage.lock);
if (last)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index f21c4d3aeae0..3bba291c6c32 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -568,7 +568,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
- struct fib_rule *rule, *tmp;
+ struct fib_rule *rule, *r;
struct nlattr *tb[FRA_MAX+1];
struct fib_kuid_range range;
int err = -EINVAL;
@@ -668,16 +668,23 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
/*
* Check if this rule is a target to any of them. If so,
+ * adjust to the next one with the same preference or
* disable them. As this operation is eventually very
- * expensive, it is only performed if goto rules have
- * actually been added.
+ * expensive, it is only performed if goto rules, except
+ * current if it is goto rule, have actually been added.
*/
if (ops->nr_goto_rules > 0) {
- list_for_each_entry(tmp, &ops->rules_list, list) {
- if (rtnl_dereference(tmp->ctarget) == rule) {
- RCU_INIT_POINTER(tmp->ctarget, NULL);
+ struct fib_rule *n;
+
+ n = list_next_entry(rule, list);
+ if (&n->list == &ops->rules_list || n->pref != rule->pref)
+ n = NULL;
+ list_for_each_entry(r, &ops->rules_list, list) {
+ if (rtnl_dereference(r->ctarget) != rule)
+ continue;
+ rcu_assign_pointer(r->ctarget, n);
+ if (!n)
ops->unresolved_rules++;
- }
}
}
diff --git a/net/core/filter.c b/net/core/filter.c
index a253a6197e6b..a6bb95fa87b2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2281,6 +2281,7 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_skb_change_head ||
func == bpf_skb_change_tail ||
func == bpf_skb_pull_data ||
+ func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
func == bpf_xdp_adjust_head)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 58b0bcc125b5..d274f81fcc2c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1132,10 +1132,6 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
lladdr = neigh->ha;
}
- if (new & NUD_CONNECTED)
- neigh->confirmed = jiffies;
- neigh->updated = jiffies;
-
/* If entry was valid and address is not changed,
do not change entry state, if new one is STALE.
*/
@@ -1157,6 +1153,16 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
}
}
+ /* Update timestamps only once we know we will make a change to the
+ * neighbour entry. Otherwise we risk to move the locktime window with
+ * noop updates and ignore relevant ARP updates.
+ */
+ if (new != old || lladdr != neigh->ha) {
+ if (new & NUD_CONNECTED)
+ neigh->confirmed = jiffies;
+ neigh->updated = jiffies;
+ }
+
if (new != old) {
neigh_del_timer(neigh);
if (new & NUD_PROBE)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1934efd4a9d4..26bbfababff2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -315,6 +315,25 @@ out_undo:
goto out;
}
+static int __net_init net_defaults_init_net(struct net *net)
+{
+ net->core.sysctl_somaxconn = SOMAXCONN;
+ return 0;
+}
+
+static struct pernet_operations net_defaults_ops = {
+ .init = net_defaults_init_net,
+};
+
+static __init int net_defaults_init(void)
+{
+ if (register_pernet_subsys(&net_defaults_ops))
+ panic("Cannot initialize net default settings");
+
+ return 0;
+}
+
+core_initcall(net_defaults_init);
#ifdef CONFIG_NET_NS
static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bcb0f610ee42..467a2f4510a7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -899,8 +899,7 @@ static size_t rtnl_port_size(const struct net_device *dev,
static size_t rtnl_xdp_size(void)
{
size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */
- nla_total_size(1) + /* XDP_ATTACHED */
- nla_total_size(4); /* XDP_FLAGS */
+ nla_total_size(1); /* XDP_ATTACHED */
return xdp_size;
}
@@ -932,6 +931,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(4) /* IFLA_CARRIER_CHANGES */
+ nla_total_size(4) /* IFLA_LINK_NETNSID */
+ + nla_total_size(4) /* IFLA_GROUP */
+ nla_total_size(ext_filter_mask
& RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -1125,6 +1125,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
struct ifla_vf_mac vf_mac;
struct ifla_vf_info ivi;
+ memset(&ivi, 0, sizeof(ivi));
+
/* Not all SR-IOV capable drivers support the
* spoofcheck and "RSS query enable" query. Preset to
* -1 so the user space tool can detect that the driver
@@ -1133,7 +1135,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
ivi.spoofchk = -1;
ivi.rss_query_en = -1;
ivi.trusted = -1;
- memset(ivi.mac, 0, sizeof(ivi.mac));
/* The default value for VF link state is "auto"
* IFLA_VF_LINK_STATE_AUTO which equals zero
*/
@@ -1247,37 +1248,34 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static u8 rtnl_xdp_attached_mode(struct net_device *dev)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ ASSERT_RTNL();
+
+ if (rcu_access_pointer(dev->xdp_prog))
+ return XDP_ATTACHED_SKB;
+ if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp))
+ return XDP_ATTACHED_DRV;
+
+ return XDP_ATTACHED_NONE;
+}
+
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
{
struct nlattr *xdp;
- u32 xdp_flags = 0;
- u8 val = 0;
int err;
xdp = nla_nest_start(skb, IFLA_XDP);
if (!xdp)
return -EMSGSIZE;
- if (rcu_access_pointer(dev->xdp_prog)) {
- xdp_flags = XDP_FLAGS_SKB_MODE;
- val = 1;
- } else if (dev->netdev_ops->ndo_xdp) {
- struct netdev_xdp xdp_op = {};
-
- xdp_op.command = XDP_QUERY_PROG;
- err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
- if (err)
- goto err_cancel;
- val = xdp_op.prog_attached;
- }
- err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val);
+
+ err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
+ rtnl_xdp_attached_mode(dev));
if (err)
goto err_cancel;
- if (xdp_flags) {
- err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags);
- if (err)
- goto err_cancel;
- }
nla_nest_end(skb, xdp);
return 0;
@@ -1471,6 +1469,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_LINK_NETNSID] = { .type = NLA_S32 },
[IFLA_PROTO_DOWN] = { .type = NLA_U8 },
[IFLA_XDP] = { .type = NLA_NESTED },
+ [IFLA_GROUP] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1631,13 +1630,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
cb->nlh->nlmsg_seq, 0,
flags,
ext_filter_mask);
- /* If we ran out of room on the first message,
- * we're in trouble
- */
- WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
- if (err < 0)
- goto out;
+ if (err < 0) {
+ if (likely(skb->len))
+ goto out;
+
+ goto out_err;
+ }
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
@@ -1645,10 +1644,12 @@ cont:
}
}
out:
+ err = skb->len;
+out_err:
cb->args[1] = idx;
cb->args[0] = h;
- return skb->len;
+ return err;
}
int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
@@ -2199,6 +2200,11 @@ static int do_setlink(const struct sk_buff *skb,
err = -EINVAL;
goto errout;
}
+ if ((xdp_flags & XDP_FLAGS_SKB_MODE) &&
+ (xdp_flags & XDP_FLAGS_DRV_MODE)) {
+ err = -EINVAL;
+ goto errout;
+ }
}
if (xdp[IFLA_XDP_FD]) {
@@ -3228,8 +3234,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
int err = 0;
int fidx = 0;
- if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
- IFLA_MAX, ifla_policy, NULL) == 0) {
+ err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+ IFLA_MAX, ifla_policy, NULL);
+ if (err < 0) {
+ return -EINVAL;
+ } else if (err == 0) {
if (tb[IFLA_MASTER])
br_idx = nla_get_u32(tb[IFLA_MASTER]);
}
@@ -3452,8 +3461,12 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
err = br_dev->netdev_ops->ndo_bridge_getlink(
skb, portid, seq, dev,
filter_mask, NLM_F_MULTI);
- if (err < 0 && err != -EOPNOTSUPP)
- break;
+ if (err < 0 && err != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ break;
+
+ goto out_err;
+ }
}
idx++;
}
@@ -3464,16 +3477,22 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
seq, dev,
filter_mask,
NLM_F_MULTI);
- if (err < 0 && err != -EOPNOTSUPP)
- break;
+ if (err < 0 && err != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ break;
+
+ goto out_err;
+ }
}
idx++;
}
}
+ err = skb->len;
+out_err:
rcu_read_unlock();
cb->args[0] = idx;
- return skb->len;
+ return err;
}
static inline size_t bridge_nlmsg_size(void)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 346d3e85dfbc..b1be7c01efe2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3754,8 +3754,11 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
spin_lock_irqsave(&q->lock, flags);
skb = __skb_dequeue(q);
- if (skb && (skb_next = skb_peek(q)))
+ if (skb && (skb_next = skb_peek(q))) {
icmp_next = is_icmp_err_skb(skb_next);
+ if (icmp_next)
+ sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
+ }
spin_unlock_irqrestore(&q->lock, flags);
if (is_icmp_err_skb(skb) && !icmp_next)
diff --git a/net/core/sock.c b/net/core/sock.c
index 6adc69edfdd6..0c3fc16223f9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -139,10 +139,7 @@
#include <trace/events/sock.h>
-#ifdef CONFIG_INET
#include <net/tcp.h>
-#endif
-
#include <net/busy_poll.h>
static DEFINE_MUTEX(proto_list_mutex);
@@ -1803,28 +1800,24 @@ EXPORT_SYMBOL(skb_set_owner_w);
* delay queue. We want to allow the owner socket to send more
* packets, as if they were already TX completed by a typical driver.
* But we also want to keep skb->sk set because some packet schedulers
- * rely on it (sch_fq for example). So we set skb->truesize to a small
- * amount (1) and decrease sk_wmem_alloc accordingly.
+ * rely on it (sch_fq for example).
*/
void skb_orphan_partial(struct sk_buff *skb)
{
- /* If this skb is a TCP pure ACK or already went here,
- * we have nothing to do. 2 is already a very small truesize.
- */
- if (skb->truesize <= 2)
+ if (skb_is_tcp_pure_ack(skb))
return;
- /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
- * so we do not completely orphan skb, but transfert all
- * accounted bytes but one, to avoid unexpected reorders.
- */
if (skb->destructor == sock_wfree
#ifdef CONFIG_INET
|| skb->destructor == tcp_wfree
#endif
) {
- atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
- skb->truesize = 1;
+ struct sock *sk = skb->sk;
+
+ if (atomic_inc_not_zero(&sk->sk_refcnt)) {
+ atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+ skb->destructor = sock_efree;
+ }
} else {
skb_orphan(skb);
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index ea23254b2457..b7cd9aafe99e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -479,8 +479,6 @@ static __net_init int sysctl_core_net_init(struct net *net)
{
struct ctl_table *tbl;
- net->core.sysctl_somaxconn = SOMAXCONN;
-
tbl = netns_core_table;
if (!net_eq(net, &init_net)) {
tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);