summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c279
-rw-r--r--net/core/dev.h14
-rw-r--r--net/core/dev_addr_lists.c2
-rw-r--r--net/core/dev_api.c13
-rw-r--r--net/core/dev_ioctl.c5
-rw-r--r--net/core/dst.c10
-rw-r--r--net/core/dst_cache.c2
-rw-r--r--net/core/filter.c35
-rw-r--r--net/core/hotdata.c5
-rw-r--r--net/core/ieee8021q_helpers.c44
-rw-r--r--net/core/neighbour.c558
-rw-r--r--net/core/net-sysfs.c80
-rw-r--r--net/core/net-sysfs.h2
-rw-r--r--net/core/net_namespace.c70
-rw-r--r--net/core/netclassid_cgroup.c4
-rw-r--r--net/core/netdev-genl-gen.c5
-rw-r--r--net/core/netdev-genl.c14
-rw-r--r--net/core/netdev_rx_queue.c6
-rw-r--r--net/core/netpoll.c480
-rw-r--r--net/core/page_pool.c36
-rw-r--r--net/core/rtnetlink.c10
-rw-r--r--net/core/scm.c32
-rw-r--r--net/core/selftests.c67
-rw-r--r--net/core/skbuff.c38
-rw-r--r--net/core/skmsg.c7
-rw-r--r--net/core/sock.c73
-rw-r--r--net/core/sock_map.c13
-rw-r--r--net/core/stream.c8
-rw-r--r--net/core/sysctl_net_core.c37
29 files changed, 1109 insertions, 840 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index be97c440ecd5..b28ce68830b2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1267,33 +1267,32 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
EXPORT_SYMBOL(dev_getfirstbyhwtype);
/**
- * __dev_get_by_flags - find any device with given flags
- * @net: the applicable net namespace
- * @if_flags: IFF_* values
- * @mask: bitmask of bits in if_flags to check
+ * netdev_get_by_flags_rcu - find any device with given flags
+ * @net: the applicable net namespace
+ * @tracker: tracking object for the acquired reference
+ * @if_flags: IFF_* values
+ * @mask: bitmask of bits in if_flags to check
+ *
+ * Search for any interface with the given flags.
*
- * Search for any interface with the given flags. Returns NULL if a device
- * is not found or a pointer to the device. Must be called inside
- * rtnl_lock(), and result refcount is unchanged.
+ * Context: rcu_read_lock() must be held.
+ * Returns: NULL if a device is not found or a pointer to the device.
*/
-
-struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
- unsigned short mask)
+struct net_device *netdev_get_by_flags_rcu(struct net *net, netdevice_tracker *tracker,
+ unsigned short if_flags, unsigned short mask)
{
- struct net_device *dev, *ret;
-
- ASSERT_RTNL();
+ struct net_device *dev;
- ret = NULL;
- for_each_netdev(net, dev) {
- if (((dev->flags ^ if_flags) & mask) == 0) {
- ret = dev;
- break;
+ for_each_netdev_rcu(net, dev) {
+ if (((READ_ONCE(dev->flags) ^ if_flags) & mask) == 0) {
+ netdev_hold(dev, tracker, GFP_ATOMIC);
+ return dev;
}
}
- return ret;
+
+ return NULL;
}
-EXPORT_SYMBOL(__dev_get_by_flags);
+EXPORT_IPV6_MOD(netdev_get_by_flags_rcu);
/**
* dev_valid_name - check if name is okay for network device
@@ -1769,7 +1768,7 @@ static void __dev_close(struct net_device *dev)
list_del(&single);
}
-void dev_close_many(struct list_head *head, bool unlink)
+void netif_close_many(struct list_head *head, bool unlink)
{
struct net_device *dev, *tmp;
@@ -1787,7 +1786,7 @@ void dev_close_many(struct list_head *head, bool unlink)
list_del_init(&dev->close_list);
}
}
-EXPORT_SYMBOL(dev_close_many);
+EXPORT_SYMBOL_NS_GPL(netif_close_many, "NETDEV_INTERNAL");
void netif_close(struct net_device *dev)
{
@@ -1795,7 +1794,7 @@ void netif_close(struct net_device *dev)
LIST_HEAD(single);
list_add(&dev->close_list, &single);
- dev_close_many(&single, true);
+ netif_close_many(&single, true);
list_del(&single);
}
}
@@ -3179,7 +3178,6 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
if (dev->reg_state == NETREG_REGISTERED ||
dev->reg_state == NETREG_UNREGISTERING) {
- ASSERT_RTNL();
netdev_ops_assert_locked(dev);
rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
@@ -3229,7 +3227,6 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
return -EINVAL;
if (dev->reg_state == NETREG_REGISTERED) {
- ASSERT_RTNL();
netdev_ops_assert_locked(dev);
rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
@@ -4028,7 +4025,10 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
unsigned int hdr_len;
/* mac layer + network layer */
- hdr_len = skb_transport_offset(skb);
+ if (!skb->encapsulation)
+ hdr_len = skb_transport_offset(skb);
+ else
+ hdr_len = skb_inner_transport_offset(skb);
/* + transport layer */
if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
@@ -4798,7 +4798,7 @@ static inline void ____napi_schedule(struct softnet_data *sd,
if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
/* Paired with smp_mb__before_atomic() in
- * napi_enable()/dev_set_threaded().
+ * napi_enable()/netif_set_threaded().
* Use READ_ONCE() to guarantee a complete
* read on napi->thread. Only call
* wake_up_process() when it's not NULL.
@@ -5749,6 +5749,7 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
struct packet_type **ppt_prev)
{
+ enum skb_drop_reason drop_reason = SKB_DROP_REASON_UNHANDLED_PROTO;
struct packet_type *ptype, *pt_prev;
rx_handler_func_t *rx_handler;
struct sk_buff *skb = *pskb;
@@ -5840,8 +5841,10 @@ skip_taps:
#endif
skb_reset_redirect(skb);
skip_classify:
- if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
+ if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) {
+ drop_reason = SKB_DROP_REASON_PFMEMALLOC;
goto drop;
+ }
if (skb_vlan_tag_present(skb)) {
if (pt_prev) {
@@ -5939,8 +5942,6 @@ check_vlan_id:
}
if (pt_prev) {
- if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
- goto drop;
*ppt_prev = pt_prev;
} else {
drop:
@@ -5948,7 +5949,8 @@ drop:
dev_core_stats_rx_dropped_inc(skb->dev);
else
dev_core_stats_rx_nohandler_inc(skb->dev);
- kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
+
+ kfree_skb_reason(skb, drop_reason);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
@@ -6576,8 +6578,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
* it, we need to bound somehow the time packets are kept in
* the GRO layer.
*/
- gro_flush(&n->gro, !!timeout);
- gro_normal_list(&n->gro);
+ gro_flush_normal(&n->gro, !!timeout);
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
@@ -6647,8 +6648,7 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
}
/* Flush too old packets. If HZ < 1000, flush all packets */
- gro_flush(&napi->gro, HZ >= 1000);
- gro_normal_list(&napi->gro);
+ gro_flush_normal(&napi->gro, HZ >= 1000);
clear_bit(NAPI_STATE_SCHED, &napi->state);
}
@@ -6926,22 +6926,83 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-int dev_set_threaded(struct net_device *dev, bool threaded)
+static void napi_stop_kthread(struct napi_struct *napi)
+{
+ unsigned long val, new;
+
+ /* Wait until the napi STATE_THREADED is unset. */
+ while (true) {
+ val = READ_ONCE(napi->state);
+
+ /* If napi kthread own this napi or the napi is idle,
+ * STATE_THREADED can be unset here.
+ */
+ if ((val & NAPIF_STATE_SCHED_THREADED) ||
+ !(val & NAPIF_STATE_SCHED)) {
+ new = val & (~NAPIF_STATE_THREADED);
+ } else {
+ msleep(20);
+ continue;
+ }
+
+ if (try_cmpxchg(&napi->state, &val, new))
+ break;
+ }
+
+ /* Once STATE_THREADED is unset, wait for SCHED_THREADED to be unset by
+ * the kthread.
+ */
+ while (true) {
+ if (!test_bit(NAPIF_STATE_SCHED_THREADED, &napi->state))
+ break;
+
+ msleep(20);
+ }
+
+ kthread_stop(napi->thread);
+ napi->thread = NULL;
+}
+
+int napi_set_threaded(struct napi_struct *napi,
+ enum netdev_napi_threaded threaded)
+{
+ if (threaded) {
+ if (!napi->thread) {
+ int err = napi_kthread_create(napi);
+
+ if (err)
+ return err;
+ }
+ }
+
+ if (napi->config)
+ napi->config->threaded = threaded;
+
+ if (!threaded && napi->thread) {
+ napi_stop_kthread(napi);
+ } else {
+ /* Make sure kthread is created before THREADED bit is set. */
+ smp_mb__before_atomic();
+ assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
+ }
+
+ return 0;
+}
+
+int netif_set_threaded(struct net_device *dev,
+ enum netdev_napi_threaded threaded)
{
struct napi_struct *napi;
int err = 0;
netdev_assert_locked_or_invisible(dev);
- if (dev->threaded == threaded)
- return 0;
-
if (threaded) {
list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (!napi->thread) {
err = napi_kthread_create(napi);
if (err) {
- threaded = false;
+ threaded = NETDEV_NAPI_THREADED_DISABLED;
break;
}
}
@@ -6961,12 +7022,32 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
* softirq mode will happen in the next round of napi_schedule().
* This should not cause hiccups/stalls to the live traffic.
*/
- list_for_each_entry(napi, &dev->napi_list, dev_list)
- assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (!threaded && napi->thread)
+ napi_stop_kthread(napi);
+ else
+ assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
+ }
return err;
}
-EXPORT_SYMBOL(dev_set_threaded);
+
+/**
+ * netif_threaded_enable() - enable threaded NAPIs
+ * @dev: net_device instance
+ *
+ * Enable threaded mode for the NAPI instances of the device. This may be useful
+ * for devices where multiple NAPI instances get scheduled by a single
+ * interrupt. Threaded NAPI allows moving the NAPI processing to cores other
+ * than the core where IRQ is mapped.
+ *
+ * This function should be called before @dev is registered.
+ */
+void netif_threaded_enable(struct net_device *dev)
+{
+ WARN_ON_ONCE(netif_set_threaded(dev, NETDEV_NAPI_THREADED_ENABLED));
+}
+EXPORT_SYMBOL(netif_threaded_enable);
/**
* netif_queue_set_napi - Associate queue with the napi
@@ -7182,6 +7263,8 @@ static void napi_restore_config(struct napi_struct *n)
napi_hash_add(n);
n->config->napi_id = n->napi_id;
}
+
+ WARN_ON_ONCE(napi_set_threaded(n, n->config->threaded));
}
static void napi_save_config(struct napi_struct *n)
@@ -7279,7 +7362,7 @@ void netif_napi_add_weight_locked(struct net_device *dev,
* threaded mode will not be enabled in napi_enable().
*/
if (dev->threaded && napi_kthread_create(napi))
- dev->threaded = false;
+ dev->threaded = NETDEV_NAPI_THREADED_DISABLED;
netif_napi_set_irq_locked(napi, -1);
}
EXPORT_SYMBOL(netif_napi_add_weight_locked);
@@ -7448,8 +7531,7 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
}
/* Flush too old packets. If HZ < 1000, flush all packets */
- gro_flush(&n->gro, HZ >= 1000);
- gro_normal_list(&n->gro);
+ gro_flush_normal(&n->gro, HZ >= 1000);
/* Some drivers may have called napi_schedule
* prior to exhausting their budget.
@@ -9387,12 +9469,12 @@ void dev_set_rx_mode(struct net_device *dev)
}
/**
- * dev_get_flags - get flags reported to userspace
- * @dev: device
+ * netif_get_flags() - get flags reported to userspace
+ * @dev: device
*
- * Get the combination of flag bits exported through APIs to userspace.
+ * Get the combination of flag bits exported through APIs to userspace.
*/
-unsigned int dev_get_flags(const struct net_device *dev)
+unsigned int netif_get_flags(const struct net_device *dev)
{
unsigned int flags;
@@ -9415,7 +9497,7 @@ unsigned int dev_get_flags(const struct net_device *dev)
return flags;
}
-EXPORT_SYMBOL(dev_get_flags);
+EXPORT_SYMBOL(netif_get_flags);
int __dev_change_flags(struct net_device *dev, unsigned int flags,
struct netlink_ext_ack *extack)
@@ -9527,7 +9609,7 @@ int netif_change_flags(struct net_device *dev, unsigned int flags,
return ret;
}
-int __dev_set_mtu(struct net_device *dev, int new_mtu)
+int __netif_set_mtu(struct net_device *dev, int new_mtu)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -9538,7 +9620,7 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu)
WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
-EXPORT_SYMBOL(__dev_set_mtu);
+EXPORT_SYMBOL_NS_GPL(__netif_set_mtu, "NETDEV_INTERNAL");
int dev_validate_mtu(struct net_device *dev, int new_mtu,
struct netlink_ext_ack *extack)
@@ -9557,18 +9639,22 @@ int dev_validate_mtu(struct net_device *dev, int new_mtu,
}
/**
- * netif_set_mtu_ext - Change maximum transfer unit
- * @dev: device
- * @new_mtu: new transfer unit
- * @extack: netlink extended ack
+ * netif_set_mtu_ext() - Change maximum transfer unit
+ * @dev: device
+ * @new_mtu: new transfer unit
+ * @extack: netlink extended ack
+ *
+ * Change the maximum transfer size of the network device.
*
- * Change the maximum transfer size of the network device.
+ * Return: 0 on success, -errno on failure.
*/
int netif_set_mtu_ext(struct net_device *dev, int new_mtu,
struct netlink_ext_ack *extack)
{
int err, orig_mtu;
+ netdev_ops_assert_locked(dev);
+
if (new_mtu == dev->mtu)
return 0;
@@ -9585,7 +9671,7 @@ int netif_set_mtu_ext(struct net_device *dev, int new_mtu,
return err;
orig_mtu = dev->mtu;
- err = __dev_set_mtu(dev, new_mtu);
+ err = __netif_set_mtu(dev, new_mtu);
if (!err) {
err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
@@ -9595,7 +9681,7 @@ int netif_set_mtu_ext(struct net_device *dev, int new_mtu,
/* setting mtu back and notifying everyone again,
* so that they have a chance to revert changes.
*/
- __dev_set_mtu(dev, orig_mtu);
+ __netif_set_mtu(dev, orig_mtu);
call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
new_mtu);
}
@@ -9649,13 +9735,15 @@ void netif_set_group(struct net_device *dev, int new_group)
}
/**
- * dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
- * @dev: device
- * @addr: new address
- * @extack: netlink extended ack
+ * netif_pre_changeaddr_notify() - Call NETDEV_PRE_CHANGEADDR.
+ * @dev: device
+ * @addr: new address
+ * @extack: netlink extended ack
+ *
+ * Return: 0 on success, -errno on failure.
*/
-int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
- struct netlink_ext_ack *extack)
+int netif_pre_changeaddr_notify(struct net_device *dev, const char *addr,
+ struct netlink_ext_ack *extack)
{
struct netdev_notifier_pre_changeaddr_info info = {
.info.dev = dev,
@@ -9667,7 +9755,7 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
return notifier_to_errno(rc);
}
-EXPORT_SYMBOL(dev_pre_changeaddr_notify);
+EXPORT_SYMBOL_NS_GPL(netif_pre_changeaddr_notify, "NETDEV_INTERNAL");
int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
struct netlink_ext_ack *extack)
@@ -9681,7 +9769,7 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
- err = dev_pre_changeaddr_notify(dev, ss->__data, extack);
+ err = netif_pre_changeaddr_notify(dev, ss->__data, extack);
if (err)
return err;
if (memcmp(dev->dev_addr, ss->__data, dev->addr_len)) {
@@ -9698,7 +9786,7 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
DECLARE_RWSEM(dev_addr_sem);
/* "sa" is a true struct sockaddr with limited "sa_data" member. */
-int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
+int netif_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
{
size_t size = sizeof(sa->sa_data_min);
struct net_device *dev;
@@ -9724,7 +9812,7 @@ unlock:
up_read(&dev_addr_sem);
return ret;
}
-EXPORT_SYMBOL(dev_get_mac_address);
+EXPORT_SYMBOL_NS_GPL(netif_get_mac_address, "NETDEV_INTERNAL");
int netif_change_carrier(struct net_device *dev, bool new_carrier)
{
@@ -9777,16 +9865,17 @@ int dev_get_phys_port_name(struct net_device *dev,
}
/**
- * dev_get_port_parent_id - Get the device's port parent identifier
- * @dev: network device
- * @ppid: pointer to a storage for the port's parent identifier
- * @recurse: allow/disallow recursion to lower devices
+ * netif_get_port_parent_id() - Get the device's port parent identifier
+ * @dev: network device
+ * @ppid: pointer to a storage for the port's parent identifier
+ * @recurse: allow/disallow recursion to lower devices
+ *
+ * Get the devices's port parent identifier.
*
- * Get the devices's port parent identifier
+ * Return: 0 on success, -errno on failure.
*/
-int dev_get_port_parent_id(struct net_device *dev,
- struct netdev_phys_item_id *ppid,
- bool recurse)
+int netif_get_port_parent_id(struct net_device *dev,
+ struct netdev_phys_item_id *ppid, bool recurse)
{
const struct net_device_ops *ops = dev->netdev_ops;
struct netdev_phys_item_id first = { };
@@ -9805,7 +9894,7 @@ int dev_get_port_parent_id(struct net_device *dev,
return err;
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = dev_get_port_parent_id(lower_dev, ppid, true);
+ err = netif_get_port_parent_id(lower_dev, ppid, true);
if (err)
break;
if (!first.id_len)
@@ -9816,7 +9905,7 @@ int dev_get_port_parent_id(struct net_device *dev,
return err;
}
-EXPORT_SYMBOL(dev_get_port_parent_id);
+EXPORT_SYMBOL(netif_get_port_parent_id);
/**
* netdev_port_same_parent_id - Indicate if two network devices have
@@ -9829,8 +9918,8 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
struct netdev_phys_item_id a_id = { };
struct netdev_phys_item_id b_id = { };
- if (dev_get_port_parent_id(a, &a_id, true) ||
- dev_get_port_parent_id(b, &b_id, true))
+ if (netif_get_port_parent_id(a, &a_id, true) ||
+ netif_get_port_parent_id(b, &b_id, true))
return false;
return netdev_phys_item_id_same(&a_id, &b_id);
@@ -10364,7 +10453,8 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
goto unlock;
}
- bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
+ bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog,
+ attr->link_create.attach_type);
link->dev = dev;
link->flags = attr->link_create.flags;
@@ -10730,12 +10820,14 @@ sync_lower:
* *before* calling udp_tunnel_get_rx_info,
* but *after* calling udp_tunnel_drop_rx_info.
*/
+ udp_tunnel_nic_lock(dev);
if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
dev->features = features;
udp_tunnel_get_rx_info(dev);
} else {
udp_tunnel_drop_rx_info(dev);
}
+ udp_tunnel_nic_unlock(dev);
}
if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
@@ -11715,7 +11807,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->priv_len = sizeof_priv;
- ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
+ ref_tracker_dir_init(&dev->refcnt_tracker, 128, "netdev");
#ifdef CONFIG_PCPU_DEV_REFCNT
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
@@ -11937,21 +12029,8 @@ static void netdev_rss_contexts_free(struct net_device *dev)
mutex_lock(&dev->ethtool->rss_lock);
xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
- struct ethtool_rxfh_param rxfh;
-
- rxfh.indir = ethtool_rxfh_context_indir(ctx);
- rxfh.key = ethtool_rxfh_context_key(ctx);
- rxfh.hfunc = ctx->hfunc;
- rxfh.input_xfrm = ctx->input_xfrm;
- rxfh.rss_context = context;
- rxfh.rss_delete = true;
-
xa_erase(&dev->ethtool->rss_ctx, context);
- if (dev->ethtool_ops->create_rxfh_context)
- dev->ethtool_ops->remove_rxfh_context(dev, ctx,
- context, NULL);
- else
- dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL);
+ dev->ethtool_ops->remove_rxfh_context(dev, ctx, context, NULL);
kfree(ctx);
}
xa_destroy(&dev->ethtool->rss_ctx);
@@ -12036,7 +12115,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
netdev_lock(dev);
}
}
- dev_close_many(&close_head, true);
+ netif_close_many(&close_head, true);
/* ... now unlock them and go over the rest. */
list_for_each_entry(dev, head, unreg_list) {
if (netdev_need_ops_lock(dev))
@@ -12044,7 +12123,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
else
list_add_tail(&dev->close_list, &close_head);
}
- dev_close_many(&close_head, true);
+ netif_close_many(&close_head, true);
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
diff --git a/net/core/dev.h b/net/core/dev.h
index e93f36b7ddf3..ab69edc0c3e3 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -315,6 +315,20 @@ static inline void napi_set_irq_suspend_timeout(struct napi_struct *n,
WRITE_ONCE(n->irq_suspend_timeout, timeout);
}
+static inline enum netdev_napi_threaded napi_get_threaded(struct napi_struct *n)
+{
+ if (test_bit(NAPI_STATE_THREADED, &n->state))
+ return NETDEV_NAPI_THREADED_ENABLED;
+
+ return NETDEV_NAPI_THREADED_DISABLED;
+}
+
+int napi_set_threaded(struct napi_struct *n,
+ enum netdev_napi_threaded threaded);
+
+int netif_set_threaded(struct net_device *dev,
+ enum netdev_napi_threaded threaded);
+
int rps_cpumask_housekeeping(struct cpumask *mask);
#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 90716bd736f3..76c91f224886 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -603,7 +603,7 @@ int dev_addr_add(struct net_device *dev, const unsigned char *addr,
ASSERT_RTNL();
- err = dev_pre_changeaddr_notify(dev, addr, NULL);
+ err = netif_pre_changeaddr_notify(dev, addr, NULL);
if (err)
return err;
err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
diff --git a/net/core/dev_api.c b/net/core/dev_api.c
index 1bf0153195f2..f28852078aa6 100644
--- a/net/core/dev_api.c
+++ b/net/core/dev_api.c
@@ -367,3 +367,16 @@ void netdev_state_change(struct net_device *dev)
netdev_unlock_ops(dev);
}
EXPORT_SYMBOL(netdev_state_change);
+
+int dev_set_threaded(struct net_device *dev,
+ enum netdev_napi_threaded threaded)
+{
+ int ret;
+
+ netdev_lock(dev);
+ ret = netif_set_threaded(dev, threaded);
+ netdev_unlock(dev);
+
+ return ret;
+}
+EXPORT_SYMBOL(dev_set_threaded);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 616479e71466..9c0ad7f4b5d8 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -147,7 +147,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
switch (cmd) {
case SIOCGIFFLAGS: /* Get interface flags */
- ifr->ifr_flags = (short) dev_get_flags(dev);
+ ifr->ifr_flags = (short)netif_get_flags(dev);
return 0;
case SIOCGIFMETRIC: /* Get the metric on the interface
@@ -728,7 +728,8 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
switch (cmd) {
case SIOCGIFHWADDR:
dev_load(net, ifr->ifr_name);
- ret = dev_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name);
+ ret = netif_get_mac_address(&ifr->ifr_hwaddr, net,
+ ifr->ifr_name);
if (colon)
*colon = ':';
return ret;
diff --git a/net/core/dst.c b/net/core/dst.c
index 795ca07e28a4..e2de8b68c41d 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -145,12 +145,12 @@ void dst_dev_put(struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
- dst->obsolete = DST_OBSOLETE_DEAD;
+ WRITE_ONCE(dst->obsolete, DST_OBSOLETE_DEAD);
if (dst->ops->ifdown)
dst->ops->ifdown(dst, dev);
- dst->input = dst_discard;
- dst->output = dst_discard_out;
- dst->dev = blackhole_netdev;
+ WRITE_ONCE(dst->input, dst_discard);
+ WRITE_ONCE(dst->output, dst_discard_out);
+ WRITE_ONCE(dst->dev, blackhole_netdev);
netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker,
GFP_ATOMIC);
}
@@ -263,7 +263,7 @@ unsigned int dst_blackhole_mtu(const struct dst_entry *dst)
{
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
- return mtu ? : dst->dev->mtu;
+ return mtu ? : dst_dev(dst)->mtu;
}
EXPORT_SYMBOL_GPL(dst_blackhole_mtu);
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 93a04d18e505..9ab4902324e1 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -52,7 +52,7 @@ static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
if (unlikely(!time_after(idst->refresh_ts,
READ_ONCE(dst_cache->reset_ts)) ||
- (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
+ (READ_ONCE(dst->obsolete) && !dst->ops->check(dst, idst->cookie)))) {
dst_cache_per_cpu_dst_set(idst, NULL, 0);
dst_release(dst);
goto fail;
diff --git a/net/core/filter.c b/net/core/filter.c
index 7a72f766aacf..c09a85c17496 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -122,6 +122,7 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
* @sk: sock associated with &sk_buff
* @skb: buffer to filter
* @cap: limit on how short the eBPF program may trim the packet
+ * @reason: record drop reason on errors (negative return value)
*
* Run the eBPF program and then cut skb->data to correct size returned by
* the program. If pkt_len is 0 we toss packet. If skb->len is smaller
@@ -130,7 +131,8 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
* be accepted or -EPERM if the packet should be tossed.
*
*/
-int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
+int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb,
+ unsigned int cap, enum skb_drop_reason *reason)
{
int err;
struct sk_filter *filter;
@@ -142,15 +144,20 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
*/
if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
+ *reason = SKB_DROP_REASON_PFMEMALLOC;
return -ENOMEM;
}
err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
- if (err)
+ if (err) {
+ *reason = SKB_DROP_REASON_SOCKET_FILTER;
return err;
+ }
err = security_sock_rcv_skb(sk, skb);
- if (err)
+ if (err) {
+ *reason = SKB_DROP_REASON_SECURITY_HOOK;
return err;
+ }
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
@@ -162,6 +169,8 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
skb->sk = save_sk;
err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
+ if (err)
+ *reason = SKB_DROP_REASON_SOCKET_FILTER;
}
rcu_read_unlock();
@@ -8690,7 +8699,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != sizeof(__u64))
return false;
break;
- case offsetof(struct __sk_buff, sk):
+ case bpf_ctx_range_ptr(struct __sk_buff, sk):
if (type == BPF_WRITE || size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
@@ -9268,7 +9277,7 @@ static bool sock_addr_is_valid_access(int off, int size,
return false;
}
break;
- case offsetof(struct bpf_sock_addr, sk):
+ case bpf_ctx_range_ptr(struct bpf_sock_addr, sk):
if (type != BPF_READ)
return false;
if (size != sizeof(__u64))
@@ -9318,17 +9327,17 @@ static bool sock_ops_is_valid_access(int off, int size,
if (size != sizeof(__u64))
return false;
break;
- case offsetof(struct bpf_sock_ops, sk):
+ case bpf_ctx_range_ptr(struct bpf_sock_ops, sk):
if (size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_SOCKET_OR_NULL;
break;
- case offsetof(struct bpf_sock_ops, skb_data):
+ case bpf_ctx_range_ptr(struct bpf_sock_ops, skb_data):
if (size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_PACKET;
break;
- case offsetof(struct bpf_sock_ops, skb_data_end):
+ case bpf_ctx_range_ptr(struct bpf_sock_ops, skb_data_end):
if (size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_PACKET_END;
@@ -9337,7 +9346,7 @@ static bool sock_ops_is_valid_access(int off, int size,
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size,
size_default);
- case offsetof(struct bpf_sock_ops, skb_hwtstamp):
+ case bpf_ctx_range(struct bpf_sock_ops, skb_hwtstamp):
if (size != sizeof(__u64))
return false;
break;
@@ -9407,17 +9416,17 @@ static bool sk_msg_is_valid_access(int off, int size,
return false;
switch (off) {
- case offsetof(struct sk_msg_md, data):
+ case bpf_ctx_range_ptr(struct sk_msg_md, data):
info->reg_type = PTR_TO_PACKET;
if (size != sizeof(__u64))
return false;
break;
- case offsetof(struct sk_msg_md, data_end):
+ case bpf_ctx_range_ptr(struct sk_msg_md, data_end):
info->reg_type = PTR_TO_PACKET_END;
if (size != sizeof(__u64))
return false;
break;
- case offsetof(struct sk_msg_md, sk):
+ case bpf_ctx_range_ptr(struct sk_msg_md, sk):
if (size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_SOCKET;
@@ -11623,7 +11632,7 @@ static bool sk_lookup_is_valid_access(int off, int size,
return false;
switch (off) {
- case offsetof(struct bpf_sk_lookup, sk):
+ case bpf_ctx_range_ptr(struct bpf_sk_lookup, sk):
info->reg_type = PTR_TO_SOCKET_OR_NULL;
return size == sizeof(__u64);
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index 0bc893d5f07b..95d0a4df1006 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -2,7 +2,9 @@
#include <linux/cache.h>
#include <linux/jiffies.h>
#include <linux/list.h>
+#include <net/aligned_data.h>
#include <net/hotdata.h>
+#include <net/ip.h>
#include <net/proto_memory.h>
struct net_hotdata net_hotdata __cacheline_aligned = {
@@ -22,3 +24,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.sysctl_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE
};
EXPORT_SYMBOL(net_hotdata);
+
+struct net_aligned_data net_aligned_data;
+EXPORT_IPV6_MOD(net_aligned_data);
diff --git a/net/core/ieee8021q_helpers.c b/net/core/ieee8021q_helpers.c
index 759a9b9f3f89..669b357b73b2 100644
--- a/net/core/ieee8021q_helpers.c
+++ b/net/core/ieee8021q_helpers.c
@@ -7,6 +7,11 @@
#include <net/dscp.h>
#include <net/ieee8021q.h>
+/* verify that table covers all 8 traffic types */
+#define TT_MAP_SIZE_OK(tbl) \
+ compiletime_assert(ARRAY_SIZE(tbl) == IEEE8021Q_TT_MAX, \
+ #tbl " size mismatch")
+
/* The following arrays map Traffic Types (TT) to traffic classes (TC) for
* different number of queues as shown in the example provided by
* IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic class mapping" and
@@ -101,51 +106,28 @@ int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt, unsigned int num_queues)
switch (num_queues) {
case 8:
- compiletime_assert(ARRAY_SIZE(ieee8021q_8queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_8queue_tt_tc_map != max - 1");
+ TT_MAP_SIZE_OK(ieee8021q_8queue_tt_tc_map);
return ieee8021q_8queue_tt_tc_map[tt];
case 7:
- compiletime_assert(ARRAY_SIZE(ieee8021q_7queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_7queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_7queue_tt_tc_map);
return ieee8021q_7queue_tt_tc_map[tt];
case 6:
- compiletime_assert(ARRAY_SIZE(ieee8021q_6queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_6queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_6queue_tt_tc_map);
return ieee8021q_6queue_tt_tc_map[tt];
case 5:
- compiletime_assert(ARRAY_SIZE(ieee8021q_5queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_5queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_5queue_tt_tc_map);
return ieee8021q_5queue_tt_tc_map[tt];
case 4:
- compiletime_assert(ARRAY_SIZE(ieee8021q_4queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_4queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_4queue_tt_tc_map);
return ieee8021q_4queue_tt_tc_map[tt];
case 3:
- compiletime_assert(ARRAY_SIZE(ieee8021q_3queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_3queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_3queue_tt_tc_map);
return ieee8021q_3queue_tt_tc_map[tt];
case 2:
- compiletime_assert(ARRAY_SIZE(ieee8021q_2queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_2queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_2queue_tt_tc_map);
return ieee8021q_2queue_tt_tc_map[tt];
case 1:
- compiletime_assert(ARRAY_SIZE(ieee8021q_1queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_1queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_1queue_tt_tc_map);
return ieee8021q_1queue_tt_tc_map[tt];
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 49dce9a82295..bddfa389effa 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -28,6 +28,7 @@
#include <net/neighbour.h>
#include <net/arp.h>
#include <net/dst.h>
+#include <net/ip.h>
#include <net/sock.h>
#include <net/netevent.h>
#include <net/netlink.h>
@@ -53,8 +54,8 @@ static void neigh_timer_handler(struct timer_list *t);
static void __neigh_notify(struct neighbour *n, int type, int flags,
u32 pid);
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
-static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
- struct net_device *dev);
+static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm);
#ifdef CONFIG_PROC_FS
static const struct seq_operations neigh_stat_seq_ops;
@@ -153,11 +154,12 @@ static void neigh_update_gc_list(struct neighbour *n)
if (n->dead)
goto out;
- /* remove from the gc list if new state is permanent or if neighbor
- * is externally learned; otherwise entry should be on the gc list
+ /* remove from the gc list if new state is permanent or if neighbor is
+ * externally learned / validated; otherwise entry should be on the gc
+ * list
*/
exempt_from_gc = n->nud_state & NUD_PERMANENT ||
- n->flags & NTF_EXT_LEARNED;
+ n->flags & (NTF_EXT_LEARNED | NTF_EXT_VALIDATED);
on_gc_list = !list_empty(&n->gc_list);
if (exempt_from_gc && on_gc_list) {
@@ -204,6 +206,7 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
+ ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_VALIDATED) ? NTF_EXT_VALIDATED : 0;
if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
if (ndm_flags & NTF_EXT_LEARNED)
@@ -221,6 +224,14 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
*notify = 1;
*managed_update = true;
}
+ if ((old_flags ^ ndm_flags) & NTF_EXT_VALIDATED) {
+ if (ndm_flags & NTF_EXT_VALIDATED)
+ neigh->flags |= NTF_EXT_VALIDATED;
+ else
+ neigh->flags &= ~NTF_EXT_VALIDATED;
+ *notify = 1;
+ *gc_update = true;
+ }
}
bool neigh_remove_one(struct neighbour *n)
@@ -368,6 +379,43 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
}
}
+static void neigh_flush_one(struct neighbour *n)
+{
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
+
+ write_lock(&n->lock);
+
+ neigh_del_timer(n);
+ neigh_mark_dead(n);
+
+ if (refcount_read(&n->refcnt) != 1) {
+ /* The most unpleasant situation.
+ * We must destroy neighbour entry,
+ * but someone still uses it.
+ *
+ * The destroy will be delayed until
+ * the last user releases us, but
+ * we must kill timers etc. and move
+ * it to safe state.
+ */
+ __skb_queue_purge(&n->arp_queue);
+ n->arp_queue_len_bytes = 0;
+ WRITE_ONCE(n->output, neigh_blackhole);
+
+ if (n->nud_state & NUD_VALID)
+ n->nud_state = NUD_NOARP;
+ else
+ n->nud_state = NUD_NONE;
+
+ neigh_dbg(2, "neigh %p is stray\n", n);
+ }
+
+ write_unlock(&n->lock);
+
+ neigh_cleanup_and_release(n);
+}
+
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
@@ -378,35 +426,29 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
dev_head = neigh_get_dev_table(dev, tbl->family);
hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
- if (skip_perm && n->nud_state & NUD_PERMANENT)
+ if (skip_perm &&
+ (n->nud_state & NUD_PERMANENT ||
+ n->flags & NTF_EXT_VALIDATED))
continue;
- hlist_del_rcu(&n->hash);
- hlist_del_rcu(&n->dev_list);
- write_lock(&n->lock);
- neigh_del_timer(n);
- neigh_mark_dead(n);
- if (refcount_read(&n->refcnt) != 1) {
- /* The most unpleasant situation.
- * We must destroy neighbour entry,
- * but someone still uses it.
- *
- * The destroy will be delayed until
- * the last user releases us, but
- * we must kill timers etc. and move
- * it to safe state.
- */
- __skb_queue_purge(&n->arp_queue);
- n->arp_queue_len_bytes = 0;
- WRITE_ONCE(n->output, neigh_blackhole);
- if (n->nud_state & NUD_VALID)
- n->nud_state = NUD_NOARP;
- else
- n->nud_state = NUD_NONE;
- neigh_dbg(2, "neigh %p is stray\n", n);
- }
- write_unlock(&n->lock);
- neigh_cleanup_and_release(n);
+ neigh_flush_one(n);
+ }
+}
+
+static void neigh_flush_table(struct neigh_table *tbl)
+{
+ struct neigh_hash_table *nht;
+ int i;
+
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+
+ for (i = 0; i < (1 << nht->hash_shift); i++) {
+ struct hlist_node *tmp;
+ struct neighbour *n;
+
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i])
+ neigh_flush_one(n);
}
}
@@ -422,8 +464,15 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
write_lock_bh(&tbl->lock);
- neigh_flush_dev(tbl, dev, skip_perm);
- pneigh_ifdown_and_unlock(tbl, dev);
+ if (likely(dev)) {
+ neigh_flush_dev(tbl, dev, skip_perm);
+ } else {
+ DEBUG_NET_WARN_ON_ONCE(skip_perm);
+ neigh_flush_table(tbl);
+ }
+ write_unlock_bh(&tbl->lock);
+
+ pneigh_ifdown(tbl, dev, skip_perm);
pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
tbl->family);
if (skb_queue_empty_lockless(&tbl->proxy_queue))
@@ -706,54 +755,53 @@ static u32 pneigh_hash(const void *pkey, unsigned int key_len)
return hash_val;
}
-static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
- struct net *net,
- const void *pkey,
- unsigned int key_len,
- struct net_device *dev)
+struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
+ struct net *net, const void *pkey,
+ struct net_device *dev)
{
+ struct pneigh_entry *n;
+ unsigned int key_len;
+ u32 hash_val;
+
+ key_len = tbl->key_len;
+ hash_val = pneigh_hash(pkey, key_len);
+ n = rcu_dereference_check(tbl->phash_buckets[hash_val],
+ lockdep_is_held(&tbl->phash_lock));
+
while (n) {
if (!memcmp(n->key, pkey, key_len) &&
net_eq(pneigh_net(n), net) &&
(n->dev == dev || !n->dev))
return n;
- n = n->next;
- }
- return NULL;
-}
-struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
- struct net *net, const void *pkey, struct net_device *dev)
-{
- unsigned int key_len = tbl->key_len;
- u32 hash_val = pneigh_hash(pkey, key_len);
+ n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->phash_lock));
+ }
- return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
- net, pkey, key_len, dev);
+ return NULL;
}
-EXPORT_SYMBOL_GPL(__pneigh_lookup);
+EXPORT_IPV6_MOD(pneigh_lookup);
-struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
- struct net *net, const void *pkey,
- struct net_device *dev, int creat)
+int pneigh_create(struct neigh_table *tbl, struct net *net,
+ const void *pkey, struct net_device *dev,
+ u32 flags, u8 protocol, bool permanent)
{
struct pneigh_entry *n;
- unsigned int key_len = tbl->key_len;
- u32 hash_val = pneigh_hash(pkey, key_len);
-
- read_lock_bh(&tbl->lock);
- n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
- net, pkey, key_len, dev);
- read_unlock_bh(&tbl->lock);
+ unsigned int key_len;
+ u32 hash_val;
+ int err = 0;
- if (n || !creat)
- goto out;
+ mutex_lock(&tbl->phash_lock);
- ASSERT_RTNL();
+ n = pneigh_lookup(tbl, net, pkey, dev);
+ if (n)
+ goto update;
+ key_len = tbl->key_len;
n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
- if (!n)
+ if (!n) {
+ err = -ENOBUFS;
goto out;
+ }
write_pnet(&n->net, net);
memcpy(n->key, pkey, key_len);
@@ -763,73 +811,98 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
if (tbl->pconstructor && tbl->pconstructor(n)) {
netdev_put(dev, &n->dev_tracker);
kfree(n);
- n = NULL;
+ err = -ENOBUFS;
goto out;
}
- write_lock_bh(&tbl->lock);
+ hash_val = pneigh_hash(pkey, key_len);
n->next = tbl->phash_buckets[hash_val];
- tbl->phash_buckets[hash_val] = n;
- write_unlock_bh(&tbl->lock);
+ rcu_assign_pointer(tbl->phash_buckets[hash_val], n);
+update:
+ WRITE_ONCE(n->flags, flags);
+ n->permanent = permanent;
+ WRITE_ONCE(n->protocol, protocol);
out:
- return n;
+ mutex_unlock(&tbl->phash_lock);
+ return err;
}
-EXPORT_SYMBOL(pneigh_lookup);
+static void pneigh_destroy(struct rcu_head *rcu)
+{
+ struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);
+
+ netdev_put(n->dev, &n->dev_tracker);
+ kfree(n);
+}
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
struct net_device *dev)
{
- struct pneigh_entry *n, **np;
- unsigned int key_len = tbl->key_len;
- u32 hash_val = pneigh_hash(pkey, key_len);
+ struct pneigh_entry *n, __rcu **np;
+ unsigned int key_len;
+ u32 hash_val;
- write_lock_bh(&tbl->lock);
- for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
+ key_len = tbl->key_len;
+ hash_val = pneigh_hash(pkey, key_len);
+
+ mutex_lock(&tbl->phash_lock);
+
+ for (np = &tbl->phash_buckets[hash_val];
+ (n = rcu_dereference_protected(*np, 1)) != NULL;
np = &n->next) {
if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
net_eq(pneigh_net(n), net)) {
- *np = n->next;
- write_unlock_bh(&tbl->lock);
+ rcu_assign_pointer(*np, n->next);
+
+ mutex_unlock(&tbl->phash_lock);
+
if (tbl->pdestructor)
tbl->pdestructor(n);
- netdev_put(n->dev, &n->dev_tracker);
- kfree(n);
+
+ call_rcu(&n->rcu, pneigh_destroy);
return 0;
}
}
- write_unlock_bh(&tbl->lock);
+
+ mutex_unlock(&tbl->phash_lock);
return -ENOENT;
}
-static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
- struct net_device *dev)
+static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm)
{
- struct pneigh_entry *n, **np, *freelist = NULL;
+ struct pneigh_entry *n, __rcu **np;
+ LIST_HEAD(head);
u32 h;
+ mutex_lock(&tbl->phash_lock);
+
for (h = 0; h <= PNEIGH_HASHMASK; h++) {
np = &tbl->phash_buckets[h];
- while ((n = *np) != NULL) {
+ while ((n = rcu_dereference_protected(*np, 1)) != NULL) {
+ if (skip_perm && n->permanent)
+ goto skip;
if (!dev || n->dev == dev) {
- *np = n->next;
- n->next = freelist;
- freelist = n;
+ rcu_assign_pointer(*np, n->next);
+ list_add(&n->free_node, &head);
continue;
}
+skip:
np = &n->next;
}
}
- write_unlock_bh(&tbl->lock);
- while ((n = freelist)) {
- freelist = n->next;
- n->next = NULL;
+
+ mutex_unlock(&tbl->phash_lock);
+
+ while (!list_empty(&head)) {
+ n = list_first_entry(&head, typeof(*n), free_node);
+ list_del(&n->free_node);
+
if (tbl->pdestructor)
tbl->pdestructor(n);
- netdev_put(n->dev, &n->dev_tracker);
- kfree(n);
+
+ call_rcu(&n->rcu, pneigh_destroy);
}
- return -ENOENT;
}
static inline void neigh_parms_put(struct neigh_parms *parms)
@@ -937,7 +1010,8 @@ static void neigh_periodic_work(struct work_struct *work)
state = n->nud_state;
if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
- (n->flags & NTF_EXT_LEARNED)) {
+ (n->flags &
+ (NTF_EXT_LEARNED | NTF_EXT_VALIDATED))) {
write_unlock(&n->lock);
continue;
}
@@ -1090,9 +1164,15 @@ static void neigh_timer_handler(struct timer_list *t)
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
- WRITE_ONCE(neigh->nud_state, NUD_FAILED);
+ if (neigh->nud_state == NUD_PROBE &&
+ neigh->flags & NTF_EXT_VALIDATED) {
+ WRITE_ONCE(neigh->nud_state, NUD_STALE);
+ neigh->updated = jiffies;
+ } else {
+ WRITE_ONCE(neigh->nud_state, NUD_FAILED);
+ neigh_invalidate(neigh);
+ }
notify = 1;
- neigh_invalidate(neigh);
goto out;
}
@@ -1240,6 +1320,8 @@ static void neigh_update_hhs(struct neighbour *neigh)
NTF_ROUTER flag.
NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
a router.
+ NEIGH_UPDATE_F_EXT_VALIDATED means that the entry will not be removed
+ or invalidated.
Caller MUST hold reference count on the entry.
*/
@@ -1402,7 +1484,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
* we can reinject the packet there.
*/
n2 = NULL;
- if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
+ if (dst &&
+ READ_ONCE(dst->obsolete) != DST_OBSOLETE_DEAD) {
n2 = dst_neigh_lookup_skb(dst, skb);
if (n2)
n1 = n2;
@@ -1756,6 +1839,7 @@ void neigh_table_init(int index, struct neigh_table *tbl)
WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
rwlock_init(&tbl->lock);
+ mutex_init(&tbl->phash_lock);
INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
@@ -1972,21 +2056,13 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[NDA_PROTOCOL])
protocol = nla_get_u8(tb[NDA_PROTOCOL]);
if (ndm_flags & NTF_PROXY) {
- struct pneigh_entry *pn;
-
- if (ndm_flags & NTF_MANAGED) {
+ if (ndm_flags & (NTF_MANAGED | NTF_EXT_VALIDATED)) {
NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
goto out;
}
- err = -ENOBUFS;
- pn = pneigh_lookup(tbl, net, dst, dev, 1);
- if (pn) {
- pn->flags = ndm_flags;
- if (protocol)
- pn->protocol = protocol;
- err = 0;
- }
+ err = pneigh_create(tbl, net, dst, dev, ndm_flags, protocol,
+ !!(ndm->ndm_state & NUD_PERMANENT));
goto out;
}
@@ -2004,7 +2080,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (neigh == NULL) {
bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
bool exempt_from_gc = ndm_permanent ||
- ndm_flags & NTF_EXT_LEARNED;
+ ndm_flags & (NTF_EXT_LEARNED |
+ NTF_EXT_VALIDATED);
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
err = -ENOENT;
@@ -2015,10 +2092,27 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
err = -EINVAL;
goto out;
}
+ if (ndm_flags & NTF_EXT_VALIDATED) {
+ u8 state = ndm->ndm_state;
+
+ /* NTF_USE and NTF_MANAGED will result in the neighbor
+ * being created with an invalid state (NUD_NONE).
+ */
+ if (ndm_flags & (NTF_USE | NTF_MANAGED))
+ state = NUD_NONE;
+
+ if (!(state & NUD_VALID)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot create externally validated neighbor with an invalid state");
+ err = -EINVAL;
+ goto out;
+ }
+ }
neigh = ___neigh_create(tbl, dst, dev,
ndm_flags &
- (NTF_EXT_LEARNED | NTF_MANAGED),
+ (NTF_EXT_LEARNED | NTF_MANAGED |
+ NTF_EXT_VALIDATED),
exempt_from_gc, true);
if (IS_ERR(neigh)) {
err = PTR_ERR(neigh);
@@ -2030,6 +2124,24 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
neigh_release(neigh);
goto out;
}
+ if (ndm_flags & NTF_EXT_VALIDATED) {
+ u8 state = ndm->ndm_state;
+
+ /* NTF_USE and NTF_MANAGED do not update the existing
+ * state other than clearing it if it was
+ * NUD_PERMANENT.
+ */
+ if (ndm_flags & (NTF_USE | NTF_MANAGED))
+ state = READ_ONCE(neigh->nud_state) & ~NUD_PERMANENT;
+
+ if (!(state & NUD_VALID)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot mark neighbor as externally validated with an invalid state");
+ err = -EINVAL;
+ neigh_release(neigh);
+ goto out;
+ }
+ }
if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
@@ -2046,13 +2158,13 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
flags |= NEIGH_UPDATE_F_MANAGED;
if (ndm_flags & NTF_USE)
flags |= NEIGH_UPDATE_F_USE;
+ if (ndm_flags & NTF_EXT_VALIDATED)
+ flags |= NEIGH_UPDATE_F_EXT_VALIDATED;
err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
NETLINK_CB(skb).portid, extack);
- if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
+ if (!err && ndm_flags & (NTF_USE | NTF_MANAGED))
neigh_event_send(neigh, NULL);
- err = 0;
- }
neigh_release(neigh);
out:
return err;
@@ -2579,13 +2691,15 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
u32 neigh_flags, neigh_flags_ext;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
+ u8 protocol;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
if (nlh == NULL)
return -EMSGSIZE;
- neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
- neigh_flags = pn->flags & NTF_OLD_MASK;
+ neigh_flags = READ_ONCE(pn->flags);
+ neigh_flags_ext = neigh_flags >> NTF_EXT_SHIFT;
+ neigh_flags &= NTF_OLD_MASK;
ndm = nlmsg_data(nlh);
ndm->ndm_family = tbl->family;
@@ -2599,7 +2713,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
goto nla_put_failure;
- if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
+ protocol = READ_ONCE(pn->protocol);
+ if (protocol && nla_put_u8(skb, NDA_PROTOCOL, protocol))
goto nla_put_failure;
if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
goto nla_put_failure;
@@ -2706,12 +2821,12 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (filter->dev_idx || filter->master_idx)
flags |= NLM_F_DUMP_FILTERED;
- read_lock_bh(&tbl->lock);
-
for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
if (h > s_h)
s_idx = 0;
- for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
+ for (n = rcu_dereference(tbl->phash_buckets[h]), idx = 0;
+ n;
+ n = rcu_dereference(n->next)) {
if (idx < s_idx || pneigh_net(n) != net)
goto next;
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
@@ -2720,16 +2835,13 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH, flags, tbl);
- if (err < 0) {
- read_unlock_bh(&tbl->lock);
+ if (err < 0)
goto out;
- }
next:
idx++;
}
}
- read_unlock_bh(&tbl->lock);
out:
cb->args[3] = h;
cb->args[4] = idx;
@@ -2846,64 +2958,58 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
return err;
}
-static int neigh_valid_get_req(const struct nlmsghdr *nlh,
- struct neigh_table **tbl,
- void **dst, int *dev_idx, u8 *ndm_flags,
- struct netlink_ext_ack *extack)
+static struct ndmsg *neigh_valid_get_req(const struct nlmsghdr *nlh,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
- struct nlattr *tb[NDA_MAX + 1];
struct ndmsg *ndm;
int err, i;
ndm = nlmsg_payload(nlh, sizeof(*ndm));
if (!ndm) {
NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
ndm->ndm_type) {
NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
if (ndm->ndm_flags & ~NTF_PROXY) {
NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!(ndm->ndm_flags & NTF_PROXY) && !ndm->ndm_ifindex) {
+ NL_SET_ERR_MSG(extack, "No device specified");
+ return ERR_PTR(-EINVAL);
}
err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
NDA_MAX, nda_policy, extack);
if (err < 0)
- return err;
-
- *ndm_flags = ndm->ndm_flags;
- *dev_idx = ndm->ndm_ifindex;
- *tbl = neigh_find_table(ndm->ndm_family);
- if (*tbl == NULL) {
- NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
- return -EAFNOSUPPORT;
- }
+ return ERR_PTR(err);
for (i = 0; i <= NDA_MAX; ++i) {
- if (!tb[i])
- continue;
-
switch (i) {
case NDA_DST:
- if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
- NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
- return -EINVAL;
+ if (!tb[i]) {
+ NL_SET_ERR_ATTR_MISS(extack, NULL, NDA_DST);
+ return ERR_PTR(-EINVAL);
}
- *dst = nla_data(tb[i]);
break;
default:
+ if (!tb[i])
+ continue;
+
NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
}
- return 0;
+ return ndm;
}
static inline size_t neigh_nlmsg_size(void)
@@ -2917,27 +3023,6 @@ static inline size_t neigh_nlmsg_size(void)
+ nla_total_size(1); /* NDA_PROTOCOL */
}
-static int neigh_get_reply(struct net *net, struct neighbour *neigh,
- u32 pid, u32 seq)
-{
- struct sk_buff *skb;
- int err = 0;
-
- skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
-
- err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
- if (err) {
- kfree_skb(skb);
- goto errout;
- }
-
- err = rtnl_unicast(skb, net, pid);
-errout:
- return err;
-}
-
static inline size_t pneigh_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ndmsg))
@@ -2946,85 +3031,91 @@ static inline size_t pneigh_nlmsg_size(void)
+ nla_total_size(1); /* NDA_PROTOCOL */
}
-static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
- u32 pid, u32 seq, struct neigh_table *tbl)
+static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
+ struct net *net = sock_net(in_skb->sk);
+ u32 pid = NETLINK_CB(in_skb).portid;
+ struct nlattr *tb[NDA_MAX + 1];
+ struct net_device *dev = NULL;
+ u32 seq = nlh->nlmsg_seq;
+ struct neigh_table *tbl;
+ struct neighbour *neigh;
struct sk_buff *skb;
- int err = 0;
+ struct ndmsg *ndm;
+ void *dst;
+ int err;
- skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
+ ndm = neigh_valid_get_req(nlh, tb, extack);
+ if (IS_ERR(ndm))
+ return PTR_ERR(ndm);
+
+ if (ndm->ndm_flags & NTF_PROXY)
+ skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
+ else
+ skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
- if (err) {
- kfree_skb(skb);
- goto errout;
- }
+ rcu_read_lock();
- err = rtnl_unicast(skb, net, pid);
-errout:
- return err;
-}
+ tbl = neigh_find_table(ndm->ndm_family);
+ if (!tbl) {
+ NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
+ err = -EAFNOSUPPORT;
+ goto err_unlock;
+ }
-static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(in_skb->sk);
- struct net_device *dev = NULL;
- struct neigh_table *tbl = NULL;
- struct neighbour *neigh;
- void *dst = NULL;
- u8 ndm_flags = 0;
- int dev_idx = 0;
- int err;
+ if (nla_len(tb[NDA_DST]) != (int)tbl->key_len) {
+ NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
+ err = -EINVAL;
+ goto err_unlock;
+ }
- err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
- extack);
- if (err < 0)
- return err;
+ dst = nla_data(tb[NDA_DST]);
- if (dev_idx) {
- dev = __dev_get_by_index(net, dev_idx);
+ if (ndm->ndm_ifindex) {
+ dev = dev_get_by_index_rcu(net, ndm->ndm_ifindex);
if (!dev) {
NL_SET_ERR_MSG(extack, "Unknown device ifindex");
- return -ENODEV;
+ err = -ENODEV;
+ goto err_unlock;
}
}
- if (!dst) {
- NL_SET_ERR_MSG(extack, "Network address not specified");
- return -EINVAL;
- }
-
- if (ndm_flags & NTF_PROXY) {
+ if (ndm->ndm_flags & NTF_PROXY) {
struct pneigh_entry *pn;
- pn = pneigh_lookup(tbl, net, dst, dev, 0);
+ pn = pneigh_lookup(tbl, net, dst, dev);
if (!pn) {
NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
- return -ENOENT;
+ err = -ENOENT;
+ goto err_unlock;
}
- return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, tbl);
- }
- if (!dev) {
- NL_SET_ERR_MSG(extack, "No device specified");
- return -EINVAL;
- }
+ err = pneigh_fill_info(skb, pn, pid, seq, RTM_NEWNEIGH, 0, tbl);
+ if (err)
+ goto err_unlock;
+ } else {
+ neigh = neigh_lookup(tbl, dst, dev);
+ if (!neigh) {
+ NL_SET_ERR_MSG(extack, "Neighbour entry not found");
+ err = -ENOENT;
+ goto err_unlock;
+ }
- neigh = neigh_lookup(tbl, dst, dev);
- if (!neigh) {
- NL_SET_ERR_MSG(extack, "Neighbour entry not found");
- return -ENOENT;
+ err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
+ neigh_release(neigh);
+ if (err)
+ goto err_unlock;
}
- err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq);
-
- neigh_release(neigh);
+ rcu_read_unlock();
+ return rtnl_unicast(skb, net, pid);
+err_unlock:
+ rcu_read_unlock();
+ kfree_skb(skb);
return err;
}
@@ -3231,9 +3322,10 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
state->flags |= NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
- pn = tbl->phash_buckets[bucket];
+ pn = rcu_dereference(tbl->phash_buckets[bucket]);
+
while (pn && !net_eq(pneigh_net(pn), net))
- pn = pn->next;
+ pn = rcu_dereference(pn->next);
if (pn)
break;
}
@@ -3251,15 +3343,17 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
struct neigh_table *tbl = state->tbl;
do {
- pn = pn->next;
+ pn = rcu_dereference(pn->next);
} while (pn && !net_eq(pneigh_net(pn), net));
while (!pn) {
if (++state->bucket > PNEIGH_HASHMASK)
break;
- pn = tbl->phash_buckets[state->bucket];
+
+ pn = rcu_dereference(tbl->phash_buckets[state->bucket]);
+
while (pn && !net_eq(pneigh_net(pn), net))
- pn = pn->next;
+ pn = rcu_dereference(pn->next);
if (pn)
break;
}
@@ -3823,7 +3917,7 @@ static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
{.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
{.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
{.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
- .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
{.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info},
{.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set},
};
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1ace0cd01adc..c28cd6665444 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -256,7 +256,7 @@ static ssize_t name_assign_type_show(struct device *dev,
}
static DEVICE_ATTR_RO(name_assign_type);
-/* use same locking rules as GIFHWADDR ioctl's (dev_get_mac_address()) */
+/* use same locking rules as GIFHWADDR ioctl's (netif_get_mac_address()) */
static ssize_t address_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -641,12 +641,6 @@ static ssize_t phys_port_id_show(struct device *dev,
struct netdev_phys_item_id ppid;
ssize_t ret;
- /* The check is also done in dev_get_phys_port_id; this helps returning
- * early without hitting the locking section below.
- */
- if (!netdev->netdev_ops->ndo_get_phys_port_id)
- return -EOPNOTSUPP;
-
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
@@ -668,13 +662,6 @@ static ssize_t phys_port_name_show(struct device *dev,
char name[IFNAMSIZ];
ssize_t ret;
- /* The checks are also done in dev_get_phys_port_name; this helps
- * returning early without hitting the locking section below.
- */
- if (!netdev->netdev_ops->ndo_get_phys_port_name &&
- !netdev->devlink_port)
- return -EOPNOTSUPP;
-
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
@@ -696,19 +683,11 @@ static ssize_t phys_switch_id_show(struct device *dev,
struct netdev_phys_item_id ppid = { };
ssize_t ret;
- /* The checks are also done in dev_get_phys_port_name; this helps
- * returning early without hitting the locking section below. This works
- * because recurse is false when calling dev_get_port_parent_id.
- */
- if (!netdev->netdev_ops->ndo_get_port_parent_id &&
- !netdev->devlink_port)
- return -EOPNOTSUPP;
-
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
- ret = dev_get_port_parent_id(netdev, &ppid, false);
+ ret = netif_get_port_parent_id(netdev, &ppid, false);
if (!ret)
ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
@@ -718,6 +697,40 @@ static ssize_t phys_switch_id_show(struct device *dev,
}
static DEVICE_ATTR_RO(phys_switch_id);
+static struct attribute *netdev_phys_attrs[] __ro_after_init = {
+ &dev_attr_phys_port_id.attr,
+ &dev_attr_phys_port_name.attr,
+ &dev_attr_phys_switch_id.attr,
+ NULL,
+};
+
+static umode_t netdev_phys_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct net_device *netdev = to_net_dev(dev);
+
+ if (attr == &dev_attr_phys_port_id.attr) {
+ if (!netdev->netdev_ops->ndo_get_phys_port_id)
+ return 0;
+ } else if (attr == &dev_attr_phys_port_name.attr) {
+ if (!netdev->netdev_ops->ndo_get_phys_port_name &&
+ !netdev->devlink_port)
+ return 0;
+ } else if (attr == &dev_attr_phys_switch_id.attr) {
+ if (!netdev->netdev_ops->ndo_get_port_parent_id &&
+ !netdev->devlink_port)
+ return 0;
+ }
+
+ return attr->mode;
+}
+
+static const struct attribute_group netdev_phys_group = {
+ .attrs = netdev_phys_attrs,
+ .is_visible = netdev_phys_is_visible,
+};
+
static ssize_t threaded_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -744,7 +757,7 @@ static int modify_napi_threaded(struct net_device *dev, unsigned long val)
if (val != 0 && val != 1)
return -EOPNOTSUPP;
- ret = dev_set_threaded(dev, val);
+ ret = netif_set_threaded(dev, val);
return ret;
}
@@ -783,9 +796,6 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
&dev_attr_tx_queue_len.attr,
&dev_attr_gro_flush_timeout.attr,
&dev_attr_napi_defer_hard_irqs.attr,
- &dev_attr_phys_port_id.attr,
- &dev_attr_phys_port_name.attr,
- &dev_attr_phys_switch_id.attr,
&dev_attr_proto_down.attr,
&dev_attr_carrier_up_count.attr,
&dev_attr_carrier_down_count.attr,
@@ -1200,12 +1210,21 @@ static int rx_queue_default_mask(struct net_device *dev,
struct netdev_rx_queue *queue)
{
#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
- struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask);
+ struct cpumask *rps_default_mask;
+ int res = 0;
+ mutex_lock(&rps_default_mask_mutex);
+
+ rps_default_mask = dev_net(dev)->core.rps_default_mask;
if (rps_default_mask && !cpumask_empty(rps_default_mask))
- return netdev_rx_queue_set_rps_mask(queue, rps_default_mask);
-#endif
+ res = netdev_rx_queue_set_rps_mask(queue, rps_default_mask);
+
+ mutex_unlock(&rps_default_mask_mutex);
+
+ return res;
+#else
return 0;
+#endif
}
static int rx_queue_add_kobject(struct net_device *dev, int index)
@@ -2328,6 +2347,7 @@ int netdev_register_kobject(struct net_device *ndev)
groups++;
*groups++ = &netstat_group;
+ *groups++ = &netdev_phys_group;
if (wireless_group_needed(ndev))
*groups++ = &wireless_group;
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 8a5b04c2699a..e938f25e8e86 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -11,4 +11,6 @@ int netdev_queue_update_kobjects(struct net_device *net,
int netdev_change_owner(struct net_device *, const struct net *net_old,
const struct net *net_new);
+extern struct mutex rps_default_mask_mutex;
+
#endif
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ae54f26709ca..1b6f3826dd0e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -19,9 +19,9 @@
#include <linux/net_namespace.h>
#include <linux/sched/task.h>
#include <linux/uidgid.h>
-#include <linux/cookie.h>
#include <linux/proc_fs.h>
+#include <net/aligned_data.h>
#include <net/sock.h>
#include <net/netlink.h>
#include <net/net_namespace.h>
@@ -64,8 +64,6 @@ DECLARE_RWSEM(pernet_ops_rwsem);
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
-DEFINE_COOKIE(net_cookie);
-
static struct net_generic *net_alloc_generic(void)
{
unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs);
@@ -319,10 +317,10 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
if (refcount_read(&net->ns.count) == 0)
return NETNSA_NSID_NOT_ASSIGNED;
- spin_lock_bh(&net->nsid_lock);
+ spin_lock(&net->nsid_lock);
id = __peernet2id(net, peer);
if (id >= 0) {
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
return id;
}
@@ -332,12 +330,12 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
* just been idr_remove()'d from there in cleanup_net().
*/
if (!maybe_get_net(peer)) {
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
return NETNSA_NSID_NOT_ASSIGNED;
}
id = alloc_netid(net, peer, -1);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
put_net(peer);
if (id < 0)
@@ -403,8 +401,8 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_
{
refcount_set(&net->passive, 1);
refcount_set(&net->ns.count, 1);
- ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
- ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt");
+ ref_tracker_dir_init(&net->refcnt_tracker, 128, "net_refcnt");
+ ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net_notrefcnt");
get_random_bytes(&net->hash_mix, sizeof(u32));
net->dev_base_seq = 1;
@@ -434,9 +432,7 @@ static __net_init int setup_net(struct net *net)
LIST_HEAD(net_exit_list);
int error = 0;
- preempt_disable();
- net->net_cookie = gen_cookie_next(&net_cookie);
- preempt_enable();
+ net->net_cookie = atomic64_inc_return(&net_aligned_data.net_cookie);
list_for_each_entry(ops, &pernet_list, list) {
error = ops_init(ops, net);
@@ -628,20 +624,20 @@ static void unhash_nsid(struct net *net, struct net *last)
for_each_net(tmp) {
int id;
- spin_lock_bh(&tmp->nsid_lock);
+ spin_lock(&tmp->nsid_lock);
id = __peernet2id(tmp, net);
if (id >= 0)
idr_remove(&tmp->netns_ids, id);
- spin_unlock_bh(&tmp->nsid_lock);
+ spin_unlock(&tmp->nsid_lock);
if (id >= 0)
rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
GFP_KERNEL);
if (tmp == last)
break;
}
- spin_lock_bh(&net->nsid_lock);
+ spin_lock(&net->nsid_lock);
idr_destroy(&net->netns_ids);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
}
static LLIST_HEAD(cleanup_list);
@@ -791,16 +787,50 @@ struct net *get_net_ns_by_pid(pid_t pid)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
+#ifdef CONFIG_NET_NS_REFCNT_TRACKER
+static void net_ns_net_debugfs(struct net *net)
+{
+ ref_tracker_dir_symlink(&net->refcnt_tracker, "netns-%llx-%u-refcnt",
+ net->net_cookie, net->ns.inum);
+ ref_tracker_dir_symlink(&net->notrefcnt_tracker, "netns-%llx-%u-notrefcnt",
+ net->net_cookie, net->ns.inum);
+}
+
+static int __init init_net_debugfs(void)
+{
+ ref_tracker_dir_debugfs(&init_net.refcnt_tracker);
+ ref_tracker_dir_debugfs(&init_net.notrefcnt_tracker);
+ net_ns_net_debugfs(&init_net);
+ return 0;
+}
+late_initcall(init_net_debugfs);
+#else
+static void net_ns_net_debugfs(struct net *net)
+{
+}
+#endif
+
static __net_init int net_ns_net_init(struct net *net)
{
#ifdef CONFIG_NET_NS
net->ns.ops = &netns_operations;
#endif
- return ns_alloc_inum(&net->ns);
+ net->ns.inum = PROC_NET_INIT_INO;
+ if (net != &init_net) {
+ int ret = ns_alloc_inum(&net->ns);
+ if (ret)
+ return ret;
+ }
+ net_ns_net_debugfs(net);
+ return 0;
}
static __net_exit void net_ns_net_exit(struct net *net)
{
+ /*
+ * Initial network namespace doesn't exit so we don't need any
+ * special checks here.
+ */
ns_free_inum(&net->ns);
}
@@ -852,9 +882,9 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
return PTR_ERR(peer);
}
- spin_lock_bh(&net->nsid_lock);
+ spin_lock(&net->nsid_lock);
if (__peernet2id(net, peer) >= 0) {
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
err = -EEXIST;
NL_SET_BAD_ATTR(extack, nla);
NL_SET_ERR_MSG(extack,
@@ -863,7 +893,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
}
err = alloc_netid(net, peer, nsid);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
if (err >= 0) {
rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
nlh, GFP_KERNEL);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index d22f0919821e..dff66d8fb325 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -21,7 +21,9 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state
struct cgroup_cls_state *task_cls_state(struct task_struct *p)
{
return css_cls_state(task_css_check(p, net_cls_cgrp_id,
- rcu_read_lock_bh_held()));
+ rcu_read_lock_held() ||
+ rcu_read_lock_bh_held() ||
+ rcu_read_lock_trace_held()));
}
EXPORT_SYMBOL_GPL(task_cls_state);
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index 4fc44587f493..e9a2a6f26cb7 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -92,11 +92,12 @@ static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1]
};
/* NETDEV_CMD_NAPI_SET - do */
-static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT + 1] = {
+static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_THREADED + 1] = {
[NETDEV_A_NAPI_ID] = { .type = NLA_U32, },
[NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range),
[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, },
[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, },
+ [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_U32, 1),
};
/* NETDEV_CMD_BIND_TX - do */
@@ -193,7 +194,7 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.cmd = NETDEV_CMD_NAPI_SET,
.doit = netdev_nl_napi_set_doit,
.policy = netdev_napi_set_nl_policy,
- .maxattr = NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
+ .maxattr = NETDEV_A_NAPI_THREADED,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
},
{
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 2afa7b2141aa..6314eb7bdf69 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -184,6 +184,10 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq))
goto nla_put_failure;
+ if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED,
+ napi_get_threaded(napi)))
+ goto nla_put_failure;
+
if (napi->thread) {
pid = task_pid_nr(napi->thread);
if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid))
@@ -322,8 +326,18 @@ netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info)
{
u64 irq_suspend_timeout = 0;
u64 gro_flush_timeout = 0;
+ u8 threaded = 0;
u32 defer = 0;
+ if (info->attrs[NETDEV_A_NAPI_THREADED]) {
+ int ret;
+
+ threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]);
+ ret = napi_set_threaded(napi, threaded);
+ if (ret)
+ return ret;
+ }
+
if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) {
defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]);
napi_set_defer_hard_irqs(napi, defer);
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index d126f10197bf..3bf1151d8061 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -97,14 +97,12 @@ int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
if (!netdev_need_ops_lock(dev))
return -EOPNOTSUPP;
- if (rxq_idx >= dev->real_num_rx_queues)
- return -EINVAL;
- rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
-
if (rxq_idx >= dev->real_num_rx_queues) {
NL_SET_ERR_MSG(extack, "rx queue index out of range");
return -ERANGE;
}
+ rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
+
if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
return -EINVAL;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 6ad84d4a2b46..a1da97b5b30b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -58,13 +58,6 @@ static void zap_completion_queue(void);
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);
-#define np_info(np, fmt, ...) \
- pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
-#define np_err(np, fmt, ...) \
- pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
-#define np_notice(np, fmt, ...) \
- pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
-
static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
struct net_device *dev,
struct netdev_queue *txq)
@@ -379,6 +372,31 @@ out:
return ret;
}
+static void netpoll_udp_checksum(struct netpoll *np, struct sk_buff *skb,
+ int len)
+{
+ struct udphdr *udph;
+ int udp_len;
+
+ udp_len = len + sizeof(struct udphdr);
+ udph = udp_hdr(skb);
+
+ /* check needs to be set, since it will be consumed in csum_partial */
+ udph->check = 0;
+ if (np->ipv6)
+ udph->check = csum_ipv6_magic(&np->local_ip.in6,
+ &np->remote_ip.in6,
+ udp_len, IPPROTO_UDP,
+ csum_partial(udph, udp_len, 0));
+ else
+ udph->check = csum_tcpudp_magic(np->local_ip.ip,
+ np->remote_ip.ip,
+ udp_len, IPPROTO_UDP,
+ csum_partial(udph, udp_len, 0));
+ if (udph->check == 0)
+ udph->check = CSUM_MANGLED_0;
+}
+
netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
unsigned long flags;
@@ -396,24 +414,101 @@ netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
}
EXPORT_SYMBOL(netpoll_send_skb);
+static void push_ipv6(struct netpoll *np, struct sk_buff *skb, int len)
+{
+ struct ipv6hdr *ip6h;
+
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+
+ /* ip6h->version = 6; ip6h->priority = 0; */
+ *(unsigned char *)ip6h = 0x60;
+ ip6h->flow_lbl[0] = 0;
+ ip6h->flow_lbl[1] = 0;
+ ip6h->flow_lbl[2] = 0;
+
+ ip6h->payload_len = htons(sizeof(struct udphdr) + len);
+ ip6h->nexthdr = IPPROTO_UDP;
+ ip6h->hop_limit = 32;
+ ip6h->saddr = np->local_ip.in6;
+ ip6h->daddr = np->remote_ip.in6;
+
+ skb->protocol = htons(ETH_P_IPV6);
+}
+
+static void push_ipv4(struct netpoll *np, struct sk_buff *skb, int len)
+{
+ static atomic_t ip_ident;
+ struct iphdr *iph;
+ int ip_len;
+
+ ip_len = len + sizeof(struct udphdr) + sizeof(struct iphdr);
+
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+
+ /* iph->version = 4; iph->ihl = 5; */
+ *(unsigned char *)iph = 0x45;
+ iph->tos = 0;
+ put_unaligned(htons(ip_len), &iph->tot_len);
+ iph->id = htons(atomic_inc_return(&ip_ident));
+ iph->frag_off = 0;
+ iph->ttl = 64;
+ iph->protocol = IPPROTO_UDP;
+ iph->check = 0;
+ put_unaligned(np->local_ip.ip, &iph->saddr);
+ put_unaligned(np->remote_ip.ip, &iph->daddr);
+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ skb->protocol = htons(ETH_P_IP);
+}
+
+static void push_udp(struct netpoll *np, struct sk_buff *skb, int len)
+{
+ struct udphdr *udph;
+ int udp_len;
+
+ udp_len = len + sizeof(struct udphdr);
+
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+
+ udph = udp_hdr(skb);
+ udph->source = htons(np->local_port);
+ udph->dest = htons(np->remote_port);
+ udph->len = htons(udp_len);
+
+ netpoll_udp_checksum(np, skb, len);
+}
+
+static void push_eth(struct netpoll *np, struct sk_buff *skb)
+{
+ struct ethhdr *eth;
+
+ eth = skb_push(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+ ether_addr_copy(eth->h_source, np->dev->dev_addr);
+ ether_addr_copy(eth->h_dest, np->remote_mac);
+ if (np->ipv6)
+ eth->h_proto = htons(ETH_P_IPV6);
+ else
+ eth->h_proto = htons(ETH_P_IP);
+}
+
int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
int total_len, ip_len, udp_len;
struct sk_buff *skb;
- struct udphdr *udph;
- struct iphdr *iph;
- struct ethhdr *eth;
- static atomic_t ip_ident;
- struct ipv6hdr *ip6h;
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
WARN_ON_ONCE(!irqs_disabled());
- udp_len = len + sizeof(*udph);
+ udp_len = len + sizeof(struct udphdr);
if (np->ipv6)
- ip_len = udp_len + sizeof(*ip6h);
+ ip_len = udp_len + sizeof(struct ipv6hdr);
else
- ip_len = udp_len + sizeof(*iph);
+ ip_len = udp_len + sizeof(struct iphdr);
total_len = ip_len + LL_RESERVED_SPACE(np->dev);
@@ -425,117 +520,18 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
skb_copy_to_linear_data(skb, msg, len);
skb_put(skb, len);
- skb_push(skb, sizeof(*udph));
- skb_reset_transport_header(skb);
- udph = udp_hdr(skb);
- udph->source = htons(np->local_port);
- udph->dest = htons(np->remote_port);
- udph->len = htons(udp_len);
-
- udph->check = 0;
- if (np->ipv6) {
- udph->check = csum_ipv6_magic(&np->local_ip.in6,
- &np->remote_ip.in6,
- udp_len, IPPROTO_UDP,
- csum_partial(udph, udp_len, 0));
- if (udph->check == 0)
- udph->check = CSUM_MANGLED_0;
-
- skb_push(skb, sizeof(*ip6h));
- skb_reset_network_header(skb);
- ip6h = ipv6_hdr(skb);
-
- /* ip6h->version = 6; ip6h->priority = 0; */
- *(unsigned char *)ip6h = 0x60;
- ip6h->flow_lbl[0] = 0;
- ip6h->flow_lbl[1] = 0;
- ip6h->flow_lbl[2] = 0;
-
- ip6h->payload_len = htons(sizeof(struct udphdr) + len);
- ip6h->nexthdr = IPPROTO_UDP;
- ip6h->hop_limit = 32;
- ip6h->saddr = np->local_ip.in6;
- ip6h->daddr = np->remote_ip.in6;
-
- eth = skb_push(skb, ETH_HLEN);
- skb_reset_mac_header(skb);
- skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
- } else {
- udph->check = csum_tcpudp_magic(np->local_ip.ip,
- np->remote_ip.ip,
- udp_len, IPPROTO_UDP,
- csum_partial(udph, udp_len, 0));
- if (udph->check == 0)
- udph->check = CSUM_MANGLED_0;
-
- skb_push(skb, sizeof(*iph));
- skb_reset_network_header(skb);
- iph = ip_hdr(skb);
-
- /* iph->version = 4; iph->ihl = 5; */
- *(unsigned char *)iph = 0x45;
- iph->tos = 0;
- put_unaligned(htons(ip_len), &(iph->tot_len));
- iph->id = htons(atomic_inc_return(&ip_ident));
- iph->frag_off = 0;
- iph->ttl = 64;
- iph->protocol = IPPROTO_UDP;
- iph->check = 0;
- put_unaligned(np->local_ip.ip, &(iph->saddr));
- put_unaligned(np->remote_ip.ip, &(iph->daddr));
- iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
-
- eth = skb_push(skb, ETH_HLEN);
- skb_reset_mac_header(skb);
- skb->protocol = eth->h_proto = htons(ETH_P_IP);
- }
-
- ether_addr_copy(eth->h_source, np->dev->dev_addr);
- ether_addr_copy(eth->h_dest, np->remote_mac);
-
+ push_udp(np, skb, len);
+ if (np->ipv6)
+ push_ipv6(np, skb, len);
+ else
+ push_ipv4(np, skb, len);
+ push_eth(np, skb);
skb->dev = np->dev;
return (int)netpoll_send_skb(np, skb);
}
EXPORT_SYMBOL(netpoll_send_udp);
-void netpoll_print_options(struct netpoll *np)
-{
- np_info(np, "local port %d\n", np->local_port);
- if (np->ipv6)
- np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
- else
- np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
- np_info(np, "interface name '%s'\n", np->dev_name);
- np_info(np, "local ethernet address '%pM'\n", np->dev_mac);
- np_info(np, "remote port %d\n", np->remote_port);
- if (np->ipv6)
- np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
- else
- np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
- np_info(np, "remote ethernet address %pM\n", np->remote_mac);
-}
-EXPORT_SYMBOL(netpoll_print_options);
-
-static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
-{
- const char *end;
-
- if (!strchr(str, ':') &&
- in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
- if (!*end)
- return 0;
- }
- if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
-#if IS_ENABLED(CONFIG_IPV6)
- if (!*end)
- return 1;
-#else
- return -1;
-#endif
- }
- return -1;
-}
static void skb_pool_flush(struct netpoll *np)
{
@@ -546,95 +542,6 @@ static void skb_pool_flush(struct netpoll *np)
skb_queue_purge_reason(skb_pool, SKB_CONSUMED);
}
-int netpoll_parse_options(struct netpoll *np, char *opt)
-{
- char *cur=opt, *delim;
- int ipv6;
- bool ipversion_set = false;
-
- if (*cur != '@') {
- if ((delim = strchr(cur, '@')) == NULL)
- goto parse_failed;
- *delim = 0;
- if (kstrtou16(cur, 10, &np->local_port))
- goto parse_failed;
- cur = delim;
- }
- cur++;
-
- if (*cur != '/') {
- ipversion_set = true;
- if ((delim = strchr(cur, '/')) == NULL)
- goto parse_failed;
- *delim = 0;
- ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
- if (ipv6 < 0)
- goto parse_failed;
- else
- np->ipv6 = (bool)ipv6;
- cur = delim;
- }
- cur++;
-
- if (*cur != ',') {
- /* parse out dev_name or dev_mac */
- if ((delim = strchr(cur, ',')) == NULL)
- goto parse_failed;
- *delim = 0;
-
- np->dev_name[0] = '\0';
- eth_broadcast_addr(np->dev_mac);
- if (!strchr(cur, ':'))
- strscpy(np->dev_name, cur, sizeof(np->dev_name));
- else if (!mac_pton(cur, np->dev_mac))
- goto parse_failed;
-
- cur = delim;
- }
- cur++;
-
- if (*cur != '@') {
- /* dst port */
- if ((delim = strchr(cur, '@')) == NULL)
- goto parse_failed;
- *delim = 0;
- if (*cur == ' ' || *cur == '\t')
- np_info(np, "warning: whitespace is not allowed\n");
- if (kstrtou16(cur, 10, &np->remote_port))
- goto parse_failed;
- cur = delim;
- }
- cur++;
-
- /* dst ip */
- if ((delim = strchr(cur, '/')) == NULL)
- goto parse_failed;
- *delim = 0;
- ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
- if (ipv6 < 0)
- goto parse_failed;
- else if (ipversion_set && np->ipv6 != (bool)ipv6)
- goto parse_failed;
- else
- np->ipv6 = (bool)ipv6;
- cur = delim + 1;
-
- if (*cur != 0) {
- /* MAC address */
- if (!mac_pton(cur, np->remote_mac))
- goto parse_failed;
- }
-
- netpoll_print_options(np);
-
- return 0;
-
- parse_failed:
- np_info(np, "couldn't parse config at '%s'!\n", cur);
- return -1;
-}
-EXPORT_SYMBOL(netpoll_parse_options);
-
static void refill_skbs_work_handler(struct work_struct *work)
{
struct netpoll *np =
@@ -716,13 +623,97 @@ static char *egress_dev(struct netpoll *np, char *buf)
return buf;
}
+static void netpoll_wait_carrier(struct netpoll *np, struct net_device *ndev,
+ unsigned int timeout)
+{
+ unsigned long atmost;
+
+ atmost = jiffies + timeout * HZ;
+ while (!netif_carrier_ok(ndev)) {
+ if (time_after(jiffies, atmost)) {
+ np_notice(np, "timeout waiting for carrier\n");
+ break;
+ }
+ msleep(1);
+ }
+}
+
+/*
+ * Take the IPv6 from ndev and populate local_ip structure in netpoll
+ */
+static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev)
+{
+ char buf[MAC_ADDR_STR_LEN + 1];
+ int err = -EDESTADDRREQ;
+ struct inet6_dev *idev;
+
+ if (!IS_ENABLED(CONFIG_IPV6)) {
+ np_err(np, "IPv6 is not supported %s, aborting\n",
+ egress_dev(np, buf));
+ return -EINVAL;
+ }
+
+ idev = __in6_dev_get(ndev);
+ if (idev) {
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
+ !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
+ continue;
+ /* Got the IP, let's return */
+ np->local_ip.in6 = ifp->addr;
+ err = 0;
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ if (err) {
+ np_err(np, "no IPv6 address for %s, aborting\n",
+ egress_dev(np, buf));
+ return err;
+ }
+
+ np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
+ return 0;
+}
+
+/*
+ * Take the IPv4 from ndev and populate local_ip structure in netpoll
+ */
+static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev)
+{
+ char buf[MAC_ADDR_STR_LEN + 1];
+ const struct in_ifaddr *ifa;
+ struct in_device *in_dev;
+
+ in_dev = __in_dev_get_rtnl(ndev);
+ if (!in_dev) {
+ np_err(np, "no IP address for %s, aborting\n",
+ egress_dev(np, buf));
+ return -EDESTADDRREQ;
+ }
+
+ ifa = rtnl_dereference(in_dev->ifa_list);
+ if (!ifa) {
+ np_err(np, "no IP address for %s, aborting\n",
+ egress_dev(np, buf));
+ return -EDESTADDRREQ;
+ }
+
+ np->local_ip.ip = ifa->ifa_local;
+ np_info(np, "local IP %pI4\n", &np->local_ip.ip);
+
+ return 0;
+}
+
int netpoll_setup(struct netpoll *np)
{
struct net *net = current->nsproxy->net_ns;
char buf[MAC_ADDR_STR_LEN + 1];
struct net_device *ndev = NULL;
bool ip_overwritten = false;
- struct in_device *in_dev;
int err;
rtnl_lock();
@@ -746,85 +737,31 @@ int netpoll_setup(struct netpoll *np)
}
if (!netif_running(ndev)) {
- unsigned long atmost;
-
np_info(np, "device %s not up yet, forcing it\n",
egress_dev(np, buf));
err = dev_open(ndev, NULL);
-
if (err) {
np_err(np, "failed to open %s\n", ndev->name);
goto put;
}
rtnl_unlock();
- atmost = jiffies + carrier_timeout * HZ;
- while (!netif_carrier_ok(ndev)) {
- if (time_after(jiffies, atmost)) {
- np_notice(np, "timeout waiting for carrier\n");
- break;
- }
- msleep(1);
- }
-
+ netpoll_wait_carrier(np, ndev, carrier_timeout);
rtnl_lock();
}
if (!np->local_ip.ip) {
if (!np->ipv6) {
- const struct in_ifaddr *ifa;
-
- in_dev = __in_dev_get_rtnl(ndev);
- if (!in_dev)
- goto put_noaddr;
-
- ifa = rtnl_dereference(in_dev->ifa_list);
- if (!ifa) {
-put_noaddr:
- np_err(np, "no IP address for %s, aborting\n",
- egress_dev(np, buf));
- err = -EDESTADDRREQ;
+ err = netpoll_take_ipv4(np, ndev);
+ if (err)
goto put;
- }
-
- np->local_ip.ip = ifa->ifa_local;
- ip_overwritten = true;
- np_info(np, "local IP %pI4\n", &np->local_ip.ip);
} else {
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_dev *idev;
-
- err = -EDESTADDRREQ;
- idev = __in6_dev_get(ndev);
- if (idev) {
- struct inet6_ifaddr *ifp;
-
- read_lock_bh(&idev->lock);
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
- if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
- !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
- continue;
- np->local_ip.in6 = ifp->addr;
- ip_overwritten = true;
- err = 0;
- break;
- }
- read_unlock_bh(&idev->lock);
- }
- if (err) {
- np_err(np, "no IPv6 address for %s, aborting\n",
- egress_dev(np, buf));
+ err = netpoll_take_ipv6(np, ndev);
+ if (err)
goto put;
- } else
- np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
-#else
- np_err(np, "IPv6 is not supported %s, aborting\n",
- egress_dev(np, buf));
- err = -EINVAL;
- goto put;
-#endif
}
+ ip_overwritten = true;
}
err = __netpoll_setup(np, ndev);
@@ -863,7 +800,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
kfree(npinfo);
}
-void __netpoll_cleanup(struct netpoll *np)
+static void __netpoll_cleanup(struct netpoll *np)
{
struct netpoll_info *npinfo;
@@ -885,7 +822,6 @@ void __netpoll_cleanup(struct netpoll *np)
skb_pool_flush(np);
}
-EXPORT_SYMBOL_GPL(__netpoll_cleanup);
void __netpoll_free(struct netpoll *np)
{
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index ba7cf3e3c32f..05e2e22a8f7c 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -371,7 +371,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
}
EXPORT_SYMBOL(page_pool_create);
-static void page_pool_return_page(struct page_pool *pool, netmem_ref netmem);
+static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem);
static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool)
{
@@ -409,7 +409,7 @@ static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool)
* (2) break out to fallthrough to alloc_pages_node.
* This limit stress on page buddy alloactor.
*/
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
alloc_stat_inc(pool, waive);
netmem = 0;
break;
@@ -544,8 +544,8 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
}
/* slow path */
-static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool,
- gfp_t gfp)
+static noinline netmem_ref __page_pool_alloc_netmems_slow(struct page_pool *pool,
+ gfp_t gfp)
{
const int bulk = PP_ALLOC_CACHE_REFILL;
unsigned int pp_order = pool->p.order;
@@ -615,7 +615,7 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp)
if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
netmem = pool->mp_ops->alloc_netmems(pool, gfp);
else
- netmem = __page_pool_alloc_pages_slow(pool, gfp);
+ netmem = __page_pool_alloc_netmems_slow(pool, gfp);
return netmem;
}
EXPORT_SYMBOL(page_pool_alloc_netmems);
@@ -673,8 +673,8 @@ void page_pool_clear_pp_info(netmem_ref netmem)
netmem_set_pp(netmem, NULL);
}
-static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
- netmem_ref netmem)
+static __always_inline void __page_pool_release_netmem_dma(struct page_pool *pool,
+ netmem_ref netmem)
{
struct page *old, *page = netmem_to_page(netmem);
unsigned long id;
@@ -712,7 +712,7 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
* a regular page (that will eventually be returned to the normal
* page-allocator via put_page).
*/
-void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
+static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem)
{
int count;
bool put;
@@ -721,7 +721,7 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
put = pool->mp_ops->release_netmem(pool, netmem);
else
- __page_pool_release_page_dma(pool, netmem);
+ __page_pool_release_netmem_dma(pool, netmem);
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
@@ -826,7 +826,7 @@ __page_pool_put_page(struct page_pool *pool, netmem_ref netmem,
* will be invoking put_page.
*/
recycle_stat_inc(pool, released_refcnt);
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
return 0;
}
@@ -869,7 +869,7 @@ void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem,
if (netmem && !page_pool_recycle_in_ring(pool, netmem)) {
/* Cache full, fallback to free pages */
recycle_stat_inc(pool, ring_full);
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
}
}
EXPORT_SYMBOL(page_pool_put_unrefed_netmem);
@@ -912,7 +912,7 @@ static void page_pool_recycle_ring_bulk(struct page_pool *pool,
* since put_page() with refcnt == 1 can be an expensive operation.
*/
for (; i < bulk_len; i++)
- page_pool_return_page(pool, bulk[i]);
+ page_pool_return_netmem(pool, bulk[i]);
}
/**
@@ -995,7 +995,7 @@ static netmem_ref page_pool_drain_frag(struct page_pool *pool,
return netmem;
}
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
return 0;
}
@@ -1009,7 +1009,7 @@ static void page_pool_free_frag(struct page_pool *pool)
if (!netmem || page_pool_unref_netmem(netmem, drain_count))
return;
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
}
netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
@@ -1076,7 +1076,7 @@ static void page_pool_empty_ring(struct page_pool *pool)
pr_crit("%s() page_pool refcnt %d violation\n",
__func__, netmem_ref_count(netmem));
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
}
}
@@ -1109,7 +1109,7 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
*/
while (pool->alloc.count) {
netmem = pool->alloc.cache[--pool->alloc.count];
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
}
}
@@ -1136,7 +1136,7 @@ static void page_pool_scrub(struct page_pool *pool)
}
xa_for_each(&pool->dma_mapped, id, ptr)
- __page_pool_release_page_dma(pool, page_to_netmem(ptr));
+ __page_pool_release_netmem_dma(pool, page_to_netmem((struct page *)ptr));
}
/* No more consumers should exist, but producers could still
@@ -1253,7 +1253,7 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
/* Flush pool alloc cache, as refill will check NUMA node */
while (pool->alloc.count) {
netmem = pool->alloc.cache[--pool->alloc.count];
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
}
}
EXPORT_SYMBOL(page_pool_update_nid);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c57692eb8da9..094b085cff20 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1026,9 +1026,11 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
.rta_error = error,
.rta_id = id,
};
+ unsigned long delta;
if (dst) {
- ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse);
+ delta = jiffies - READ_ONCE(dst->lastuse);
+ ci.rta_lastuse = jiffies_delta_to_clock_t(delta);
ci.rta_used = dst->__use;
ci.rta_clntref = rcuref_read(&dst->__rcuref);
}
@@ -1446,7 +1448,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
struct netdev_phys_item_id ppid = { };
int err;
- err = dev_get_port_parent_id(dev, &ppid, false);
+ err = netif_get_port_parent_id(dev, &ppid, false);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
@@ -2036,7 +2038,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
ifm->__ifi_pad = 0;
ifm->ifi_type = READ_ONCE(dev->type);
ifm->ifi_index = READ_ONCE(dev->ifindex);
- ifm->ifi_flags = dev_get_flags(dev);
+ ifm->ifi_flags = netif_get_flags(dev);
ifm->ifi_change = change;
if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
@@ -5225,7 +5227,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
ifm->__ifi_pad = 0;
ifm->ifi_type = dev->type;
ifm->ifi_index = dev->ifindex;
- ifm->ifi_flags = dev_get_flags(dev);
+ ifm->ifi_flags = netif_get_flags(dev);
ifm->ifi_change = 0;
diff --git a/net/core/scm.c b/net/core/scm.c
index 0225bd94170f..072d5742440a 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -23,6 +23,8 @@
#include <linux/security.h>
#include <linux/pid_namespace.h>
#include <linux/pid.h>
+#include <uapi/linux/pidfd.h>
+#include <linux/pidfs.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/errqueue.h>
@@ -145,6 +147,22 @@ void __scm_destroy(struct scm_cookie *scm)
}
EXPORT_SYMBOL(__scm_destroy);
+static inline int scm_replace_pid(struct scm_cookie *scm, struct pid *pid)
+{
+ int err;
+
+ /* drop all previous references */
+ scm_destroy_cred(scm);
+
+ err = pidfs_register_pid(pid);
+ if (unlikely(err))
+ return err;
+
+ scm->pid = pid;
+ scm->creds.pid = pid_vnr(pid);
+ return 0;
+}
+
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
const struct proto_ops *ops = READ_ONCE(sock->ops);
@@ -189,15 +207,21 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
if (err)
goto error;
- p->creds.pid = creds.pid;
if (!p->pid || pid_vnr(p->pid) != creds.pid) {
struct pid *pid;
err = -ESRCH;
pid = find_get_pid(creds.pid);
if (!pid)
goto error;
- put_pid(p->pid);
- p->pid = pid;
+
+ /* pass a struct pid reference from
+ * find_get_pid() to scm_replace_pid().
+ */
+ err = scm_replace_pid(p, pid);
+ if (err) {
+ put_pid(pid);
+ goto error;
+ }
}
err = -EINVAL;
@@ -459,7 +483,7 @@ static void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
if (!scm->pid)
return;
- pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file);
+ pidfd = pidfd_prepare(scm->pid, PIDFD_STALE, &pidfd_file);
if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
if (pidfd_file) {
diff --git a/net/core/selftests.c b/net/core/selftests.c
index 406faf8e5f3f..3d79133a91a6 100644
--- a/net/core/selftests.c
+++ b/net/core/selftests.c
@@ -27,6 +27,7 @@ struct net_packet_attrs {
int max_size;
u8 id;
u16 queue_mapping;
+ bool bad_csum;
};
struct net_test_priv {
@@ -165,6 +166,20 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev,
thdr->check = ~tcp_v4_check(l4len, ihdr->saddr, ihdr->daddr, 0);
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
+
+ if (attr->bad_csum) {
+ /* Force mangled checksum */
+ if (skb_checksum_help(skb)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ if (thdr->check != CSUM_MANGLED_0)
+ thdr->check = CSUM_MANGLED_0;
+ else
+ thdr->check = csum16_sub(thdr->check,
+ cpu_to_be16(1));
+ }
} else {
udp4_hwcsum(skb, ihdr->saddr, ihdr->daddr);
}
@@ -239,7 +254,11 @@ static int net_test_loopback_validate(struct sk_buff *skb,
if (tpriv->packet->id != shdr->id)
goto out;
- tpriv->ok = true;
+ if (tpriv->packet->bad_csum && skb->ip_summed == CHECKSUM_UNNECESSARY)
+ tpriv->ok = -EIO;
+ else
+ tpriv->ok = true;
+
complete(&tpriv->comp);
out:
kfree_skb(skb);
@@ -285,7 +304,12 @@ static int __net_test_loopback(struct net_device *ndev,
attr->timeout = NET_LB_TIMEOUT;
wait_for_completion_timeout(&tpriv->comp, attr->timeout);
- ret = tpriv->ok ? 0 : -ETIMEDOUT;
+ if (tpriv->ok < 0)
+ ret = tpriv->ok;
+ else if (!tpriv->ok)
+ ret = -ETIMEDOUT;
+ else
+ ret = 0;
cleanup:
dev_remove_pack(&tpriv->pt);
@@ -345,6 +369,42 @@ static int net_test_phy_loopback_tcp(struct net_device *ndev)
return __net_test_loopback(ndev, &attr);
}
+/**
+ * net_test_phy_loopback_tcp_bad_csum - PHY loopback test with a deliberately
+ * corrupted TCP checksum
+ * @ndev: the network device to test
+ *
+ * Builds the same minimal Ethernet/IPv4/TCP frame as
+ * net_test_phy_loopback_tcp(), then flips the least-significant bit of the TCP
+ * checksum so the resulting value is provably invalid (neither 0 nor 0xFFFF).
+ * The frame is transmitted through the device’s internal PHY loopback path:
+ *
+ * test code -> MAC driver -> MAC HW -> xMII -> PHY ->
+ * internal PHY loopback -> xMII -> MAC HW -> MAC driver -> test code
+ *
+ * Result interpretation
+ * ---------------------
+ * 0 The frame is delivered to the stack and the driver reports
+ * ip_summed as CHECKSUM_NONE or CHECKSUM_COMPLETE - both are
+ * valid ways to indicate “bad checksum, let the stack verify.”
+ * -ETIMEDOUT The MAC/PHY silently dropped the frame; hardware checksum
+ * verification filtered it out before the driver saw it.
+ * -EIO The driver returned the frame with ip_summed ==
+ * CHECKSUM_UNNECESSARY, falsely claiming a valid checksum and
+ * indicating a serious RX-path defect.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+static int net_test_phy_loopback_tcp_bad_csum(struct net_device *ndev)
+{
+ struct net_packet_attrs attr = { };
+
+ attr.dst = ndev->dev_addr;
+ attr.tcp = true;
+ attr.bad_csum = true;
+ return __net_test_loopback(ndev, &attr);
+}
+
static const struct net_test {
char name[ETH_GSTRING_LEN];
int (*fn)(struct net_device *ndev);
@@ -369,6 +429,9 @@ static const struct net_test {
.name = "PHY internal loopback, TCP ",
.fn = net_test_phy_loopback_tcp,
}, {
+ .name = "PHY loopback, bad TCP csum ",
+ .fn = net_test_phy_loopback_tcp_bad_csum,
+ }, {
/* This test should be done after all PHY loopback test */
.name = "PHY internal loopback, disable",
.fn = net_test_phy_loopback_disable,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d6420b74ea9c..ee0274417948 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -384,8 +384,7 @@ static inline void __finalize_skb_around(struct sk_buff *skb, void *data,
skb_set_kcov_handle(skb, kcov_common_handle());
}
-static inline void *__slab_build_skb(struct sk_buff *skb, void *data,
- unsigned int *size)
+static inline void *__slab_build_skb(void *data, unsigned int *size)
{
void *resized;
@@ -418,7 +417,7 @@ struct sk_buff *slab_build_skb(void *data)
return NULL;
memset(skb, 0, offsetof(struct sk_buff, tail));
- data = __slab_build_skb(skb, data, &size);
+ data = __slab_build_skb(data, &size);
__finalize_skb_around(skb, data, size);
return skb;
@@ -435,7 +434,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data,
* using slab buffer should use slab_build_skb() instead.
*/
if (WARN_ONCE(size == 0, "Use slab_build_skb() instead"))
- data = __slab_build_skb(skb, data, &size);
+ data = __slab_build_skb(data, &size);
__finalize_skb_around(skb, data, size);
}
@@ -3060,10 +3059,8 @@ static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
/*
* Fill page/offset/length into spd, if it can hold more pages.
*/
-static bool spd_fill_page(struct splice_pipe_desc *spd,
- struct pipe_inode_info *pipe, struct page *page,
- unsigned int *len, unsigned int offset,
- bool linear,
+static bool spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
+ unsigned int *len, unsigned int offset, bool linear,
struct sock *sk)
{
if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
@@ -3091,8 +3088,7 @@ static bool __splice_segment(struct page *page, unsigned int poff,
unsigned int plen, unsigned int *off,
unsigned int *len,
struct splice_pipe_desc *spd, bool linear,
- struct sock *sk,
- struct pipe_inode_info *pipe)
+ struct sock *sk)
{
if (!*len)
return true;
@@ -3111,8 +3107,7 @@ static bool __splice_segment(struct page *page, unsigned int poff,
do {
unsigned int flen = min(*len, plen);
- if (spd_fill_page(spd, pipe, page, &flen, poff,
- linear, sk))
+ if (spd_fill_page(spd, page, &flen, poff, linear, sk))
return true;
poff += flen;
plen -= flen;
@@ -3130,8 +3125,8 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
unsigned int *offset, unsigned int *len,
struct splice_pipe_desc *spd, struct sock *sk)
{
- int seg;
struct sk_buff *iter;
+ int seg;
/* map the linear part :
* If skb->head_frag is set, this 'linear' part is backed by a
@@ -3143,7 +3138,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
skb_headlen(skb),
offset, len, spd,
skb_head_is_locked(skb),
- sk, pipe))
+ sk))
return true;
/*
@@ -3160,7 +3155,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
if (__splice_segment(skb_frag_page(f),
skb_frag_off(f), skb_frag_size(f),
- offset, len, spd, false, sk, pipe))
+ offset, len, spd, false, sk))
return true;
}
@@ -3235,6 +3230,7 @@ typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg);
static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
int len, sendmsg_func sendmsg, int flags)
{
+ int more_hint = sk_is_tcp(sk) ? MSG_MORE : 0;
unsigned int orig_len = len;
struct sk_buff *head = skb;
unsigned short fragidx;
@@ -3252,6 +3248,8 @@ do_frag_list:
kv.iov_len = slen;
memset(&msg, 0, sizeof(msg));
msg.msg_flags = MSG_DONTWAIT | flags;
+ if (slen < len)
+ msg.msg_flags |= more_hint;
iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen);
ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
@@ -3292,6 +3290,8 @@ do_frag_list:
flags,
};
+ if (slen < len)
+ msg.msg_flags |= more_hint;
bvec_set_page(&bvec, skb_frag_page(frag), slen,
skb_frag_off(frag) + offset);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1,
@@ -6763,8 +6763,7 @@ static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp);
/* carve out the first eat bytes from skb's frag_list. May recurse into
* pskb_carve()
*/
-static int pskb_carve_frag_list(struct sk_buff *skb,
- struct skb_shared_info *shinfo, int eat,
+static int pskb_carve_frag_list(struct skb_shared_info *shinfo, int eat,
gfp_t gfp_mask)
{
struct sk_buff *list = shinfo->frag_list;
@@ -6869,7 +6868,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
skb_clone_fraglist(skb);
/* split line is in frag list */
- if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) {
+ if (k == 0 && pskb_carve_frag_list(shinfo, off - pos, gfp_mask)) {
/* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
if (skb_has_frag_list(skb))
kfree_skb_list(skb_shinfo(skb)->frag_list);
@@ -7234,7 +7233,6 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
* @skb: The buffer to add pages to
* @iter: Iterator representing the pages to be added
* @maxsize: Maximum amount of pages to be added
- * @gfp: Allocation flags
*
* This is a common helper function for supporting MSG_SPLICE_PAGES. It
* extracts pages from an iterator and adds them to the socket buffer if
@@ -7245,7 +7243,7 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
* insufficient space in the buffer to transfer anything.
*/
ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
- ssize_t maxsize, gfp_t gfp)
+ ssize_t maxsize)
{
size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags);
struct page *pages[8], **ppages = pages;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 34c51eb1a14f..83c78379932e 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -656,6 +656,13 @@ static void sk_psock_backlog(struct work_struct *work)
bool ingress;
int ret;
+ /* If sk is quickly removed from the map and then added back, the old
+ * psock should not be scheduled, because there are now two psocks
+ * pointing to the same sk.
+ */
+ if (!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+ return;
+
/* Increment the psock refcnt to synchronize with close(fd) path in
* sock_map_close(), ensuring we wait for backlog thread completion
* before sk_socket freed. If refcnt increment fails, it indicates
diff --git a/net/core/sock.c b/net/core/sock.c
index 3b409bc8ef6d..7c26ec8dce63 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -526,11 +526,10 @@ int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
enum skb_drop_reason drop_reason;
int err;
- err = sk_filter(sk, skb);
- if (err) {
- drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
+ err = sk_filter_reason(sk, skb, &drop_reason);
+ if (err)
goto out;
- }
+
err = __sock_queue_rcv_skb(sk, skb);
switch (err) {
case -ENOMEM:
@@ -553,15 +552,18 @@ EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
const int nested, unsigned int trim_cap, bool refcounted)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
int rc = NET_RX_SUCCESS;
+ int err;
- if (sk_filter_trim_cap(sk, skb, trim_cap))
+ if (sk_filter_trim_cap(sk, skb, trim_cap, &reason))
goto discard_and_relse;
skb->dev = NULL;
if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
atomic_inc(&sk->sk_drops);
+ reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
goto discard_and_relse;
}
if (nested)
@@ -577,8 +579,12 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
rc = sk_backlog_rcv(sk, skb);
mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
- } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
+ } else if ((err = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))) {
bh_unlock_sock(sk);
+ if (err == -ENOMEM)
+ reason = SKB_DROP_REASON_PFMEMALLOC;
+ if (err == -ENOBUFS)
+ reason = SKB_DROP_REASON_SOCKET_BACKLOG;
atomic_inc(&sk->sk_drops);
goto discard_and_relse;
}
@@ -589,7 +595,7 @@ out:
sock_put(sk);
return rc;
discard_and_relse:
- kfree_skb(skb);
+ sk_skb_reason_drop(sk, skb, reason);
goto out;
}
EXPORT_SYMBOL(__sk_receive_skb);
@@ -602,7 +608,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = __sk_dst_get(sk);
- if (dst && dst->obsolete &&
+ if (dst && READ_ONCE(dst->obsolete) &&
INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
dst, cookie) == NULL) {
sk_tx_queue_clear(sk);
@@ -620,7 +626,7 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = sk_dst_get(sk);
- if (dst && dst->obsolete &&
+ if (dst && READ_ONCE(dst->obsolete) &&
INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
dst, cookie) == NULL) {
sk_dst_reset(sk);
@@ -818,12 +824,10 @@ EXPORT_SYMBOL(sock_set_priority);
void sock_set_sndtimeo(struct sock *sk, s64 secs)
{
- lock_sock(sk);
if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
else
WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
- release_sock(sk);
}
EXPORT_SYMBOL(sock_set_sndtimeo);
@@ -837,14 +841,6 @@ static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
}
}
-void sock_enable_timestamps(struct sock *sk)
-{
- lock_sock(sk);
- __sock_set_timestamps(sk, true, false, true);
- release_sock(sk);
-}
-EXPORT_SYMBOL(sock_enable_timestamps);
-
void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
{
switch (optname) {
@@ -1295,6 +1291,14 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
case SO_DEVMEM_DONTNEED:
return sock_devmem_dontneed(sk, optval, optlen);
#endif
+ case SO_SNDTIMEO_OLD:
+ case SO_SNDTIMEO_NEW:
+ return sock_set_timeout(&sk->sk_sndtimeo, optval,
+ optlen, optname == SO_SNDTIMEO_OLD);
+ case SO_RCVTIMEO_OLD:
+ case SO_RCVTIMEO_NEW:
+ return sock_set_timeout(&sk->sk_rcvtimeo, optval,
+ optlen, optname == SO_RCVTIMEO_OLD);
}
sockopt_lock_sock(sk);
@@ -1450,18 +1454,6 @@ set_sndbuf:
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
break;
}
- case SO_RCVTIMEO_OLD:
- case SO_RCVTIMEO_NEW:
- ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
- optlen, optname == SO_RCVTIMEO_OLD);
- break;
-
- case SO_SNDTIMEO_OLD:
- case SO_SNDTIMEO_NEW:
- ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
- optlen, optname == SO_SNDTIMEO_OLD);
- break;
-
case SO_ATTACH_FILTER: {
struct sock_fprog fprog;
@@ -2602,8 +2594,8 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
#endif
/* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
- max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
- READ_ONCE(dst->dev->gso_ipv4_max_size);
+ max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) :
+ READ_ONCE(dst_dev(dst)->gso_ipv4_max_size);
if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
max_size = GSO_LEGACY_MAX_SIZE;
@@ -2614,7 +2606,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;
- sk->sk_route_caps = dst->dev->features;
+ sk->sk_route_caps = dst_dev(dst)->features;
if (sk_is_tcp(sk)) {
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2632,7 +2624,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
- max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
+ max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1);
}
}
sk->sk_gso_max_segs = max_segs;
@@ -2788,17 +2780,6 @@ void sock_pfree(struct sk_buff *skb)
EXPORT_SYMBOL(sock_pfree);
#endif /* CONFIG_INET */
-kuid_t sock_i_uid(struct sock *sk)
-{
- kuid_t uid;
-
- read_lock_bh(&sk->sk_callback_lock);
- uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
- read_unlock_bh(&sk->sk_callback_lock);
- return uid;
-}
-EXPORT_SYMBOL(sock_i_uid);
-
unsigned long __sock_i_ino(struct sock *sk)
{
unsigned long ino;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 82a14f131d00..5947b38e4f8b 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1709,7 +1709,6 @@ EXPORT_SYMBOL_GPL(sock_map_close);
struct sockmap_link {
struct bpf_link link;
struct bpf_map *map;
- enum bpf_attach_type attach_type;
};
static void sock_map_link_release(struct bpf_link *link)
@@ -1721,7 +1720,7 @@ static void sock_map_link_release(struct bpf_link *link)
goto out;
WARN_ON_ONCE(sock_map_prog_update(sockmap_link->map, NULL, link->prog, link,
- sockmap_link->attach_type));
+ link->attach_type));
bpf_map_put_with_uref(sockmap_link->map);
sockmap_link->map = NULL;
@@ -1772,7 +1771,7 @@ static int sock_map_link_update_prog(struct bpf_link *link,
}
ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink,
- sockmap_link->attach_type);
+ link->attach_type);
if (ret)
goto out;
@@ -1817,7 +1816,7 @@ static int sock_map_link_fill_info(const struct bpf_link *link,
u32 map_id = sock_map_link_get_map_id(sockmap_link);
info->sockmap.map_id = map_id;
- info->sockmap.attach_type = sockmap_link->attach_type;
+ info->sockmap.attach_type = link->attach_type;
return 0;
}
@@ -1828,7 +1827,7 @@ static void sock_map_link_show_fdinfo(const struct bpf_link *link,
u32 map_id = sock_map_link_get_map_id(sockmap_link);
seq_printf(seq, "map_id:\t%u\n", map_id);
- seq_printf(seq, "attach_type:\t%u\n", sockmap_link->attach_type);
+ seq_printf(seq, "attach_type:\t%u\n", link->attach_type);
}
static const struct bpf_link_ops sock_map_link_ops = {
@@ -1866,9 +1865,9 @@ int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
}
attach_type = attr->link_create.attach_type;
- bpf_link_init(&sockmap_link->link, BPF_LINK_TYPE_SOCKMAP, &sock_map_link_ops, prog);
+ bpf_link_init(&sockmap_link->link, BPF_LINK_TYPE_SOCKMAP, &sock_map_link_ops, prog,
+ attach_type);
sockmap_link->map = map;
- sockmap_link->attach_type = attach_type;
ret = bpf_link_prime(&sockmap_link->link, &link_primer);
if (ret) {
diff --git a/net/core/stream.c b/net/core/stream.c
index b16dfa568a2d..7a37e7dd2c43 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -23,9 +23,13 @@
/**
* sk_stream_write_space - stream socket write_space callback.
- * @sk: socket
+ * @sk: pointer to the socket structure
*
- * FIXME: write proper description
+ * This function is invoked when there's space available in the socket's
+ * send buffer for writing. It first checks if the socket is writable,
+ * clears the SOCK_NOSPACE flag indicating that memory for writing
+ * is now available, wakes up any processes waiting for write operations
+ * and sends asynchronous notifications if needed.
*/
void sk_stream_write_space(struct sock *sk)
{
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 5dbb2c6f371d..8cf04b57ade1 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -28,6 +28,7 @@
#include <net/rps.h>
#include "dev.h"
+#include "net-sysfs.h"
static int int_3600 = 3600;
static int min_sndbuf = SOCK_MIN_SNDBUF;
@@ -96,50 +97,40 @@ free_buf:
#ifdef CONFIG_RPS
-static struct cpumask *rps_default_mask_cow_alloc(struct net *net)
-{
- struct cpumask *rps_default_mask;
-
- if (net->core.rps_default_mask)
- return net->core.rps_default_mask;
-
- rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL);
- if (!rps_default_mask)
- return NULL;
-
- /* pairs with READ_ONCE in rx_queue_default_mask() */
- WRITE_ONCE(net->core.rps_default_mask, rps_default_mask);
- return rps_default_mask;
-}
+DEFINE_MUTEX(rps_default_mask_mutex);
static int rps_default_mask_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = (struct net *)table->data;
+ struct cpumask *mask;
int err = 0;
- rtnl_lock();
+ mutex_lock(&rps_default_mask_mutex);
+ mask = net->core.rps_default_mask;
if (write) {
- struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net);
-
+ if (!mask) {
+ mask = kzalloc(cpumask_size(), GFP_KERNEL);
+ net->core.rps_default_mask = mask;
+ }
err = -ENOMEM;
- if (!rps_default_mask)
+ if (!mask)
goto done;
- err = cpumask_parse(buffer, rps_default_mask);
+ err = cpumask_parse(buffer, mask);
if (err)
goto done;
- err = rps_cpumask_housekeeping(rps_default_mask);
+ err = rps_cpumask_housekeeping(mask);
if (err)
goto done;
} else {
err = dump_cpumask(buffer, lenp, ppos,
- net->core.rps_default_mask ? : cpu_none_mask);
+ mask ?: cpu_none_mask);
}
done:
- rtnl_unlock();
+ mutex_unlock(&rps_default_mask_mutex);
return err;
}