summaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c463
1 files changed, 328 insertions, 135 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 0d891634c692..b28ce68830b2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -462,7 +462,9 @@ EXPORT_PER_CPU_SYMBOL(softnet_data);
* PP consumers must pay attention to run APIs in the appropriate context
* (e.g. NAPI context).
*/
-DEFINE_PER_CPU(struct page_pool *, system_page_pool);
+DEFINE_PER_CPU(struct page_pool_bh, system_page_pool) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
#ifdef CONFIG_LOCKDEP
/*
@@ -828,7 +830,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)
dev_hold(dev);
rcu_read_unlock();
- dev = __netdev_put_lock(dev);
+ dev = __netdev_put_lock(dev, net);
if (!dev)
return NULL;
@@ -1039,10 +1041,11 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
* This helper is intended for locking net_device after it has been looked up
* using a lockless lookup helper. Lock prevents the instance from going away.
*/
-struct net_device *__netdev_put_lock(struct net_device *dev)
+struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net)
{
netdev_lock(dev);
- if (dev->reg_state > NETREG_REGISTERED) {
+ if (dev->reg_state > NETREG_REGISTERED ||
+ dev->moving_ns || !net_eq(dev_net(dev), net)) {
netdev_unlock(dev);
dev_put(dev);
return NULL;
@@ -1051,6 +1054,20 @@ struct net_device *__netdev_put_lock(struct net_device *dev)
return dev;
}
+static struct net_device *
+__netdev_put_lock_ops_compat(struct net_device *dev, struct net *net)
+{
+ netdev_lock_ops_compat(dev);
+ if (dev->reg_state > NETREG_REGISTERED ||
+ dev->moving_ns || !net_eq(dev_net(dev), net)) {
+ netdev_unlock_ops_compat(dev);
+ dev_put(dev);
+ return NULL;
+ }
+ dev_put(dev);
+ return dev;
+}
+
/**
* netdev_get_by_index_lock() - find a device by its ifindex
* @net: the applicable net namespace
@@ -1070,7 +1087,19 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex)
if (!dev)
return NULL;
- return __netdev_put_lock(dev);
+ return __netdev_put_lock(dev, net);
+}
+
+struct net_device *
+netdev_get_by_index_lock_ops_compat(struct net *net, int ifindex)
+{
+ struct net_device *dev;
+
+ dev = dev_get_by_index(net, ifindex);
+ if (!dev)
+ return NULL;
+
+ return __netdev_put_lock_ops_compat(dev, net);
}
struct net_device *
@@ -1090,7 +1119,32 @@ netdev_xa_find_lock(struct net *net, struct net_device *dev,
dev_hold(dev);
rcu_read_unlock();
- dev = __netdev_put_lock(dev);
+ dev = __netdev_put_lock(dev, net);
+ if (dev)
+ return dev;
+
+ (*index)++;
+ } while (true);
+}
+
+struct net_device *
+netdev_xa_find_lock_ops_compat(struct net *net, struct net_device *dev,
+ unsigned long *index)
+{
+ if (dev)
+ netdev_unlock_ops_compat(dev);
+
+ do {
+ rcu_read_lock();
+ dev = xa_find(&net->dev_by_index, index, ULONG_MAX, XA_PRESENT);
+ if (!dev) {
+ rcu_read_unlock();
+ return NULL;
+ }
+ dev_hold(dev);
+ rcu_read_unlock();
+
+ dev = __netdev_put_lock_ops_compat(dev, net);
if (dev)
return dev;
@@ -1213,33 +1267,32 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
EXPORT_SYMBOL(dev_getfirstbyhwtype);
/**
- * __dev_get_by_flags - find any device with given flags
- * @net: the applicable net namespace
- * @if_flags: IFF_* values
- * @mask: bitmask of bits in if_flags to check
+ * netdev_get_by_flags_rcu - find any device with given flags
+ * @net: the applicable net namespace
+ * @tracker: tracking object for the acquired reference
+ * @if_flags: IFF_* values
+ * @mask: bitmask of bits in if_flags to check
+ *
+ * Search for any interface with the given flags.
*
- * Search for any interface with the given flags. Returns NULL if a device
- * is not found or a pointer to the device. Must be called inside
- * rtnl_lock(), and result refcount is unchanged.
+ * Context: rcu_read_lock() must be held.
+ * Returns: NULL if a device is not found or a pointer to the device.
*/
-
-struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
- unsigned short mask)
+struct net_device *netdev_get_by_flags_rcu(struct net *net, netdevice_tracker *tracker,
+ unsigned short if_flags, unsigned short mask)
{
- struct net_device *dev, *ret;
-
- ASSERT_RTNL();
+ struct net_device *dev;
- ret = NULL;
- for_each_netdev(net, dev) {
- if (((dev->flags ^ if_flags) & mask) == 0) {
- ret = dev;
- break;
+ for_each_netdev_rcu(net, dev) {
+ if (((READ_ONCE(dev->flags) ^ if_flags) & mask) == 0) {
+ netdev_hold(dev, tracker, GFP_ATOMIC);
+ return dev;
}
}
- return ret;
+
+ return NULL;
}
-EXPORT_SYMBOL(__dev_get_by_flags);
+EXPORT_IPV6_MOD(netdev_get_by_flags_rcu);
/**
* dev_valid_name - check if name is okay for network device
@@ -1715,7 +1768,7 @@ static void __dev_close(struct net_device *dev)
list_del(&single);
}
-void dev_close_many(struct list_head *head, bool unlink)
+void netif_close_many(struct list_head *head, bool unlink)
{
struct net_device *dev, *tmp;
@@ -1733,7 +1786,7 @@ void dev_close_many(struct list_head *head, bool unlink)
list_del_init(&dev->close_list);
}
}
-EXPORT_SYMBOL(dev_close_many);
+EXPORT_SYMBOL_NS_GPL(netif_close_many, "NETDEV_INTERNAL");
void netif_close(struct net_device *dev)
{
@@ -1741,7 +1794,7 @@ void netif_close(struct net_device *dev)
LIST_HEAD(single);
list_add(&dev->close_list, &single);
- dev_close_many(&single, true);
+ netif_close_many(&single, true);
list_del(&single);
}
}
@@ -3125,7 +3178,6 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
if (dev->reg_state == NETREG_REGISTERED ||
dev->reg_state == NETREG_UNREGISTERING) {
- ASSERT_RTNL();
netdev_ops_assert_locked(dev);
rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
@@ -3175,7 +3227,6 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
return -EINVAL;
if (dev->reg_state == NETREG_REGISTERED) {
- ASSERT_RTNL();
netdev_ops_assert_locked(dev);
rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
@@ -3542,9 +3593,10 @@ out:
}
EXPORT_SYMBOL(skb_checksum_help);
+#ifdef CONFIG_NET_CRC32C
int skb_crc32c_csum_help(struct sk_buff *skb)
{
- __le32 crc32c_csum;
+ u32 crc;
int ret = 0, offset, start;
if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -3572,15 +3624,14 @@ int skb_crc32c_csum_help(struct sk_buff *skb)
if (ret)
goto out;
- crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
- skb->len - start, ~(__u32)0,
- crc32c_csum_stub));
- *(__le32 *)(skb->data + offset) = crc32c_csum;
+ crc = ~skb_crc32c(skb, start, skb->len - start, ~0);
+ *(__le32 *)(skb->data + offset) = cpu_to_le32(crc);
skb_reset_csum_not_inet(skb);
out:
return ret;
}
EXPORT_SYMBOL(skb_crc32c_csum_help);
+#endif /* CONFIG_NET_CRC32C */
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
@@ -3844,12 +3895,42 @@ sw_checksum:
}
EXPORT_SYMBOL(skb_csum_hwoffload_help);
+static struct sk_buff *validate_xmit_unreadable_skb(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct skb_shared_info *shinfo;
+ struct net_iov *niov;
+
+ if (likely(skb_frags_readable(skb)))
+ goto out;
+
+ if (!dev->netmem_tx)
+ goto out_free;
+
+ shinfo = skb_shinfo(skb);
+
+ if (shinfo->nr_frags > 0) {
+ niov = netmem_to_net_iov(skb_frag_netmem(&shinfo->frags[0]));
+ if (net_is_devmem_iov(niov) &&
+ net_devmem_iov_binding(niov)->dev != dev)
+ goto out_free;
+ }
+
+out:
+ return skb;
+
+out_free:
+ kfree_skb(skb);
+ return NULL;
+}
+
static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again)
{
netdev_features_t features;
- if (!skb_frags_readable(skb))
- goto out_kfree_skb;
+ skb = validate_xmit_unreadable_skb(skb, dev);
+ if (unlikely(!skb))
+ goto out_null;
features = netif_skb_features(skb);
skb = validate_xmit_vlan(skb, features);
@@ -3944,7 +4025,10 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
unsigned int hdr_len;
/* mac layer + network layer */
- hdr_len = skb_transport_offset(skb);
+ if (!skb->encapsulation)
+ hdr_len = skb_transport_offset(skb);
+ else
+ hdr_len = skb_inner_transport_offset(skb);
/* + transport layer */
if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
@@ -4714,7 +4798,7 @@ static inline void ____napi_schedule(struct softnet_data *sd,
if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
/* Paired with smp_mb__before_atomic() in
- * napi_enable()/dev_set_threaded().
+ * napi_enable()/netif_set_threaded().
* Use READ_ONCE() to guarantee a complete
* read on napi->thread. Only call
* wake_up_process() when it's not NULL.
@@ -4731,6 +4815,7 @@ static inline void ____napi_schedule(struct softnet_data *sd,
}
use_local_napi:
+ DEBUG_NET_WARN_ON_ONCE(!list_empty(&napi->poll_list));
list_add_tail(&napi->poll_list, &sd->poll_list);
WRITE_ONCE(napi->list_owner, smp_processor_id());
/* If not called from net_rx_action()
@@ -4946,7 +5031,8 @@ static void rps_trigger_softirq(void *data)
struct softnet_data *sd = data;
____napi_schedule(sd, &sd->backlog);
- sd->received_rps++;
+ /* Pairs with READ_ONCE() in softnet_seq_show() */
+ WRITE_ONCE(sd->received_rps, sd->received_rps + 1);
}
#endif /* CONFIG_RPS */
@@ -5031,7 +5117,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
rcu_read_lock();
fl = rcu_dereference(sd->flow_limit);
if (fl) {
- new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
+ new_flow = hash_32(skb_get_hash(skb), fl->log_buckets);
old_flow = fl->history[fl->history_head];
fl->history[fl->history_head] = new_flow;
@@ -5042,7 +5128,8 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
fl->buckets[old_flow]--;
if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
- fl->count++;
+ /* Pairs with READ_ONCE() in softnet_seq_show() */
+ WRITE_ONCE(fl->count, fl->count + 1);
rcu_read_unlock();
return true;
}
@@ -5238,7 +5325,10 @@ netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog)
struct sk_buff *skb = *pskb;
int err, hroom, troom;
- if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog))
+ local_lock_nested_bh(&system_page_pool.bh_lock);
+ err = skb_cow_data_for_xdp(this_cpu_read(system_page_pool.pool), pskb, prog);
+ local_unlock_nested_bh(&system_page_pool.bh_lock);
+ if (!err)
return 0;
/* In case we have to go down the path and also linearize,
@@ -5659,6 +5749,7 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
struct packet_type **ppt_prev)
{
+ enum skb_drop_reason drop_reason = SKB_DROP_REASON_UNHANDLED_PROTO;
struct packet_type *ptype, *pt_prev;
rx_handler_func_t *rx_handler;
struct sk_buff *skb = *pskb;
@@ -5750,8 +5841,10 @@ skip_taps:
#endif
skb_reset_redirect(skb);
skip_classify:
- if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
+ if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) {
+ drop_reason = SKB_DROP_REASON_PFMEMALLOC;
goto drop;
+ }
if (skb_vlan_tag_present(skb)) {
if (pt_prev) {
@@ -5849,8 +5942,6 @@ check_vlan_id:
}
if (pt_prev) {
- if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
- goto drop;
*ppt_prev = pt_prev;
} else {
drop:
@@ -5858,7 +5949,8 @@ drop:
dev_core_stats_rx_dropped_inc(skb->dev);
else
dev_core_stats_rx_nohandler_inc(skb->dev);
- kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
+
+ kfree_skb_reason(skb, drop_reason);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
@@ -6486,8 +6578,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
* it, we need to bound somehow the time packets are kept in
* the GRO layer.
*/
- gro_flush(&n->gro, !!timeout);
- gro_normal_list(&n->gro);
+ gro_flush_normal(&n->gro, !!timeout);
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
@@ -6557,8 +6648,7 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
}
/* Flush too old packets. If HZ < 1000, flush all packets */
- gro_flush(&napi->gro, HZ >= 1000);
- gro_normal_list(&napi->gro);
+ gro_flush_normal(&napi->gro, HZ >= 1000);
clear_bit(NAPI_STATE_SCHED, &napi->state);
}
@@ -6836,22 +6926,83 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-int dev_set_threaded(struct net_device *dev, bool threaded)
+static void napi_stop_kthread(struct napi_struct *napi)
+{
+ unsigned long val, new;
+
+ /* Wait until the napi STATE_THREADED is unset. */
+ while (true) {
+ val = READ_ONCE(napi->state);
+
+ /* If napi kthread own this napi or the napi is idle,
+ * STATE_THREADED can be unset here.
+ */
+ if ((val & NAPIF_STATE_SCHED_THREADED) ||
+ !(val & NAPIF_STATE_SCHED)) {
+ new = val & (~NAPIF_STATE_THREADED);
+ } else {
+ msleep(20);
+ continue;
+ }
+
+ if (try_cmpxchg(&napi->state, &val, new))
+ break;
+ }
+
+ /* Once STATE_THREADED is unset, wait for SCHED_THREADED to be unset by
+ * the kthread.
+ */
+ while (true) {
+ if (!test_bit(NAPIF_STATE_SCHED_THREADED, &napi->state))
+ break;
+
+ msleep(20);
+ }
+
+ kthread_stop(napi->thread);
+ napi->thread = NULL;
+}
+
+int napi_set_threaded(struct napi_struct *napi,
+ enum netdev_napi_threaded threaded)
+{
+ if (threaded) {
+ if (!napi->thread) {
+ int err = napi_kthread_create(napi);
+
+ if (err)
+ return err;
+ }
+ }
+
+ if (napi->config)
+ napi->config->threaded = threaded;
+
+ if (!threaded && napi->thread) {
+ napi_stop_kthread(napi);
+ } else {
+ /* Make sure kthread is created before THREADED bit is set. */
+ smp_mb__before_atomic();
+ assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
+ }
+
+ return 0;
+}
+
+int netif_set_threaded(struct net_device *dev,
+ enum netdev_napi_threaded threaded)
{
struct napi_struct *napi;
int err = 0;
netdev_assert_locked_or_invisible(dev);
- if (dev->threaded == threaded)
- return 0;
-
if (threaded) {
list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (!napi->thread) {
err = napi_kthread_create(napi);
if (err) {
- threaded = false;
+ threaded = NETDEV_NAPI_THREADED_DISABLED;
break;
}
}
@@ -6871,12 +7022,32 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
* softirq mode will happen in the next round of napi_schedule().
* This should not cause hiccups/stalls to the live traffic.
*/
- list_for_each_entry(napi, &dev->napi_list, dev_list)
- assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (!threaded && napi->thread)
+ napi_stop_kthread(napi);
+ else
+ assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
+ }
return err;
}
-EXPORT_SYMBOL(dev_set_threaded);
+
+/**
+ * netif_threaded_enable() - enable threaded NAPIs
+ * @dev: net_device instance
+ *
+ * Enable threaded mode for the NAPI instances of the device. This may be useful
+ * for devices where multiple NAPI instances get scheduled by a single
+ * interrupt. Threaded NAPI allows moving the NAPI processing to cores other
+ * than the core where IRQ is mapped.
+ *
+ * This function should be called before @dev is registered.
+ */
+void netif_threaded_enable(struct net_device *dev)
+{
+ WARN_ON_ONCE(netif_set_threaded(dev, NETDEV_NAPI_THREADED_ENABLED));
+}
+EXPORT_SYMBOL(netif_threaded_enable);
/**
* netif_queue_set_napi - Associate queue with the napi
@@ -7092,6 +7263,8 @@ static void napi_restore_config(struct napi_struct *n)
napi_hash_add(n);
n->config->napi_id = n->napi_id;
}
+
+ WARN_ON_ONCE(napi_set_threaded(n, n->config->threaded));
}
static void napi_save_config(struct napi_struct *n)
@@ -7189,7 +7362,7 @@ void netif_napi_add_weight_locked(struct net_device *dev,
* threaded mode will not be enabled in napi_enable().
*/
if (dev->threaded && napi_kthread_create(napi))
- dev->threaded = false;
+ dev->threaded = NETDEV_NAPI_THREADED_DISABLED;
netif_napi_set_irq_locked(napi, -1);
}
EXPORT_SYMBOL(netif_napi_add_weight_locked);
@@ -7358,8 +7531,7 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
}
/* Flush too old packets. If HZ < 1000, flush all packets */
- gro_flush(&n->gro, HZ >= 1000);
- gro_normal_list(&n->gro);
+ gro_flush_normal(&n->gro, HZ >= 1000);
/* Some drivers may have called napi_schedule
* prior to exhausting their budget.
@@ -7387,9 +7559,14 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
work = __napi_poll(n, &do_repoll);
- if (do_repoll)
+ if (do_repoll) {
+#if defined(CONFIG_DEBUG_NET)
+ if (unlikely(!napi_is_scheduled(n)))
+ pr_crit("repoll requested for device %s %ps but napi is not scheduled.\n",
+ n->dev->name, n->poll);
+#endif
list_add_tail(&n->poll_list, repoll);
-
+ }
netpoll_poll_unlock(have);
return work;
@@ -7515,7 +7692,8 @@ start:
*/
if (unlikely(budget <= 0 ||
time_after_eq(jiffies, time_limit))) {
- sd->time_squeeze++;
+ /* Pairs with READ_ONCE() in softnet_seq_show() */
+ WRITE_ONCE(sd->time_squeeze, sd->time_squeeze + 1);
break;
}
}
@@ -9188,8 +9366,16 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
dev_change_rx_flags(dev, IFF_PROMISC);
}
- if (notify)
+ if (notify) {
+ /* The ops lock is only required to ensure consistent locking
+ * for `NETDEV_CHANGE` notifiers. This function is sometimes
+ * called without the lock, even for devices that are ops
+ * locked, such as in `dev_uc_sync_multiple` when using
+ * bonding or teaming.
+ */
+ netdev_ops_assert_locked(dev);
__dev_notify_flags(dev, old_flags, IFF_PROMISC, 0, NULL);
+ }
return 0;
}
@@ -9283,12 +9469,12 @@ void dev_set_rx_mode(struct net_device *dev)
}
/**
- * dev_get_flags - get flags reported to userspace
- * @dev: device
+ * netif_get_flags() - get flags reported to userspace
+ * @dev: device
*
- * Get the combination of flag bits exported through APIs to userspace.
+ * Get the combination of flag bits exported through APIs to userspace.
*/
-unsigned int dev_get_flags(const struct net_device *dev)
+unsigned int netif_get_flags(const struct net_device *dev)
{
unsigned int flags;
@@ -9311,7 +9497,7 @@ unsigned int dev_get_flags(const struct net_device *dev)
return flags;
}
-EXPORT_SYMBOL(dev_get_flags);
+EXPORT_SYMBOL(netif_get_flags);
int __dev_change_flags(struct net_device *dev, unsigned int flags,
struct netlink_ext_ack *extack)
@@ -9423,7 +9609,7 @@ int netif_change_flags(struct net_device *dev, unsigned int flags,
return ret;
}
-int __dev_set_mtu(struct net_device *dev, int new_mtu)
+int __netif_set_mtu(struct net_device *dev, int new_mtu)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -9434,7 +9620,7 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu)
WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
-EXPORT_SYMBOL(__dev_set_mtu);
+EXPORT_SYMBOL_NS_GPL(__netif_set_mtu, "NETDEV_INTERNAL");
int dev_validate_mtu(struct net_device *dev, int new_mtu,
struct netlink_ext_ack *extack)
@@ -9453,18 +9639,22 @@ int dev_validate_mtu(struct net_device *dev, int new_mtu,
}
/**
- * netif_set_mtu_ext - Change maximum transfer unit
- * @dev: device
- * @new_mtu: new transfer unit
- * @extack: netlink extended ack
+ * netif_set_mtu_ext() - Change maximum transfer unit
+ * @dev: device
+ * @new_mtu: new transfer unit
+ * @extack: netlink extended ack
*
- * Change the maximum transfer size of the network device.
+ * Change the maximum transfer size of the network device.
+ *
+ * Return: 0 on success, -errno on failure.
*/
int netif_set_mtu_ext(struct net_device *dev, int new_mtu,
struct netlink_ext_ack *extack)
{
int err, orig_mtu;
+ netdev_ops_assert_locked(dev);
+
if (new_mtu == dev->mtu)
return 0;
@@ -9481,7 +9671,7 @@ int netif_set_mtu_ext(struct net_device *dev, int new_mtu,
return err;
orig_mtu = dev->mtu;
- err = __dev_set_mtu(dev, new_mtu);
+ err = __netif_set_mtu(dev, new_mtu);
if (!err) {
err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
@@ -9491,7 +9681,7 @@ int netif_set_mtu_ext(struct net_device *dev, int new_mtu,
/* setting mtu back and notifying everyone again,
* so that they have a chance to revert changes.
*/
- __dev_set_mtu(dev, orig_mtu);
+ __netif_set_mtu(dev, orig_mtu);
call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
new_mtu);
}
@@ -9545,13 +9735,15 @@ void netif_set_group(struct net_device *dev, int new_group)
}
/**
- * dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
- * @dev: device
- * @addr: new address
- * @extack: netlink extended ack
+ * netif_pre_changeaddr_notify() - Call NETDEV_PRE_CHANGEADDR.
+ * @dev: device
+ * @addr: new address
+ * @extack: netlink extended ack
+ *
+ * Return: 0 on success, -errno on failure.
*/
-int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
- struct netlink_ext_ack *extack)
+int netif_pre_changeaddr_notify(struct net_device *dev, const char *addr,
+ struct netlink_ext_ack *extack)
{
struct netdev_notifier_pre_changeaddr_info info = {
.info.dev = dev,
@@ -9563,9 +9755,9 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
return notifier_to_errno(rc);
}
-EXPORT_SYMBOL(dev_pre_changeaddr_notify);
+EXPORT_SYMBOL_NS_GPL(netif_pre_changeaddr_notify, "NETDEV_INTERNAL");
-int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa,
+int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -9573,15 +9765,15 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa,
if (!ops->ndo_set_mac_address)
return -EOPNOTSUPP;
- if (sa->sa_family != dev->type)
+ if (ss->ss_family != dev->type)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
- err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
+ err = netif_pre_changeaddr_notify(dev, ss->__data, extack);
if (err)
return err;
- if (memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) {
- err = ops->ndo_set_mac_address(dev, sa);
+ if (memcmp(dev->dev_addr, ss->__data, dev->addr_len)) {
+ err = ops->ndo_set_mac_address(dev, ss);
if (err)
return err;
}
@@ -9593,7 +9785,8 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa,
DECLARE_RWSEM(dev_addr_sem);
-int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
+/* "sa" is a true struct sockaddr with limited "sa_data" member. */
+int netif_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
{
size_t size = sizeof(sa->sa_data_min);
struct net_device *dev;
@@ -9619,7 +9812,7 @@ unlock:
up_read(&dev_addr_sem);
return ret;
}
-EXPORT_SYMBOL(dev_get_mac_address);
+EXPORT_SYMBOL_NS_GPL(netif_get_mac_address, "NETDEV_INTERNAL");
int netif_change_carrier(struct net_device *dev, bool new_carrier)
{
@@ -9672,16 +9865,17 @@ int dev_get_phys_port_name(struct net_device *dev,
}
/**
- * dev_get_port_parent_id - Get the device's port parent identifier
- * @dev: network device
- * @ppid: pointer to a storage for the port's parent identifier
- * @recurse: allow/disallow recursion to lower devices
+ * netif_get_port_parent_id() - Get the device's port parent identifier
+ * @dev: network device
+ * @ppid: pointer to a storage for the port's parent identifier
+ * @recurse: allow/disallow recursion to lower devices
+ *
+ * Get the devices's port parent identifier.
*
- * Get the devices's port parent identifier
+ * Return: 0 on success, -errno on failure.
*/
-int dev_get_port_parent_id(struct net_device *dev,
- struct netdev_phys_item_id *ppid,
- bool recurse)
+int netif_get_port_parent_id(struct net_device *dev,
+ struct netdev_phys_item_id *ppid, bool recurse)
{
const struct net_device_ops *ops = dev->netdev_ops;
struct netdev_phys_item_id first = { };
@@ -9700,7 +9894,7 @@ int dev_get_port_parent_id(struct net_device *dev,
return err;
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = dev_get_port_parent_id(lower_dev, ppid, true);
+ err = netif_get_port_parent_id(lower_dev, ppid, true);
if (err)
break;
if (!first.id_len)
@@ -9711,7 +9905,7 @@ int dev_get_port_parent_id(struct net_device *dev,
return err;
}
-EXPORT_SYMBOL(dev_get_port_parent_id);
+EXPORT_SYMBOL(netif_get_port_parent_id);
/**
* netdev_port_same_parent_id - Indicate if two network devices have
@@ -9724,8 +9918,8 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
struct netdev_phys_item_id a_id = { };
struct netdev_phys_item_id b_id = { };
- if (dev_get_port_parent_id(a, &a_id, true) ||
- dev_get_port_parent_id(b, &b_id, true))
+ if (netif_get_port_parent_id(a, &a_id, true) ||
+ netif_get_port_parent_id(b, &b_id, true))
return false;
return netdev_phys_item_id_same(&a_id, &b_id);
@@ -9863,6 +10057,7 @@ int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
return dev->netdev_ops->ndo_bpf(dev, bpf);
}
+EXPORT_SYMBOL_GPL(netif_xdp_propagate);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{
@@ -10258,7 +10453,8 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
goto unlock;
}
- bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
+ bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog,
+ attr->link_create.attach_type);
link->dev = dev;
link->flags = attr->link_create.flags;
@@ -10393,7 +10589,7 @@ static void dev_index_release(struct net *net, int ifindex)
static bool from_cleanup_net(void)
{
#ifdef CONFIG_NET_NS
- return current == cleanup_net_task;
+ return current == READ_ONCE(cleanup_net_task);
#else
return false;
#endif
@@ -10624,12 +10820,14 @@ sync_lower:
* *before* calling udp_tunnel_get_rx_info,
* but *after* calling udp_tunnel_drop_rx_info.
*/
+ udp_tunnel_nic_lock(dev);
if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
dev->features = features;
udp_tunnel_get_rx_info(dev);
} else {
udp_tunnel_drop_rx_info(dev);
}
+ udp_tunnel_nic_unlock(dev);
}
if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
@@ -11047,8 +11245,7 @@ int register_netdevice(struct net_device *dev)
* Prevent userspace races by waiting until the network
* device is fully setup before sending notifications.
*/
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ if (!(dev->rtnl_link_ops && dev->rtnl_link_initializing))
rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL);
out:
@@ -11610,7 +11807,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->priv_len = sizeof_priv;
- ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
+ ref_tracker_dir_init(&dev->refcnt_tracker, 128, "netdev");
#ifdef CONFIG_PCPU_DEV_REFCNT
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
@@ -11832,21 +12029,8 @@ static void netdev_rss_contexts_free(struct net_device *dev)
mutex_lock(&dev->ethtool->rss_lock);
xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
- struct ethtool_rxfh_param rxfh;
-
- rxfh.indir = ethtool_rxfh_context_indir(ctx);
- rxfh.key = ethtool_rxfh_context_key(ctx);
- rxfh.hfunc = ctx->hfunc;
- rxfh.input_xfrm = ctx->input_xfrm;
- rxfh.rss_context = context;
- rxfh.rss_delete = true;
-
xa_erase(&dev->ethtool->rss_ctx, context);
- if (dev->ethtool_ops->create_rxfh_context)
- dev->ethtool_ops->remove_rxfh_context(dev, ctx,
- context, NULL);
- else
- dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL);
+ dev->ethtool_ops->remove_rxfh_context(dev, ctx, context, NULL);
kfree(ctx);
}
xa_destroy(&dev->ethtool->rss_ctx);
@@ -11931,7 +12115,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
netdev_lock(dev);
}
}
- dev_close_many(&close_head, true);
+ netif_close_many(&close_head, true);
/* ... now unlock them and go over the rest. */
list_for_each_entry(dev, head, unreg_list) {
if (netdev_need_ops_lock(dev))
@@ -11939,7 +12123,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
else
list_add_tail(&dev->close_list, &close_head);
}
- dev_close_many(&close_head, true);
+ netif_close_many(&close_head, true);
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
@@ -11971,8 +12155,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ if (!(dev->rtnl_link_ops && dev->rtnl_link_initializing))
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
GFP_KERNEL, NULL, 0,
portid, nlh);
@@ -12146,7 +12329,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
netif_close(dev);
/* And unlink it from device chain */
unlist_netdevice(dev);
- netdev_unlock_ops(dev);
+
+ if (!netdev_need_ops_lock(dev))
+ netdev_lock(dev);
+ dev->moving_ns = true;
+ netdev_unlock(dev);
synchronize_net();
@@ -12184,7 +12371,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
move_netdevice_notifiers_dev_net(dev, net);
/* Actually switch the network namespace */
+ netdev_lock(dev);
dev_net_set(dev, net);
+ netdev_unlock(dev);
dev->ifindex = new_ifindex;
if (new_name[0]) {
@@ -12210,7 +12399,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
err = netdev_change_owner(dev, net_old, net);
WARN_ON(err);
- netdev_lock_ops(dev);
+ netdev_lock(dev);
+ dev->moving_ns = false;
+ if (!netdev_need_ops_lock(dev))
+ netdev_unlock(dev);
+
/* Add the device back in the hashes */
list_netdevice(dev);
/* Notify protocols, that a new device appeared. */
@@ -12621,7 +12814,7 @@ static int net_page_pool_create(int cpuid)
return err;
}
- per_cpu(system_page_pool, cpuid) = pp_ptr;
+ per_cpu(system_page_pool.pool, cpuid) = pp_ptr;
#endif
return 0;
}
@@ -12751,13 +12944,13 @@ out:
for_each_possible_cpu(i) {
struct page_pool *pp_ptr;
- pp_ptr = per_cpu(system_page_pool, i);
+ pp_ptr = per_cpu(system_page_pool.pool, i);
if (!pp_ptr)
continue;
xdp_unreg_page_pool(pp_ptr);
page_pool_destroy(pp_ptr);
- per_cpu(system_page_pool, i) = NULL;
+ per_cpu(system_page_pool.pool, i) = NULL;
}
}