summaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c404
1 files changed, 242 insertions, 162 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 1f79b9aa9a3f..222b1d322c96 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -848,6 +848,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
+{
+ int k = stack->num_paths++;
+
+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
+ return NULL;
+
+ return &stack->path[k];
+}
+
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+ struct net_device_path_stack *stack)
+{
+ const struct net_device *last_dev;
+ struct net_device_path_ctx ctx = {
+ .dev = dev,
+ .daddr = daddr,
+ };
+ struct net_device_path *path;
+ int ret = 0;
+
+ stack->num_paths = 0;
+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
+ last_dev = ctx.dev;
+ path = dev_fwd_path(stack);
+ if (!path)
+ return -1;
+
+ memset(path, 0, sizeof(struct net_device_path));
+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
+ if (ret < 0)
+ return -1;
+
+ if (WARN_ON_ONCE(last_dev == ctx.dev))
+ return -1;
+ }
+ path = dev_fwd_path(stack);
+ if (!path)
+ return -1;
+ path->type = DEV_PATH_ETHERNET;
+ path->dev = ctx.dev;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
+
/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
@@ -2463,16 +2509,14 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
EXPORT_SYMBOL(netdev_txq_to_tc);
#ifdef CONFIG_XPS
-struct static_key xps_needed __read_mostly;
-EXPORT_SYMBOL(xps_needed);
-struct static_key xps_rxqs_needed __read_mostly;
-EXPORT_SYMBOL(xps_rxqs_needed);
+static struct static_key xps_needed __read_mostly;
+static struct static_key xps_rxqs_needed __read_mostly;
static DEFINE_MUTEX(xps_map_mutex);
#define xmap_dereference(P) \
rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
- int tci, u16 index)
+ struct xps_dev_maps *old_maps, int tci, u16 index)
{
struct xps_map *map = NULL;
int pos;
@@ -2491,6 +2535,8 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
break;
}
+ if (old_maps)
+ RCU_INIT_POINTER(old_maps->attr_map[tci], NULL);
RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
kfree_rcu(map, rcu);
return false;
@@ -2503,7 +2549,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
struct xps_dev_maps *dev_maps,
int cpu, u16 offset, u16 count)
{
- int num_tc = dev->num_tc ? : 1;
+ int num_tc = dev_maps->num_tc;
bool active = false;
int tci;
@@ -2511,7 +2557,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
int i, j;
for (i = count, j = offset; i--; j++) {
- if (!remove_xps_queue(dev_maps, tci, j))
+ if (!remove_xps_queue(dev_maps, NULL, tci, j))
break;
}
@@ -2523,74 +2569,54 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
static void reset_xps_maps(struct net_device *dev,
struct xps_dev_maps *dev_maps,
- bool is_rxqs_map)
+ enum xps_map_type type)
{
- if (is_rxqs_map) {
- static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
- RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
- } else {
- RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
- }
static_key_slow_dec_cpuslocked(&xps_needed);
+ if (type == XPS_RXQS)
+ static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
+
+ RCU_INIT_POINTER(dev->xps_maps[type], NULL);
+
kfree_rcu(dev_maps, rcu);
}
-static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
- struct xps_dev_maps *dev_maps, unsigned int nr_ids,
- u16 offset, u16 count, bool is_rxqs_map)
+static void clean_xps_maps(struct net_device *dev, enum xps_map_type type,
+ u16 offset, u16 count)
{
+ struct xps_dev_maps *dev_maps;
bool active = false;
int i, j;
- for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
- j < nr_ids;)
- active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
- count);
+ dev_maps = xmap_dereference(dev->xps_maps[type]);
+ if (!dev_maps)
+ return;
+
+ for (j = 0; j < dev_maps->nr_ids; j++)
+ active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count);
if (!active)
- reset_xps_maps(dev, dev_maps, is_rxqs_map);
+ reset_xps_maps(dev, dev_maps, type);
- if (!is_rxqs_map) {
- for (i = offset + (count - 1); count--; i--) {
+ if (type == XPS_CPUS) {
+ for (i = offset + (count - 1); count--; i--)
netdev_queue_numa_node_write(
- netdev_get_tx_queue(dev, i),
- NUMA_NO_NODE);
- }
+ netdev_get_tx_queue(dev, i), NUMA_NO_NODE);
}
}
static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
u16 count)
{
- const unsigned long *possible_mask = NULL;
- struct xps_dev_maps *dev_maps;
- unsigned int nr_ids;
-
if (!static_key_false(&xps_needed))
return;
cpus_read_lock();
mutex_lock(&xps_map_mutex);
- if (static_key_false(&xps_rxqs_needed)) {
- dev_maps = xmap_dereference(dev->xps_rxqs_map);
- if (dev_maps) {
- nr_ids = dev->num_rx_queues;
- clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
- offset, count, true);
- }
- }
-
- dev_maps = xmap_dereference(dev->xps_cpus_map);
- if (!dev_maps)
- goto out_no_maps;
+ if (static_key_false(&xps_rxqs_needed))
+ clean_xps_maps(dev, XPS_RXQS, offset, count);
- if (num_possible_cpus() > 1)
- possible_mask = cpumask_bits(cpu_possible_mask);
- nr_ids = nr_cpu_ids;
- clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
- false);
+ clean_xps_maps(dev, XPS_CPUS, offset, count);
-out_no_maps:
mutex_unlock(&xps_map_mutex);
cpus_read_unlock();
}
@@ -2640,16 +2666,35 @@ static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
return new_map;
}
+/* Copy xps maps at a given index */
+static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps,
+ struct xps_dev_maps *new_dev_maps, int index,
+ int tc, bool skip_tc)
+{
+ int i, tci = index * dev_maps->num_tc;
+ struct xps_map *map;
+
+ /* copy maps belonging to foreign traffic classes */
+ for (i = 0; i < dev_maps->num_tc; i++, tci++) {
+ if (i == tc && skip_tc)
+ continue;
+
+ /* fill in the new device map from the old device map */
+ map = xmap_dereference(dev_maps->attr_map[tci]);
+ RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
+ }
+}
+
/* Must be called under cpus_read_lock */
int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
- u16 index, bool is_rxqs_map)
+ u16 index, enum xps_map_type type)
{
- const unsigned long *online_mask = NULL, *possible_mask = NULL;
- struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+ struct xps_dev_maps *dev_maps, *new_dev_maps = NULL, *old_dev_maps = NULL;
+ const unsigned long *online_mask = NULL;
+ bool active = false, copy = false;
int i, j, tci, numa_node_id = -2;
int maps_sz, num_tc = 1, tc = 0;
struct xps_map *map, *new_map;
- bool active = false;
unsigned int nr_ids;
if (dev->num_tc) {
@@ -2667,38 +2712,48 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
}
mutex_lock(&xps_map_mutex);
- if (is_rxqs_map) {
+
+ dev_maps = xmap_dereference(dev->xps_maps[type]);
+ if (type == XPS_RXQS) {
maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
- dev_maps = xmap_dereference(dev->xps_rxqs_map);
nr_ids = dev->num_rx_queues;
} else {
maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
- if (num_possible_cpus() > 1) {
+ if (num_possible_cpus() > 1)
online_mask = cpumask_bits(cpu_online_mask);
- possible_mask = cpumask_bits(cpu_possible_mask);
- }
- dev_maps = xmap_dereference(dev->xps_cpus_map);
nr_ids = nr_cpu_ids;
}
if (maps_sz < L1_CACHE_BYTES)
maps_sz = L1_CACHE_BYTES;
+ /* The old dev_maps could be larger or smaller than the one we're
+ * setting up now, as dev->num_tc or nr_ids could have been updated in
+ * between. We could try to be smart, but let's be safe instead and only
+ * copy foreign traffic classes if the two map sizes match.
+ */
+ if (dev_maps &&
+ dev_maps->num_tc == num_tc && dev_maps->nr_ids == nr_ids)
+ copy = true;
+
/* allocate memory for queue storage */
for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
j < nr_ids;) {
- if (!new_dev_maps)
- new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
if (!new_dev_maps) {
- mutex_unlock(&xps_map_mutex);
- return -ENOMEM;
+ new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
+ if (!new_dev_maps) {
+ mutex_unlock(&xps_map_mutex);
+ return -ENOMEM;
+ }
+
+ new_dev_maps->nr_ids = nr_ids;
+ new_dev_maps->num_tc = num_tc;
}
tci = j * num_tc + tc;
- map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
- NULL;
+ map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL;
- map = expand_xps_map(map, j, index, is_rxqs_map);
+ map = expand_xps_map(map, j, index, type == XPS_RXQS);
if (!map)
goto error;
@@ -2711,29 +2766,21 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
if (!dev_maps) {
/* Increment static keys at most once per type */
static_key_slow_inc_cpuslocked(&xps_needed);
- if (is_rxqs_map)
+ if (type == XPS_RXQS)
static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
}
- for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
- j < nr_ids;) {
- /* copy maps belonging to foreign traffic classes */
- for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
- /* fill in the new device map from the old device map */
- map = xmap_dereference(dev_maps->attr_map[tci]);
- RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
- }
+ for (j = 0; j < nr_ids; j++) {
+ bool skip_tc = false;
- /* We need to explicitly update tci as prevous loop
- * could break out early if dev_maps is NULL.
- */
tci = j * num_tc + tc;
-
if (netif_attr_test_mask(j, mask, nr_ids) &&
netif_attr_test_online(j, online_mask, nr_ids)) {
/* add tx-queue to CPU/rx-queue maps */
int pos = 0;
+ skip_tc = true;
+
map = xmap_dereference(new_dev_maps->attr_map[tci]);
while ((pos < map->len) && (map->queues[pos] != index))
pos++;
@@ -2741,78 +2788,81 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
if (pos == map->len)
map->queues[map->len++] = index;
#ifdef CONFIG_NUMA
- if (!is_rxqs_map) {
+ if (type == XPS_CPUS) {
if (numa_node_id == -2)
numa_node_id = cpu_to_node(j);
else if (numa_node_id != cpu_to_node(j))
numa_node_id = -1;
}
#endif
- } else if (dev_maps) {
- /* fill in the new device map from the old device map */
- map = xmap_dereference(dev_maps->attr_map[tci]);
- RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
}
- /* copy maps belonging to foreign traffic classes */
- for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
- /* fill in the new device map from the old device map */
- map = xmap_dereference(dev_maps->attr_map[tci]);
- RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
- }
+ if (copy)
+ xps_copy_dev_maps(dev_maps, new_dev_maps, j, tc,
+ skip_tc);
}
- if (is_rxqs_map)
- rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
- else
- rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
+ rcu_assign_pointer(dev->xps_maps[type], new_dev_maps);
/* Cleanup old maps */
if (!dev_maps)
goto out_no_old_maps;
- for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
- j < nr_ids;) {
- for (i = num_tc, tci = j * num_tc; i--; tci++) {
- new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+ for (j = 0; j < dev_maps->nr_ids; j++) {
+ for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) {
map = xmap_dereference(dev_maps->attr_map[tci]);
- if (map && map != new_map)
- kfree_rcu(map, rcu);
+ if (!map)
+ continue;
+
+ if (copy) {
+ new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+ if (map == new_map)
+ continue;
+ }
+
+ RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
+ kfree_rcu(map, rcu);
}
}
- kfree_rcu(dev_maps, rcu);
+ old_dev_maps = dev_maps;
out_no_old_maps:
dev_maps = new_dev_maps;
active = true;
out_no_new_maps:
- if (!is_rxqs_map) {
+ if (type == XPS_CPUS)
/* update Tx queue numa node */
netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
(numa_node_id >= 0) ?
numa_node_id : NUMA_NO_NODE);
- }
if (!dev_maps)
goto out_no_maps;
/* removes tx-queue from unused CPUs/rx-queues */
- for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
- j < nr_ids;) {
- for (i = tc, tci = j * num_tc; i--; tci++)
- active |= remove_xps_queue(dev_maps, tci, index);
- if (!netif_attr_test_mask(j, mask, nr_ids) ||
- !netif_attr_test_online(j, online_mask, nr_ids))
- active |= remove_xps_queue(dev_maps, tci, index);
- for (i = num_tc - tc, tci++; --i; tci++)
- active |= remove_xps_queue(dev_maps, tci, index);
+ for (j = 0; j < dev_maps->nr_ids; j++) {
+ tci = j * dev_maps->num_tc;
+
+ for (i = 0; i < dev_maps->num_tc; i++, tci++) {
+ if (i == tc &&
+ netif_attr_test_mask(j, mask, dev_maps->nr_ids) &&
+ netif_attr_test_online(j, online_mask, dev_maps->nr_ids))
+ continue;
+
+ active |= remove_xps_queue(dev_maps,
+ copy ? old_dev_maps : NULL,
+ tci, index);
+ }
}
+ if (old_dev_maps)
+ kfree_rcu(old_dev_maps, rcu);
+
/* free map if not active */
if (!active)
- reset_xps_maps(dev, dev_maps, is_rxqs_map);
+ reset_xps_maps(dev, dev_maps, type);
out_no_maps:
mutex_unlock(&xps_map_mutex);
@@ -2820,11 +2870,10 @@ out_no_maps:
return 0;
error:
/* remove any maps that we added */
- for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
- j < nr_ids;) {
+ for (j = 0; j < nr_ids; j++) {
for (i = num_tc, tci = j * num_tc; i--; tci++) {
new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
- map = dev_maps ?
+ map = copy ?
xmap_dereference(dev_maps->attr_map[tci]) :
NULL;
if (new_map && new_map != map)
@@ -2845,7 +2894,7 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
int ret;
cpus_read_lock();
- ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
+ ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, XPS_CPUS);
cpus_read_unlock();
return ret;
@@ -3956,13 +4005,15 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
struct xps_dev_maps *dev_maps, unsigned int tci)
{
+ int tc = netdev_get_prio_tc_map(dev, skb->priority);
struct xps_map *map;
int queue_index = -1;
- if (dev->num_tc) {
- tci *= dev->num_tc;
- tci += netdev_get_prio_tc_map(dev, skb->priority);
- }
+ if (tc >= dev_maps->num_tc || tci >= dev_maps->nr_ids)
+ return queue_index;
+
+ tci *= dev_maps->num_tc;
+ tci += tc;
map = rcu_dereference(dev_maps->attr_map[tci]);
if (map) {
@@ -3993,18 +4044,18 @@ static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
if (!static_key_false(&xps_rxqs_needed))
goto get_cpus_map;
- dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
+ dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_RXQS]);
if (dev_maps) {
int tci = sk_rx_queue_get(sk);
- if (tci >= 0 && tci < dev->num_rx_queues)
+ if (tci >= 0)
queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
tci);
}
get_cpus_map:
if (queue_index < 0) {
- dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
+ dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_CPUS]);
if (dev_maps) {
unsigned int tci = skb->sender_cpu - 1;
@@ -4672,10 +4723,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
void *orig_data, *orig_data_end, *hard_start;
struct netdev_rx_queue *rxqueue;
u32 metalen, act = XDP_DROP;
+ bool orig_bcast, orig_host;
u32 mac_len, frame_sz;
__be16 orig_eth_type;
struct ethhdr *eth;
- bool orig_bcast;
int off;
/* Reinjected packets coming from act_mirred or similar should
@@ -4722,6 +4773,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
orig_data_end = xdp->data_end;
orig_data = xdp->data;
eth = (struct ethhdr *)xdp->data;
+ orig_host = ether_addr_equal_64bits(eth->h_dest, skb->dev->dev_addr);
orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
orig_eth_type = eth->h_proto;
@@ -4749,8 +4801,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* check if XDP changed eth hdr such SKB needs update */
eth = (struct ethhdr *)xdp->data;
if ((orig_eth_type != eth->h_proto) ||
+ (orig_host != ether_addr_equal_64bits(eth->h_dest,
+ skb->dev->dev_addr)) ||
(orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
__skb_push(skb, ETH_HLEN);
+ skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
}
@@ -5284,6 +5339,7 @@ skip_classify:
goto another_round;
case RX_HANDLER_EXACT:
deliver_exact = true;
+ break;
case RX_HANDLER_PASS:
break;
default:
@@ -5876,15 +5932,13 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
}
EXPORT_SYMBOL(napi_gro_flush);
-static struct list_head *gro_list_prepare(struct napi_struct *napi,
- struct sk_buff *skb)
+static void gro_list_prepare(const struct list_head *head,
+ const struct sk_buff *skb)
{
unsigned int maclen = skb->dev->hard_header_len;
u32 hash = skb_get_hash_raw(skb);
- struct list_head *head;
struct sk_buff *p;
- head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
list_for_each_entry(p, head, list) {
unsigned long diffs;
@@ -5910,11 +5964,9 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
maclen);
NAPI_GRO_CB(p)->same_flow = !diffs;
}
-
- return head;
}
-static void skb_gro_reset_offset(struct sk_buff *skb)
+static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
{
const struct skb_shared_info *pinfo = skb_shinfo(skb);
const skb_frag_t *frag0 = &pinfo->frags[0];
@@ -5925,7 +5977,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
if (!skb_headlen(skb) && pinfo->nr_frags &&
!PageHighMem(skb_frag_page(frag0)) &&
- (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
+ (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
skb_frag_size(frag0),
@@ -5975,11 +6027,11 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
- u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+ u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+ struct gro_list *gro_list = &napi->gro_hash[bucket];
struct list_head *head = &offload_base;
struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *gro_head;
struct sk_buff *pp = NULL;
enum gro_result ret;
int same_flow;
@@ -5988,7 +6040,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (netif_elide_gro(skb->dev))
goto normal;
- gro_head = gro_list_prepare(napi, skb);
+ gro_list_prepare(&gro_list->list, skb);
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
@@ -6024,7 +6076,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
ipv6_gro_receive, inet_gro_receive,
- gro_head, skb);
+ &gro_list->list, skb);
break;
}
rcu_read_unlock();
@@ -6043,7 +6095,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (pp) {
skb_list_del_init(pp);
napi_gro_complete(napi, pp);
- napi->gro_hash[hash].count--;
+ gro_list->count--;
}
if (same_flow)
@@ -6052,16 +6104,16 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (NAPI_GRO_CB(skb)->flush)
goto normal;
- if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
- gro_flush_oldest(napi, gro_head);
- } else {
- napi->gro_hash[hash].count++;
- }
+ if (unlikely(gro_list->count >= MAX_GRO_SKBS))
+ gro_flush_oldest(napi, &gro_list->list);
+ else
+ gro_list->count++;
+
NAPI_GRO_CB(skb)->count = 1;
NAPI_GRO_CB(skb)->age = jiffies;
NAPI_GRO_CB(skb)->last = skb;
skb_shinfo(skb)->gso_size = skb_gro_len(skb);
- list_add(&skb->list, gro_head);
+ list_add(&skb->list, &gro_list->list);
ret = GRO_HELD;
pull:
@@ -6069,11 +6121,11 @@ pull:
if (grow > 0)
gro_pull_from_frag0(skb, grow);
ok:
- if (napi->gro_hash[hash].count) {
- if (!test_bit(hash, &napi->gro_bitmask))
- __set_bit(hash, &napi->gro_bitmask);
- } else if (test_bit(hash, &napi->gro_bitmask)) {
- __clear_bit(hash, &napi->gro_bitmask);
+ if (gro_list->count) {
+ if (!test_bit(bucket, &napi->gro_bitmask))
+ __set_bit(bucket, &napi->gro_bitmask);
+ } else if (test_bit(bucket, &napi->gro_bitmask)) {
+ __clear_bit(bucket, &napi->gro_bitmask);
}
return ret;
@@ -6143,7 +6195,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
skb_mark_napi_id(skb, napi);
trace_napi_gro_receive_entry(skb);
- skb_gro_reset_offset(skb);
+ skb_gro_reset_offset(skb, 0);
ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
trace_napi_gro_receive_exit(ret);
@@ -6232,7 +6284,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
napi->skb = NULL;
skb_reset_mac_header(skb);
- skb_gro_reset_offset(skb);
+ skb_gro_reset_offset(skb, hlen);
if (unlikely(skb_gro_header_hard(skb, hlen))) {
eth = skb_gro_header_slow(skb, hlen, 0);
@@ -6790,6 +6842,7 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
return err;
}
+EXPORT_SYMBOL(dev_set_threaded);
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
@@ -10338,14 +10391,20 @@ EXPORT_SYMBOL(register_netdev);
int netdev_refcnt_read(const struct net_device *dev)
{
+#ifdef CONFIG_PCPU_DEV_REFCNT
int i, refcnt = 0;
for_each_possible_cpu(i)
refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
return refcnt;
+#else
+ return refcount_read(&dev->dev_refcnt);
+#endif
}
EXPORT_SYMBOL(netdev_refcnt_read);
+int netdev_unregister_timeout_secs __read_mostly = 10;
+
#define WAIT_REFS_MIN_MSECS 1
#define WAIT_REFS_MAX_MSECS 250
/**
@@ -10370,7 +10429,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
rebroadcast_time = warning_time = jiffies;
refcnt = netdev_refcnt_read(dev);
- while (refcnt != 0) {
+ while (refcnt != 1) {
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
rtnl_lock();
@@ -10407,7 +10466,9 @@ static void netdev_wait_allrefs(struct net_device *dev)
refcnt = netdev_refcnt_read(dev);
- if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) {
+ if (refcnt != 1 &&
+ time_after(jiffies, warning_time +
+ netdev_unregister_timeout_secs * HZ)) {
pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
dev->name, refcnt);
warning_time = jiffies;
@@ -10483,7 +10544,7 @@ void netdev_run_todo(void)
netdev_wait_allrefs(dev);
/* paranoia */
- BUG_ON(netdev_refcnt_read(dev));
+ BUG_ON(netdev_refcnt_read(dev) != 1);
BUG_ON(!list_empty(&dev->ptype_all));
BUG_ON(!list_empty(&dev->ptype_specific));
WARN_ON(rcu_access_pointer(dev->ip_ptr));
@@ -10700,9 +10761,14 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
+#ifdef CONFIG_PCPU_DEV_REFCNT
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
goto free_dev;
+ dev_hold(dev);
+#else
+ refcount_set(&dev->dev_refcnt, 1);
+#endif
if (dev_addr_init(dev))
goto free_pcpu;
@@ -10766,8 +10832,10 @@ free_all:
return NULL;
free_pcpu:
+#ifdef CONFIG_PCPU_DEV_REFCNT
free_percpu(dev->pcpu_refcnt);
free_dev:
+#endif
netdev_freemem(dev);
return NULL;
}
@@ -10809,8 +10877,10 @@ void free_netdev(struct net_device *dev)
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
+#ifdef CONFIG_PCPU_DEV_REFCNT
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
+#endif
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;
@@ -10998,11 +11068,13 @@ void unregister_netdev(struct net_device *dev)
EXPORT_SYMBOL(unregister_netdev);
/**
- * dev_change_net_namespace - move device to different nethost namespace
+ * __dev_change_net_namespace - move device to different nethost namespace
* @dev: device
* @net: network namespace
* @pat: If not NULL name pattern to try if the current device name
* is already taken in the destination network namespace.
+ * @new_ifindex: If not zero, specifies device index in the target
+ * namespace.
*
* This function shuts down a device interface and moves it
* to a new network namespace. On success 0 is returned, on
@@ -11011,10 +11083,11 @@ EXPORT_SYMBOL(unregister_netdev);
* Callers must hold the rtnl semaphore.
*/
-int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
+int __dev_change_net_namespace(struct net_device *dev, struct net *net,
+ const char *pat, int new_ifindex)
{
struct net *net_old = dev_net(dev);
- int err, new_nsid, new_ifindex;
+ int err, new_nsid;
ASSERT_RTNL();
@@ -11045,6 +11118,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
goto out;
}
+ /* Check that new_ifindex isn't used yet. */
+ err = -EBUSY;
+ if (new_ifindex && __dev_get_by_index(net, new_ifindex))
+ goto out;
+
/*
* And now a mini version of register_netdevice unregister_netdevice.
*/
@@ -11072,10 +11150,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
/* If there is an ifindex conflict assign a new one */
- if (__dev_get_by_index(net, dev->ifindex))
- new_ifindex = dev_new_index(net);
- else
- new_ifindex = dev->ifindex;
+ if (!new_ifindex) {
+ if (__dev_get_by_index(net, dev->ifindex))
+ new_ifindex = dev_new_index(net);
+ else
+ new_ifindex = dev->ifindex;
+ }
rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
new_ifindex);
@@ -11128,7 +11208,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
out:
return err;
}
-EXPORT_SYMBOL_GPL(dev_change_net_namespace);
+EXPORT_SYMBOL_GPL(__dev_change_net_namespace);
static int dev_cpu_dead(unsigned int oldcpu)
{