From fe8222406c8277a21172479d3a8283d31c209028 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 9 Nov 2010 10:47:38 +0000 Subject: net: Simplify RX queue allocation This patch move RX queue allocation to alloc_netdev_mq and freeing of the queues to free_netdev (symmetric to TX queue allocation). Each kobject RX queue takes a reference to the queue's device so that the device can't be freed before all the kobjects have been released-- this obviates the need for reference counts specific to RX queues. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net/core/net-sysfs.c') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a5ff5a89f376..3ba526b56fe3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -706,7 +706,6 @@ static struct attribute *rx_queue_default_attrs[] = { static void rx_queue_release(struct kobject *kobj) { struct netdev_rx_queue *queue = to_rx_queue(kobj); - struct netdev_rx_queue *first = queue->first; struct rps_map *map; struct rps_dev_flow_table *flow_table; @@ -719,8 +718,7 @@ static void rx_queue_release(struct kobject *kobj) if (flow_table) call_rcu(&flow_table->rcu, rps_dev_flow_table_release); - if (atomic_dec_and_test(&first->count)) - kfree(first); + dev_put(queue->dev); } static struct kobj_type rx_queue_ktype = { @@ -732,7 +730,6 @@ static struct kobj_type rx_queue_ktype = { static int rx_queue_add_kobject(struct net_device *net, int index) { struct netdev_rx_queue *queue = net->_rx + index; - struct netdev_rx_queue *first = queue->first; struct kobject *kobj = &queue->kobj; int error = 0; @@ -745,7 +742,7 @@ static int rx_queue_add_kobject(struct net_device *net, int index) } kobject_uevent(kobj, KOBJ_ADD); - atomic_inc(&first->count); + dev_hold(queue->dev); return error; } -- cgit From 9ea19481db31d614f729f346bdcf28e4e60ff14a Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Nov 2010 06:31:39 +0000 Subject: net: zero kobject in rx_queue_release netif_set_real_num_rx_queues() can decrement and increment the number of rx queues. For example ixgbe does this as features and offloads are toggled. Presumably this could also happen across down/up on most devices if the available resources changed (cpu offlined). The kobject needs to be zero'd in this case so that the state is not preserved across kobject_put()/kobject_init_and_add(). This resolves the following error report. ixgbe 0000:03:00.0: eth2: NIC Link is Up 10 Gbps, Flow Control: RX/TX kobject (ffff880324b83210): tried to init an initialized object, something is seriously wrong. Pid: 1972, comm: lldpad Not tainted 2.6.37-rc18021qaz+ #169 Call Trace: [] kobject_init+0x3a/0x83 [] kobject_init_and_add+0x23/0x57 [] ? mark_lock+0x21/0x267 [] net_rx_queue_update_kobjects+0x63/0xc6 [] netif_set_real_num_rx_queues+0x5f/0x78 [] ixgbe_set_num_queues+0x1c6/0x1ca [ixgbe] [] ixgbe_init_interrupt_scheme+0x1e/0x79c [ixgbe] [] ixgbe_dcbnl_set_state+0x167/0x189 [ixgbe] Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net/core/net-sysfs.c') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 3ba526b56fe3..7abeb7ceaa4c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -711,13 +711,18 @@ static void rx_queue_release(struct kobject *kobj) map = rcu_dereference_raw(queue->rps_map); - if (map) + if (map) { + RCU_INIT_POINTER(queue->rps_map, NULL); call_rcu(&map->rcu, rps_map_release); + } flow_table = rcu_dereference_raw(queue->rps_flow_table); - if (flow_table) + if (flow_table) { + RCU_INIT_POINTER(queue->rps_flow_table, NULL); call_rcu(&flow_table->rcu, rps_dev_flow_table_release); + } + memset(kobj, 0, sizeof(*kobj)); dev_put(queue->dev); } -- cgit From 1d24eb4815d1e0e8b451ecc546645f8ef1176d4f Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 21 Nov 2010 13:17:27 +0000 Subject: xps: Transmit Packet Steering This patch implements transmit packet steering (XPS) for multiqueue devices. XPS selects a transmit queue during packet transmission based on configuration. This is done by mapping the CPU transmitting the packet to a queue. This is the transmit side analogue to RPS-- where RPS is selecting a CPU based on receive queue, XPS selects a queue based on the CPU (previously there was an XPS patch from Eric Dumazet, but that might more appropriately be called transmit completion steering). Each transmit queue can be associated with a number of CPUs which will use the queue to send packets. This is configured as a CPU mask on a per queue basis in: /sys/class/net/eth/queues/tx-/xps_cpus The mappings are stored per device in an inverted data structure that maps CPUs to queues. In the netdevice structure this is an array of num_possible_cpu structures where each structure holds and array of queue_indexes for queues which that CPU can use. The benefits of XPS are improved locality in the per queue data structures. Also, transmit completions are more likely to be done nearer to the sending thread, so this should promote locality back to the socket on free (e.g. UDP). The benefits of XPS are dependent on cache hierarchy, application load, and other factors. XPS would nominally be configured so that a queue would only be shared by CPUs which are sharing a cache, the degenerative configuration woud be that each CPU has it's own queue. Below are some benchmark results which show the potential benfit of this patch. The netperf test has 500 instances of netperf TCP_RR test with 1 byte req. and resp. bnx2x on 16 core AMD XPS (16 queues, 1 TX queue per CPU) 1234K at 100% CPU No XPS (16 queues) 996K at 100% CPU Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 369 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 364 insertions(+), 5 deletions(-) (limited to 'net/core/net-sysfs.c') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7abeb7ceaa4c..68dbbfdee274 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -772,18 +772,377 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) return error; } -static int rx_queue_register_kobjects(struct net_device *net) +/* + * netdev_queue sysfs structures and functions. + */ +struct netdev_queue_attribute { + struct attribute attr; + ssize_t (*show)(struct netdev_queue *queue, + struct netdev_queue_attribute *attr, char *buf); + ssize_t (*store)(struct netdev_queue *queue, + struct netdev_queue_attribute *attr, const char *buf, size_t len); +}; +#define to_netdev_queue_attr(_attr) container_of(_attr, \ + struct netdev_queue_attribute, attr) + +#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj) + +static ssize_t netdev_queue_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); + struct netdev_queue *queue = to_netdev_queue(kobj); + + if (!attribute->show) + return -EIO; + + return attribute->show(queue, attribute, buf); +} + +static ssize_t netdev_queue_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) +{ + struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); + struct netdev_queue *queue = to_netdev_queue(kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(queue, attribute, buf, count); +} + +static const struct sysfs_ops netdev_queue_sysfs_ops = { + .show = netdev_queue_attr_show, + .store = netdev_queue_attr_store, +}; + +static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) { + struct net_device *dev = queue->dev; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) + if (queue == &dev->_tx[i]) + break; + + BUG_ON(i >= dev->num_tx_queues); + + return i; +} + + +static ssize_t show_xps_map(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, char *buf) +{ + struct net_device *dev = queue->dev; + struct xps_dev_maps *dev_maps; + cpumask_var_t mask; + unsigned long index; + size_t len = 0; + int i; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + index = get_netdev_queue_index(queue); + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + for_each_possible_cpu(i) { + struct xps_map *map = + rcu_dereference(dev_maps->cpu_map[i]); + if (map) { + int j; + for (j = 0; j < map->len; j++) { + if (map->queues[j] == index) { + cpumask_set_cpu(i, mask); + break; + } + } + } + } + } + rcu_read_unlock(); + + len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); + if (PAGE_SIZE - len < 3) { + free_cpumask_var(mask); + return -EINVAL; + } + + free_cpumask_var(mask); + len += sprintf(buf + len, "\n"); + return len; +} + +static void xps_map_release(struct rcu_head *rcu) +{ + struct xps_map *map = container_of(rcu, struct xps_map, rcu); + + kfree(map); +} + +static void xps_dev_maps_release(struct rcu_head *rcu) +{ + struct xps_dev_maps *dev_maps = + container_of(rcu, struct xps_dev_maps, rcu); + + kfree(dev_maps); +} + +static DEFINE_MUTEX(xps_map_mutex); + +static ssize_t store_xps_map(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct net_device *dev = queue->dev; + cpumask_var_t mask; + int err, i, cpu, pos, map_len, alloc_len, need_set; + unsigned long index; + struct xps_map *map, *new_map; + struct xps_dev_maps *dev_maps, *new_dev_maps; + int nonempty = 0; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + index = get_netdev_queue_index(queue); + + err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); + if (err) { + free_cpumask_var(mask); + return err; + } + + new_dev_maps = kzalloc(max_t(unsigned, + XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); + if (!new_dev_maps) { + free_cpumask_var(mask); + return -ENOMEM; + } + + mutex_lock(&xps_map_mutex); + + dev_maps = dev->xps_maps; + + for_each_possible_cpu(cpu) { + new_map = map = dev_maps ? dev_maps->cpu_map[cpu] : NULL; + + if (map) { + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + map_len = map->len; + alloc_len = map->alloc_len; + } else + pos = map_len = alloc_len = 0; + + need_set = cpu_isset(cpu, *mask) && cpu_online(cpu); + + if (need_set && pos >= map_len) { + /* Need to add queue to this CPU's map */ + if (map_len >= alloc_len) { + alloc_len = alloc_len ? + 2 * alloc_len : XPS_MIN_MAP_ALLOC; + new_map = kzalloc(XPS_MAP_SIZE(alloc_len), + GFP_KERNEL); + if (!new_map) + goto error; + new_map->alloc_len = alloc_len; + for (i = 0; i < map_len; i++) + new_map->queues[i] = map->queues[i]; + new_map->len = map_len; + } + new_map->queues[new_map->len++] = index; + } else if (!need_set && pos < map_len) { + /* Need to remove queue from this CPU's map */ + if (map_len > 1) + new_map->queues[pos] = + new_map->queues[--new_map->len]; + else + new_map = NULL; + } + new_dev_maps->cpu_map[cpu] = new_map; + } + + /* Cleanup old maps */ + for_each_possible_cpu(cpu) { + map = dev_maps ? dev_maps->cpu_map[cpu] : NULL; + if (map && new_dev_maps->cpu_map[cpu] != map) + call_rcu(&map->rcu, xps_map_release); + if (new_dev_maps->cpu_map[cpu]) + nonempty = 1; + } + + if (nonempty) + rcu_assign_pointer(dev->xps_maps, new_dev_maps); + else { + kfree(new_dev_maps); + rcu_assign_pointer(dev->xps_maps, NULL); + } + + if (dev_maps) + call_rcu(&dev_maps->rcu, xps_dev_maps_release); + + mutex_unlock(&xps_map_mutex); + + free_cpumask_var(mask); + return len; + +error: + mutex_unlock(&xps_map_mutex); + + if (new_dev_maps) + for_each_possible_cpu(i) + kfree(new_dev_maps->cpu_map[i]); + kfree(new_dev_maps); + free_cpumask_var(mask); + return -ENOMEM; +} + +static struct netdev_queue_attribute xps_cpus_attribute = + __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); + +static struct attribute *netdev_queue_default_attrs[] = { + &xps_cpus_attribute.attr, + NULL +}; + +static void netdev_queue_release(struct kobject *kobj) +{ + struct netdev_queue *queue = to_netdev_queue(kobj); + struct net_device *dev = queue->dev; + struct xps_dev_maps *dev_maps; + struct xps_map *map; + unsigned long index; + int i, pos, nonempty = 0; + + index = get_netdev_queue_index(queue); + + mutex_lock(&xps_map_mutex); + dev_maps = dev->xps_maps; + + if (dev_maps) { + for_each_possible_cpu(i) { + map = dev_maps->cpu_map[i]; + if (!map) + continue; + + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + + if (pos < map->len) { + if (map->len > 1) + map->queues[pos] = + map->queues[--map->len]; + else { + RCU_INIT_POINTER(dev_maps->cpu_map[i], + NULL); + call_rcu(&map->rcu, xps_map_release); + map = NULL; + } + } + if (map) + nonempty = 1; + } + + if (!nonempty) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + call_rcu(&dev_maps->rcu, xps_dev_maps_release); + } + } + + mutex_unlock(&xps_map_mutex); + + memset(kobj, 0, sizeof(*kobj)); + dev_put(queue->dev); +} + +static struct kobj_type netdev_queue_ktype = { + .sysfs_ops = &netdev_queue_sysfs_ops, + .release = netdev_queue_release, + .default_attrs = netdev_queue_default_attrs, +}; + +static int netdev_queue_add_kobject(struct net_device *net, int index) +{ + struct netdev_queue *queue = net->_tx + index; + struct kobject *kobj = &queue->kobj; + int error = 0; + + kobj->kset = net->queues_kset; + error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, + "tx-%u", index); + if (error) { + kobject_put(kobj); + return error; + } + + kobject_uevent(kobj, KOBJ_ADD); + dev_hold(queue->dev); + + return error; +} + +int +netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) +{ + int i; + int error = 0; + + for (i = old_num; i < new_num; i++) { + error = netdev_queue_add_kobject(net, i); + if (error) { + new_num = old_num; + break; + } + } + + while (--i >= new_num) + kobject_put(&net->_tx[i].kobj); + + return error; +} + +static int register_queue_kobjects(struct net_device *net) +{ + int error = 0, txq = 0, rxq = 0; + net->queues_kset = kset_create_and_add("queues", NULL, &net->dev.kobj); if (!net->queues_kset) return -ENOMEM; - return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues); + + error = net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues); + if (error) + goto error; + rxq = net->real_num_rx_queues; + + error = netdev_queue_update_kobjects(net, 0, + net->real_num_tx_queues); + if (error) + goto error; + txq = net->real_num_tx_queues; + + return 0; + +error: + netdev_queue_update_kobjects(net, txq, 0); + net_rx_queue_update_kobjects(net, rxq, 0); + return error; } -static void rx_queue_remove_kobjects(struct net_device *net) +static void remove_queue_kobjects(struct net_device *net) { net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0); + netdev_queue_update_kobjects(net, net->real_num_tx_queues, 0); kset_unregister(net->queues_kset); } #endif /* CONFIG_RPS */ @@ -886,7 +1245,7 @@ void netdev_unregister_kobject(struct net_device * net) kobject_get(&dev->kobj); #ifdef CONFIG_RPS - rx_queue_remove_kobjects(net); + remove_queue_kobjects(net); #endif device_del(dev); @@ -927,7 +1286,7 @@ int netdev_register_kobject(struct net_device *net) return error; #ifdef CONFIG_RPS - error = rx_queue_register_kobjects(net); + error = register_queue_kobjects(net); if (error) { device_del(dev); return error; -- cgit From bf26414510103448ad3dc069c7422462f03ea3d7 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 26 Nov 2010 08:36:09 +0000 Subject: xps: Add CONFIG_XPS This patch adds XPS_CONFIG option to enable and disable XPS. This is done in the same manner as RPS_CONFIG. This is also fixes build failure in XPS code when SMP is not enabled. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) (limited to 'net/core/net-sysfs.c') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 68dbbfdee274..99c11294623f 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -751,10 +751,12 @@ static int rx_queue_add_kobject(struct net_device *net, int index) return error; } +#endif /* CONFIG_RPS */ int net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) { +#ifdef CONFIG_RPS int i; int error = 0; @@ -770,8 +772,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) kobject_put(&net->_rx[i].kobj); return error; +#else + return 0; +#endif } +#ifdef CONFIG_XPS /* * netdev_queue sysfs structures and functions. */ @@ -1090,10 +1096,12 @@ static int netdev_queue_add_kobject(struct net_device *net, int index) return error; } +#endif /* CONFIG_XPS */ int netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) { +#ifdef CONFIG_XPS int i; int error = 0; @@ -1109,27 +1117,36 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) kobject_put(&net->_tx[i].kobj); return error; +#else + return 0; +#endif } static int register_queue_kobjects(struct net_device *net) { - int error = 0, txq = 0, rxq = 0; + int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; +#if defined(CONFIG_RPS) || defined(CONFIG_XPS) net->queues_kset = kset_create_and_add("queues", NULL, &net->dev.kobj); if (!net->queues_kset) return -ENOMEM; +#endif + +#ifdef CONFIG_RPS + real_rx = net->real_num_rx_queues; +#endif + real_tx = net->real_num_tx_queues; - error = net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues); + error = net_rx_queue_update_kobjects(net, 0, real_rx); if (error) goto error; - rxq = net->real_num_rx_queues; + rxq = real_rx; - error = netdev_queue_update_kobjects(net, 0, - net->real_num_tx_queues); + error = netdev_queue_update_kobjects(net, 0, real_tx); if (error) goto error; - txq = net->real_num_tx_queues; + txq = real_tx; return 0; @@ -1141,11 +1158,19 @@ error: static void remove_queue_kobjects(struct net_device *net) { - net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0); - netdev_queue_update_kobjects(net, net->real_num_tx_queues, 0); + int real_rx = 0, real_tx = 0; + +#ifdef CONFIG_RPS + real_rx = net->real_num_rx_queues; +#endif + real_tx = net->real_num_tx_queues; + + net_rx_queue_update_kobjects(net, real_rx, 0); + netdev_queue_update_kobjects(net, real_tx, 0); +#if defined(CONFIG_RPS) || defined(CONFIG_XPS) kset_unregister(net->queues_kset); +#endif } -#endif /* CONFIG_RPS */ static const void *net_current_ns(void) { @@ -1244,9 +1269,7 @@ void netdev_unregister_kobject(struct net_device * net) kobject_get(&dev->kobj); -#ifdef CONFIG_RPS remove_queue_kobjects(net); -#endif device_del(dev); } @@ -1285,13 +1308,11 @@ int netdev_register_kobject(struct net_device *net) if (error) return error; -#ifdef CONFIG_RPS error = register_queue_kobjects(net); if (error) { device_del(dev); return error; } -#endif return error; } -- cgit From b02038a17b271e0f70616c54e4eccb5cc33d1b74 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 28 Nov 2010 05:43:24 +0000 Subject: xps: NUMA allocations for per cpu data store_xps_map() allocates maps that are used by single cpu, it makes sense to use NUMA allocations. Signed-off-by: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/core/net-sysfs.c') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 99c11294623f..35ef42fa0cf3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -956,8 +956,9 @@ static ssize_t store_xps_map(struct netdev_queue *queue, if (map_len >= alloc_len) { alloc_len = alloc_len ? 2 * alloc_len : XPS_MIN_MAP_ALLOC; - new_map = kzalloc(XPS_MAP_SIZE(alloc_len), - GFP_KERNEL); + new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), + GFP_KERNEL, + cpu_to_node(cpu)); if (!new_map) goto error; new_map->alloc_len = alloc_len; -- cgit From a41778694806ac1ccd4b1dafed1abef8d5ba98ac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 28 Nov 2010 21:43:02 +0000 Subject: xps: add __rcu annotations Avoid sparse warnings : add __rcu annotations and use rcu_dereference_protected() where necessary. Signed-off-by: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'net/core/net-sysfs.c') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 35ef42fa0cf3..f85cee3d869e 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -899,6 +899,8 @@ static void xps_dev_maps_release(struct rcu_head *rcu) } static DEFINE_MUTEX(xps_map_mutex); +#define xmap_dereference(P) \ + rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) static ssize_t store_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, @@ -935,11 +937,12 @@ static ssize_t store_xps_map(struct netdev_queue *queue, mutex_lock(&xps_map_mutex); - dev_maps = dev->xps_maps; + dev_maps = xmap_dereference(dev->xps_maps); for_each_possible_cpu(cpu) { - new_map = map = dev_maps ? dev_maps->cpu_map[cpu] : NULL; - + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; + new_map = map; if (map) { for (pos = 0; pos < map->len; pos++) if (map->queues[pos] == index) @@ -975,13 +978,14 @@ static ssize_t store_xps_map(struct netdev_queue *queue, else new_map = NULL; } - new_dev_maps->cpu_map[cpu] = new_map; + RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map); } /* Cleanup old maps */ for_each_possible_cpu(cpu) { - map = dev_maps ? dev_maps->cpu_map[cpu] : NULL; - if (map && new_dev_maps->cpu_map[cpu] != map) + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; + if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) call_rcu(&map->rcu, xps_map_release); if (new_dev_maps->cpu_map[cpu]) nonempty = 1; @@ -1007,7 +1011,9 @@ error: if (new_dev_maps) for_each_possible_cpu(i) - kfree(new_dev_maps->cpu_map[i]); + kfree(rcu_dereference_protected( + new_dev_maps->cpu_map[i], + 1)); kfree(new_dev_maps); free_cpumask_var(mask); return -ENOMEM; @@ -1033,11 +1039,11 @@ static void netdev_queue_release(struct kobject *kobj) index = get_netdev_queue_index(queue); mutex_lock(&xps_map_mutex); - dev_maps = dev->xps_maps; + dev_maps = xmap_dereference(dev->xps_maps); if (dev_maps) { for_each_possible_cpu(i) { - map = dev_maps->cpu_map[i]; + map = xmap_dereference(dev_maps->cpu_map[i]); if (!map) continue; -- cgit From f2cd2d3e9b3ef960612e362f0ad129d735452df2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Nov 2010 08:14:37 +0000 Subject: net sched: use xps information for qdisc NUMA affinity Allocate qdisc memory according to NUMA properties of cpus included in xps map. To be effective, qdisc should be (re)setup after changes of /sys/class/net/eth/queues/tx-/xps_cpus I added a numa_node field in struct netdev_queue, containing NUMA node if all cpus included in xps_cpus share same node, else -1. Signed-off-by: Eric Dumazet Cc: Ben Hutchings Cc: Tom Herbert Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'net/core/net-sysfs.c') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f85cee3d869e..85e8b5326dd6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -913,6 +913,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue, struct xps_map *map, *new_map; struct xps_dev_maps *dev_maps, *new_dev_maps; int nonempty = 0; + int numa_node = -2; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -953,7 +954,14 @@ static ssize_t store_xps_map(struct netdev_queue *queue, pos = map_len = alloc_len = 0; need_set = cpu_isset(cpu, *mask) && cpu_online(cpu); - +#ifdef CONFIG_NUMA + if (need_set) { + if (numa_node == -2) + numa_node = cpu_to_node(cpu); + else if (numa_node != cpu_to_node(cpu)) + numa_node = -1; + } +#endif if (need_set && pos >= map_len) { /* Need to add queue to this CPU's map */ if (map_len >= alloc_len) { @@ -1001,6 +1009,8 @@ static ssize_t store_xps_map(struct netdev_queue *queue, if (dev_maps) call_rcu(&dev_maps->rcu, xps_dev_maps_release); + netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : -1); + mutex_unlock(&xps_map_mutex); free_cpumask_var(mask); -- cgit From b236da6931e2482bfe44a7865dd4e7bb036f3496 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 14 Dec 2010 03:09:15 +0000 Subject: net: use NUMA_NO_NODE instead of the magic number -1 Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core/net-sysfs.c') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 85e8b5326dd6..e23c01be5a5b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1009,7 +1009,8 @@ static ssize_t store_xps_map(struct netdev_queue *queue, if (dev_maps) call_rcu(&dev_maps->rcu, xps_dev_maps_release); - netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : -1); + netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : + NUMA_NO_NODE); mutex_unlock(&xps_map_mutex); -- cgit