diff options
Diffstat (limited to 'net/core/flow.c')
-rw-r--r-- | net/core/flow.c | 137 |
1 files changed, 100 insertions, 37 deletions
diff --git a/net/core/flow.c b/net/core/flow.c index 127c8a7ffd61..dfa602ceb8cd 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -22,7 +22,7 @@ #include <linux/cpumask.h> #include <linux/mutex.h> #include <net/flow.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/security.h> struct flow_cache_entry { @@ -30,6 +30,7 @@ struct flow_cache_entry { struct hlist_node hlist; struct list_head gc_list; } u; + struct net *net; u16 family; u8 dir; u32 genid; @@ -131,14 +132,14 @@ static void __flow_cache_shrink(struct flow_cache *fc, int shrink_to) { struct flow_cache_entry *fle; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; for (i = 0; i < flow_cache_hash_size(fc); i++) { int saved = 0; - hlist_for_each_entry_safe(fle, entry, tmp, + hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { if (saved < shrink_to && flow_entry_valid(fle)) { @@ -172,31 +173,28 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - struct flowi *key) + const struct flowi *key, + size_t keysize) { - u32 *k = (u32 *) key; + const u32 *k = (const u32 *) key; + const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) + return jhash2(k, length, fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); } -typedef unsigned long flow_compare_t; - /* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment and - * constant size. + * important assumptions that we can here, such as alignment. */ -static int flow_key_compare(struct flowi *key1, struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, + size_t keysize) { - flow_compare_t *k1, *k1_lim, *k2; - const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); - - BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); + const flow_compare_t *k1, *k1_lim, *k2; - k1 = (flow_compare_t *) key1; - k1_lim = k1 + n_elem; + k1 = (const flow_compare_t *) key1; + k1_lim = k1 + keysize; - k2 = (flow_compare_t *) key2; + k2 = (const flow_compare_t *) key2; do { if (*k1++ != *k2++) @@ -207,14 +205,14 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) } struct flow_cache_object * -flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, +flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver, void *ctx) { struct flow_cache *fc = &flow_cache_global; struct flow_cache_percpu *fcp; struct flow_cache_entry *fle, *tfle; - struct hlist_node *entry; struct flow_cache_object *flo; + size_t keysize; unsigned int hash; local_bh_disable(); @@ -222,6 +220,11 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, fle = NULL; flo = NULL; + + keysize = flow_key_size(family); + if (!keysize) + goto nocache; + /* Packet really early in init? Making flow_cache_init a * pre-smp initcall would solve this. --RR */ if (!fcp->hash_table) @@ -230,11 +233,12 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, if (fcp->hash_rnd_recalc) flow_new_hash_rnd(fc, fcp); - hash = flow_hash_code(fc, fcp, key); - hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { - if (tfle->family == family && + hash = flow_hash_code(fc, fcp, key, keysize); + hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) { + if (tfle->net == net && + tfle->family == family && tfle->dir == dir && - flow_key_compare(key, &tfle->key) == 0) { + flow_key_compare(key, &tfle->key, keysize) == 0) { fle = tfle; break; } @@ -246,9 +250,10 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { + fle->net = net; fle->family = family; fle->dir = dir; - memcpy(&fle->key, key, sizeof(*key)); + memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); fle->object = NULL; hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); fcp->hash_count++; @@ -280,7 +285,7 @@ nocache: else fle->genid--; } else { - if (flo && !IS_ERR(flo)) + if (!IS_ERR_OR_NULL(flo)) flo->ops->delete(flo); } ret_object: @@ -295,13 +300,13 @@ static void flow_cache_flush_tasklet(unsigned long data) struct flow_cache *fc = info->cache; struct flow_cache_percpu *fcp; struct flow_cache_entry *fle; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; fcp = this_cpu_ptr(fc->percpu); for (i = 0; i < flow_cache_hash_size(fc); i++) { - hlist_for_each_entry_safe(fle, entry, tmp, + hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { if (flow_entry_valid(fle)) continue; @@ -318,14 +323,30 @@ static void flow_cache_flush_tasklet(unsigned long data) complete(&info->completion); } +/* + * Return whether a cpu needs flushing. Conservatively, we assume + * the presence of any entries means the core may require flushing, + * since the flow_cache_ops.check() function may assume it's running + * on the same core as the per-cpu cache component. + */ +static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) +{ + struct flow_cache_percpu *fcp; + int i; + + fcp = per_cpu_ptr(fc->percpu, cpu); + for (i = 0; i < flow_cache_hash_size(fc); i++) + if (!hlist_empty(&fcp->hash_table[i])) + return 0; + return 1; +} + static void flow_cache_flush_per_cpu(void *data) { struct flow_flush_info *info = data; - int cpu; struct tasklet_struct *tasklet; - cpu = smp_processor_id(); - tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet; + tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet; tasklet->data = (unsigned long)info; tasklet_schedule(tasklet); } @@ -334,25 +355,55 @@ void flow_cache_flush(void) { struct flow_flush_info info; static DEFINE_MUTEX(flow_flush_sem); + cpumask_var_t mask; + int i, self; + + /* Track which cpus need flushing to avoid disturbing all cores. */ + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return; + cpumask_clear(mask); /* Don't want cpus going down or up during this. */ get_online_cpus(); mutex_lock(&flow_flush_sem); info.cache = &flow_cache_global; - atomic_set(&info.cpuleft, num_online_cpus()); + for_each_online_cpu(i) + if (!flow_cache_percpu_empty(info.cache, i)) + cpumask_set_cpu(i, mask); + atomic_set(&info.cpuleft, cpumask_weight(mask)); + if (atomic_read(&info.cpuleft) == 0) + goto done; + init_completion(&info.completion); local_bh_disable(); - smp_call_function(flow_cache_flush_per_cpu, &info, 0); - flow_cache_flush_tasklet((unsigned long)&info); + self = cpumask_test_and_clear_cpu(smp_processor_id(), mask); + on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0); + if (self) + flow_cache_flush_tasklet((unsigned long)&info); local_bh_enable(); wait_for_completion(&info.completion); + +done: mutex_unlock(&flow_flush_sem); put_online_cpus(); + free_cpumask_var(mask); +} + +static void flow_cache_flush_task(struct work_struct *work) +{ + flow_cache_flush(); +} + +static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task); + +void flow_cache_flush_deferred(void) +{ + schedule_work(&flow_cache_flush_work); } -static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) +static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); @@ -370,7 +421,7 @@ static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) return 0; } -static int __cpuinit flow_cache_cpu(struct notifier_block *nfb, +static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -407,7 +458,7 @@ static int __init flow_cache_init(struct flow_cache *fc) for_each_online_cpu(i) { if (flow_cache_cpu_prepare(fc, i)) - return -ENOMEM; + goto err; } fc->hotcpu_notifier = (struct notifier_block){ .notifier_call = flow_cache_cpu, @@ -420,6 +471,18 @@ static int __init flow_cache_init(struct flow_cache *fc) add_timer(&fc->rnd_timer); return 0; + +err: + for_each_possible_cpu(i) { + struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); + kfree(fcp->hash_table); + fcp->hash_table = NULL; + } + + free_percpu(fc->percpu); + fc->percpu = NULL; + + return -ENOMEM; } static int __init flow_cache_init_global(void) |