diff options
Diffstat (limited to 'kernel/bpf/bpf_local_storage.c')
-rw-r--r-- | kernel/bpf/bpf_local_storage.c | 175 |
1 files changed, 94 insertions, 81 deletions
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 146824cc9689..fa56c30833ff 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -73,7 +73,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, - void *value, bool charge_mem, gfp_t gfp_flags) + void *value, bool charge_mem, bool swap_uptrs, gfp_t gfp_flags) { struct bpf_local_storage_elem *selem; @@ -81,9 +81,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, return NULL; if (smap->bpf_ma) { - migrate_disable(); selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags); - migrate_enable(); if (selem) /* Keep the original bpf_map_kzalloc behavior * before started using the bpf_mem_cache_alloc. @@ -99,9 +97,12 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, } if (selem) { - if (value) + if (value) { + /* No need to call check_and_init_map_value as memory is zero init */ copy_map_value(&smap->map, SDATA(selem)->data, value); - /* No need to call check_and_init_map_value as memory is zero init */ + if (swap_uptrs) + bpf_obj_swap_uptrs(smap->map.record, SDATA(selem)->data, value); + } return selem; } @@ -171,17 +172,14 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage, return; } - if (smap) { - migrate_disable(); + if (smap) bpf_mem_cache_free(&smap->storage_ma, local_storage); - migrate_enable(); - } else { + else /* smap could be NULL if the selem that triggered * this 'local_storage' creation had been long gone. * In this case, directly do call_rcu(). */ call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); - } } /* rcu tasks trace callback for bpf_ma == false */ @@ -209,8 +207,15 @@ static void __bpf_selem_free(struct bpf_local_storage_elem *selem, static void bpf_selem_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *smap; selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + /* The bpf_local_storage_map_free will wait for rcu_barrier */ + smap = rcu_dereference_check(SDATA(selem)->smap, 1); + + migrate_disable(); + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + migrate_enable(); bpf_mem_cache_raw_free(selem); } @@ -226,23 +231,50 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem, struct bpf_local_storage_map *smap, bool reuse_now) { - bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); - if (!smap->bpf_ma) { + /* Only task storage has uptrs and task storage + * has moved to bpf_mem_alloc. Meaning smap->bpf_ma == true + * for task storage, so this bpf_obj_free_fields() won't unpin + * any uptr. + */ + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); __bpf_selem_free(selem, reuse_now); return; } - if (!reuse_now) { - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); - } else { + if (reuse_now) { + /* reuse_now == true only happens when the storage owner + * (e.g. task_struct) is being destructed or the map itself + * is being destructed (ie map_free). In both cases, + * no bpf prog can have a hold on the selem. It is + * safe to unpin the uptrs and free the selem now. + */ + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); /* Instead of using the vanilla call_rcu(), * bpf_mem_cache_free will be able to reuse selem * immediately. */ - migrate_disable(); bpf_mem_cache_free(&smap->selem_ma, selem); - migrate_enable(); + return; + } + + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); +} + +static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now) +{ + struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *smap; + struct hlist_node *n; + + /* The "_safe" iteration is needed. + * The loop is not removing the selem from the list + * but bpf_selem_free will use the selem->rcu_head + * which is union-ized with the selem->free_node. + */ + hlist_for_each_entry_safe(selem, n, list, free_node) { + smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); + bpf_selem_free(selem, smap, reuse_now); } } @@ -252,7 +284,7 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem, */ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem, - bool uncharge_mem, bool reuse_now) + bool uncharge_mem, struct hlist_head *free_selem_list) { struct bpf_local_storage_map *smap; bool free_local_storage; @@ -296,7 +328,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - bpf_selem_free(selem, smap, reuse_now); + hlist_add_head(&selem->free_node, free_selem_list); if (rcu_access_pointer(local_storage->smap) == smap) RCU_INIT_POINTER(local_storage->smap, NULL); @@ -318,7 +350,7 @@ static bool check_storage_bpf_ma(struct bpf_local_storage *local_storage, * * If the local_storage->list is already empty, the caller will not * care about the bpf_ma value also because the caller is not - * responsibile to free the local_storage. + * responsible to free the local_storage. */ if (storage_smap) @@ -345,6 +377,7 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, struct bpf_local_storage_map *storage_smap; struct bpf_local_storage *local_storage; bool bpf_ma, free_local_storage = false; + HLIST_HEAD(selem_free_list); unsigned long flags; if (unlikely(!selem_linked_to_storage_lockless(selem))) @@ -360,9 +393,11 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, raw_spin_lock_irqsave(&local_storage->lock, flags); if (likely(selem_linked_to_storage(selem))) free_local_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, true, reuse_now); + local_storage, selem, true, &selem_free_list); raw_spin_unlock_irqrestore(&local_storage->lock, flags); + bpf_selem_free_list(&selem_free_list, reuse_now); + if (free_local_storage) bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now); } @@ -414,47 +449,21 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now) bpf_selem_unlink_storage(selem, reuse_now); } -/* If cacheit_lockit is false, this lookup function is lockless */ -struct bpf_local_storage_data * -bpf_local_storage_lookup(struct bpf_local_storage *local_storage, - struct bpf_local_storage_map *smap, - bool cacheit_lockit) +void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem) { - struct bpf_local_storage_data *sdata; - struct bpf_local_storage_elem *selem; - - /* Fast path (cache hit) */ - sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], - bpf_rcu_lock_held()); - if (sdata && rcu_access_pointer(sdata->smap) == smap) - return sdata; - - /* Slow path (cache miss) */ - hlist_for_each_entry_rcu(selem, &local_storage->list, snode, - rcu_read_lock_trace_held()) - if (rcu_access_pointer(SDATA(selem)->smap) == smap) - break; - - if (!selem) - return NULL; - - sdata = SDATA(selem); - if (cacheit_lockit) { - unsigned long flags; - - /* spinlock is needed to avoid racing with the - * parallel delete. Otherwise, publishing an already - * deleted sdata to the cache will become a use-after-free - * problem in the next bpf_local_storage_lookup(). - */ - raw_spin_lock_irqsave(&local_storage->lock, flags); - if (selem_linked_to_storage(selem)) - rcu_assign_pointer(local_storage->cache[smap->cache_idx], - sdata); - raw_spin_unlock_irqrestore(&local_storage->lock, flags); - } + unsigned long flags; - return sdata; + /* spinlock is needed to avoid racing with the + * parallel delete. Otherwise, publishing an already + * deleted sdata to the cache will become a use-after-free + * problem in the next bpf_local_storage_lookup(). + */ + raw_spin_lock_irqsave(&local_storage->lock, flags); + if (selem_linked_to_storage(selem)) + rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem)); + raw_spin_unlock_irqrestore(&local_storage->lock, flags); } static int check_flags(const struct bpf_local_storage_data *old_sdata, @@ -484,15 +493,11 @@ int bpf_local_storage_alloc(void *owner, if (err) return err; - if (smap->bpf_ma) { - migrate_disable(); + if (smap->bpf_ma) storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags); - migrate_enable(); - } else { + else storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), gfp_flags | __GFP_NOWARN); - } - if (!storage) { err = -ENOMEM; goto uncharge; @@ -550,11 +555,12 @@ uncharge: */ struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, - void *value, u64 map_flags, gfp_t gfp_flags) + void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags) { struct bpf_local_storage_data *old_sdata = NULL; struct bpf_local_storage_elem *alloc_selem, *selem = NULL; struct bpf_local_storage *local_storage; + HLIST_HEAD(old_selem_free_list); unsigned long flags; int err; @@ -576,7 +582,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, if (err) return ERR_PTR(err); - selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags); + selem = bpf_selem_alloc(smap, owner, value, true, swap_uptrs, gfp_flags); if (!selem) return ERR_PTR(-ENOMEM); @@ -610,7 +616,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, /* A lookup has just been done before and concluded a new selem is * needed. The chance of an unnecessary alloc is unlikely. */ - alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags); + alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, swap_uptrs, gfp_flags); if (!alloc_selem) return ERR_PTR(-ENOMEM); @@ -650,11 +656,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, if (old_sdata) { bpf_selem_unlink_map(SELEM(old_sdata)); bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), - true, false); + true, &old_selem_free_list); } unlock: raw_spin_unlock_irqrestore(&local_storage->lock, flags); + bpf_selem_free_list(&old_selem_free_list, false); if (alloc_selem) { mem_uncharge(smap, owner, smap->elem_size); bpf_selem_free(alloc_selem, smap, true); @@ -732,6 +739,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) struct bpf_local_storage_map *storage_smap; struct bpf_local_storage_elem *selem; bool bpf_ma, free_storage = false; + HLIST_HEAD(free_selem_list); struct hlist_node *n; unsigned long flags; @@ -760,10 +768,12 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) * of the loop will set the free_cgroup_storage to true. */ free_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, true, true); + local_storage, selem, true, &free_selem_list); } raw_spin_unlock_irqrestore(&local_storage->lock, flags); + bpf_selem_free_list(&free_selem_list, true); + if (free_storage) bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true); } @@ -808,8 +818,8 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, nbuckets = max_t(u32, 2, nbuckets); smap->bucket_log = ilog2(nbuckets); - smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets), - nbuckets, GFP_USER | __GFP_NOWARN); + smap->buckets = bpf_map_kvcalloc(&smap->map, nbuckets, + sizeof(*smap->buckets), GFP_USER | __GFP_NOWARN); if (!smap->buckets) { err = -ENOMEM; goto free_smap; @@ -823,8 +833,12 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, smap->elem_size = offsetof(struct bpf_local_storage_elem, sdata.data[attr->value_size]); - smap->bpf_ma = bpf_ma; - if (bpf_ma) { + /* In PREEMPT_RT, kmalloc(GFP_ATOMIC) is still not safe in non + * preemptible context. Thus, enforce all storages to use + * bpf_mem_alloc when CONFIG_PREEMPT_RT is enabled. + */ + smap->bpf_ma = IS_ENABLED(CONFIG_PREEMPT_RT) ? true : bpf_ma; + if (smap->bpf_ma) { err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false); if (err) goto free_smap; @@ -880,15 +894,11 @@ void bpf_local_storage_map_free(struct bpf_map *map, while ((selem = hlist_entry_safe( rcu_dereference_raw(hlist_first_rcu(&b->list)), struct bpf_local_storage_elem, map_node))) { - if (busy_counter) { - migrate_disable(); + if (busy_counter) this_cpu_inc(*busy_counter); - } bpf_selem_unlink(selem, true); - if (busy_counter) { + if (busy_counter) this_cpu_dec(*busy_counter); - migrate_enable(); - } cond_resched_rcu(); } rcu_read_unlock(); @@ -909,6 +919,9 @@ void bpf_local_storage_map_free(struct bpf_map *map, synchronize_rcu(); if (smap->bpf_ma) { + rcu_barrier_tasks_trace(); + if (!rcu_trace_implies_rcu_gp()) + rcu_barrier(); bpf_mem_alloc_destroy(&smap->selem_ma); bpf_mem_alloc_destroy(&smap->storage_ma); } |