9 files changed, 270 insertions, 36 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b4b40b45962b..b4825d3cdb29 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2058,6 +2058,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
 void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
 void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
+void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
 
 struct bpf_map *bpf_map_get(u32 ufd);
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index d644bbb298af..bb1223b21308 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -11,6 +11,7 @@ struct bpf_mem_caches;
 struct bpf_mem_alloc {
 	struct bpf_mem_caches __percpu *caches;
 	struct bpf_mem_cache __percpu *cache;
+	bool percpu;
 	struct work_struct work;
 };
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 68fac2e7cbe6..8c677f185901 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -132,6 +132,7 @@ extern void __init setup_per_cpu_areas(void);
 extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1);
 extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1);
 extern void free_percpu(void __percpu *__pdata);
+extern size_t pcpu_alloc_size(void __percpu *__pdata);
 
 DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T))
 
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index da058aead20c..e46ac288a108 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1811,8 +1811,6 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 	}
 }
 
-void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);
-
 void bpf_list_head_free(const struct btf_field *field, void *list_head,
 			struct bpf_spin_lock *spin_lock)
 {
@@ -1844,7 +1842,7 @@ unlock:
 		 * bpf_list_head which needs to be freed.
 		 */
 		migrate_disable();
-		__bpf_obj_drop_impl(obj, field->graph_root.value_rec);
+		__bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
 		migrate_enable();
 	}
 }
@@ -1883,7 +1881,7 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
 
 
 		migrate_disable();
-		__bpf_obj_drop_impl(obj, field->graph_root.value_rec);
+		__bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
 		migrate_enable();
 	}
 }
@@ -1915,8 +1913,10 @@ __bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
 }
 
 /* Must be called under migrate_disable(), as required by bpf_mem_free */
-void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
+void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
 {
+	struct bpf_mem_alloc *ma;
+
 	if (rec && rec->refcount_off >= 0 &&
 	    !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) {
 		/* Object is refcounted and refcount_dec didn't result in 0
@@ -1928,10 +1928,14 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
 	if (rec)
 		bpf_obj_free_fields(rec, p);
 
+	if (percpu)
+		ma = &bpf_global_percpu_ma;
+	else
+		ma = &bpf_global_ma;
 	if (rec && rec->refcount_off >= 0)
-		bpf_mem_free_rcu(&bpf_global_ma, p);
+		bpf_mem_free_rcu(ma, p);
 	else
-		bpf_mem_free(&bpf_global_ma, p);
+		bpf_mem_free(ma, p);
 }
 
 __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
@@ -1939,7 +1943,7 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
 	struct btf_struct_meta *meta = meta__ign;
 	void *p = p__alloc;
 
-	__bpf_obj_drop_impl(p, meta ? meta->record : NULL);
+	__bpf_obj_drop_impl(p, meta ? meta->record : NULL, false);
 }
 
 __bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
@@ -1983,7 +1987,7 @@ static int __bpf_list_add(struct bpf_list_node_kern *node,
 	 */
 	if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
 		/* Only called from BPF prog, no need to migrate_disable */
-		__bpf_obj_drop_impl((void *)n - off, rec);
+		__bpf_obj_drop_impl((void *)n - off, rec, false);
 		return -EINVAL;
 	}
 
@@ -2082,7 +2086,7 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root,
 	 */
 	if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
 		/* Only called from BPF prog, no need to migrate_disable */
-		__bpf_obj_drop_impl((void *)n - off, rec);
+		__bpf_obj_drop_impl((void *)n - off, rec, false);
 		return -EINVAL;
 	}
 
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 39ea316c55e7..5308e386380a 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -491,21 +491,17 @@ static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
 	struct llist_node *first;
 	unsigned int obj_size;
 
-	/* For per-cpu allocator, the size of free objects in free list doesn't
-	 * match with unit_size and now there is no way to get the size of
-	 * per-cpu pointer saved in free object, so just skip the checking.
-	 */
-	if (c->percpu_size)
-		return 0;
-
 	first = c->free_llist.first;
 	if (!first)
 		return 0;
 
-	obj_size = ksize(first);
+	if (c->percpu_size)
+		obj_size = pcpu_alloc_size(((void **)first)[1]);
+	else
+		obj_size = ksize(first);
 	if (obj_size != c->unit_size) {
-		WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
-			  idx, obj_size, c->unit_size);
+		WARN_ONCE(1, "bpf_mem_cache[%u]: percpu %d, unexpected object size %u, expect %u\n",
+			  idx, c->percpu_size, obj_size, c->unit_size);
 		return -EINVAL;
 	}
 	return 0;
@@ -529,6 +525,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 	/* room for llist_node and per-cpu pointer */
 	if (percpu)
 		percpu_size = LLIST_NODE_SZ + sizeof(void *);
+	ma->percpu = percpu;
 
 	if (size) {
 		pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
@@ -878,6 +875,17 @@ void notrace *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size)
 	return !ret ? NULL : ret + LLIST_NODE_SZ;
 }
 
+static notrace int bpf_mem_free_idx(void *ptr, bool percpu)
+{
+	size_t size;
+
+	if (percpu)
+		size = pcpu_alloc_size(*((void **)ptr));
+	else
+		size = ksize(ptr - LLIST_NODE_SZ);
+	return bpf_mem_cache_idx(size);
+}
+
 void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr)
 {
 	int idx;
@@ -885,7 +893,7 @@ void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr)
 	if (!ptr)
 		return;
 
-	idx = bpf_mem_cache_idx(ksize(ptr - LLIST_NODE_SZ));
+	idx = bpf_mem_free_idx(ptr, ma->percpu);
 	if (idx < 0)
 		return;
 
@@ -899,7 +907,7 @@ void notrace bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr)
 	if (!ptr)
 		return;
 
-	idx = bpf_mem_cache_idx(ksize(ptr - LLIST_NODE_SZ));
+	idx = bpf_mem_free_idx(ptr, ma->percpu);
 	if (idx < 0)
 		return;
 
@@ -973,6 +981,12 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
 	return !ret ? NULL : ret + LLIST_NODE_SZ;
 }
 
+/* The alignment of dynamic per-cpu area is 8, so c->unit_size and the
+ * actual size of dynamic per-cpu area will always be matched and there is
+ * no need to adjust size_index for per-cpu allocation. However for the
+ * simplicity of the implementation, use an unified size_index for both
+ * kmalloc and per-cpu allocation.
+ */
 static __init int bpf_mem_cache_adjust_size(void)
 {
 	unsigned int size;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 341f8cb4405c..a9b2cb500bf7 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -626,8 +626,6 @@ void bpf_obj_free_timer(const struct btf_record *rec, void *obj)
 	bpf_timer_cancel_and_free(obj + rec->timer_off);
 }
 
-extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);
-
 void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
 {
 	const struct btf_field *fields;
@@ -662,8 +660,8 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
 									   field->kptr.btf_id);
 				migrate_disable();
 				__bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ?
-								 pointee_struct_meta->record :
-								 NULL);
+								 pointee_struct_meta->record : NULL,
+								 fields[i].type == BPF_KPTR_PERCPU);
 				migrate_enable();
 			} else {
 				field->kptr.dtor(xchgd_field);
diff --git a/mm/percpu.c b/mm/percpu.c
index a7665de8485f..60ed078e4cd0 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2245,6 +2245,37 @@ static void pcpu_balance_workfn(struct work_struct *work)
 }
 
 /**
+ * pcpu_alloc_size - the size of the dynamic percpu area
+ * @ptr: pointer to the dynamic percpu area
+ *
+ * Returns the size of the @ptr allocation.  This is undefined for statically
+ * defined percpu variables as there is no corresponding chunk->bound_map.
+ *
+ * RETURNS:
+ * The size of the dynamic percpu area.
+ *
+ * CONTEXT:
+ * Can be called from atomic context.
+ */
+size_t pcpu_alloc_size(void __percpu *ptr)
+{
+	struct pcpu_chunk *chunk;
+	unsigned long bit_off, end;
+	void *addr;
+
+	if (!ptr)
+		return 0;
+
+	addr = __pcpu_ptr_to_addr(ptr);
+	/* No pcpu_lock here: ptr has not been freed, so chunk is still alive */
+	chunk = pcpu_chunk_addr_search(addr);
+	bit_off = (addr - chunk->base_addr) / PCPU_MIN_ALLOC_SIZE;
+	end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk),
+			    bit_off + 1);
+	return (end - bit_off) * PCPU_MIN_ALLOC_SIZE;
+}
+
+/**
  * free_percpu - free percpu area
  * @ptr: pointer to area to free
  *
@@ -2267,12 +2298,10 @@ void free_percpu(void __percpu *ptr)
 	kmemleak_free_percpu(ptr);
 
 	addr = __pcpu_ptr_to_addr(ptr);
-
-	spin_lock_irqsave(&pcpu_lock, flags);
-
 	chunk = pcpu_chunk_addr_search(addr);
 	off = addr - chunk->base_addr;
 
+	spin_lock_irqsave(&pcpu_lock, flags);
 	size = pcpu_free_area(chunk, off);
 
 	pcpu_memcg_free_hook(chunk, off, size);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
index 0cca4e8ae38e..d3491a84b3b9 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
@@ -9,9 +9,10 @@
 
 #include "test_bpf_ma.skel.h"
 
-void test_test_bpf_ma(void)
+static void do_bpf_ma_test(const char *name)
 {
 	struct test_bpf_ma *skel;
+	struct bpf_program *prog;
 	struct btf *btf;
 	int i, err;
 
@@ -34,6 +35,11 @@ void test_test_bpf_ma(void)
 		skel->rodata->data_btf_ids[i] = id;
 	}
 
+	prog = bpf_object__find_program_by_name(skel->obj, name);
+	if (!ASSERT_OK_PTR(prog, "invalid prog name"))
+		goto out;
+	bpf_program__set_autoload(prog, true);
+
 	err = test_bpf_ma__load(skel);
 	if (!ASSERT_OK(err, "load"))
 		goto out;
@@ -48,3 +54,15 @@ void test_test_bpf_ma(void)
 out:
 	test_bpf_ma__destroy(skel);
 }
+
+void test_test_bpf_ma(void)
+{
+	if (test__start_subtest("batch_alloc_free"))
+		do_bpf_ma_test("test_batch_alloc_free");
+	if (test__start_subtest("free_through_map_free"))
+		do_bpf_ma_test("test_free_through_map_free");
+	if (test__start_subtest("batch_percpu_alloc_free"))
+		do_bpf_ma_test("test_batch_percpu_alloc_free");
+	if (test__start_subtest("percpu_free_through_map_free"))
+		do_bpf_ma_test("test_percpu_free_through_map_free");
+}
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
index ecde41ae0fc8..b685a4aba6bd 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
@@ -37,10 +37,20 @@ int pid = 0;
 		__type(key, int); \
 		__type(value, struct map_value_##_size); \
 		__uint(max_entries, 128); \
-	} array_##_size SEC(".maps");
+	} array_##_size SEC(".maps")
 
-static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch,
-					     unsigned int idx)
+#define DEFINE_ARRAY_WITH_PERCPU_KPTR(_size) \
+	struct map_value_percpu_##_size { \
+		struct bin_data_##_size __percpu_kptr * data; \
+	}; \
+	struct { \
+		__uint(type, BPF_MAP_TYPE_ARRAY); \
+		__type(key, int); \
+		__type(value, struct map_value_percpu_##_size); \
+		__uint(max_entries, 128); \
+	} array_percpu_##_size SEC(".maps")
+
+static __always_inline void batch_alloc(struct bpf_map *map, unsigned int batch, unsigned int idx)
 {
 	struct generic_map_value *value;
 	unsigned int i, key;
@@ -65,6 +75,14 @@ static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int b
 			return;
 		}
 	}
+}
+
+static __always_inline void batch_free(struct bpf_map *map, unsigned int batch, unsigned int idx)
+{
+	struct generic_map_value *value;
+	unsigned int i, key;
+	void *old;
+
 	for (i = 0; i < batch; i++) {
 		key = i;
 		value = bpf_map_lookup_elem(map, &key);
@@ -81,8 +99,72 @@ static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int b
 	}
 }
 
+static __always_inline void batch_percpu_alloc(struct bpf_map *map, unsigned int batch,
+					       unsigned int idx)
+{
+	struct generic_map_value *value;
+	unsigned int i, key;
+	void *old, *new;
+
+	for (i = 0; i < batch; i++) {
+		key = i;
+		value = bpf_map_lookup_elem(map, &key);
+		if (!value) {
+			err = 1;
+			return;
+		}
+		/* per-cpu allocator may not be able to refill in time */
+		new = bpf_percpu_obj_new_impl(data_btf_ids[idx], NULL);
+		if (!new)
+			continue;
+
+		old = bpf_kptr_xchg(&value->data, new);
+		if (old) {
+			bpf_percpu_obj_drop(old);
+			err = 2;
+			return;
+		}
+	}
+}
+
+static __always_inline void batch_percpu_free(struct bpf_map *map, unsigned int batch,
+					      unsigned int idx)
+{
+	struct generic_map_value *value;
+	unsigned int i, key;
+	void *old;
+
+	for (i = 0; i < batch; i++) {
+		key = i;
+		value = bpf_map_lookup_elem(map, &key);
+		if (!value) {
+			err = 3;
+			return;
+		}
+		old = bpf_kptr_xchg(&value->data, NULL);
+		if (!old)
+			continue;
+		bpf_percpu_obj_drop(old);
+	}
+}
+
+#define CALL_BATCH_ALLOC(size, batch, idx) \
+	batch_alloc((struct bpf_map *)(&array_##size), batch, idx)
+
 #define CALL_BATCH_ALLOC_FREE(size, batch, idx) \
-	batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx)
+	do { \
+		batch_alloc((struct bpf_map *)(&array_##size), batch, idx); \
+		batch_free((struct bpf_map *)(&array_##size), batch, idx); \
+	} while (0)
+
+#define CALL_BATCH_PERCPU_ALLOC(size, batch, idx) \
+	batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx)
+
+#define CALL_BATCH_PERCPU_ALLOC_FREE(size, batch, idx) \
+	do { \
+		batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx); \
+		batch_percpu_free((struct bpf_map *)(&array_percpu_##size), batch, idx); \
+	} while (0)
 
 DEFINE_ARRAY_WITH_KPTR(8);
 DEFINE_ARRAY_WITH_KPTR(16);
@@ -97,8 +179,21 @@ DEFINE_ARRAY_WITH_KPTR(1024);
 DEFINE_ARRAY_WITH_KPTR(2048);
 DEFINE_ARRAY_WITH_KPTR(4096);
 
-SEC("fentry/" SYS_PREFIX "sys_nanosleep")
-int test_bpf_mem_alloc_free(void *ctx)
+/* per-cpu kptr doesn't support bin_data_8 which is a zero-sized array */
+DEFINE_ARRAY_WITH_PERCPU_KPTR(16);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(32);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(64);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(96);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(128);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(192);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(256);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(512);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(1024);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(2048);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(4096);
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_batch_alloc_free(void *ctx)
 {
 	if ((u32)bpf_get_current_pid_tgid() != pid)
 		return 0;
@@ -121,3 +216,76 @@ int test_bpf_mem_alloc_free(void *ctx)
 
 	return 0;
 }
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_free_through_map_free(void *ctx)
+{
+	if ((u32)bpf_get_current_pid_tgid() != pid)
+		return 0;
+
+	/* Alloc 128 8-bytes objects in batch to trigger refilling,
+	 * then free these objects through map free.
+	 */
+	CALL_BATCH_ALLOC(8, 128, 0);
+	CALL_BATCH_ALLOC(16, 128, 1);
+	CALL_BATCH_ALLOC(32, 128, 2);
+	CALL_BATCH_ALLOC(64, 128, 3);
+	CALL_BATCH_ALLOC(96, 128, 4);
+	CALL_BATCH_ALLOC(128, 128, 5);
+	CALL_BATCH_ALLOC(192, 128, 6);
+	CALL_BATCH_ALLOC(256, 128, 7);
+	CALL_BATCH_ALLOC(512, 64, 8);
+	CALL_BATCH_ALLOC(1024, 32, 9);
+	CALL_BATCH_ALLOC(2048, 16, 10);
+	CALL_BATCH_ALLOC(4096, 8, 11);
+
+	return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_batch_percpu_alloc_free(void *ctx)
+{
+	if ((u32)bpf_get_current_pid_tgid() != pid)
+		return 0;
+
+	/* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling,
+	 * then free 128 16-bytes per-cpu objects in batch to trigger freeing.
+	 */
+	CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1);
+	CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2);
+	CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3);
+	CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4);
+	CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5);
+	CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6);
+	CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7);
+	CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8);
+	CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 9);
+	CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 10);
+	CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 11);
+
+	return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_percpu_free_through_map_free(void *ctx)
+{
+	if ((u32)bpf_get_current_pid_tgid() != pid)
+		return 0;
+
+	/* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling,
+	 * then free these object through map free.
+	 */
+	CALL_BATCH_PERCPU_ALLOC(16, 128, 1);
+	CALL_BATCH_PERCPU_ALLOC(32, 128, 2);
+	CALL_BATCH_PERCPU_ALLOC(64, 128, 3);
+	CALL_BATCH_PERCPU_ALLOC(96, 128, 4);
+	CALL_BATCH_PERCPU_ALLOC(128, 128, 5);
+	CALL_BATCH_PERCPU_ALLOC(192, 128, 6);
+	CALL_BATCH_PERCPU_ALLOC(256, 128, 7);
+	CALL_BATCH_PERCPU_ALLOC(512, 64, 8);
+	CALL_BATCH_PERCPU_ALLOC(1024, 32, 9);
+	CALL_BATCH_PERCPU_ALLOC(2048, 16, 10);
+	CALL_BATCH_PERCPU_ALLOC(4096, 8, 11);
+
+	return 0;
+}