summaryrefslogtreecommitdiff
path: root/mm/percpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/percpu.c')
-rw-r--r--mm/percpu.c185
1 files changed, 164 insertions, 21 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index 5e9eefdce21f..dc1a213293aa 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -37,9 +37,14 @@
* takes care of normal allocations.
*
* The allocator organizes chunks into lists according to free size and
- * tries to allocate from the fullest chunk first. Each chunk is managed
- * by a bitmap with metadata blocks. The allocation map is updated on
- * every allocation and free to reflect the current state while the boundary
+ * memcg-awareness. To make a percpu allocation memcg-aware the __GFP_ACCOUNT
+ * flag should be passed. All memcg-aware allocations are sharing one set
+ * of chunks and all unaccounted allocations and allocations performed
+ * by processes belonging to the root memory cgroup are using the second set.
+ *
+ * The allocator tries to allocate from the fullest chunk first. Each chunk
+ * is managed by a bitmap with metadata blocks. The allocation map is updated
+ * on every allocation and free to reflect the current state while the boundary
* map is only updated on allocation. Each metadata block contains
* information to help mitigate the need to iterate over large portions
* of the bitmap. The reverse mapping from page to chunk is stored in
@@ -81,6 +86,7 @@
#include <linux/kmemleak.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
+#include <linux/memcontrol.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>
@@ -160,7 +166,7 @@ struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init;
DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */
static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */
-struct list_head *pcpu_slot __ro_after_init; /* chunk list slots */
+struct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */
/* chunks which need their map areas extended, protected by pcpu_lock */
static LIST_HEAD(pcpu_map_extend_chunks);
@@ -500,6 +506,9 @@ static void __pcpu_chunk_move(struct pcpu_chunk *chunk, int slot,
bool move_front)
{
if (chunk != pcpu_reserved_chunk) {
+ struct list_head *pcpu_slot;
+
+ pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk));
if (move_front)
list_move(&chunk->list, &pcpu_slot[slot]);
else
@@ -1341,6 +1350,10 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
panic("%s: Failed to allocate %zu bytes\n", __func__,
alloc_size);
+#ifdef CONFIG_MEMCG_KMEM
+ /* first chunk isn't memcg-aware */
+ chunk->obj_cgroups = NULL;
+#endif
pcpu_init_md_blocks(chunk);
/* manage populated page bitmap */
@@ -1380,7 +1393,7 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
return chunk;
}
-static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
+static struct pcpu_chunk *pcpu_alloc_chunk(enum pcpu_chunk_type type, gfp_t gfp)
{
struct pcpu_chunk *chunk;
int region_bits;
@@ -1408,6 +1421,16 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
if (!chunk->md_blocks)
goto md_blocks_fail;
+#ifdef CONFIG_MEMCG_KMEM
+ if (pcpu_is_memcg_chunk(type)) {
+ chunk->obj_cgroups =
+ pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) *
+ sizeof(struct obj_cgroup *), gfp);
+ if (!chunk->obj_cgroups)
+ goto objcg_fail;
+ }
+#endif
+
pcpu_init_md_blocks(chunk);
/* init metadata */
@@ -1415,6 +1438,10 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
return chunk;
+#ifdef CONFIG_MEMCG_KMEM
+objcg_fail:
+ pcpu_mem_free(chunk->md_blocks);
+#endif
md_blocks_fail:
pcpu_mem_free(chunk->bound_map);
bound_map_fail:
@@ -1429,6 +1456,9 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk)
{
if (!chunk)
return;
+#ifdef CONFIG_MEMCG_KMEM
+ pcpu_mem_free(chunk->obj_cgroups);
+#endif
pcpu_mem_free(chunk->md_blocks);
pcpu_mem_free(chunk->bound_map);
pcpu_mem_free(chunk->alloc_map);
@@ -1505,7 +1535,8 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
int page_start, int page_end, gfp_t gfp);
static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
int page_start, int page_end);
-static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
+static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type,
+ gfp_t gfp);
static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
static struct page *pcpu_addr_to_page(void *addr);
static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
@@ -1547,6 +1578,77 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
return pcpu_get_page_chunk(pcpu_addr_to_page(addr));
}
+#ifdef CONFIG_MEMCG_KMEM
+static enum pcpu_chunk_type pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
+ struct obj_cgroup **objcgp)
+{
+ struct obj_cgroup *objcg;
+
+ if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT) ||
+ memcg_kmem_bypass())
+ return PCPU_CHUNK_ROOT;
+
+ objcg = get_obj_cgroup_from_current();
+ if (!objcg)
+ return PCPU_CHUNK_ROOT;
+
+ if (obj_cgroup_charge(objcg, gfp, size * num_possible_cpus())) {
+ obj_cgroup_put(objcg);
+ return PCPU_FAIL_ALLOC;
+ }
+
+ *objcgp = objcg;
+ return PCPU_CHUNK_MEMCG;
+}
+
+static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
+ struct pcpu_chunk *chunk, int off,
+ size_t size)
+{
+ if (!objcg)
+ return;
+
+ if (chunk) {
+ chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg;
+ } else {
+ obj_cgroup_uncharge(objcg, size * num_possible_cpus());
+ obj_cgroup_put(objcg);
+ }
+}
+
+static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
+{
+ struct obj_cgroup *objcg;
+
+ if (!pcpu_is_memcg_chunk(pcpu_chunk_type(chunk)))
+ return;
+
+ objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT];
+ chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL;
+
+ obj_cgroup_uncharge(objcg, size * num_possible_cpus());
+
+ obj_cgroup_put(objcg);
+}
+
+#else /* CONFIG_MEMCG_KMEM */
+static enum pcpu_chunk_type
+pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, struct obj_cgroup **objcgp)
+{
+ return PCPU_CHUNK_ROOT;
+}
+
+static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
+ struct pcpu_chunk *chunk, int off,
+ size_t size)
+{
+}
+
+static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
/**
* pcpu_alloc - the percpu allocator
* @size: size of area to allocate in bytes
@@ -1568,6 +1670,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
gfp_t pcpu_gfp;
bool is_atomic;
bool do_warn;
+ enum pcpu_chunk_type type;
+ struct list_head *pcpu_slot;
+ struct obj_cgroup *objcg = NULL;
static int warn_limit = 10;
struct pcpu_chunk *chunk, *next;
const char *err;
@@ -1602,16 +1707,23 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
return NULL;
}
+ type = pcpu_memcg_pre_alloc_hook(size, gfp, &objcg);
+ if (unlikely(type == PCPU_FAIL_ALLOC))
+ return NULL;
+ pcpu_slot = pcpu_chunk_list(type);
+
if (!is_atomic) {
/*
* pcpu_balance_workfn() allocates memory under this mutex,
* and it may wait for memory reclaim. Allow current task
* to become OOM victim, in case of memory pressure.
*/
- if (gfp & __GFP_NOFAIL)
+ if (gfp & __GFP_NOFAIL) {
mutex_lock(&pcpu_alloc_mutex);
- else if (mutex_lock_killable(&pcpu_alloc_mutex))
+ } else if (mutex_lock_killable(&pcpu_alloc_mutex)) {
+ pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size);
return NULL;
+ }
}
spin_lock_irqsave(&pcpu_lock, flags);
@@ -1666,7 +1778,7 @@ restart:
}
if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
- chunk = pcpu_create_chunk(pcpu_gfp);
+ chunk = pcpu_create_chunk(type, pcpu_gfp);
if (!chunk) {
err = "failed to allocate new chunk";
goto fail;
@@ -1723,6 +1835,8 @@ area_found:
trace_percpu_alloc_percpu(reserved, is_atomic, size, align,
chunk->base_addr, off, ptr);
+ pcpu_memcg_post_alloc_hook(objcg, chunk, off, size);
+
return ptr;
fail_unlock:
@@ -1744,6 +1858,9 @@ fail:
} else {
mutex_unlock(&pcpu_alloc_mutex);
}
+
+ pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size);
+
return NULL;
}
@@ -1803,8 +1920,8 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
}
/**
- * pcpu_balance_workfn - manage the amount of free chunks and populated pages
- * @work: unused
+ * __pcpu_balance_workfn - manage the amount of free chunks and populated pages
+ * @type: chunk type
*
* Reclaim all fully free chunks except for the first one. This is also
* responsible for maintaining the pool of empty populated pages. However,
@@ -1813,11 +1930,12 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
* allocation causes the failure as it is possible that requests can be
* serviced from already backed regions.
*/
-static void pcpu_balance_workfn(struct work_struct *work)
+static void __pcpu_balance_workfn(enum pcpu_chunk_type type)
{
/* gfp flags passed to underlying allocators */
const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
LIST_HEAD(to_free);
+ struct list_head *pcpu_slot = pcpu_chunk_list(type);
struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
struct pcpu_chunk *chunk, *next;
int slot, nr_to_pop, ret;
@@ -1915,7 +2033,7 @@ retry_pop:
if (nr_to_pop) {
/* ran out of chunks to populate, create a new one and retry */
- chunk = pcpu_create_chunk(gfp);
+ chunk = pcpu_create_chunk(type, gfp);
if (chunk) {
spin_lock_irq(&pcpu_lock);
pcpu_chunk_relocate(chunk, -1);
@@ -1928,6 +2046,20 @@ retry_pop:
}
/**
+ * pcpu_balance_workfn - manage the amount of free chunks and populated pages
+ * @work: unused
+ *
+ * Call __pcpu_balance_workfn() for each chunk type.
+ */
+static void pcpu_balance_workfn(struct work_struct *work)
+{
+ enum pcpu_chunk_type type;
+
+ for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
+ __pcpu_balance_workfn(type);
+}
+
+/**
* free_percpu - free percpu area
* @ptr: pointer to area to free
*
@@ -1941,8 +2073,9 @@ void free_percpu(void __percpu *ptr)
void *addr;
struct pcpu_chunk *chunk;
unsigned long flags;
- int off;
+ int size, off;
bool need_balance = false;
+ struct list_head *pcpu_slot;
if (!ptr)
return;
@@ -1956,7 +2089,11 @@ void free_percpu(void __percpu *ptr)
chunk = pcpu_chunk_addr_search(addr);
off = addr - chunk->base_addr;
- pcpu_free_area(chunk, off);
+ size = pcpu_free_area(chunk, off);
+
+ pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk));
+
+ pcpu_memcg_free_hook(chunk, off, size);
/* if there are more than one fully free chunks, wake up grim reaper */
if (chunk->free_bytes == pcpu_unit_size) {
@@ -2267,6 +2404,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
int map_size;
unsigned long tmp_addr;
size_t alloc_size;
+ enum pcpu_chunk_type type;
#define PCPU_SETUP_BUG_ON(cond) do { \
if (unlikely(cond)) { \
@@ -2384,13 +2522,18 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
* empty chunks.
*/
pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
- pcpu_slot = memblock_alloc(pcpu_nr_slots * sizeof(pcpu_slot[0]),
- SMP_CACHE_BYTES);
- if (!pcpu_slot)
+ pcpu_chunk_lists = memblock_alloc(pcpu_nr_slots *
+ sizeof(pcpu_chunk_lists[0]) *
+ PCPU_NR_CHUNK_TYPES,
+ SMP_CACHE_BYTES);
+ if (!pcpu_chunk_lists)
panic("%s: Failed to allocate %zu bytes\n", __func__,
- pcpu_nr_slots * sizeof(pcpu_slot[0]));
- for (i = 0; i < pcpu_nr_slots; i++)
- INIT_LIST_HEAD(&pcpu_slot[i]);
+ pcpu_nr_slots * sizeof(pcpu_chunk_lists[0]) *
+ PCPU_NR_CHUNK_TYPES);
+
+ for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
+ for (i = 0; i < pcpu_nr_slots; i++)
+ INIT_LIST_HEAD(&pcpu_chunk_list(type)[i]);
/*
* The end of the static region needs to be aligned with the