diff options
Diffstat (limited to 'mm/percpu-vm.c')
| -rw-r--r-- | mm/percpu-vm.c | 256 |
1 files changed, 109 insertions, 147 deletions
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 3707c71ae4cd..4f5937090590 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -1,14 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * mm/percpu-vm.c - vmalloc area based chunk allocation * * Copyright (C) 2010 SUSE Linux Products GmbH * Copyright (C) 2010 Tejun Heo <tj@kernel.org> * - * This file is released under the GPLv2. - * * Chunks are mapped into vmalloc areas and populated page by page. * This is the default chunk allocator. */ +#include "internal.h" static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, unsigned int cpu, int page_idx) @@ -20,46 +20,24 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, } /** - * pcpu_get_pages_and_bitmap - get temp pages array and bitmap - * @chunk: chunk of interest - * @bitmapp: output parameter for bitmap - * @may_alloc: may allocate the array + * pcpu_get_pages - get temp pages array * - * Returns pointer to array of pointers to struct page and bitmap, - * both of which can be indexed with pcpu_page_idx(). The returned - * array is cleared to zero and *@bitmapp is copied from - * @chunk->populated. Note that there is only one array and bitmap - * and access exclusion is the caller's responsibility. - * - * CONTEXT: - * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. - * Otherwise, don't care. + * Returns pointer to array of pointers to struct page which can be indexed + * with pcpu_page_idx(). Note that there is only one array and accesses + * should be serialized by pcpu_alloc_mutex. * * RETURNS: - * Pointer to temp pages array on success, NULL on failure. + * Pointer to temp pages array on success. */ -static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, - unsigned long **bitmapp, - bool may_alloc) +static struct page **pcpu_get_pages(void) { static struct page **pages; - static unsigned long *bitmap; size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); - size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * - sizeof(unsigned long); - - if (!pages || !bitmap) { - if (may_alloc && !pages) - pages = pcpu_mem_zalloc(pages_size); - if (may_alloc && !bitmap) - bitmap = pcpu_mem_zalloc(bitmap_size); - if (!pages || !bitmap) - return NULL; - } - bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); + lockdep_assert_held(&pcpu_alloc_mutex); - *bitmapp = bitmap; + if (!pages) + pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL); return pages; } @@ -67,7 +45,6 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, * pcpu_free_pages - free pages which were allocated for @chunk * @chunk: chunk pages were allocated for * @pages: array of pages to be freed, indexed by pcpu_page_idx() - * @populated: populated bitmap * @page_start: page index of the first page to be freed * @page_end: page index of the last page to be freed + 1 * @@ -75,8 +52,7 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, * The pages were allocated for @chunk. */ static void pcpu_free_pages(struct pcpu_chunk *chunk, - struct page **pages, unsigned long *populated, - int page_start, int page_end) + struct page **pages, int page_start, int page_end) { unsigned int cpu; int i; @@ -95,35 +71,45 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk, * pcpu_alloc_pages - allocates pages for @chunk * @chunk: target chunk * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() - * @populated: populated bitmap * @page_start: page index of the first page to be allocated * @page_end: page index of the last page to be allocated + 1 + * @gfp: allocation flags passed to the underlying allocator * * Allocate pages [@page_start,@page_end) into @pages for all units. * The allocation is for @chunk. Percpu core doesn't care about the * content of @pages and will pass it verbatim to pcpu_map_pages(). */ static int pcpu_alloc_pages(struct pcpu_chunk *chunk, - struct page **pages, unsigned long *populated, - int page_start, int page_end) + struct page **pages, int page_start, int page_end, + gfp_t gfp) { - const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; - unsigned int cpu; + unsigned int cpu, tcpu; int i; + gfp |= __GFP_HIGHMEM; + for_each_possible_cpu(cpu) { for (i = page_start; i < page_end; i++) { struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); - if (!*pagep) { - pcpu_free_pages(chunk, pages, populated, - page_start, page_end); - return -ENOMEM; - } + if (!*pagep) + goto err; } } return 0; + +err: + while (--i >= page_start) + __free_page(pages[pcpu_page_idx(cpu, i)]); + + for_each_possible_cpu(tcpu) { + if (tcpu == cpu) + break; + for (i = page_start; i < page_end; i++) + __free_page(pages[pcpu_page_idx(tcpu, i)]); + } + return -ENOMEM; } /** @@ -148,14 +134,13 @@ static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) { - unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); + vunmap_range_noflush(addr, addr + (nr_pages << PAGE_SHIFT)); } /** * pcpu_unmap_pages - unmap pages out of a pcpu_chunk * @chunk: chunk of interest * @pages: pages array which can be used to pass information to free - * @populated: populated bitmap * @page_start: page index of the first page to unmap * @page_end: page index of the last page to unmap + 1 * @@ -166,8 +151,7 @@ static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) * proper pre/post flush functions. */ static void pcpu_unmap_pages(struct pcpu_chunk *chunk, - struct page **pages, unsigned long *populated, - int page_start, int page_end) + struct page **pages, int page_start, int page_end) { unsigned int cpu; int i; @@ -183,8 +167,6 @@ static void pcpu_unmap_pages(struct pcpu_chunk *chunk, __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), page_end - page_start); } - - bitmap_clear(populated, page_start, page_end - page_start); } /** @@ -211,15 +193,14 @@ static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, static int __pcpu_map_pages(unsigned long addr, struct page **pages, int nr_pages) { - return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, - PAGE_KERNEL, pages); + return vmap_pages_range_noflush(addr, addr + (nr_pages << PAGE_SHIFT), + PAGE_KERNEL, pages, PAGE_SHIFT, GFP_KERNEL); } /** * pcpu_map_pages - map pages into a pcpu_chunk * @chunk: chunk of interest * @pages: pages array containing pages to be mapped - * @populated: populated bitmap * @page_start: page index of the first page to map * @page_end: page index of the last page to map + 1 * @@ -227,13 +208,11 @@ static int __pcpu_map_pages(unsigned long addr, struct page **pages, * caller is responsible for calling pcpu_post_map_flush() after all * mappings are complete. * - * This function is responsible for setting corresponding bits in - * @chunk->populated bitmap and whatever is necessary for reverse - * lookup (addr -> chunk). + * This function is responsible for setting up whatever is necessary for + * reverse lookup (addr -> chunk). */ static int pcpu_map_pages(struct pcpu_chunk *chunk, - struct page **pages, unsigned long *populated, - int page_start, int page_end) + struct page **pages, int page_start, int page_end) { unsigned int cpu, tcpu; int i, err; @@ -244,25 +223,20 @@ static int pcpu_map_pages(struct pcpu_chunk *chunk, page_end - page_start); if (err < 0) goto err; - } - /* mapping successful, link chunk and mark populated */ - for (i = page_start; i < page_end; i++) { - for_each_possible_cpu(cpu) + for (i = page_start; i < page_end; i++) pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], chunk); - __set_bit(i, populated); } - return 0; - err: for_each_possible_cpu(tcpu) { - if (tcpu == cpu) - break; __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), page_end - page_start); + if (tcpu == cpu) + break; } + pcpu_post_unmap_tlb_flush(chunk, page_start, page_end); return err; } @@ -289,131 +263,79 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, /** * pcpu_populate_chunk - populate and map an area of a pcpu_chunk * @chunk: chunk of interest - * @off: offset to the area to populate - * @size: size of the area to populate in bytes + * @page_start: the start page + * @page_end: the end page + * @gfp: allocation flags passed to the underlying memory allocator * * For each cpu, populate and map pages [@page_start,@page_end) into - * @chunk. The area is cleared on return. + * @chunk. * * CONTEXT: * pcpu_alloc_mutex, does GFP_KERNEL allocation. */ -static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) +static int pcpu_populate_chunk(struct pcpu_chunk *chunk, + int page_start, int page_end, gfp_t gfp) { - int page_start = PFN_DOWN(off); - int page_end = PFN_UP(off + size); - int free_end = page_start, unmap_end = page_start; struct page **pages; - unsigned long *populated; - unsigned int cpu; - int rs, re, rc; - - /* quick path, check whether all pages are already there */ - rs = page_start; - pcpu_next_pop(chunk, &rs, &re, page_end); - if (rs == page_start && re == page_end) - goto clear; - - /* need to allocate and map pages, this chunk can't be immutable */ - WARN_ON(chunk->immutable); - pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); + pages = pcpu_get_pages(); if (!pages) return -ENOMEM; - /* alloc and map */ - pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { - rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); - if (rc) - goto err_free; - free_end = re; - } + if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp)) + return -ENOMEM; - pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { - rc = pcpu_map_pages(chunk, pages, populated, rs, re); - if (rc) - goto err_unmap; - unmap_end = re; + if (pcpu_map_pages(chunk, pages, page_start, page_end)) { + pcpu_free_pages(chunk, pages, page_start, page_end); + return -ENOMEM; } pcpu_post_map_flush(chunk, page_start, page_end); - /* commit new bitmap */ - bitmap_copy(chunk->populated, populated, pcpu_unit_pages); -clear: - for_each_possible_cpu(cpu) - memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); return 0; - -err_unmap: - pcpu_pre_unmap_flush(chunk, page_start, unmap_end); - pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) - pcpu_unmap_pages(chunk, pages, populated, rs, re); - pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); -err_free: - pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) - pcpu_free_pages(chunk, pages, populated, rs, re); - return rc; } /** * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk * @chunk: chunk to depopulate - * @off: offset to the area to depopulate - * @size: size of the area to depopulate in bytes + * @page_start: the start page + * @page_end: the end page * * For each cpu, depopulate and unmap pages [@page_start,@page_end) - * from @chunk. If @flush is true, vcache is flushed before unmapping - * and tlb after. + * from @chunk. + * + * Caller is required to call pcpu_post_unmap_tlb_flush() if not returning the + * region back to vmalloc() which will lazily flush the tlb. * * CONTEXT: * pcpu_alloc_mutex. */ -static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) +static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, + int page_start, int page_end) { - int page_start = PFN_DOWN(off); - int page_end = PFN_UP(off + size); struct page **pages; - unsigned long *populated; - int rs, re; - - /* quick path, check whether it's empty already */ - rs = page_start; - pcpu_next_unpop(chunk, &rs, &re, page_end); - if (rs == page_start && re == page_end) - return; - - /* immutable chunks can't be depopulated */ - WARN_ON(chunk->immutable); /* * If control reaches here, there must have been at least one * successful population attempt so the temp pages array must * be available now. */ - pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); + pages = pcpu_get_pages(); BUG_ON(!pages); /* unmap and free */ pcpu_pre_unmap_flush(chunk, page_start, page_end); - pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) - pcpu_unmap_pages(chunk, pages, populated, rs, re); - - /* no need to flush tlb, vmalloc will handle it lazily */ - - pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) - pcpu_free_pages(chunk, pages, populated, rs, re); + pcpu_unmap_pages(chunk, pages, page_start, page_end); - /* commit new bitmap */ - bitmap_copy(chunk->populated, populated, pcpu_unit_pages); + pcpu_free_pages(chunk, pages, page_start, page_end); } -static struct pcpu_chunk *pcpu_create_chunk(void) +static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) { struct pcpu_chunk *chunk; struct vm_struct **vms; - chunk = pcpu_alloc_chunk(); + chunk = pcpu_alloc_chunk(gfp); if (!chunk) return NULL; @@ -426,12 +348,22 @@ static struct pcpu_chunk *pcpu_create_chunk(void) chunk->data = vms; chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; + + pcpu_stats_chunk_alloc(); + trace_percpu_create_chunk(chunk->base_addr); + return chunk; } static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) { - if (chunk && chunk->data) + if (!chunk) + return; + + pcpu_stats_chunk_dealloc(); + trace_percpu_destroy_chunk(chunk->base_addr); + + if (chunk->data) pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); pcpu_free_chunk(chunk); } @@ -446,3 +378,33 @@ static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) /* no extra restriction */ return 0; } + +/** + * pcpu_should_reclaim_chunk - determine if a chunk should go into reclaim + * @chunk: chunk of interest + * + * This is the entry point for percpu reclaim. If a chunk qualifies, it is then + * isolated and managed in separate lists at the back of pcpu_slot: sidelined + * and to_depopulate respectively. The to_depopulate list holds chunks slated + * for depopulation. They no longer contribute to pcpu_nr_empty_pop_pages once + * they are on this list. Once depopulated, they are moved onto the sidelined + * list which enables them to be pulled back in for allocation if no other chunk + * can suffice the allocation. + */ +static bool pcpu_should_reclaim_chunk(struct pcpu_chunk *chunk) +{ + /* do not reclaim either the first chunk or reserved chunk */ + if (chunk == pcpu_first_chunk || chunk == pcpu_reserved_chunk) + return false; + + /* + * If it is isolated, it may be on the sidelined list so move it back to + * the to_depopulate list. If we hit at least 1/4 pages empty pages AND + * there is no system-wide shortage of empty pages aside from this + * chunk, move it to the to_depopulate list. + */ + return ((chunk->isolated && chunk->nr_empty_pop_pages) || + (pcpu_nr_empty_pop_pages > + (PCPU_EMPTY_POP_PAGES_HIGH + chunk->nr_empty_pop_pages) && + chunk->nr_empty_pop_pages >= chunk->nr_pages / 4)); +} |
