summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-20 10:41:01 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-20 10:41:01 +0200
commitf4484d138b31e8fa1ba410363b5b9664f68974af (patch)
tree6d776b1212604d3b16df499ae6e455e4a20544db /mm
parent1d1df41c5a33359a00e919d54eaebfb789711fdc (diff)
parentb1e78ef3be2533973953a35a56739fda7325875c (diff)
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: "55 patches. Subsystems affected by this patch series: percpu, procfs, sysctl, misc, core-kernel, get_maintainer, lib, checkpatch, binfmt, nilfs2, hfs, fat, adfs, panic, delayacct, kconfig, kcov, and ubsan" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (55 commits) lib: remove redundant assignment to variable ret ubsan: remove CONFIG_UBSAN_OBJECT_SIZE kcov: fix generic Kconfig dependencies if ARCH_WANTS_NO_INSTR lib/Kconfig.debug: make TEST_KMOD depend on PAGE_SIZE_LESS_THAN_256KB btrfs: use generic Kconfig option for 256kB page size limit arch/Kconfig: split PAGE_SIZE_LESS_THAN_256KB from PAGE_SIZE_LESS_THAN_64KB configs: introduce debug.config for CI-like setup delayacct: track delays from memory compact Documentation/accounting/delay-accounting.rst: add thrashing page cache and direct compact delayacct: cleanup flags in struct task_delay_info and functions use it delayacct: fix incomplete disable operation when switch enable to disable delayacct: support swapin delay accounting for swapping without blkio panic: remove oops_id panic: use error_report_end tracepoint on warnings fs/adfs: remove unneeded variable make code cleaner FAT: use io_schedule_timeout() instead of congestion_wait() hfsplus: use struct_group_attr() for memcpy() region nilfs2: remove redundant pointer sbufs fs/binfmt_elf: use PT_LOAD p_align values for static PIE const_structs.checkpatch: add frequently used ops structs ...
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig12
-rw-r--r--mm/memory.c4
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/page_io.c3
-rw-r--r--mm/percpu.c154
5 files changed, 138 insertions, 38 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 53d7485fc38f..a99bd499ef51 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -432,6 +432,18 @@ config NEED_PER_CPU_KM
bool
default y
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+ bool
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+ bool
+
+config USE_PERCPU_NUMA_NODE_ID
+ bool
+
+config HAVE_SETUP_PER_CPU_AREA
+ bool
+
config CLEANCACHE
bool "Enable cleancache driver to cache clean pages if tmem is present"
help
diff --git a/mm/memory.c b/mm/memory.c
index f306e698a1e3..c125c4969913 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3529,7 +3529,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (unlikely(!si))
goto out;
- delayacct_set_flag(current, DELAYACCT_PF_SWAPIN);
page = lookup_swap_cache(entry, vma, vmf->address);
swapcache = page;
@@ -3577,7 +3576,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
vmf->address, &vmf->ptl);
if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
ret = VM_FAULT_OOM;
- delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
goto unlock;
}
@@ -3591,13 +3589,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* owner processes (which may be unknown at hwpoison time)
*/
ret = VM_FAULT_HWPOISON;
- delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
goto out_release;
}
locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
- delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
if (!locked) {
ret |= VM_FAULT_RETRY;
goto out_release;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d4205e5e41d1..3589febc6d31 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -74,6 +74,7 @@
#include <linux/padata.h>
#include <linux/khugepaged.h>
#include <linux/buffer_head.h>
+#include <linux/delayacct.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -4365,6 +4366,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
return NULL;
psi_memstall_enter(&pflags);
+ delayacct_compact_start();
noreclaim_flag = memalloc_noreclaim_save();
*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
@@ -4372,6 +4374,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
memalloc_noreclaim_restore(noreclaim_flag);
psi_memstall_leave(&pflags);
+ delayacct_compact_end();
if (*compact_result == COMPACT_SKIPPED)
return NULL;
diff --git a/mm/page_io.c b/mm/page_io.c
index 9725c7e1eeea..0bf8e40f4e57 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -25,6 +25,7 @@
#include <linux/psi.h>
#include <linux/uio.h>
#include <linux/sched/task.h>
+#include <linux/delayacct.h>
void end_swap_bio_write(struct bio *bio)
{
@@ -370,6 +371,7 @@ int swap_readpage(struct page *page, bool synchronous)
* significant part of overall IO time.
*/
psi_memstall_enter(&pflags);
+ delayacct_swapin_start();
if (frontswap_load(page) == 0) {
SetPageUptodate(page);
@@ -432,6 +434,7 @@ int swap_readpage(struct page *page, bool synchronous)
out:
psi_memstall_leave(&pflags);
+ delayacct_swapin_end();
return ret;
}
diff --git a/mm/percpu.c b/mm/percpu.c
index 4199a0604c32..a188ac9a267d 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2992,6 +2992,42 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
return ai;
}
+
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
+{
+ const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NUMA
+ int node = NUMA_NO_NODE;
+ void *ptr;
+
+ if (cpu_to_nd_fn)
+ node = cpu_to_nd_fn(cpu);
+
+ if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) {
+ ptr = memblock_alloc_from(size, align, goal);
+ pr_info("cpu %d has no node %d or node-local memory\n",
+ cpu, node);
+ pr_debug("per cpu data for cpu%d %zu bytes at 0x%llx\n",
+ cpu, size, (u64)__pa(ptr));
+ } else {
+ ptr = memblock_alloc_try_nid(size, align, goal,
+ MEMBLOCK_ALLOC_ACCESSIBLE,
+ node);
+
+ pr_debug("per cpu data for cpu%d %zu bytes on node%d at 0x%llx\n",
+ cpu, size, node, (u64)__pa(ptr));
+ }
+ return ptr;
+#else
+ return memblock_alloc_from(size, align, goal);
+#endif
+}
+
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+ memblock_free(ptr, size);
+}
#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
#if defined(BUILD_EMBED_FIRST_CHUNK)
@@ -3001,14 +3037,13 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
* @dyn_size: minimum free size for dynamic allocation in bytes
* @atom_size: allocation atom size
* @cpu_distance_fn: callback to determine distance between cpus, optional
- * @alloc_fn: function to allocate percpu page
- * @free_fn: function to free percpu page
+ * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
*
* This is a helper to ease setting up embedded first percpu chunk and
* can be called where pcpu_setup_first_chunk() is expected.
*
* If this function is used to setup the first chunk, it is allocated
- * by calling @alloc_fn and used as-is without being mapped into
+ * by calling pcpu_fc_alloc and used as-is without being mapped into
* vmalloc area. Allocations are always whole multiples of @atom_size
* aligned to @atom_size.
*
@@ -3022,7 +3057,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
* @dyn_size specifies the minimum dynamic area size.
*
* If the needed size is smaller than the minimum or specified unit
- * size, the leftover is returned using @free_fn.
+ * size, the leftover is returned using pcpu_fc_free.
*
* RETURNS:
* 0 on success, -errno on failure.
@@ -3030,8 +3065,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn)
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
void *base = (void *)ULONG_MAX;
void **areas = NULL;
@@ -3066,7 +3100,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
BUG_ON(cpu == NR_CPUS);
/* allocate space for the whole group */
- ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
+ ptr = pcpu_fc_alloc(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
if (!ptr) {
rc = -ENOMEM;
goto out_free_areas;
@@ -3105,12 +3139,12 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
if (gi->cpu_map[i] == NR_CPUS) {
/* unused unit, free whole */
- free_fn(ptr, ai->unit_size);
+ pcpu_fc_free(ptr, ai->unit_size);
continue;
}
/* copy and return the unused part */
memcpy(ptr, __per_cpu_load, ai->static_size);
- free_fn(ptr + size_sum, ai->unit_size - size_sum);
+ pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum);
}
}
@@ -3129,7 +3163,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
out_free_areas:
for (group = 0; group < ai->nr_groups; group++)
if (areas[group])
- free_fn(areas[group],
+ pcpu_fc_free(areas[group],
ai->groups[group].nr_units * ai->unit_size);
out_free:
pcpu_free_alloc_info(ai);
@@ -3140,12 +3174,79 @@ out_free:
#endif /* BUILD_EMBED_FIRST_CHUNK */
#ifdef BUILD_PAGE_FIRST_CHUNK
+#include <asm/pgalloc.h>
+
+#ifndef P4D_TABLE_SIZE
+#define P4D_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PUD_TABLE_SIZE
+#define PUD_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PMD_TABLE_SIZE
+#define PMD_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PTE_TABLE_SIZE
+#define PTE_TABLE_SIZE PAGE_SIZE
+#endif
+void __init __weak pcpu_populate_pte(unsigned long addr)
+{
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (pgd_none(*pgd)) {
+ p4d_t *new;
+
+ new = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pgd_populate(&init_mm, pgd, new);
+ }
+
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ pud_t *new;
+
+ new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ p4d_populate(&init_mm, p4d, new);
+ }
+
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud)) {
+ pmd_t *new;
+
+ new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pud_populate(&init_mm, pud, new);
+ }
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd)) {
+ pte_t *new;
+
+ new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pmd_populate_kernel(&init_mm, pmd, new);
+ }
+
+ return;
+
+err_alloc:
+ panic("%s: Failed to allocate memory\n", __func__);
+}
+
/**
* pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
* @reserved_size: the size of reserved percpu area in bytes
- * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
- * @free_fn: function to free percpu page, always called with PAGE_SIZE
- * @populate_pte_fn: function to populate pte
+ * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
*
* This is a helper to ease setting up page-remapped first percpu
* chunk and can be called where pcpu_setup_first_chunk() is expected.
@@ -3156,10 +3257,7 @@ out_free:
* RETURNS:
* 0 on success, -errno on failure.
*/
-int __init pcpu_page_first_chunk(size_t reserved_size,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn,
- pcpu_fc_populate_pte_fn_t populate_pte_fn)
+int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
static struct vm_struct vm;
struct pcpu_alloc_info *ai;
@@ -3201,7 +3299,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
for (i = 0; i < unit_pages; i++) {
void *ptr;
- ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
+ ptr = pcpu_fc_alloc(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
if (!ptr) {
pr_warn("failed to allocate %s page for cpu%u\n",
psize_str, cpu);
@@ -3223,7 +3321,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
(unsigned long)vm.addr + unit * ai->unit_size;
for (i = 0; i < unit_pages; i++)
- populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
+ pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));
/* pte already populated, the following shouldn't fail */
rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
@@ -3253,7 +3351,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
enomem:
while (--j >= 0)
- free_fn(page_address(pages[j]), PAGE_SIZE);
+ pcpu_fc_free(page_address(pages[j]), PAGE_SIZE);
rc = -ENOMEM;
out_free_ar:
memblock_free(pages, pages_size);
@@ -3278,17 +3376,6 @@ out_free_ar:
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
-static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
- size_t align)
-{
- return memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
-}
-
-static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
@@ -3299,9 +3386,8 @@ void __init setup_per_cpu_areas(void)
* Always reserve area for module percpu variables. That's
* what the legacy allocator did.
*/
- rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
- PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
- pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
+ rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE,
+ PAGE_SIZE, NULL, NULL);
if (rc < 0)
panic("Failed to initialize percpu areas.");