From 25efb2ffdf991177e740b2f63e92b4ec7d310a92 Mon Sep 17 00:00:00 2001 From: Simon Gander Date: Fri, 10 Apr 2020 14:32:16 -0700 Subject: hfsplus: fix crash and filesystem corruption when deleting files When removing files containing extended attributes, the hfsplus driver may remove the wrong entries from the attributes b-tree, causing major filesystem damage and in some cases even kernel crashes. To remove a file, all its extended attributes have to be removed as well. The driver does this by looking up all keys in the attributes b-tree with the cnid of the file. Each of these entries then gets deleted using the key used for searching, which doesn't contain the attribute's name when it should. Since the key doesn't contain the name, the deletion routine will not find the correct entry and instead remove the one in front of it. If parent nodes have to be modified, these become corrupt as well. This causes invalid links and unsorted entries that not even macOS's fsck_hfs is able to fix. To fix this, modify the search key before an entry is deleted from the attributes b-tree by copying the found entry's key into the search key, therefore ensuring that the correct entry gets removed from the tree. Signed-off-by: Simon Gander Signed-off-by: Andrew Morton Reviewed-by: Anton Altaparmakov Cc: Link: http://lkml.kernel.org/r/20200327155541.1521-1-simon@tuxera.com Signed-off-by: Linus Torvalds --- fs/hfsplus/attributes.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index e6d554476db4..eeebe80c6be4 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c @@ -292,6 +292,10 @@ static int __hfsplus_delete_attr(struct inode *inode, u32 cnid, return -ENOENT; } + /* Avoid btree corruption */ + hfs_bnode_read(fd->bnode, fd->search_key, + fd->keyoffset, fd->keylength); + err = hfs_brec_remove(fd); if (err) return err; -- cgit From 9b8b17541f13809d06f6f873325305ddbb760e3e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 10 Apr 2020 14:32:19 -0700 Subject: mm, memcg: do not high throttle allocators based on wraparound If a cgroup violates its memory.high constraints, we may end up unduly penalising it. For example, for the following hierarchy: A: max high, 20 usage A/B: 9 high, 10 usage A/C: max high, 10 usage We would end up doing the following calculation below when calculating high delay for A/B: A/B: 10 - 9 = 1... A: 20 - PAGE_COUNTER_MAX = 21, so set max_overage to 21. This gets worse with higher disparities in usage in the parent. I have no idea how this disappeared from the final version of the patch, but it is certainly Not Good(tm). This wasn't obvious in testing because, for a simple cgroup hierarchy with only one child, the result is usually roughly the same. It's only in more complex hierarchies that things go really awry (although still, the effects are limited to a maximum of 2 seconds in schedule_timeout_killable at a maximum). [chris@chrisdown.name: changelog] Fixes: e26733e0d0ec ("mm, memcg: throttle allocators based on ancestral memory.high") Signed-off-by: Jakub Kicinski Signed-off-by: Chris Down Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: Johannes Weiner Cc: [5.4.x] Link: http://lkml.kernel.org/r/20200331152424.GA1019937@chrisdown.name Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 05b4ec2c6499..5beea03dd58a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2336,6 +2336,9 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg, usage = page_counter_read(&memcg->memory); high = READ_ONCE(memcg->high); + if (usage <= high) + continue; + /* * Prevent division by 0 in overage calculation by acting as if * it was a threshold of 1 page -- cgit From b991cee567bf045097d9426719d7f1477bd7dc59 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Fri, 10 Apr 2020 14:32:22 -0700 Subject: mm, slab_common: fix a typo in comment "eariler"->"earlier" There is a typo in comment, fix it. s/eariler/earlier/ Signed-off-by: Qiujun Huang Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Acked-by: Christoph Lameter Link: http://lkml.kernel.org/r/20200405160544.1246-1-hqjagain@gmail.com Signed-off-by: Linus Torvalds --- mm/slab_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 93ec4a574d8d..23c7500eea7d 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -731,7 +731,7 @@ static void kmemcg_rcufn(struct rcu_head *head) /* * We need to grab blocking locks. Bounce to ->work. The * work item shares the space with the RCU head and can't be - * initialized eariler. + * initialized earlier. */ INIT_WORK(&s->memcg_params.work, kmemcg_workfn); queue_work(memcg_kmem_cache_wq, &s->memcg_params.work); -- cgit From 2370ae4b1d5aa7eb70bd7539a420e791d4b0123b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 10 Apr 2020 14:32:25 -0700 Subject: docs: mm: slab.h: fix a broken cross-reference There is a typo at the cross-reference link, causing this warning: include/linux/slab.h:11: WARNING: undefined label: memory-allocation (if the link has no caption the label must precede a section header) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Cc: Jonathan Corbet Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/0aeac24235d356ebd935d11e147dcc6edbb6465c.1586359676.git.mchehab+huawei@kernel.org Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 03a389358562..6d454886bcaf 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -501,7 +501,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * :ref:`Documentation/core-api/mm-api.rst ` * * The recommended usage of the @flags is described at - * :ref:`Documentation/core-api/memory-allocation.rst ` + * :ref:`Documentation/core-api/memory-allocation.rst ` * * Below is a brief outline of the most useful GFP flags * -- cgit From e6a0a7ad1c2b6608e294fbbce60c42ba5a1304ce Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 10 Apr 2020 14:32:29 -0700 Subject: mm/page_alloc.c: fix kernel-doc warning Add description of function parameter 'mt' to fix kernel-doc warning: mm/page_alloc.c:3246: warning: Function parameter or member 'mt' not described in '__putback_isolated_page' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Acked-by: Pankaj Gupta Link: http://lkml.kernel.org/r/02998bd4-0b82-2f15-2570-f86130304d1e@infradead.org Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 114c56c3685d..d98441ab1036 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3224,6 +3224,7 @@ int __isolate_free_page(struct page *page, unsigned int order) * __putback_isolated_page - Return a now-isolated page back where we got it * @page: Page that was isolated * @order: Order of the isolated page + * @mt: The page's pageblock's migratetype * * This function is meant to return a page pulled from the free lists via * __isolate_free_page back to the free lists they were pulled from. -- cgit From 8b885f53b03e3b14003083daf64a6ed16bf561b3 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Apr 2020 14:32:32 -0700 Subject: mm/page_alloc: make pcpu_drain_mutex and pcpu_drain static Fix the following sparse warning: mm/page_alloc.c:106:1: warning: symbol 'pcpu_drain_mutex' was not declared. Should it be static? mm/page_alloc.c:107:1: warning: symbol '__pcpu_scope_pcpu_drain' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/20200407023925.46438-1-yanaijie@huawei.com Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d98441ab1036..69827d4fa052 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -103,8 +103,8 @@ struct pcpu_drain { struct zone *zone; struct work_struct work; }; -DEFINE_MUTEX(pcpu_drain_mutex); -DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain); +static DEFINE_MUTEX(pcpu_drain_mutex); +static DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain); #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY volatile unsigned long latent_entropy __latent_entropy; -- cgit From 783fda856e1034dee90a873f7654c418212d12d7 Mon Sep 17 00:00:00 2001 From: Changwei Ge Date: Fri, 10 Apr 2020 14:32:38 -0700 Subject: ocfs2: no need try to truncate file beyond i_size Linux fallocate(2) with FALLOC_FL_PUNCH_HOLE mode set, its offset can exceed the inode size. Ocfs2 now doesn't allow that offset beyond inode size. This restriction is not necessary and violates fallocate(2) semantics. If fallocate(2) offset is beyond inode size, just return success and do nothing further. Otherwise, ocfs2 will crash the kernel. kernel BUG at fs/ocfs2//alloc.c:7264! ocfs2_truncate_inline+0x20f/0x360 [ocfs2] ocfs2_remove_inode_range+0x23c/0xcb0 [ocfs2] __ocfs2_change_file_space+0x4a5/0x650 [ocfs2] ocfs2_fallocate+0x83/0xa0 [ocfs2] vfs_fallocate+0x148/0x230 SyS_fallocate+0x48/0x80 do_syscall_64+0x79/0x170 Signed-off-by: Changwei Ge Signed-off-by: Andrew Morton Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Link: http://lkml.kernel.org/r/20200407082754.17565-1-chge@linux.alibaba.com Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 65b3abbcce4e..2f834add165b 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7402,6 +7402,10 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_inline_data *idata = &di->id2.i_data; + /* No need to punch hole beyond i_size. */ + if (start >= i_size_read(inode)) + return 0; + if (end > i_size_read(inode)) end = i_size_read(inode); -- cgit From 8676af1ff2d28e64e5636147821bda7524cf007d Mon Sep 17 00:00:00 2001 From: Aslan Bakirov Date: Fri, 10 Apr 2020 14:32:42 -0700 Subject: mm: cma: NUMA node interface I've noticed that there is no interface exposed by CMA which would let me to declare contigous memory on particular NUMA node. This patchset adds the ability to try to allocate contiguous memory on a specific node. It will fallback to other nodes if the specified one doesn't work. Implement a new method for declaring contigous memory on particular node and keep cma_declare_contiguous() as a wrapper. [akpm@linux-foundation.org: build fix] Signed-off-by: Aslan Bakirov Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: Andreas Schaufler Cc: Mike Kravetz Cc: Rik van Riel Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20200407163840.92263-2-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/cma.h | 14 ++++++++++++-- include/linux/memblock.h | 3 +++ mm/cma.c | 16 +++++++++------- mm/memblock.c | 2 +- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/include/linux/cma.h b/include/linux/cma.h index 190184b5ff32..6ff79fefd01f 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -4,6 +4,7 @@ #include #include +#include /* * There is always at least global CMA area and a few optional @@ -24,10 +25,19 @@ extern phys_addr_t cma_get_base(const struct cma *cma); extern unsigned long cma_get_size(const struct cma *cma); extern const char *cma_get_name(const struct cma *cma); -extern int __init cma_declare_contiguous(phys_addr_t base, +extern int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, const char *name, struct cma **res_cma); + bool fixed, const char *name, struct cma **res_cma, + int nid); +static inline int __init cma_declare_contiguous(phys_addr_t base, + phys_addr_t size, phys_addr_t limit, + phys_addr_t alignment, unsigned int order_per_bit, + bool fixed, const char *name, struct cma **res_cma) +{ + return cma_declare_contiguous_nid(base, size, limit, alignment, + order_per_bit, fixed, name, res_cma, NUMA_NO_NODE); +} extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, const char *name, diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 079d17d96410..6bc37a731d27 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -348,6 +348,9 @@ static inline int memblock_get_region_node(const struct memblock_region *r) phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end); +phys_addr_t memblock_alloc_range_nid(phys_addr_t size, + phys_addr_t align, phys_addr_t start, + phys_addr_t end, int nid, bool exact_nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, diff --git a/mm/cma.c b/mm/cma.c index be55d1988c67..0463ad2ce06b 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -220,7 +220,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, } /** - * cma_declare_contiguous() - reserve custom contiguous area + * cma_declare_contiguous_nid() - reserve custom contiguous area * @base: Base address of the reserved area optional, use 0 for any * @size: Size of the reserved area (in bytes), * @limit: End address of the reserved memory (optional, 0 for any). @@ -229,6 +229,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, * @fixed: hint about where to place the reserved area * @name: The name of the area. See function cma_init_reserved_mem() * @res_cma: Pointer to store the created cma region. + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node * * This function reserves memory from early allocator. It should be * called by arch specific code once the early allocator (memblock or bootmem) @@ -238,10 +239,11 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, * If @fixed is true, reserve contiguous area at exactly @base. If false, * reserve in range from @base to @limit. */ -int __init cma_declare_contiguous(phys_addr_t base, +int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, const char *name, struct cma **res_cma) + bool fixed, const char *name, struct cma **res_cma, + int nid) { phys_addr_t memblock_end = memblock_end_of_DRAM(); phys_addr_t highmem_start; @@ -336,14 +338,14 @@ int __init cma_declare_contiguous(phys_addr_t base, * memory in case of failure. */ if (base < highmem_start && limit > highmem_start) { - addr = memblock_phys_alloc_range(size, alignment, - highmem_start, limit); + addr = memblock_alloc_range_nid(size, alignment, + highmem_start, limit, nid, false); limit = highmem_start; } if (!addr) { - addr = memblock_phys_alloc_range(size, alignment, base, - limit); + addr = memblock_alloc_range_nid(size, alignment, base, + limit, nid, false); if (!addr) { ret = -ENOMEM; goto err; diff --git a/mm/memblock.c b/mm/memblock.c index 4d06bbaded0f..c79ba6f9920c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1349,7 +1349,7 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, * Return: * Physical address of allocated memory block on success, %0 on failure. */ -static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, +phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, int nid, bool exact_nid) -- cgit From cf11e85fc08cc6a4fe3ac2ba2e610c962bf20bc3 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 10 Apr 2020 14:32:45 -0700 Subject: mm: hugetlb: optionally allocate gigantic hugepages using cma Commit 944d9fec8d7a ("hugetlb: add support for gigantic page allocation at runtime") has added the run-time allocation of gigantic pages. However it actually works only at early stages of the system loading, when the majority of memory is free. After some time the memory gets fragmented by non-movable pages, so the chances to find a contiguous 1GB block are getting close to zero. Even dropping caches manually doesn't help a lot. At large scale rebooting servers in order to allocate gigantic hugepages is quite expensive and complex. At the same time keeping some constant percentage of memory in reserved hugepages even if the workload isn't using it is a big waste: not all workloads can benefit from using 1 GB pages. The following solution can solve the problem: 1) On boot time a dedicated cma area* is reserved. The size is passed as a kernel argument. 2) Run-time allocations of gigantic hugepages are performed using the cma allocator and the dedicated cma area In this case gigantic hugepages can be allocated successfully with a high probability, however the memory isn't completely wasted if nobody is using 1GB hugepages: it can be used for pagecache, anon memory, THPs, etc. * On a multi-node machine a per-node cma area is allocated on each node. Following gigantic hugetlb allocation are using the first available numa node if the mask isn't specified by a user. Usage: 1) configure the kernel to allocate a cma area for hugetlb allocations: pass hugetlb_cma=10G as a kernel argument 2) allocate hugetlb pages as usual, e.g. echo 10 > /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages If the option isn't enabled or the allocation of the cma area failed, the current behavior of the system is preserved. x86 and arm-64 are covered by this patch, other architectures can be trivially added later. The patch contains clean-ups and fixes proposed and implemented by Aslan Bakirov and Randy Dunlap. It also contains ideas and suggestions proposed by Rik van Riel, Michal Hocko and Mike Kravetz. Thanks! Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Tested-by: Andreas Schaufler Acked-by: Mike Kravetz Acked-by: Michal Hocko Cc: Aslan Bakirov Cc: Randy Dunlap Cc: Rik van Riel Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20200407163840.92263-3-guro@fb.com Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 8 ++ arch/arm64/mm/init.c | 6 ++ arch/x86/kernel/setup.c | 4 + include/linux/hugetlb.h | 12 +++ mm/hugetlb.c | 109 ++++++++++++++++++++++++ 5 files changed, 139 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 86aae1fa099a..d7df9a8302c4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1475,6 +1475,14 @@ hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET registers. Default set by CONFIG_HPET_MMAP_DEFAULT. + hugetlb_cma= [HW] The size of a cma area used for allocation + of gigantic hugepages. + Format: nn[KMGTPE] + + Reserve a cma area of given size and allocate gigantic + hugepages using the cma allocator. If enabled, the + boot-time allocation of gigantic hugepages is skipped. + hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot. hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages. On x86-64 and powerpc, this option can be specified diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index b65dffdfb201..e42727e3568e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -457,6 +458,11 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; dma_contiguous_reserve(arm64_dma32_phys_limit); + +#ifdef CONFIG_ARM64_4K_PAGES + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); +#endif + } void __init bootmem_init(void) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e6b545047f38..4b3fa6cd3106 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -1157,6 +1158,9 @@ void __init setup_arch(char **cmdline_p) initmem_init(); dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); + if (boot_cpu_has(X86_FEATURE_GBPAGES)) + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); + /* * Reserve memory for crash kernel after SRAT is parsed so that it * won't consume hotpluggable memory. diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5ea05879a0a9..43a1cef8f0f1 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -895,4 +895,16 @@ static inline spinlock_t *huge_pte_lock(struct hstate *h, return ptl; } +#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) +extern void __init hugetlb_cma_reserve(int order); +extern void __init hugetlb_cma_check(void); +#else +static inline __init void hugetlb_cma_reserve(int order) +{ +} +static inline __init void hugetlb_cma_check(void) +{ +} +#endif + #endif /* _LINUX_HUGETLB_H */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f5fb53fdfa02..cd459155d28a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -44,6 +45,9 @@ int hugetlb_max_hstate __read_mostly; unsigned int default_hstate_idx; struct hstate hstates[HUGE_MAX_HSTATE]; + +static struct cma *hugetlb_cma[MAX_NUMNODES]; + /* * Minimum page order among possible hugepage sizes, set to a proper value * at boot time. @@ -1228,6 +1232,14 @@ static void destroy_compound_gigantic_page(struct page *page, static void free_gigantic_page(struct page *page, unsigned int order) { + /* + * If the page isn't allocated using the cma allocator, + * cma_release() returns false. + */ + if (IS_ENABLED(CONFIG_CMA) && + cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order)) + return; + free_contig_range(page_to_pfn(page), 1 << order); } @@ -1237,6 +1249,21 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, { unsigned long nr_pages = 1UL << huge_page_order(h); + if (IS_ENABLED(CONFIG_CMA)) { + struct page *page; + int node; + + for_each_node_mask(node, *nodemask) { + if (!hugetlb_cma[node]) + continue; + + page = cma_alloc(hugetlb_cma[node], nr_pages, + huge_page_order(h), true); + if (page) + return page; + } + } + return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask); } @@ -1281,8 +1308,14 @@ static void update_and_free_page(struct hstate *h, struct page *page) set_compound_page_dtor(page, NULL_COMPOUND_DTOR); set_page_refcounted(page); if (hstate_is_gigantic(h)) { + /* + * Temporarily drop the hugetlb_lock, because + * we might block in free_gigantic_page(). + */ + spin_unlock(&hugetlb_lock); destroy_compound_gigantic_page(page, huge_page_order(h)); free_gigantic_page(page, huge_page_order(h)); + spin_lock(&hugetlb_lock); } else { __free_pages(page, huge_page_order(h)); } @@ -2539,6 +2572,10 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) for (i = 0; i < h->max_huge_pages; ++i) { if (hstate_is_gigantic(h)) { + if (IS_ENABLED(CONFIG_CMA) && hugetlb_cma[0]) { + pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n"); + break; + } if (!alloc_bootmem_huge_page(h)) break; } else if (!alloc_pool_huge_page(h, @@ -3194,6 +3231,7 @@ static int __init hugetlb_init(void) default_hstate.max_huge_pages = default_hstate_max_huge_pages; } + hugetlb_cma_check(); hugetlb_init_hstates(); gather_bootmem_prealloc(); report_hugepages(); @@ -5506,3 +5544,74 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) spin_unlock(&hugetlb_lock); } } + +#ifdef CONFIG_CMA +static unsigned long hugetlb_cma_size __initdata; +static bool cma_reserve_called __initdata; + +static int __init cmdline_parse_hugetlb_cma(char *p) +{ + hugetlb_cma_size = memparse(p, &p); + return 0; +} + +early_param("hugetlb_cma", cmdline_parse_hugetlb_cma); + +void __init hugetlb_cma_reserve(int order) +{ + unsigned long size, reserved, per_node; + int nid; + + cma_reserve_called = true; + + if (!hugetlb_cma_size) + return; + + if (hugetlb_cma_size < (PAGE_SIZE << order)) { + pr_warn("hugetlb_cma: cma area should be at least %lu MiB\n", + (PAGE_SIZE << order) / SZ_1M); + return; + } + + /* + * If 3 GB area is requested on a machine with 4 numa nodes, + * let's allocate 1 GB on first three nodes and ignore the last one. + */ + per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes); + pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", + hugetlb_cma_size / SZ_1M, per_node / SZ_1M); + + reserved = 0; + for_each_node_state(nid, N_ONLINE) { + int res; + + size = min(per_node, hugetlb_cma_size - reserved); + size = round_up(size, PAGE_SIZE << order); + + res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order, + 0, false, "hugetlb", + &hugetlb_cma[nid], nid); + if (res) { + pr_warn("hugetlb_cma: reservation failed: err %d, node %d", + res, nid); + continue; + } + + reserved += size; + pr_info("hugetlb_cma: reserved %lu MiB on node %d\n", + size / SZ_1M, nid); + + if (reserved >= hugetlb_cma_size) + break; + } +} + +void __init hugetlb_cma_check(void) +{ + if (!hugetlb_cma_size || cma_reserve_called) + return; + + pr_warn("hugetlb_cma: the option isn't supported by current arch\n"); +} + +#endif /* CONFIG_CMA */ -- cgit From 09ef5283fd96ac424ef0e569626f359bf9ab86c9 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Fri, 10 Apr 2020 14:32:48 -0700 Subject: mm/mmap.c: initialize align_offset explicitly for vm_unmapped_area On passing requirement to vm_unmapped_area, arch_get_unmapped_area and arch_get_unmapped_area_topdown did not set align_offset. Internally on both unmapped_area and unmapped_area_topdown, if info->align_mask is 0, then info->align_offset was meaningless. But commit df529cabb7a2 ("mm: mmap: add trace point of vm_unmapped_area") always prints info->align_offset even though it is uninitialized. Fix this uninitialized value issue by setting it to 0 explicitly. Before: vm_unmapped_area: addr=0x755b155000 err=0 total_vm=0x15aaf0 flags=0x1 len=0x109000 lo=0x8000 hi=0x75eed48000 mask=0x0 ofs=0x4022 After: vm_unmapped_area: addr=0x74a4ca1000 err=0 total_vm=0x168ab1 flags=0x1 len=0x9000 lo=0x8000 hi=0x753d94b000 mask=0x0 ofs=0x0 Signed-off-by: Jaewon Kim Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Matthew Wilcox (Oracle) Cc: Michel Lespinasse Cc: Borislav Petkov Link: http://lkml.kernel.org/r/20200409094035.19457-1-jaewon31.kim@samsung.com Signed-off-by: Linus Torvalds --- mm/mmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 8d77dbbb80fe..de07bbc0e21f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2123,6 +2123,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, info.low_limit = mm->mmap_base; info.high_limit = mmap_end; info.align_mask = 0; + info.align_offset = 0; return vm_unmapped_area(&info); } #endif @@ -2164,6 +2165,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, info.low_limit = max(PAGE_SIZE, mmap_min_addr); info.high_limit = arch_get_mmap_base(addr, mm->mmap_base); info.align_mask = 0; + info.align_offset = 0; addr = vm_unmapped_area(&info); /* -- cgit From 8efd6f5b1732c4ac88b4bb6908d481d95804fa1c Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:32:51 -0700 Subject: mm/memory.c: refactor insert_page to prepare for batched-lock insert Add helper methods for vm_insert_page()/insert_page() to prepare for vm_insert_pages(), which batch-inserts pages to reduce spinlock operations when inserting multiple consecutive pages into the user page table. The intention of this patch-set is to reduce atomic ops for tcp zerocopy receives, which normally hits the same spinlock multiple times consecutively. Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Andrew Morton Cc: David Miller Cc: Matthew Wilcox Cc: Jason Gunthorpe Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200128025958.43490-1-arjunroy.kdev@gmail.com Signed-off-by: Linus Torvalds --- mm/memory.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 19874d133a66..52a3303458cb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1442,6 +1442,27 @@ pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, return pte_alloc_map_lock(mm, pmd, addr, ptl); } +static int validate_page_before_insert(struct page *page) +{ + if (PageAnon(page) || PageSlab(page) || page_has_type(page)) + return -EINVAL; + flush_dcache_page(page); + return 0; +} + +static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte, + unsigned long addr, struct page *page, pgprot_t prot) +{ + if (!pte_none(*pte)) + return -EBUSY; + /* Ok, finally just insert the thing.. */ + get_page(page); + inc_mm_counter_fast(mm, mm_counter_file(page)); + page_add_file_rmap(page, false); + set_pte_at(mm, addr, pte, mk_pte(page, prot)); + return 0; +} + /* * This is the old fallback for page remapping. * @@ -1457,26 +1478,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, pte_t *pte; spinlock_t *ptl; - retval = -EINVAL; - if (PageAnon(page) || PageSlab(page) || page_has_type(page)) + retval = validate_page_before_insert(page); + if (retval) goto out; retval = -ENOMEM; - flush_dcache_page(page); pte = get_locked_pte(mm, addr, &ptl); if (!pte) goto out; - retval = -EBUSY; - if (!pte_none(*pte)) - goto out_unlock; - - /* Ok, finally just insert the thing.. */ - get_page(page); - inc_mm_counter_fast(mm, mm_counter_file(page)); - page_add_file_rmap(page, false); - set_pte_at(mm, addr, pte, mk_pte(page, prot)); - - retval = 0; -out_unlock: + retval = insert_page_into_pte_locked(mm, pte, addr, page, prot); pte_unmap_unlock(pte, ptl); out: return retval; -- cgit From 251a0ffeaeee2a900765d98d44880943dce1047d Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:32:54 -0700 Subject: mm: bring sparc pte_index() semantics inline with other platforms pte_index() on platforms other than sparc return a numerical index. On sparc, it returns a pte_t*. This presents an issue for vm_insert_pages(), which relies on pte_index() to find the offset for a pte within a pmd, for batched inserts. This patch: 1. Modifies pte_index() for sparc to return a numerical index, like other platforms, 2. Defines pte_entry() for sparc which returns a pte_t* (as pte_index() used to), 3. Converts existing sparc callers for pte_index() to use pte_entry(). [sfr@canb.auug.org.au: remove pte_entry and just directly modified pte_offset_kernel instead] Signed-off-by: Arjun Roy Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Reviewed-by: Mike Rapoport Cc: Eric Dumazet Cc: Soheil Hassas Yeganeh Cc: David Miller Cc: Matthew Wilcox Cc: Arjun Roy Cc: Jason Gunthorpe Link: http://lkml.kernel.org/r/20200227105045.6b421d9f@canb.auug.org.au Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/pgtable_64.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 65494c3a420e..da527b27cf7d 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -907,11 +907,11 @@ static inline unsigned long pud_pfn(pud_t pud) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))) /* Find an entry in the third-level page table.. */ -#define pte_index(dir, address) \ - ((pte_t *) __pmd_page(*(dir)) + \ - ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) -#define pte_offset_kernel pte_index -#define pte_offset_map pte_index +#define pte_index(address) \ + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset_kernel(dir, address) \ + ((pte_t *) __pmd_page(*(dir)) + pte_index(address)) +#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_unmap(pte) do { } while (0) /* We cannot include at this point yet: */ -- cgit From c97078bd219cbe1a878b24bb4e61d312f19ece1f Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:32:58 -0700 Subject: mm: define pte_index as macro for x86 pte_index() is either defined as a macro (e.g. sparc64) or as an inlined function (e.g. x86). vm_insert_pages() depends on pte_index but it is not defined on all platforms (e.g. m68k). To fix compilation of vm_insert_pages() on architectures not providing pte_index(), we perform the following fix: 0. For platforms where it is meaningful, and defined as a macro, no change is needed. 1. For platforms where it is meaningful and defined as an inlined function, and we want to use it with vm_insert_pages(), we define a degenerate macro of the form: #define pte_index pte_index 2. vm_insert_pages() checks for the existence of a pte_index macro definition. If found, it implements a batched insert. If not found, it devolves to calling vm_insert_page() in a loop. This patch implements step 1 for x86. v3 of this patch fixes a compilation warning for an unused method. v2 of this patch moved a macro definition to a more readable location. Signed-off-by: Arjun Roy Signed-off-by: Andrew Morton Cc: David Miller Cc: Eric Dumazet Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Soheil Hassas Yeganeh Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200228054714.204424-1-arjunroy.kdev@gmail.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 28838d790191..abad0da0973a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -860,7 +860,10 @@ static inline unsigned long pmd_index(unsigned long address) * * this function returns the index of the entry in the pte page which would * control the given virtual address + * + * Also define macro so we can test if pte_index is defined for arch. */ +#define pte_index pte_index static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); -- cgit From 8cd3984d81d5fd5e18bccb12d7d228a114ec2508 Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:33:01 -0700 Subject: mm/memory.c: add vm_insert_pages() Add the ability to insert multiple pages at once to a user VM with lower PTE spinlock operations. The intention of this patch-set is to reduce atomic ops for tcp zerocopy receives, which normally hits the same spinlock multiple times consecutively. [akpm@linux-foundation.org: pte_alloc() no longer takes the `addr' argument] [arjunroy@google.com: add missing page_count() check to vm_insert_pages()] Link: http://lkml.kernel.org/r/20200214005929.104481-1-arjunroy.kdev@gmail.com [arjunroy@google.com: vm_insert_pages() checks if pte_index defined] Link: http://lkml.kernel.org/r/20200228054714.204424-2-arjunroy.kdev@gmail.com Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Andrew Morton Cc: David Miller Cc: Matthew Wilcox Cc: Jason Gunthorpe Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200128025958.43490-2-arjunroy.kdev@gmail.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 + mm/memory.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 129 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e2f938c5a9d8..ed896cedd4c4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2689,6 +2689,8 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); +int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num); int vm_map_pages(struct vm_area_struct *vma, struct page **pages, unsigned long num); int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, diff --git a/mm/memory.c b/mm/memory.c index 52a3303458cb..f703fe8c8346 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1419,8 +1419,7 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, } EXPORT_SYMBOL_GPL(zap_vma_ptes); -pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, - spinlock_t **ptl) +static pmd_t *walk_to_pmd(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; p4d_t *p4d; @@ -1439,6 +1438,16 @@ pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, return NULL; VM_BUG_ON(pmd_trans_huge(*pmd)); + return pmd; +} + +pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl) +{ + pmd_t *pmd = walk_to_pmd(mm, addr); + + if (!pmd) + return NULL; return pte_alloc_map_lock(mm, pmd, addr, ptl); } @@ -1491,6 +1500,122 @@ out: return retval; } +#ifdef pte_index +static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, struct page *page, pgprot_t prot) +{ + int err; + + if (!page_count(page)) + return -EINVAL; + err = validate_page_before_insert(page); + return err ? err : insert_page_into_pte_locked( + mm, pte_offset_map(pmd, addr), addr, page, prot); +} + +/* insert_pages() amortizes the cost of spinlock operations + * when inserting pages in a loop. Arch *must* define pte_index. + */ +static int insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num, pgprot_t prot) +{ + pmd_t *pmd = NULL; + spinlock_t *pte_lock = NULL; + struct mm_struct *const mm = vma->vm_mm; + unsigned long curr_page_idx = 0; + unsigned long remaining_pages_total = *num; + unsigned long pages_to_write_in_pmd; + int ret; +more: + ret = -EFAULT; + pmd = walk_to_pmd(mm, addr); + if (!pmd) + goto out; + + pages_to_write_in_pmd = min_t(unsigned long, + remaining_pages_total, PTRS_PER_PTE - pte_index(addr)); + + /* Allocate the PTE if necessary; takes PMD lock once only. */ + ret = -ENOMEM; + if (pte_alloc(mm, pmd)) + goto out; + pte_lock = pte_lockptr(mm, pmd); + + while (pages_to_write_in_pmd) { + int pte_idx = 0; + const int batch_size = min_t(int, pages_to_write_in_pmd, 8); + + spin_lock(pte_lock); + for (; pte_idx < batch_size; ++pte_idx) { + int err = insert_page_in_batch_locked(mm, pmd, + addr, pages[curr_page_idx], prot); + if (unlikely(err)) { + spin_unlock(pte_lock); + ret = err; + remaining_pages_total -= pte_idx; + goto out; + } + addr += PAGE_SIZE; + ++curr_page_idx; + } + spin_unlock(pte_lock); + pages_to_write_in_pmd -= batch_size; + remaining_pages_total -= batch_size; + } + if (remaining_pages_total) + goto more; + ret = 0; +out: + *num = remaining_pages_total; + return ret; +} +#endif /* ifdef pte_index */ + +/** + * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock. + * @vma: user vma to map to + * @addr: target start user address of these pages + * @pages: source kernel pages + * @num: in: number of pages to map. out: number of pages that were *not* + * mapped. (0 means all pages were successfully mapped). + * + * Preferred over vm_insert_page() when inserting multiple pages. + * + * In case of error, we may have mapped a subset of the provided + * pages. It is the caller's responsibility to account for this case. + * + * The same restrictions apply as in vm_insert_page(). + */ +int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num) +{ +#ifdef pte_index + const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1; + + if (addr < vma->vm_start || end_addr >= vma->vm_end) + return -EFAULT; + if (!(vma->vm_flags & VM_MIXEDMAP)) { + BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem)); + BUG_ON(vma->vm_flags & VM_PFNMAP); + vma->vm_flags |= VM_MIXEDMAP; + } + /* Defer page refcount checking till we're about to map that page. */ + return insert_pages(vma, addr, pages, num, vma->vm_page_prot); +#else + unsigned long idx = 0, pgcount = *num; + int err; + + for (; idx < pgcount; ++idx) { + err = vm_insert_page(vma, addr + (PAGE_SIZE * idx), pages[idx]); + if (err) + break; + } + *num = pgcount - idx; + return err; +#endif /* ifdef pte_index */ +} +EXPORT_SYMBOL(vm_insert_pages); + /** * vm_insert_page - insert single page into user vma * @vma: user vma to map to -- cgit From c62da0c35d58518ddb26ff641d2485596567fd96 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:05 -0700 Subject: mm/vma: define a default value for VM_DATA_DEFAULT_FLAGS There are many platforms with exact same value for VM_DATA_DEFAULT_FLAGS This creates a default value for VM_DATA_DEFAULT_FLAGS in line with the existing VM_STACK_DEFAULT_FLAGS. While here, also define some more macros with standard VMA access flag combinations that are used frequently across many platforms. Apart from simplification, this reduces code duplication as well. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Acked-by: Geert Uytterhoeven Cc: Richard Henderson Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Mark Salter Cc: Guo Ren Cc: Yoshinori Sato Cc: Brian Cain Cc: Tony Luck Cc: Michal Simek Cc: Ralf Baechle Cc: Paul Burton Cc: Nick Hu Cc: Ley Foon Tan Cc: Jonas Bonn Cc: "James E.J. Bottomley" Cc: Michael Ellerman Cc: Paul Walmsley Cc: Heiko Carstens Cc: Rich Felker Cc: "David S. Miller" Cc: Guan Xuetao Cc: Thomas Gleixner Cc: Jeff Dike Cc: Chris Zankel Link: http://lkml.kernel.org/r/1583391014-8170-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/page.h | 3 --- arch/arc/include/asm/page.h | 2 +- arch/arm/include/asm/page.h | 4 +--- arch/arm64/include/asm/page.h | 4 +--- arch/c6x/include/asm/page.h | 5 +---- arch/csky/include/asm/page.h | 3 --- arch/h8300/include/asm/page.h | 2 -- arch/hexagon/include/asm/page.h | 3 +-- arch/ia64/include/asm/page.h | 5 +---- arch/m68k/include/asm/page.h | 3 --- arch/microblaze/include/asm/page.h | 2 -- arch/mips/include/asm/page.h | 5 +---- arch/nds32/include/asm/page.h | 3 --- arch/nios2/include/asm/page.h | 3 +-- arch/openrisc/include/asm/page.h | 5 ----- arch/parisc/include/asm/page.h | 3 --- arch/powerpc/include/asm/page.h | 9 ++------- arch/powerpc/include/asm/page_64.h | 7 ++----- arch/riscv/include/asm/page.h | 3 +-- arch/s390/include/asm/page.h | 3 +-- arch/sh/include/asm/page.h | 3 --- arch/sparc/include/asm/page_32.h | 3 --- arch/sparc/include/asm/page_64.h | 3 --- arch/unicore32/include/asm/page.h | 3 --- arch/x86/include/asm/page_types.h | 4 +--- arch/x86/um/asm/vm-flags.h | 10 ++-------- arch/xtensa/include/asm/page.h | 3 --- include/linux/mm.h | 14 ++++++++++++++ 28 files changed, 31 insertions(+), 89 deletions(-) diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h index f3fb2848470a..e241bd88880f 100644 --- a/arch/alpha/include/asm/page.h +++ b/arch/alpha/include/asm/page.h @@ -90,9 +90,6 @@ typedef struct page *pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #endif /* CONFIG_DISCONTIGMEM */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #include diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 0a32e8cfd074..b0dfed0f12be 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -102,7 +102,7 @@ typedef pte_t * pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) /* Default Permissions for stack/heaps pages (Non Executable) */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #define WANT_PAGE_VIRTUAL 1 diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index c2b75cba26df..11b058a72a5b 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -161,9 +161,7 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 75d6cd23a679..c01b52add377 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -36,9 +36,7 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include diff --git a/arch/c6x/include/asm/page.h b/arch/c6x/include/asm/page.h index 70db1e7632bc..40079899084d 100644 --- a/arch/c6x/include/asm/page.h +++ b/arch/c6x/include/asm/page.h @@ -2,10 +2,7 @@ #ifndef _ASM_C6X_PAGE_H #define _ASM_C6X_PAGE_H -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index 9738eacefdc7..9b98bf31d57c 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -85,9 +85,6 @@ extern unsigned long va_pa_offset; PHYS_OFFSET_OFFSET) #define virt_to_page(x) (mem_map + MAP_NR(x)) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #define pfn_to_kaddr(x) __va(PFN_PHYS(x)) #include diff --git a/arch/h8300/include/asm/page.h b/arch/h8300/include/asm/page.h index 8da5124ad344..53e037544239 100644 --- a/arch/h8300/include/asm/page.h +++ b/arch/h8300/include/asm/page.h @@ -6,8 +6,6 @@ #include #define MAP_NR(addr) (((uintptr_t)(addr)-PAGE_OFFSET) >> PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #ifndef __ASSEMBLY__ extern unsigned long rom_length; diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h index ee31f36f48f3..7cbf719c578e 100644 --- a/arch/hexagon/include/asm/page.h +++ b/arch/hexagon/include/asm/page.h @@ -93,8 +93,7 @@ struct page; #define virt_to_page(kaddr) pfn_to_page(PFN_DOWN(__pa(kaddr))) /* Default vm area behavior is non-executable. */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #define pfn_valid(pfn) ((pfn) < max_mapnr) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h index 5798bd2b462c..b69a5499d75b 100644 --- a/arch/ia64/include/asm/page.h +++ b/arch/ia64/include/asm/page.h @@ -218,10 +218,7 @@ get_order (unsigned long size) #define PAGE_OFFSET RGN_BASE(RGN_KERNEL) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | \ - (((current->personality & READ_IMPLIES_EXEC) != 0) \ - ? VM_EXEC : 0)) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #define GATE_ADDR RGN_BASE(RGN_GATE) diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h index da546487e177..2614a1206f2f 100644 --- a/arch/m68k/include/asm/page.h +++ b/arch/m68k/include/asm/page.h @@ -65,9 +65,6 @@ extern unsigned long _ramend; #define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) #define __pfn_to_phys(pfn) PFN_PHYS(pfn) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #endif /* _M68K_PAGE_H */ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index ae7215c94706..b13463d39b38 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -194,8 +194,6 @@ extern int page_is_ram(unsigned long pfn); #ifdef CONFIG_MMU -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #endif /* CONFIG_MMU */ #endif /* __KERNEL__ */ diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index 0ba4ce6e2bf3..e2f503fc7a84 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -253,10 +253,7 @@ extern bool __virt_addr_valid(const volatile void *kaddr); #define virt_addr_valid(kaddr) \ __virt_addr_valid((const volatile void *) (kaddr)) -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include #include diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h index 86b32014c5f9..add33a7f02c8 100644 --- a/arch/nds32/include/asm/page.h +++ b/arch/nds32/include/asm/page.h @@ -59,9 +59,6 @@ typedef struct page *pgtable_t; #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #endif /* __KERNEL__ */ #endif diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h index 79fcac61f6ef..6a989819a7c1 100644 --- a/arch/nios2/include/asm/page.h +++ b/arch/nios2/include/asm/page.h @@ -98,8 +98,7 @@ static inline bool pfn_valid(unsigned long pfn) # define virt_to_page(vaddr) pfn_to_page(PFN_DOWN(virt_to_phys(vaddr))) # define virt_addr_valid(vaddr) pfn_valid(PFN_DOWN(virt_to_phys(vaddr))) -# define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +# define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h index 01069db59454..aab6e64d6db4 100644 --- a/arch/openrisc/include/asm/page.h +++ b/arch/openrisc/include/asm/page.h @@ -86,11 +86,6 @@ typedef struct page *pgtable_t; #endif /* __ASSEMBLY__ */ - -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - - #include #include diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index 796ae29e9b9a..6b3f6740a6a6 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -180,9 +180,6 @@ extern int npmem_ranges; #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #include #include diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 080a0bf8e54b..3ee8df0f66e0 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -240,13 +240,8 @@ static inline bool pfn_valid(unsigned long pfn) * and needs to be executable. This means the whole heap ends * up being executable. */ -#define VM_DATA_DEFAULT_FLAGS32 \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS32 VM_DATA_FLAGS_TSK_EXEC +#define VM_DATA_DEFAULT_FLAGS64 VM_DATA_FLAGS_NON_EXEC #ifdef __powerpc64__ #include diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 5962797f784a..79a9b7c6a132 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -94,11 +94,8 @@ extern u64 ppc64_pft_size; * stack by default, so in the absence of a PT_GNU_STACK program header * we turn execute permission off. */ -#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS32 VM_DATA_FLAGS_EXEC +#define VM_STACK_DEFAULT_FLAGS64 VM_DATA_FLAGS_NON_EXEC #define VM_STACK_DEFAULT_FLAGS \ (is_32bit_task() ? \ diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 8ca1930caa44..2d50f76efe48 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -137,8 +137,7 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include #include diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index f2d4c1bd3429..cc98f9b78fd4 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -181,8 +181,7 @@ int arch_make_page_accessible(struct page *page); #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include #include diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h index 5eef8be3e59f..ea8d68f58e39 100644 --- a/arch/sh/include/asm/page.h +++ b/arch/sh/include/asm/page.h @@ -182,9 +182,6 @@ typedef struct page *pgtable_t; #endif #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #include diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h index b76d59edec8c..478260002836 100644 --- a/arch/sparc/include/asm/page_32.h +++ b/arch/sparc/include/asm/page_32.h @@ -133,9 +133,6 @@ extern unsigned long pfn_base; #define pfn_valid(pfn) (((pfn) >= (pfn_base)) && (((pfn)-(pfn_base)) < max_mapnr)) #define virt_addr_valid(kaddr) ((((unsigned long)(kaddr)-PAGE_OFFSET)>>PAGE_SHIFT) < max_mapnr) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #include diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index e80f2d5bf62f..254dffd85fb1 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -158,9 +158,6 @@ extern unsigned long PAGE_OFFSET; #endif /* !(__ASSEMBLY__) */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #endif /* _SPARC64_PAGE_H */ diff --git a/arch/unicore32/include/asm/page.h b/arch/unicore32/include/asm/page.h index 8a89335673f9..96d6bdf180bd 100644 --- a/arch/unicore32/include/asm/page.h +++ b/arch/unicore32/include/asm/page.h @@ -69,9 +69,6 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #endif diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index c85e15010f48..e27aa6be6320 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -35,9 +35,7 @@ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \ CONFIG_PHYSICAL_ALIGN) diff --git a/arch/x86/um/asm/vm-flags.h b/arch/x86/um/asm/vm-flags.h index 7c297e9e2413..df7a3896f5dd 100644 --- a/arch/x86/um/asm/vm-flags.h +++ b/arch/x86/um/asm/vm-flags.h @@ -9,17 +9,11 @@ #ifdef CONFIG_X86_32 -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #else -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \ - VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_DATA_FLAGS_EXEC) #endif #endif diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h index f4771c29c7e9..37ce25ef92d6 100644 --- a/arch/xtensa/include/asm/page.h +++ b/arch/xtensa/include/asm/page.h @@ -203,8 +203,5 @@ static inline unsigned long ___pa(unsigned long va) #endif /* __ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #endif /* _XTENSA_PAGE_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index ed896cedd4c4..33076fa149c8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -343,6 +343,20 @@ extern unsigned int kobjsize(const void *objp); /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) +#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) + +/* Common data flag combinations */ +#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \ + VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */ +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC +#endif + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif -- cgit From 6cb4d9a2870d2062e34c93bfef4d52fca3fe42d1 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:09 -0700 Subject: mm/vma: introduce VM_ACCESS_FLAGS There are many places where all basic VMA access flags (read, write, exec) are initialized or checked against as a group. One such example is during page fault. Existing vma_is_accessible() wrapper already creates the notion of VMA accessibility as a group access permissions. Hence lets just create VM_ACCESS_FLAGS (VM_READ|VM_WRITE|VM_EXEC) which will not only reduce code duplication but also extend the VMA accessibility concept in general. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Cc: Russell King Cc: Catalin Marinas Cc: Mark Salter Cc: Nick Hu Cc: Ley Foon Tan Cc: Michael Ellerman Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Guan Xuetao Cc: Dave Hansen Cc: Thomas Gleixner Cc: Rob Springer Cc: Greg Kroah-Hartman Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/1583391014-8170-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/arm/mm/fault.c | 2 +- arch/arm64/mm/fault.c | 2 +- arch/nds32/mm/fault.c | 2 +- arch/powerpc/mm/book3s64/pkeys.c | 2 +- arch/s390/mm/fault.c | 2 +- arch/unicore32/mm/fault.c | 2 +- arch/x86/mm/pkeys.c | 2 +- drivers/staging/gasket/gasket_core.c | 2 +- include/linux/mm.h | 6 +++++- mm/mmap.c | 2 +- mm/mprotect.c | 4 ++-- 11 files changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index b598e6978b29..2dd5c41cbb8d 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -189,7 +189,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) */ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) { - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mask = VM_ACCESS_FLAGS; if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) mask = VM_WRITE; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1027851d469a..c9cedc0432d2 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -445,7 +445,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned long vm_flags = VM_ACCESS_FLAGS; unsigned int mm_flags = FAULT_FLAG_DEFAULT; if (kprobe_page_fault(regs, esr)) diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 0cf0c08c7da2..f331e533edc2 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -79,7 +79,7 @@ void do_page_fault(unsigned long entry, unsigned long addr, struct vm_area_struct *vma; int si_code; vm_fault_t fault; - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mask = VM_ACCESS_FLAGS; unsigned int flags = FAULT_FLAG_DEFAULT; error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE); diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 07527f1ed108..1199fc2bfaec 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -315,7 +315,7 @@ int __execute_only_pkey(struct mm_struct *mm) static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) { /* Do this check first since the vm_flags should be hot */ - if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC) + if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC) return false; return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index d56f67745e3e..9822a1fd1c6b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -580,7 +580,7 @@ void do_dat_exception(struct pt_regs *regs) int access; vm_fault_t fault; - access = VM_READ | VM_EXEC | VM_WRITE; + access = VM_ACCESS_FLAGS; fault = do_exception(regs, access); if (unlikely(fault)) do_fault_error(regs, access, fault); diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index a9bd08fbe588..3022104aa613 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -149,7 +149,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) */ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) { - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mask = VM_ACCESS_FLAGS; if (!(fsr ^ 0x12)) /* write? */ mask = VM_WRITE; diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index c6f84c0b5d7a..8873ed1438a9 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -63,7 +63,7 @@ int __execute_only_pkey(struct mm_struct *mm) static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) { /* Do this check first since the vm_flags should be hot */ - if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC) + if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC) return false; if (vma_pkey(vma) != vma->vm_mm->context.execute_only_pkey) return false; diff --git a/drivers/staging/gasket/gasket_core.c b/drivers/staging/gasket/gasket_core.c index cd181a64f737..8e0575fcb4c8 100644 --- a/drivers/staging/gasket/gasket_core.c +++ b/drivers/staging/gasket/gasket_core.c @@ -689,7 +689,7 @@ static bool gasket_mmap_has_permissions(struct gasket_dev *gasket_dev, /* Make sure that no wrong flags are set. */ requested_permissions = - (vma->vm_flags & (VM_WRITE | VM_READ | VM_EXEC)); + (vma->vm_flags & VM_ACCESS_FLAGS); if (requested_permissions & ~(bar_permissions)) { dev_dbg(gasket_dev->dev, "Attempting to map a region with requested permissions 0x%x, but region has permissions 0x%x.\n", diff --git a/include/linux/mm.h b/include/linux/mm.h index 33076fa149c8..4db1522d7c48 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -369,6 +369,10 @@ extern unsigned int kobjsize(const void *objp); #define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) +/* VMA basic access permission flags */ +#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) + + /* * Special vmas that are non-mergable, non-mlock()able. */ @@ -646,7 +650,7 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma) static inline bool vma_is_accessible(struct vm_area_struct *vma) { - return vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC); + return vma->vm_flags & VM_ACCESS_FLAGS; } #ifdef CONFIG_SHMEM diff --git a/mm/mmap.c b/mm/mmap.c index de07bbc0e21f..f609e9ec4a25 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1224,7 +1224,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct * return a->vm_end == b->vm_start && mpol_equal(vma_policy(a), vma_policy(b)) && a->vm_file == b->vm_file && - !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) && + !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) && b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); } diff --git a/mm/mprotect.c b/mm/mprotect.c index 1d823b050329..494192ca954b 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -419,7 +419,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, */ if (arch_has_pfn_modify_check() && (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && - (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) { + (newflags & VM_ACCESS_FLAGS) == 0) { pgprot_t new_pgprot = vm_get_page_prot(newflags); error = walk_page_range(current->mm, start, end, @@ -598,7 +598,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, newflags |= (vma->vm_flags & ~mask_off_old_flags); /* newflags >> 4 shift VM_MAY% in place of VM_% */ - if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { + if ((newflags & ~(newflags >> 4)) & VM_ACCESS_FLAGS) { error = -EACCES; goto out; } -- cgit From 78e7c5af080b86e9f28afac5a8307ddab1d2c1a3 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:13 -0700 Subject: mm/special: create generic fallbacks for pte_special() and pte_mkspecial() Currently there are many platforms that dont enable ARCH_HAS_PTE_SPECIAL but required to define quite similar fallback stubs for special page table entry helpers such as pte_special() and pte_mkspecial(), as they get build in generic MM without a config check. This creates two generic fallback stub definitions for these helpers, eliminating much code duplication. mips platform has a special case where pte_special() and pte_mkspecial() visibility is wider than what ARCH_HAS_PTE_SPECIAL enablement requires. This restricts those symbol visibility in order to avoid redefinitions which is now exposed through this new generic stubs and subsequent build failure. arm platform set_pte_at() definition needs to be moved into a C file just to prevent a build failure. [anshuman.khandual@arm.com: use defined(CONFIG_ARCH_HAS_PTE_SPECIAL) in mips per Thomas] Link: http://lkml.kernel.org/r/1583851924-21603-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Acked-by: Guo Ren [csky] Acked-by: Geert Uytterhoeven [m68k] Acked-by: Stafford Horne [openrisc] Acked-by: Helge Deller [parisc] Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Russell King Cc: Brian Cain Cc: Tony Luck Cc: Fenghua Yu Cc: Sam Creasey Cc: Michal Simek Cc: Ralf Baechle Cc: Paul Burton Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Ley Foon Tan Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: "James E.J. Bottomley" Cc: "David S. Miller" Cc: Jeff Dike Cc: Richard Weinberger Cc: Anton Ivanov Cc: Guan Xuetao Cc: Chris Zankel Cc: Max Filippov Cc: Thomas Bogendoerfer Link: http://lkml.kernel.org/r/1583802551-15406-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgtable.h | 2 -- arch/arm/include/asm/pgtable-2level.h | 2 -- arch/arm/include/asm/pgtable.h | 15 ++--------- arch/arm/mm/mmu.c | 14 ++++++++++ arch/csky/include/asm/pgtable.h | 3 --- arch/hexagon/include/asm/pgtable.h | 2 -- arch/ia64/include/asm/pgtable.h | 2 -- arch/m68k/include/asm/mcf_pgtable.h | 10 -------- arch/m68k/include/asm/motorola_pgtable.h | 2 -- arch/m68k/include/asm/sun3_pgtable.h | 2 -- arch/microblaze/include/asm/pgtable.h | 4 --- arch/mips/include/asm/pgtable.h | 44 ++++++++++++++++++++++---------- arch/nds32/include/asm/pgtable.h | 9 ------- arch/nios2/include/asm/pgtable.h | 3 --- arch/openrisc/include/asm/pgtable.h | 2 -- arch/parisc/include/asm/pgtable.h | 2 -- arch/sparc/include/asm/pgtable_32.h | 7 ----- arch/um/include/asm/pgtable.h | 10 -------- arch/unicore32/include/asm/pgtable.h | 3 --- arch/xtensa/include/asm/pgtable.h | 3 --- include/linux/mm.h | 12 +++++++++ 21 files changed, 58 insertions(+), 95 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 299791ce14b6..0267aa8a4f86 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -268,7 +268,6 @@ extern inline void pud_clear(pud_t * pudp) { pud_val(*pudp) = 0; } extern inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_FOW); } extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -extern inline int pte_special(pte_t pte) { return 0; } extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; } extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~(__DIRTY_BITS); return pte; } @@ -276,7 +275,6 @@ extern inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~(__ACCESS_BITS); ret extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_FOW; return pte; } extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; } extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; } -extern inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 0d3ea35c97fe..9e084a464a97 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -211,8 +211,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pmd_addr_end(addr,end) (end) #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) -#define pte_special(pte) (0) -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * We don't have huge page support for short descriptors, for the moment diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 0483cf413315..befc8fcec98f 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -243,19 +243,8 @@ static inline void __sync_icache_dcache(pte_t pteval) extern void __sync_icache_dcache(pte_t pteval); #endif -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) -{ - unsigned long ext = 0; - - if (addr < TASK_SIZE && pte_valid_user(pteval)) { - if (!pte_special(pteval)) - __sync_icache_dcache(pteval); - ext |= PTE_EXT_NG; - } - - set_pte_ext(ptep, pteval, ext); -} +void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval); static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) { diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 69a337df619f..ec8d0008bfa1 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1646,3 +1646,17 @@ void __init early_mm_init(const struct machine_desc *mdesc) build_mem_type_table(); early_paging_init(mdesc); } + +void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + unsigned long ext = 0; + + if (addr < TASK_SIZE && pte_valid_user(pteval)) { + if (!pte_special(pteval)) + __sync_icache_dcache(pteval); + ext |= PTE_EXT_NG; + } + + set_pte_ext(ptep, pteval, ext); +} diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index 9b7764cb7645..9ab4a445ad99 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -110,9 +110,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; extern void load_pgd(unsigned long pg_dir); extern pte_t invalid_pte_table[PTRS_PER_PTE]; -static inline int pte_special(pte_t pte) { return 0; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - static inline void set_pte(pte_t *p, pte_t pte) { *p = pte; diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 2fec20ad939e..d383e8bea5b2 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -158,8 +158,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* located in head.S */ /* Seems to be zero even in architectures where the zero page is firewalled? */ #define FIRST_USER_ADDRESS 0UL -#define pte_special(pte) 0 -#define pte_mkspecial(pte) (pte) /* HUGETLB not working currently */ #ifdef CONFIG_HUGETLB_PAGE diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index d602e7c622db..0e7b645b76c6 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -298,7 +298,6 @@ extern unsigned long VMALLOC_END; #define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0) #define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) #define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) -#define pte_special(pte) 0 /* * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the @@ -311,7 +310,6 @@ extern unsigned long VMALLOC_END; #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) #define pte_mkhuge(pte) (__pte(pte_val(pte))) -#define pte_mkspecial(pte) (pte) /* * Because ia64's Icache and Dcache is not coherent (on a cpu), we need to diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index b9f45aeded25..0031cd387b75 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -235,11 +235,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & CF_PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) -{ - return 0; -} - static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~CF_PAGE_WRITABLE; @@ -312,11 +307,6 @@ static inline pte_t pte_mkcache(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return pte; -} - #define swapper_pg_dir kernel_pg_dir extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 4b91a470ad58..48f19f0ab1e7 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -174,7 +174,6 @@ static inline void pud_set(pud_t *pudp, pmd_t *pmdp) static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RONLY); } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } @@ -192,7 +191,6 @@ static inline pte_t pte_mkcache(pte_t pte) pte_val(pte) = (pte_val(pte) & _CACHEMASK040) | m68k_supervisor_cachemode; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h index bc4155264810..0caa18a08437 100644 --- a/arch/m68k/include/asm/sun3_pgtable.h +++ b/arch/m68k/include/asm/sun3_pgtable.h @@ -155,7 +155,6 @@ static inline void pmd_clear (pmd_t *pmdp) { pmd_val (*pmdp) = 0; } static inline int pte_write(pte_t pte) { return pte_val(pte) & SUN3_PAGE_WRITEABLE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & SUN3_PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_MODIFIED; return pte; } @@ -168,7 +167,6 @@ static inline pte_t pte_mknocache(pte_t pte) { pte_val(pte) |= SUN3_PAGE_NOCACHE //static inline pte_t pte_mkcache(pte_t pte) { pte_val(pte) &= SUN3_PAGE_NOCACHE; return pte; } // until then, use: static inline pte_t pte_mkcache(pte_t pte) { return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 45b30878fc17..6b056f6545d8 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -77,10 +77,6 @@ extern pte_t *va_to_pte(unsigned long address); * Undefined behaviour if not.. */ -static inline int pte_special(pte_t pte) { return 0; } - -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - /* Start and end of the vmalloc area. */ /* Make sure to map the vmalloc area above the pinned kernel memory area of 32Mb. */ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index aef5378f909c..f1801e7a4b15 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -269,6 +269,36 @@ cache_sync_done: */ extern pgd_t swapper_pg_dir[]; +/* + * Platform specific pte_special() and pte_mkspecial() definitions + * are required only when ARCH_HAS_PTE_SPECIAL is enabled. + */ +#if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) +#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) +static inline int pte_special(pte_t pte) +{ + return pte.pte_low & _PAGE_SPECIAL; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte.pte_low |= _PAGE_SPECIAL; + return pte; +} +#else +static inline int pte_special(pte_t pte) +{ + return pte_val(pte) & _PAGE_SPECIAL; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte_val(pte) |= _PAGE_SPECIAL; + return pte; +} +#endif +#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ + /* * The following only work if pte_present() is true. * Undefined behaviour if not.. @@ -277,7 +307,6 @@ extern pgd_t swapper_pg_dir[]; static inline int pte_write(pte_t pte) { return pte.pte_low & _PAGE_WRITE; } static inline int pte_dirty(pte_t pte) { return pte.pte_low & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte.pte_low & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return pte.pte_low & _PAGE_SPECIAL; } static inline pte_t pte_wrprotect(pte_t pte) { @@ -338,17 +367,10 @@ static inline pte_t pte_mkyoung(pte_t pte) } return pte; } - -static inline pte_t pte_mkspecial(pte_t pte) -{ - pte.pte_low |= _PAGE_SPECIAL; - return pte; -} #else static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline pte_t pte_wrprotect(pte_t pte) { @@ -392,12 +414,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) -{ - pte_val(pte) |= _PAGE_SPECIAL; - return pte; -} - #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; } diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index 6abc58ac406d..476cc4dd1709 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -286,15 +286,6 @@ PTE_BIT_FUNC(mkclean, &=~_PAGE_D); PTE_BIT_FUNC(mkdirty, |=_PAGE_D); PTE_BIT_FUNC(mkold, &=~_PAGE_YOUNG); PTE_BIT_FUNC(mkyoung, |=_PAGE_YOUNG); -static inline int pte_special(pte_t pte) -{ - return 0; -} - -static inline pte_t pte_mkspecial(pte_t pte) -{ - return pte; -} /* * Mark the prot value as uncacheable and unbufferable. diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 99985d8b7166..f98b7f4519ba 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -113,7 +113,6 @@ static inline int pte_dirty(pte_t pte) \ { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) \ { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } #define pgprot_noncached pgprot_noncached @@ -168,8 +167,6 @@ static inline pte_t pte_mkdirty(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 248d22d8faa7..7f3fb9ceb083 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -236,8 +236,6 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } static inline pte_t pte_wrprotect(pte_t pte) { diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index f0a365950536..9832c73a7021 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -377,7 +377,6 @@ static inline void pud_clear(pud_t *pud) { static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } @@ -385,7 +384,6 @@ static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_WRITE; ret static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Huge pte definitions. diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 6d6f44c0cad9..0de659ae0ba4 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -223,11 +223,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & SRMMU_REF; } -static inline int pte_special(pte_t pte) -{ - return 0; -} - static inline pte_t pte_wrprotect(pte_t pte) { return __pte(pte_val(pte) & ~SRMMU_WRITE); @@ -258,8 +253,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return __pte(pte_val(pte) | SRMMU_REF); } -#define pte_mkspecial(pte) (pte) - #define pfn_pte(pfn, prot) mk_pte(pfn_to_page(pfn), prot) static inline unsigned long pte_pfn(pte_t pte) diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 2daa58df2190..b5ddf5d98bd5 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -167,11 +167,6 @@ static inline int pte_newprot(pte_t pte) return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); } -static inline int pte_special(pte_t pte) -{ - return 0; -} - /* * ================================= * Flags setting section. @@ -247,11 +242,6 @@ static inline pte_t pte_mknewpage(pte_t pte) return(pte); } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return(pte); -} - static inline void set_pte(pte_t *pteptr, pte_t pteval) { pte_copy(*pteptr, pteval); diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index c8f7ba12f309..3b8731b3a937 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -177,7 +177,6 @@ extern struct page *empty_zero_page; #define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & PTE_YOUNG) #define pte_exec(pte) (pte_val(pte) & PTE_EXEC) -#define pte_special(pte) (0) #define PTE_BIT_FUNC(fn, op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } @@ -189,8 +188,6 @@ PTE_BIT_FUNC(mkdirty, |= PTE_DIRTY); PTE_BIT_FUNC(mkold, &= ~PTE_YOUNG); PTE_BIT_FUNC(mkyoung, |= PTE_YOUNG); -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - /* * Mark the prot value as uncacheable. */ diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 27ac17c9da09..8be0c0568c50 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -266,7 +266,6 @@ static inline void paging_init(void) { } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITABLE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~(_PAGE_WRITABLE | _PAGE_HW_WRITE); return pte; } @@ -280,8 +279,6 @@ static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITABLE; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) - { return pte; } #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CA_MASK)) diff --git a/include/linux/mm.h b/include/linux/mm.h index 4db1522d7c48..5a323422d783 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1927,6 +1927,18 @@ static inline void sync_mm_rss(struct mm_struct *mm) } #endif +#ifndef CONFIG_ARCH_HAS_PTE_SPECIAL +static inline int pte_special(pte_t pte) +{ + return 0; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} +#endif + #ifndef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t pte) { -- cgit From 96c6b598135e7cec66161e8943823470c7c8954e Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:17 -0700 Subject: mm/memory_hotplug: drop the flags field from struct mhp_restrictions Patch series "Allow setting caching mode in arch_add_memory() for P2PDMA", v4. Currently, the page tables created using memremap_pages() are always created with the PAGE_KERNEL cacheing mode. However, the P2PDMA code is creating pages for PCI BAR memory which should never be accessed through the cache and instead use either WC or UC. This still works in most cases, on x86, because the MTRR registers typically override the caching settings in the page tables for all of the IO memory to be UC-. However, this tends not to work so well on other arches or some rare x86 machines that have firmware which does not setup the MTRR registers in this way. Instead of this, this series proposes a change to arch_add_memory() to take the pgprot required by the mapping which allows us to explicitly set pagetable entries for P2PDMA memory to UC. This changes is pretty routine for most of the arches: x86_64, arm64 and powerpc simply need to thread the pgprot through to where the page tables are setup. x86_32 unfortunately sets up the page tables at boot so must use _set_memory_prot() to change their caching mode. ia64, s390 and sh don't appear to have an easy way to change the page tables so, for now at least, we just return -EINVAL on such mappings and thus they will not support P2PDMA memory until the work for this is done. This should be fine as they don't yet support ZONE_DEVICE. This patch (of 7): This variable is not used anywhere and should therefore be removed from the structure. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Catalin Marinas Cc: Will Deacon Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Link: http://lkml.kernel.org/r/20200306170846.9333-2-logang@deltatee.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ef55115320fb..7c1bcff11672 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -59,11 +59,9 @@ enum { /* * Restrictions for the memory hotplug: - * flags: MHP_ flags * altmap: alternative allocator for memmap array */ struct mhp_restrictions { - unsigned long flags; struct vmem_altmap *altmap; }; -- cgit From f5637d3b42ab0465ef71d5fb8461bce97fba95e8 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:21 -0700 Subject: mm/memory_hotplug: rename mhp_restrictions to mhp_params The mhp_restrictions struct really doesn't specify anything resembling a restriction anymore so rename it to be mhp_params as it is a list of extended parameters. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Dave Hansen Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-3-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 4 ++-- arch/ia64/mm/init.c | 4 ++-- arch/powerpc/mm/mem.c | 4 ++-- arch/s390/mm/init.c | 6 +++--- arch/sh/mm/init.c | 4 ++-- arch/x86/mm/init_32.c | 4 ++-- arch/x86/mm/init_64.c | 8 ++++---- include/linux/memory_hotplug.h | 16 ++++++++-------- mm/memory_hotplug.c | 8 ++++---- mm/memremap.c | 8 ++++---- 10 files changed, 33 insertions(+), 33 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9b08f7c7e6f0..6d4e9c2b4ed0 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1374,7 +1374,7 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) } int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { int ret, flags = 0; @@ -1387,7 +1387,7 @@ int arch_add_memory(int nid, u64 start, u64 size, memblock_clear_nomap(start, size); ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, - restrictions); + params); if (ret) __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b01d68a2d5d9..97bbc23ea1e3 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -670,13 +670,13 @@ mem_init (void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + ret = __add_pages(nid, start_pfn, nr_pages, params); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9b4f5fb719e0..e1cc58115816 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -122,7 +122,7 @@ static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, } int __ref arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -138,7 +138,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, return -EFAULT; } - return __add_pages(nid, start_pfn, nr_pages, restrictions); + return __add_pages(nid, start_pfn, nr_pages, params); } void __ref arch_remove_memory(int nid, u64 start, u64 size, diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ac44bd76db4b..e9e4a7abd0cc 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -268,20 +268,20 @@ device_initcall(s390_cma_mem_init); #endif /* CONFIG_CMA */ int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); int rc; - if (WARN_ON_ONCE(restrictions->altmap)) + if (WARN_ON_ONCE(params->altmap)) return -EINVAL; rc = vmem_add_mapping(start, size); if (rc) return rc; - rc = __add_pages(nid, start_pfn, size_pages, restrictions); + rc = __add_pages(nid, start_pfn, size_pages, params); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index d1b1ff2be17a..e5114c053364 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -406,14 +406,14 @@ void __init mem_init(void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; int ret; /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + ret = __add_pages(nid, start_pfn, nr_pages, params); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index de73992b8432..d736c8625503 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -819,12 +819,12 @@ void __init mem_init(void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, restrictions); + return __add_pages(nid, start_pfn, nr_pages, params); } void arch_remove_memory(int nid, u64 start, u64 size, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0a14711d3a93..faa86a9a3b0d 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -843,11 +843,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) } int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + ret = __add_pages(nid, start_pfn, nr_pages, params); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -858,14 +858,14 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, } int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, restrictions); + return add_pages(nid, start_pfn, nr_pages, params); } #define PAGE_INUSE 0xFD diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 7c1bcff11672..75f0f6304735 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -58,10 +58,10 @@ enum { }; /* - * Restrictions for the memory hotplug: - * altmap: alternative allocator for memmap array + * Extended parameters for memory hotplug: + * altmap: alternative allocator for memmap array (optional) */ -struct mhp_restrictions { +struct mhp_params { struct vmem_altmap *altmap; }; @@ -112,7 +112,7 @@ extern int restore_online_page_callback(online_page_callback_t callback); extern int try_online_node(int nid); extern int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions); + struct mhp_params *params); extern u64 max_mem_size; extern int memhp_online_type_from_str(const char *str); @@ -133,17 +133,17 @@ extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages, /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions); + struct mhp_params *params); #ifndef CONFIG_ARCH_HAS_ADD_PAGES static inline int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, struct mhp_restrictions *restrictions) + unsigned long nr_pages, struct mhp_params *params) { - return __add_pages(nid, start_pfn, nr_pages, restrictions); + return __add_pages(nid, start_pfn, nr_pages, params); } #else /* ARCH_HAS_ADD_PAGES */ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions); + struct mhp_params *params); #endif /* ARCH_HAS_ADD_PAGES */ #ifdef CONFIG_NUMA diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 635e8e286598..fbfe7b40f552 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -304,12 +304,12 @@ static int check_hotplug_memory_addressable(unsigned long pfn, * add the new pages. */ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { const unsigned long end_pfn = pfn + nr_pages; unsigned long cur_nr_pages; int err; - struct vmem_altmap *altmap = restrictions->altmap; + struct vmem_altmap *altmap = params->altmap; err = check_hotplug_memory_addressable(pfn, nr_pages); if (err) @@ -1002,7 +1002,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res) { - struct mhp_restrictions restrictions = {}; + struct mhp_params params = {}; u64 start, size; bool new_node = false; int ret; @@ -1030,7 +1030,7 @@ int __ref add_memory_resource(int nid, struct resource *res) new_node = ret; /* call arch's memory hotadd */ - ret = arch_add_memory(nid, start, size, &restrictions); + ret = arch_add_memory(nid, start, size, ¶ms); if (ret < 0) goto error; diff --git a/mm/memremap.c b/mm/memremap.c index bbf457c4f166..b0b5170843ff 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -184,7 +184,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) { struct resource *res = &pgmap->res; struct dev_pagemap *conflict_pgmap; - struct mhp_restrictions restrictions = { + struct mhp_params params = { /* * We do not want any optional features only our own memmap */ @@ -302,7 +302,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) */ if (pgmap->type == MEMORY_DEVICE_PRIVATE) { error = add_pages(nid, PHYS_PFN(res->start), - PHYS_PFN(resource_size(res)), &restrictions); + PHYS_PFN(resource_size(res)), ¶ms); } else { error = kasan_add_zero_shadow(__va(res->start), resource_size(res)); if (error) { @@ -311,7 +311,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } error = arch_add_memory(nid, res->start, resource_size(res), - &restrictions); + ¶ms); } if (!error) { @@ -319,7 +319,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; move_pfn_range_to_zone(zone, PHYS_PFN(res->start), - PHYS_PFN(resource_size(res)), restrictions.altmap); + PHYS_PFN(resource_size(res)), params.altmap); } mem_hotplug_done(); -- cgit From c164fbb40c43f8041f4d05ec9996d8ee343c92b1 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:24 -0700 Subject: x86/mm: thread pgprot_t through init_memory_mapping() In preparation to support a pgprot_t argument for arch_add_memory(). It's required to move the prototype of init_memory_mapping() seeing the original location came before the definition of pgprot_t. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christoph Hellwig Cc: David Hildenbrand Cc: Eric Badger Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-4-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/page_types.h | 3 --- arch/x86/include/asm/pgtable.h | 3 +++ arch/x86/kernel/amd_gart_64.c | 3 ++- arch/x86/mm/init.c | 9 +++++---- arch/x86/mm/init_32.c | 3 ++- arch/x86/mm/init_64.c | 32 ++++++++++++++++++-------------- arch/x86/mm/mm_internal.h | 3 ++- arch/x86/platform/uv/bios_uv.c | 3 ++- 8 files changed, 34 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index e27aa6be6320..a506a411474d 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -71,9 +71,6 @@ static inline phys_addr_t get_max_mapped(void) bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); - extern void initmem_init(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index abad0da0973a..4d02e64af1b3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1081,6 +1081,9 @@ static inline void __meminit init_trampoline_default(void) void __init poking_init(void); +unsigned long init_memory_mapping(unsigned long start, + unsigned long end, pgprot_t prot); + # ifdef CONFIG_RANDOMIZE_MEMORY void __meminit init_trampoline(void); # else diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 4e5f50236048..16133819415c 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -744,7 +744,8 @@ int __init gart_iommu_init(void) start_pfn = PFN_DOWN(aper_base); if (!pfn_range_is_mapped(start_pfn, end_pfn)) - init_memory_mapping(start_pfn<> PAGE_SHIFT, ret >> PAGE_SHIFT); @@ -521,7 +522,7 @@ static unsigned long __init init_range_memory_mapping( */ can_use_brk_pgt = max(start, (u64)pgt_buf_end<= min(end, (u64)pgt_buf_top<> PAGE_SHIFT, - PAGE_KERNEL_LARGE), + prot), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; @@ -669,7 +672,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, static unsigned long __meminit phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, - unsigned long page_size_mask, bool init) + unsigned long page_size_mask, pgprot_t prot, bool init) { unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; @@ -679,7 +682,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!pgtable_l5_enabled()) return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, - page_size_mask, init); + page_size_mask, prot, init); for (; vaddr < vaddr_end; vaddr = vaddr_next) { p4d_t *p4d = p4d_page + p4d_index(vaddr); @@ -702,13 +705,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!p4d_none(*p4d)) { pud = pud_offset(p4d, 0); paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); continue; } pud = alloc_low_page(); paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); spin_lock(&init_mm.page_table_lock); p4d_populate_init(&init_mm, p4d, pud, init); @@ -722,7 +725,7 @@ static unsigned long __meminit __kernel_physical_mapping_init(unsigned long paddr_start, unsigned long paddr_end, unsigned long page_size_mask, - bool init) + pgprot_t prot, bool init) { bool pgd_changed = false; unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; @@ -743,13 +746,13 @@ __kernel_physical_mapping_init(unsigned long paddr_start, paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), page_size_mask, - init); + prot, init); continue; } p4d = alloc_low_page(); paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); spin_lock(&init_mm.page_table_lock); if (pgtable_l5_enabled()) @@ -778,10 +781,10 @@ __kernel_physical_mapping_init(unsigned long paddr_start, unsigned long __meminit kernel_physical_mapping_init(unsigned long paddr_start, unsigned long paddr_end, - unsigned long page_size_mask) + unsigned long page_size_mask, pgprot_t prot) { return __kernel_physical_mapping_init(paddr_start, paddr_end, - page_size_mask, true); + page_size_mask, prot, true); } /* @@ -796,7 +799,8 @@ kernel_physical_mapping_change(unsigned long paddr_start, unsigned long page_size_mask) { return __kernel_physical_mapping_init(paddr_start, paddr_end, - page_size_mask, false); + page_size_mask, PAGE_KERNEL, + false); } #ifndef CONFIG_NUMA @@ -863,7 +867,7 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - init_memory_mapping(start, start + size); + init_memory_mapping(start, start + size, PAGE_KERNEL); return add_pages(nid, start_pfn, nr_pages, params); } diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h index eeae142062ed..3f37b5c80bb3 100644 --- a/arch/x86/mm/mm_internal.h +++ b/arch/x86/mm/mm_internal.h @@ -12,7 +12,8 @@ void early_ioremap_page_table_range_init(void); unsigned long kernel_physical_mapping_init(unsigned long start, unsigned long end, - unsigned long page_size_mask); + unsigned long page_size_mask, + pgprot_t prot); unsigned long kernel_physical_mapping_change(unsigned long start, unsigned long end, unsigned long page_size_mask); diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 607f58147311..c60255da5a6c 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -352,7 +352,8 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, if (type == EFI_MEMORY_MAPPED_IO) return ioremap(phys_addr, size); - last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size, + PAGE_KERNEL); if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { unsigned long top = last_map_pfn << PAGE_SHIFT; efi_ioremap(top, size - (top - phys_addr), type, attribute); -- cgit From 30796e18c29942c4d64bf89c4135c975393ec1ad Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:28 -0700 Subject: x86/mm: introduce __set_memory_prot() For use in the 32bit arch_add_memory() to set the pgprot type of the memory to add. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christoph Hellwig Cc: David Hildenbrand Cc: Eric Badger Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Michal Hocko Cc: Paul Mackerras Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-5-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/set_memory.h | 1 + arch/x86/mm/pat/set_memory.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 950532ccbc4a..ec2c0a094b5d 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -34,6 +34,7 @@ * The caller is required to take care of these. */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot); int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wt(unsigned long addr, int numpages); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 6d5424069e2b..59eca6a94ce7 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1795,6 +1795,19 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages, CPA_PAGES_ARRAY, pages); } +/* + * _set_memory_prot is an internal helper for callers that have been passed + * a pgprot_t value from upper layers and a reservation has already been taken. + * If you want to set the pgprot to a specific page protocol, use the + * set_memory_xx() functions. + */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot) +{ + return change_page_attr_set_clr(&addr, numpages, prot, + __pgprot(~pgprot_val(prot)), 0, 0, + NULL); +} + int _set_memory_uc(unsigned long addr, int numpages) { /* -- cgit From 4e00c5affdd4b04e6392001716333971932f3d0c Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:32 -0700 Subject: powerpc/mm: thread pgprot_t through create_section_mapping() In prepartion to support a pgprot_t argument for arch_add_memory(). Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Hansen Cc: David Hildenbrand Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Michal Hocko Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-6-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/hash.h | 3 ++- arch/powerpc/include/asm/book3s/64/radix.h | 3 ++- arch/powerpc/include/asm/sparsemem.h | 3 ++- arch/powerpc/mm/book3s64/hash_utils.c | 5 +++-- arch/powerpc/mm/book3s64/pgtable.c | 7 ++++--- arch/powerpc/mm/book3s64/radix_pgtable.c | 18 +++++++++++------- arch/powerpc/mm/mem.c | 5 +++-- 7 files changed, 27 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 2781ebf6add4..6fc4520092c7 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -251,7 +251,8 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start, extern void hash__vmemmap_remove_mapping(unsigned long start, unsigned long page_size); -int hash__create_section_mapping(unsigned long start, unsigned long end, int nid); +int hash__create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); int hash__remove_section_mapping(unsigned long start, unsigned long end); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index a1c60d5b50af..08c222d5b764 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -294,7 +294,8 @@ static inline unsigned long radix__get_tree_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int radix__create_section_mapping(unsigned long start, unsigned long end, int nid); +int radix__create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); int radix__remove_section_mapping(unsigned long start, unsigned long end); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 3192d454a733..c89b32443cff 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -13,7 +13,8 @@ #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end, int nid); +extern int create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); extern int remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7e5714a69a58..8ed2411c3f39 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -809,7 +809,8 @@ int resize_hpt_for_hotplug(unsigned long new_mem_size) return 0; } -int hash__create_section_mapping(unsigned long start, unsigned long end, int nid) +int hash__create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot) { int rc; @@ -819,7 +820,7 @@ int hash__create_section_mapping(unsigned long start, unsigned long end, int nid } rc = htab_bolt_mapping(start, end, __pa(start), - pgprot_val(PAGE_KERNEL), mmu_linear_psize, + pgprot_val(prot), mmu_linear_psize, mmu_kernel_ssize); if (rc < 0) { diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 2bf7e1b4fd82..e0bb69c616e4 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -171,12 +171,13 @@ void mmu_cleanup_all(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid) +int __meminit create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot) { if (radix_enabled()) - return radix__create_section_mapping(start, end, nid); + return radix__create_section_mapping(start, end, nid, prot); - return hash__create_section_mapping(start, end, nid); + return hash__create_section_mapping(start, end, nid, prot); } int __meminit remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 2a9a0cd79490..8f9edf07063a 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -254,7 +254,7 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end) static int __meminit create_physical_mapping(unsigned long start, unsigned long end, - int nid) + int nid, pgprot_t _prot) { unsigned long vaddr, addr, mapping_size = 0; bool prev_exec, exec = false; @@ -290,7 +290,7 @@ static int __meminit create_physical_mapping(unsigned long start, prot = PAGE_KERNEL_X; exec = true; } else { - prot = PAGE_KERNEL; + prot = _prot; exec = false; } @@ -334,7 +334,7 @@ static void __init radix_init_pgtable(void) WARN_ON(create_physical_mapping(reg->base, reg->base + reg->size, - -1)); + -1, PAGE_KERNEL)); } /* Find out how many PID bits are supported */ @@ -713,8 +713,10 @@ static int __meminit stop_machine_change_mapping(void *data) spin_unlock(&init_mm.page_table_lock); pte_clear(&init_mm, params->aligned_start, params->pte); - create_physical_mapping(__pa(params->aligned_start), __pa(params->start), -1); - create_physical_mapping(__pa(params->end), __pa(params->aligned_end), -1); + create_physical_mapping(__pa(params->aligned_start), + __pa(params->start), -1, PAGE_KERNEL); + create_physical_mapping(__pa(params->end), __pa(params->aligned_end), + -1, PAGE_KERNEL); spin_lock(&init_mm.page_table_lock); return 0; } @@ -871,14 +873,16 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) radix__flush_tlb_kernel_range(start, end); } -int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid) +int __meminit radix__create_section_mapping(unsigned long start, + unsigned long end, int nid, + pgprot_t prot) { if (end >= RADIX_VMALLOC_START) { pr_warn("Outside the supported range\n"); return -1; } - return create_physical_mapping(__pa(start), __pa(end), nid); + return create_physical_mapping(__pa(start), __pa(end), nid, prot); } int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e1cc58115816..bf63ab04db63 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -90,7 +90,8 @@ int memory_add_physaddr_to_nid(u64 start) } #endif -int __weak create_section_mapping(unsigned long start, unsigned long end, int nid) +int __weak create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot) { return -ENODEV; } @@ -131,7 +132,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, resize_hpt_for_hotplug(memblock_phys_mem_size()); start = (unsigned long)__va(start); - rc = create_section_mapping(start, start + size, nid); + rc = create_section_mapping(start, start + size, nid, PAGE_KERNEL); if (rc) { pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", start, start + size, rc); -- cgit From bfeb022f8fe4c5afdcfd7a3d868fac9765f9bcad Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:36 -0700 Subject: mm/memory_hotplug: add pgprot_t to mhp_params devm_memremap_pages() is currently used by the PCI P2PDMA code to create struct page mappings for IO memory. At present, these mappings are created with PAGE_KERNEL which implies setting the PAT bits to be WB. However, on x86, an mtrr register will typically override this and force the cache type to be UC-. In the case firmware doesn't set this register it is effectively WB and will typically result in a machine check exception when it's accessed. Other arches are not currently likely to function correctly seeing they don't have any MTRR registers to fall back on. To solve this, provide a way to specify the pgprot value explicitly to arch_add_memory(). Of the arches that support MEMORY_HOTPLUG: x86_64, and arm64 need a simple change to pass the pgprot_t down to their respective functions which set up the page tables. For x86_32, set the page tables explicitly using _set_memory_prot() (seeing they are already mapped). For ia64, s390 and sh, reject anything but PAGE_KERNEL settings -- this should be fine, for now, seeing these architectures don't support ZONE_DEVICE. A check in __add_pages() is also added to ensure the pgprot parameter was set for all arches. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Acked-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Dan Williams Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Dave Hansen Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-7-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 3 ++- arch/ia64/mm/init.c | 3 +++ arch/powerpc/mm/mem.c | 3 ++- arch/s390/mm/init.c | 3 +++ arch/sh/mm/init.c | 3 +++ arch/x86/mm/init_32.c | 12 ++++++++++++ arch/x86/mm/init_64.c | 2 +- include/linux/memory_hotplug.h | 3 +++ mm/memory_hotplug.c | 5 ++++- mm/memremap.c | 6 +++--- 10 files changed, 36 insertions(+), 7 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6d4e9c2b4ed0..a374e4f51a62 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1382,7 +1382,8 @@ int arch_add_memory(int nid, u64 start, u64 size, flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), - size, PAGE_KERNEL, __pgd_pgtable_alloc, flags); + size, params->pgprot, __pgd_pgtable_alloc, + flags); memblock_clear_nomap(start, size); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 97bbc23ea1e3..d637b4ea3147 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -676,6 +676,9 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long nr_pages = size >> PAGE_SHIFT; int ret; + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) + return -EINVAL; + ret = __add_pages(nid, start_pfn, nr_pages, params); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index bf63ab04db63..041ed7cfd341 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -132,7 +132,8 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, resize_hpt_for_hotplug(memblock_phys_mem_size()); start = (unsigned long)__va(start); - rc = create_section_mapping(start, start + size, nid, PAGE_KERNEL); + rc = create_section_mapping(start, start + size, nid, + params->pgprot); if (rc) { pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", start, start + size, rc); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index e9e4a7abd0cc..87b2d024e75a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -277,6 +277,9 @@ int arch_add_memory(int nid, u64 start, u64 size, if (WARN_ON_ONCE(params->altmap)) return -EINVAL; + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) + return -EINVAL; + rc = vmem_add_mapping(start, size); if (rc) return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index e5114c053364..b9de2d4fa57e 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -412,6 +412,9 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long nr_pages = size >> PAGE_SHIFT; int ret; + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot) + return -EINVAL; + /* We only have ZONE_NORMAL, so this is easy.. */ ret = __add_pages(nid, start_pfn, nr_pages, params); if (unlikely(ret)) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ac75a8397804..4222a010057a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -824,6 +824,18 @@ int arch_add_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + int ret; + + /* + * The page tables were already mapped at boot so if the caller + * requests a different mapping type then we must change all the + * pages with __set_memory_prot(). + */ + if (params->pgprot.pgprot != PAGE_KERNEL.pgprot) { + ret = __set_memory_prot(start, nr_pages, params->pgprot); + if (ret) + return ret; + } return __add_pages(nid, start_pfn, nr_pages, params); } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7480de743105..3b289c2f75cd 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -867,7 +867,7 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - init_memory_mapping(start, start + size, PAGE_KERNEL); + init_memory_mapping(start, start + size, params->pgprot); return add_pages(nid, start_pfn, nr_pages, params); } diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 75f0f6304735..93d9ada74ddd 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -60,9 +60,12 @@ enum { /* * Extended parameters for memory hotplug: * altmap: alternative allocator for memmap array (optional) + * pgprot: page protection flags to apply to newly created page tables + * (required) */ struct mhp_params { struct vmem_altmap *altmap; + pgprot_t pgprot; }; /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fbfe7b40f552..fc0aad0bc1f5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -311,6 +311,9 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, int err; struct vmem_altmap *altmap = params->altmap; + if (WARN_ON_ONCE(!params->pgprot.pgprot)) + return -EINVAL; + err = check_hotplug_memory_addressable(pfn, nr_pages); if (err) return err; @@ -1002,7 +1005,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res) { - struct mhp_params params = {}; + struct mhp_params params = { .pgprot = PAGE_KERNEL }; u64 start, size; bool new_node = false; int ret; diff --git a/mm/memremap.c b/mm/memremap.c index b0b5170843ff..bc167cde3237 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -189,8 +189,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) * We do not want any optional features only our own memmap */ .altmap = pgmap_altmap(pgmap), + .pgprot = PAGE_KERNEL, }; - pgprot_t pgprot = PAGE_KERNEL; int error, is_ram; bool need_devmap_managed = true; @@ -282,8 +282,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) if (nid < 0) nid = numa_mem_id(); - error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0, - resource_size(res)); + error = track_pfn_remap(NULL, ¶ms.pgprot, PHYS_PFN(res->start), + 0, resource_size(res)); if (error) goto err_pfn_remap; -- cgit From a50d8d98a87f33efa07adfa20747e13a93839a4b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:39 -0700 Subject: mm/memremap: set caching mode for PCI P2PDMA memory to WC PCI BAR IO memory should never be mapped as WB, however prior to this the PAT bits were set WB and it was typically overridden by MTRR registers set by the firmware. Set PCI P2PDMA memory to be UC as this is what it currently, typically, ends up being mapped as on x86 after the MTRR registers override the cache setting. Future use-cases may need to generalize this by adding flags to select the caching type, as some P2PDMA cases may not want UC. However, those use-cases are not upstream yet and this can be changed when they arrive. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David Hildenbrand Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Michael Ellerman Cc: Michal Hocko Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-8-logang@deltatee.com Signed-off-by: Linus Torvalds --- mm/memremap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memremap.c b/mm/memremap.c index bc167cde3237..03e38b7a38f1 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -217,7 +217,10 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } break; case MEMORY_DEVICE_DEVDAX: + need_devmap_managed = false; + break; case MEMORY_DEVICE_PCI_P2PDMA: + params.pgprot = pgprot_noncached(params.pgprot); need_devmap_managed = false; break; default: -- cgit From d7d27cfc5cf0766a26a8f56868c5ad5434735126 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:43 -0700 Subject: kmod: make request_module() return an error when autoloading is disabled Patch series "module autoloading fixes and cleanups", v5. This series fixes a bug where request_module() was reporting success to kernel code when module autoloading had been completely disabled via 'echo > /proc/sys/kernel/modprobe'. It also addresses the issues raised on the original thread (https://lkml.kernel.org/lkml/20200310223731.126894-1-ebiggers@kernel.org/T/#u) bydocumenting the modprobe sysctl, adding a self-test for the empty path case, and downgrading a user-reachable WARN_ONCE(). This patch (of 4): It's long been possible to disable kernel module autoloading completely (while still allowing manual module insertion) by setting /proc/sys/kernel/modprobe to the empty string. This can be preferable to setting it to a nonexistent file since it avoids the overhead of an attempted execve(), avoids potential deadlocks, and avoids the call to security_kernel_module_request() and thus on SELinux-based systems eliminates the need to write SELinux rules to dontaudit module_request. However, when module autoloading is disabled in this way, request_module() returns 0. This is broken because callers expect 0 to mean that the module was successfully loaded. Apparently this was never noticed because this method of disabling module autoloading isn't used much, and also most callers don't use the return value of request_module() since it's always necessary to check whether the module registered its functionality or not anyway. But improperly returning 0 can indeed confuse a few callers, for example get_fs_type() in fs/filesystems.c where it causes a WARNING to be hit: if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name); } This is easily reproduced with: echo > /proc/sys/kernel/modprobe mount -t NONEXISTENT none / It causes: request_module fs-NONEXISTENT succeeded, but still no fs? WARNING: CPU: 1 PID: 1106 at fs/filesystems.c:275 get_fs_type+0xd6/0xf0 [...] This should actually use pr_warn_once() rather than WARN_ONCE(), since it's also user-reachable if userspace immediately unloads the module. Regardless, request_module() should correctly return an error when it fails. So let's make it return -ENOENT, which matches the error when the modprobe binary doesn't exist. I've also sent patches to document and test this case. Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Reviewed-by: Jessica Yu Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Ben Hutchings Cc: Josh Triplett Cc: Link: http://lkml.kernel.org/r/20200310223731.126894-1-ebiggers@kernel.org Link: http://lkml.kernel.org/r/20200312202552.241885-1-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- kernel/kmod.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kmod.c b/kernel/kmod.c index 8b2b311afa95..37c3c4b97b8e 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -120,7 +120,7 @@ out: * invoke it. * * If module auto-loading support is disabled then this function - * becomes a no-operation. + * simply returns -ENOENT. */ int __request_module(bool wait, const char *fmt, ...) { @@ -137,7 +137,7 @@ int __request_module(bool wait, const char *fmt, ...) WARN_ON_ONCE(wait && current_is_async()); if (!modprobe_path[0]) - return 0; + return -ENOENT; va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); -- cgit From 26c5d78c976ca298e59a56f6101a97b618ba3539 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:47 -0700 Subject: fs/filesystems.c: downgrade user-reachable WARN_ONCE() to pr_warn_once() After request_module(), nothing is stopping the module from being unloaded until someone takes a reference to it via try_get_module(). The WARN_ONCE() in get_fs_type() is thus user-reachable, via userspace running 'rmmod' concurrently. Since WARN_ONCE() is for kernel bugs only, not for user-reachable situations, downgrade this warning to pr_warn_once(). Keep it printed once only, since the intent of this warning is to detect a bug in modprobe at boot time. Printing the warning more than once wouldn't really provide any useful extra information. Fixes: 41124db869b7 ("fs: warn in case userspace lied about modprobe return") Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Reviewed-by: Jessica Yu Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: Luis Chamberlain Cc: NeilBrown Cc: [4.13+] Link: http://lkml.kernel.org/r/20200312202552.241885-3-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- fs/filesystems.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/filesystems.c b/fs/filesystems.c index 77bf5f95362d..90b8d879fbaf 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -272,7 +272,9 @@ struct file_system_type *get_fs_type(const char *name) fs = __get_fs_type(name, len); if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); - WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name); + if (!fs) + pr_warn_once("request_module fs-%.*s succeeded, but still no fs?\n", + len, name); } if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { -- cgit From 6e71582506258c9b2efd8f164706f2af2256cf16 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:50 -0700 Subject: docs: admin-guide: document the kernel.modprobe sysctl Document the kernel.modprobe sysctl in the same place that all the other kernel.* sysctls are documented. Make sure to mention how to use this sysctl to completely disable module autoloading, and how this sysctl relates to CONFIG_STATIC_USERMODEHELPER. [ebiggers@google.com: v5] Link: http://lkml.kernel.org/r/20200318230515.171692-4-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: Luis Chamberlain Cc: NeilBrown Link: http://lkml.kernel.org/r/20200312202552.241885-4-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- Documentation/admin-guide/sysctl/kernel.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 335696d3360d..39c95c0e13d3 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -446,6 +446,27 @@ Notes: successful IPC object allocation. If an IPC object allocation syscall fails, it is undefined if the value remains unmodified or is reset to -1. +modprobe: +========= + +The path to the usermode helper for autoloading kernel modules, by +default "/sbin/modprobe". This binary is executed when the kernel +requests a module. For example, if userspace passes an unknown +filesystem type to mount(), then the kernel will automatically request +the corresponding filesystem module by executing this usermode helper. +This usermode helper should insert the needed module into the kernel. + +This sysctl only affects module autoloading. It has no effect on the +ability to explicitly insert modules. + +If this sysctl is set to the empty string, then module autoloading is +completely disabled. The kernel will not try to execute a usermode +helper at all, nor will it call the kernel_module_request LSM hook. + +If CONFIG_STATIC_USERMODEHELPER=y is set in the kernel configuration, +then the configured static usermode helper overrides this sysctl, +except that the empty string is still accepted to completely disable +module autoloading as described above. nmi_watchdog ============ -- cgit From 6d573a07528308eb77ec072c010819c359bebf6e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:53 -0700 Subject: selftests: kmod: fix handling test numbers above 9 get_test_count() and get_test_enabled() were broken for test numbers above 9 due to awk interpreting a field specification like '$0010' as octal rather than decimal. Fix it by stripping the leading zeroes. Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: NeilBrown Link: http://lkml.kernel.org/r/20200318230515.171692-5-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- tools/testing/selftests/kmod/kmod.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 8b944cf042f6..315a43111e04 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -505,18 +505,23 @@ function test_num() fi } -function get_test_count() +function get_test_data() { test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + local field_num=$(echo $1 | sed 's/^0*//') + echo $ALL_TESTS | awk '{print $'$field_num'}' +} + +function get_test_count() +{ + TEST_DATA=$(get_test_data $1) LAST_TWO=${TEST_DATA#*:*} echo ${LAST_TWO%:*} } function get_test_enabled() { - test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + TEST_DATA=$(get_test_data $1) echo ${TEST_DATA#*:*:} } -- cgit From 23756e551f35aaa9400a7e8a2660494115221801 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:57 -0700 Subject: selftests: kmod: test disabling module autoloading Test that request_module() fails with -ENOENT when /proc/sys/kernel/modprobe contains (a) a nonexistent path, and (b) an empty path. Case (b) is a regression test for the patch "kmod: make request_module() return an error when autoloading is disabled". Tested with 'kmod.sh -t 0010 && kmod.sh -t 0011', and also simply with 'kmod.sh' to run all kmod tests. Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: NeilBrown Link: http://lkml.kernel.org/r/20200312202552.241885-5-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- tools/testing/selftests/kmod/kmod.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 315a43111e04..3702dbcc90a7 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -61,6 +61,8 @@ ALL_TESTS="$ALL_TESTS 0006:10:1" ALL_TESTS="$ALL_TESTS 0007:5:1" ALL_TESTS="$ALL_TESTS 0008:150:1" ALL_TESTS="$ALL_TESTS 0009:150:1" +ALL_TESTS="$ALL_TESTS 0010:1:1" +ALL_TESTS="$ALL_TESTS 0011:1:1" # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -149,6 +151,7 @@ function load_req_mod() test_finish() { + echo "$MODPROBE" > /proc/sys/kernel/modprobe echo "Test completed" } @@ -443,6 +446,30 @@ kmod_test_0009() config_expect_result ${FUNCNAME[0]} SUCCESS } +kmod_test_0010() +{ + kmod_defaults_driver + config_num_threads 1 + echo "/KMOD_TEST_NONEXISTENT" > /proc/sys/kernel/modprobe + config_trigger ${FUNCNAME[0]} + config_expect_result ${FUNCNAME[0]} -ENOENT + echo "$MODPROBE" > /proc/sys/kernel/modprobe +} + +kmod_test_0011() +{ + kmod_defaults_driver + config_num_threads 1 + # This causes the kernel to not even try executing modprobe. The error + # code is still -ENOENT like when modprobe doesn't exist, so we can't + # easily test for the exact difference. But this still is a useful test + # since there was a bug where request_module() returned 0 in this case. + echo > /proc/sys/kernel/modprobe + config_trigger ${FUNCNAME[0]} + config_expect_result ${FUNCNAME[0]} -ENOENT + echo "$MODPROBE" > /proc/sys/kernel/modprobe +} + list_tests() { echo "Test ID list:" @@ -460,6 +487,8 @@ list_tests() echo "0007 x $(get_test_count 0007) - multithreaded tests with default setup test request_module() and get_fs_type()" echo "0008 x $(get_test_count 0008) - multithreaded - push kmod_concurrent over max_modprobes for request_module()" echo "0009 x $(get_test_count 0009) - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()" + echo "0010 x $(get_test_count 0010) - test nonexistent modprobe path" + echo "0011 x $(get_test_count 0011) - test completely disabling module autoloading" } usage() @@ -616,6 +645,7 @@ test_reqs allow_user_defaults load_req_mod +MODPROBE=$( Date: Fri, 10 Apr 2020 14:34:00 -0700 Subject: change email address for Pali Rohár MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For security reasons I stopped using gmail account and kernel address is now up-to-date alias to my personal address. People periodically send me emails to address which they found in source code of drivers, so this change reflects state where people can contact me. [ Added .mailmap entry as per Joe Perches - Linus ] Signed-off-by: Pali Rohár Signed-off-by: Andrew Morton Cc: Greg Kroah-Hartman Cc: Joe Perches Link: http://lkml.kernel.org/r/20200307104237.8199-1-pali@kernel.org Signed-off-by: Linus Torvalds --- .mailmap | 1 + Documentation/ABI/testing/sysfs-platform-dell-laptop | 8 ++++---- MAINTAINERS | 16 ++++++++-------- arch/arm/mach-omap2/omap-secure.c | 2 +- arch/arm/mach-omap2/omap-secure.h | 2 +- arch/arm/mach-omap2/omap-smc.S | 2 +- drivers/char/hw_random/omap3-rom-rng.c | 4 ++-- drivers/hwmon/dell-smm-hwmon.c | 4 ++-- drivers/platform/x86/dell-laptop.c | 4 ++-- drivers/platform/x86/dell-rbtn.c | 4 ++-- drivers/platform/x86/dell-rbtn.h | 2 +- drivers/platform/x86/dell-smbios-base.c | 4 ++-- drivers/platform/x86/dell-smbios-smm.c | 2 +- drivers/platform/x86/dell-smbios.h | 2 +- drivers/platform/x86/dell-smo8800.c | 2 +- drivers/platform/x86/dell-wmi.c | 4 ++-- drivers/power/supply/bq2415x_charger.c | 4 ++-- drivers/power/supply/bq27xxx_battery.c | 2 +- drivers/power/supply/isp1704_charger.c | 2 +- drivers/power/supply/rx51_battery.c | 4 ++-- fs/udf/ecma_167.h | 2 +- fs/udf/osta_udf.h | 2 +- include/linux/power/bq2415x_charger.h | 2 +- tools/laptop/freefall/freefall.c | 2 +- 24 files changed, 42 insertions(+), 41 deletions(-) diff --git a/.mailmap b/.mailmap index 9198a93c2f5c..893266d1f7b0 100644 --- a/.mailmap +++ b/.mailmap @@ -210,6 +210,7 @@ Oleksij Rempel Oleksij Rempel Oleksij Rempel Oleksij Rempel +Pali Rohár Paolo 'Blaisorblade' Giarrusso Patrick Mochel Paul Burton diff --git a/Documentation/ABI/testing/sysfs-platform-dell-laptop b/Documentation/ABI/testing/sysfs-platform-dell-laptop index 8c6a0b8e1131..9b917c7453de 100644 --- a/Documentation/ABI/testing/sysfs-platform-dell-laptop +++ b/Documentation/ABI/testing/sysfs-platform-dell-laptop @@ -2,7 +2,7 @@ What: /sys/class/leds/dell::kbd_backlight/als_enabled Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta , - Pali Rohár + Pali Rohár Description: This file allows to control the automatic keyboard illumination mode on some systems that have an ambient @@ -13,7 +13,7 @@ What: /sys/class/leds/dell::kbd_backlight/als_setting Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta , - Pali Rohár + Pali Rohár Description: This file allows to specifiy the on/off threshold value, as reported by the ambient light sensor. @@ -22,7 +22,7 @@ What: /sys/class/leds/dell::kbd_backlight/start_triggers Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta , - Pali Rohár + Pali Rohár Description: This file allows to control the input triggers that turn on the keyboard backlight illumination that is @@ -45,7 +45,7 @@ What: /sys/class/leds/dell::kbd_backlight/stop_timeout Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta , - Pali Rohár + Pali Rohár Description: This file allows to specify the interval after which the keyboard illumination is disabled because of inactivity. diff --git a/MAINTAINERS b/MAINTAINERS index d5b1878f2815..ff043097ea0e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -727,7 +727,7 @@ L: linux-alpha@vger.kernel.org F: arch/alpha/ ALPS PS/2 TOUCHPAD DRIVER -R: Pali Rohár +R: Pali Rohár F: drivers/input/mouse/alps.* ALTERA I2C CONTROLLER DRIVER @@ -4774,23 +4774,23 @@ F: drivers/net/fddi/defza.* DELL LAPTOP DRIVER M: Matthew Garrett -M: Pali Rohár +M: Pali Rohár L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/dell-laptop.c DELL LAPTOP FREEFALL DRIVER -M: Pali Rohár +M: Pali Rohár S: Maintained F: drivers/platform/x86/dell-smo8800.c DELL LAPTOP RBTN DRIVER -M: Pali Rohár +M: Pali Rohár S: Maintained F: drivers/platform/x86/dell-rbtn.* DELL LAPTOP SMM DRIVER -M: Pali Rohár +M: Pali Rohár S: Maintained F: drivers/hwmon/dell-smm-hwmon.c F: include/uapi/linux/i8k.h @@ -4802,7 +4802,7 @@ S: Maintained F: drivers/platform/x86/dell_rbu.c DELL SMBIOS DRIVER -M: Pali Rohár +M: Pali Rohár M: Mario Limonciello L: platform-driver-x86@vger.kernel.org S: Maintained @@ -4835,7 +4835,7 @@ F: drivers/platform/x86/dell-wmi-descriptor.c DELL WMI NOTIFICATIONS DRIVER M: Matthew Garrett -M: Pali Rohár +M: Pali Rohár S: Maintained F: drivers/platform/x86/dell-wmi.c @@ -11950,7 +11950,7 @@ F: drivers/media/i2c/et8ek8 F: drivers/media/i2c/ad5820.c NOKIA N900 POWER SUPPLY DRIVERS -R: Pali Rohár +R: Pali Rohár F: include/linux/power/bq2415x_charger.h F: include/linux/power/bq27xxx_battery.h F: drivers/power/supply/bq2415x_charger.c diff --git a/arch/arm/mach-omap2/omap-secure.c b/arch/arm/mach-omap2/omap-secure.c index d00e3c72e37d..f70d561f37f7 100644 --- a/arch/arm/mach-omap2/omap-secure.c +++ b/arch/arm/mach-omap2/omap-secure.c @@ -5,7 +5,7 @@ * Copyright (C) 2011 Texas Instruments, Inc. * Santosh Shilimkar * Copyright (C) 2012 Ivaylo Dimitrov - * Copyright (C) 2013 Pali Rohár + * Copyright (C) 2013 Pali Rohár */ #include diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index ba8c486c0454..4aaa95706d39 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -5,7 +5,7 @@ * Copyright (C) 2011 Texas Instruments, Inc. * Santosh Shilimkar * Copyright (C) 2012 Ivaylo Dimitrov - * Copyright (C) 2013 Pali Rohár + * Copyright (C) 2013 Pali Rohár */ #ifndef OMAP_ARCH_OMAP_SECURE_H #define OMAP_ARCH_OMAP_SECURE_H diff --git a/arch/arm/mach-omap2/omap-smc.S b/arch/arm/mach-omap2/omap-smc.S index d4832845a4e8..7376f528034d 100644 --- a/arch/arm/mach-omap2/omap-smc.S +++ b/arch/arm/mach-omap2/omap-smc.S @@ -6,7 +6,7 @@ * Written by Santosh Shilimkar * * Copyright (C) 2012 Ivaylo Dimitrov - * Copyright (C) 2013 Pali Rohár + * Copyright (C) 2013 Pali Rohár */ #include diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index a431c5cbe2be..e0d77fa048fb 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -4,7 +4,7 @@ * Copyright (C) 2009 Nokia Corporation * Author: Juha Yrjola * - * Copyright (C) 2013 Pali Rohár + * Copyright (C) 2013 Pali Rohár * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -178,5 +178,5 @@ module_platform_driver(omap3_rom_rng_driver); MODULE_ALIAS("platform:omap3-rom-rng"); MODULE_AUTHOR("Juha Yrjola"); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index d4c83009d625..ab719d372b0d 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -7,7 +7,7 @@ * Hwmon integration: * Copyright (C) 2011 Jean Delvare * Copyright (C) 2013, 2014 Guenter Roeck - * Copyright (C) 2014, 2015 Pali Rohár + * Copyright (C) 2014, 2015 Pali Rohár */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -86,7 +86,7 @@ static unsigned int auto_fan; #define I8K_HWMON_HAVE_FAN3 (1 << 12) MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)"); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("Dell laptop SMM BIOS hwmon driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("i8k"); diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 74e988f839e8..f8d3e3bd1bb5 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat * Copyright (c) 2014 Gabriele Mazzotta - * Copyright (c) 2014 Pali Rohár + * Copyright (c) 2014 Pali Rohár * * Based on documentation in the libsmbios package: * Copyright (C) 2005-2014 Dell Inc. @@ -2295,6 +2295,6 @@ module_exit(dell_exit); MODULE_AUTHOR("Matthew Garrett "); MODULE_AUTHOR("Gabriele Mazzotta "); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("Dell laptop driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/dell-rbtn.c b/drivers/platform/x86/dell-rbtn.c index a6b856cd86bd..a89fad47ff13 100644 --- a/drivers/platform/x86/dell-rbtn.c +++ b/drivers/platform/x86/dell-rbtn.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* Dell Airplane Mode Switch driver - Copyright (C) 2014-2015 Pali Rohár + Copyright (C) 2014-2015 Pali Rohár */ @@ -495,5 +495,5 @@ MODULE_PARM_DESC(auto_remove_rfkill, "Automatically remove rfkill devices when " "(default true)"); MODULE_DEVICE_TABLE(acpi, rbtn_ids); MODULE_DESCRIPTION("Dell Airplane Mode Switch driver"); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/dell-rbtn.h b/drivers/platform/x86/dell-rbtn.h index 0fdc81644458..5e030f926c58 100644 --- a/drivers/platform/x86/dell-rbtn.h +++ b/drivers/platform/x86/dell-rbtn.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Dell Airplane Mode Switch driver - Copyright (C) 2014-2015 Pali Rohár + Copyright (C) 2014-2015 Pali Rohár */ diff --git a/drivers/platform/x86/dell-smbios-base.c b/drivers/platform/x86/dell-smbios-base.c index fe59b0ebff31..2e2cd565926a 100644 --- a/drivers/platform/x86/dell-smbios-base.c +++ b/drivers/platform/x86/dell-smbios-base.c @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat * Copyright (c) 2014 Gabriele Mazzotta - * Copyright (c) 2014 Pali Rohár + * Copyright (c) 2014 Pali Rohár * * Based on documentation in the libsmbios package: * Copyright (C) 2005-2014 Dell Inc. @@ -645,7 +645,7 @@ module_exit(dell_smbios_exit); MODULE_AUTHOR("Matthew Garrett "); MODULE_AUTHOR("Gabriele Mazzotta "); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_AUTHOR("Mario Limonciello "); MODULE_DESCRIPTION("Common functions for kernel modules using Dell SMBIOS"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/dell-smbios-smm.c b/drivers/platform/x86/dell-smbios-smm.c index d6854d1c4119..97c52a839a3e 100644 --- a/drivers/platform/x86/dell-smbios-smm.c +++ b/drivers/platform/x86/dell-smbios-smm.c @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat * Copyright (c) 2014 Gabriele Mazzotta - * Copyright (c) 2014 Pali Rohár + * Copyright (c) 2014 Pali Rohár * Copyright (c) 2017 Dell Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/platform/x86/dell-smbios.h b/drivers/platform/x86/dell-smbios.h index a7ff9803f41a..75fa8ea0476d 100644 --- a/drivers/platform/x86/dell-smbios.h +++ b/drivers/platform/x86/dell-smbios.h @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat * Copyright (c) 2014 Gabriele Mazzotta - * Copyright (c) 2014 Pali Rohár + * Copyright (c) 2014 Pali Rohár * * Based on documentation in the libsmbios package: * Copyright (C) 2005-2014 Dell Inc. diff --git a/drivers/platform/x86/dell-smo8800.c b/drivers/platform/x86/dell-smo8800.c index b531fe8ab7e0..5d9304a7de1b 100644 --- a/drivers/platform/x86/dell-smo8800.c +++ b/drivers/platform/x86/dell-smo8800.c @@ -3,7 +3,7 @@ * dell-smo8800.c - Dell Latitude ACPI SMO88XX freefall sensor driver * * Copyright (C) 2012 Sonal Santan - * Copyright (C) 2014 Pali Rohár + * Copyright (C) 2014 Pali Rohár * * This is loosely based on lis3lv02d driver. */ diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index 6669db2555fb..86e8dd6a8b33 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -3,7 +3,7 @@ * Dell WMI hotkeys * * Copyright (C) 2008 Red Hat - * Copyright (C) 2014-2015 Pali Rohár + * Copyright (C) 2014-2015 Pali Rohár * * Portions based on wistron_btns.c: * Copyright (C) 2005 Miloslav Trmac @@ -29,7 +29,7 @@ #include "dell-wmi-descriptor.h" MODULE_AUTHOR("Matthew Garrett "); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("Dell laptop WMI hotkeys driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/power/supply/bq2415x_charger.c b/drivers/power/supply/bq2415x_charger.c index 532f6e4fcafb..a1f00ae1c180 100644 --- a/drivers/power/supply/bq2415x_charger.c +++ b/drivers/power/supply/bq2415x_charger.c @@ -2,7 +2,7 @@ /* * bq2415x charger driver * - * Copyright (C) 2011-2013 Pali Rohár + * Copyright (C) 2011-2013 Pali Rohár * * Datasheets: * http://www.ti.com/product/bq24150 @@ -1788,6 +1788,6 @@ static struct i2c_driver bq2415x_driver = { }; module_i2c_driver(bq2415x_driver); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("bq2415x charger driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index 664e50103eaa..942c92127b6d 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -4,7 +4,7 @@ * Copyright (C) 2008 Rodolfo Giometti * Copyright (C) 2008 Eurotech S.p.A. * Copyright (C) 2010-2011 Lars-Peter Clausen - * Copyright (C) 2011 Pali Rohár + * Copyright (C) 2011 Pali Rohár * Copyright (C) 2017 Liam Breck * * Based on a previous work by Copyright (C) 2008 Texas Instruments, Inc. diff --git a/drivers/power/supply/isp1704_charger.c b/drivers/power/supply/isp1704_charger.c index 4812ac1ff2df..b6efc454e4f0 100644 --- a/drivers/power/supply/isp1704_charger.c +++ b/drivers/power/supply/isp1704_charger.c @@ -3,7 +3,7 @@ * ISP1704 USB Charger Detection driver * * Copyright (C) 2010 Nokia Corporation - * Copyright (C) 2012 - 2013 Pali Rohár + * Copyright (C) 2012 - 2013 Pali Rohár */ #include diff --git a/drivers/power/supply/rx51_battery.c b/drivers/power/supply/rx51_battery.c index 8548b639ff2f..6e488ecf4dcb 100644 --- a/drivers/power/supply/rx51_battery.c +++ b/drivers/power/supply/rx51_battery.c @@ -2,7 +2,7 @@ /* * Nokia RX-51 battery driver * - * Copyright (C) 2012 Pali Rohár + * Copyright (C) 2012 Pali Rohár */ #include @@ -278,6 +278,6 @@ static struct platform_driver rx51_battery_driver = { module_platform_driver(rx51_battery_driver); MODULE_ALIAS("platform:rx51-battery"); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("Nokia RX-51 battery driver"); MODULE_LICENSE("GPL"); diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h index 3fd85464abd5..736ebc5dc441 100644 --- a/fs/udf/ecma_167.h +++ b/fs/udf/ecma_167.h @@ -5,7 +5,7 @@ * http://www.ecma.ch * * Copyright (c) 2001-2002 Ben Fennema - * Copyright (c) 2017-2019 Pali Rohár + * Copyright (c) 2017-2019 Pali Rohár * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/fs/udf/osta_udf.h b/fs/udf/osta_udf.h index 35e61b2cacfe..d5fbfab3ddb6 100644 --- a/fs/udf/osta_udf.h +++ b/fs/udf/osta_udf.h @@ -5,7 +5,7 @@ * http://www.osta.org * * Copyright (c) 2001-2004 Ben Fennema - * Copyright (c) 2017-2019 Pali Rohár + * Copyright (c) 2017-2019 Pali Rohár * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h index 7a91b357e3ac..4ca08321e251 100644 --- a/include/linux/power/bq2415x_charger.h +++ b/include/linux/power/bq2415x_charger.h @@ -2,7 +2,7 @@ /* * bq2415x charger driver * - * Copyright (C) 2011-2013 Pali Rohár + * Copyright (C) 2011-2013 Pali Rohár */ #ifndef BQ2415X_CHARGER_H diff --git a/tools/laptop/freefall/freefall.c b/tools/laptop/freefall/freefall.c index d29a86cda87f..d77d7861787c 100644 --- a/tools/laptop/freefall/freefall.c +++ b/tools/laptop/freefall/freefall.c @@ -4,7 +4,7 @@ * Copyright 2008 Eric Piel * Copyright 2009 Pavel Machek * Copyright 2012 Sonal Santan - * Copyright 2014 Pali Rohár + * Copyright 2014 Pali Rohár */ #include -- cgit From cb8d9937e85559ea0ad1f4f83df8ad1288fed47d Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Fri, 10 Apr 2020 14:34:03 -0700 Subject: drivers/dma/tegra20-apb-dma.c: fix platform_get_irq.cocci warnings Remove dev_err() messages after platform_get_irq*() failures. platform_get_irq() already prints an error. Generated by: scripts/coccinelle/api/platform_get_irq.cocci Fixes: 6c41ac96ad92 ("dmaengine: tegra-apb: Support COMPILE_TEST") Signed-off-by: kbuild test robot Signed-off-by: Julia Lawall Signed-off-by: Andrew Morton Reviewed-by: Dmitry Osipenko Acked-by: Thierry Reding Cc: Laxman Dewangan Cc: Vinod Koul Cc: Stephen Warren Cc: Jon Hunter Link: http://lkml.kernel.org/r/alpine.DEB.2.21.2002271133450.2973@hadrien Signed-off-by: Linus Torvalds --- drivers/dma/tegra20-apb-dma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 265303a396ca..f6a2f42ffc51 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -1493,7 +1493,6 @@ static int tegra_dma_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, i); if (irq < 0) { ret = irq; - dev_err(&pdev->dev, "No irq resource for chan %d\n", i); goto err_pm_disable; } -- cgit From 3bfa7e141b0bbb818b25e0daafb65aee92e49ac4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Apr 2020 14:34:06 -0700 Subject: fs/seq_file.c: seq_read(): add info message about buggy .next functions Patch series "seq_file .next functions should increase position index". In Aug 2018 NeilBrown noticed commit 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface") "Some ->next functions do not increment *pos when they return NULL... Note that such ->next functions are buggy and should be fixed. A simple demonstration is dd if=/proc/swaps bs=1000 skip=1 Choose any block size larger than the size of /proc/swaps. This will always show the whole last line of /proc/swaps" Described problem is still actual. If you make lseek into middle of last output line following read will output end of last line and whole last line once again. $ dd if=/proc/swaps bs=1 # usual output Filename Type Size Used Priority /dev/dm-0 partition 4194812 97536 -2 104+0 records in 104+0 records out 104 bytes copied $ dd if=/proc/swaps bs=40 skip=1 # last line was generated twice dd: /proc/swaps: cannot skip to specified offset v/dm-0 partition 4194812 97536 -2 /dev/dm-0 partition 4194812 97536 -2 3+1 records in 3+1 records out 131 bytes copied There are lot of other affected files, I've found 30+ including /proc/net/ip_tables_matches and /proc/sysvipc/* I've sent patches into maillists of affected subsystems already, this patch-set fixes the problem in files related to pstore, tracing, gcov, sysvipc and other subsystems processed via linux-kernel@ mailing list directly https://bugzilla.kernel.org/show_bug.cgi?id=206283 This patch (of 4): Add debug code to seq_read() to detect missed or out-of-tree incorrect .next seq_file functions. [akpm@linux-foundation.org: s/pr_info/pr_info_ratelimited/, per Qian Cai] https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Cc: NeilBrown Cc: Al Viro Cc: Steven Rostedt Cc: Davidlohr Bueso Cc: Ingo Molnar Cc: Manfred Spraul Cc: Peter Oberparleiter Cc: Waiman Long Link: http://lkml.kernel.org/r/244674e5-760c-86bd-d08a-047042881748@virtuozzo.com Link: http://lkml.kernel.org/r/7c24087c-e280-e580-5b0c-0cdaeb14cd18@virtuozzo.com Signed-off-by: Linus Torvalds --- fs/seq_file.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index 79781ebd2145..70f5fdf99bf6 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -232,9 +232,12 @@ Fill: loff_t pos = m->index; p = m->op->next(m, p, &m->index); - if (pos == m->index) - /* Buggy ->next function */ + if (pos == m->index) { + pr_info_ratelimited("buggy seq_file .next function %ps " + "did not updated position index\n", + m->op->next); m->index++; + } if (!p || IS_ERR(p)) { err = PTR_ERR(p); break; -- cgit From f4d74ef6220c1eda0875da30457bef5c7111ab06 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Apr 2020 14:34:10 -0700 Subject: kernel/gcov/fs.c: gcov_seq_next() should increase position index If seq_file .next function does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Acked-by: Peter Oberparleiter Cc: Al Viro Cc: Davidlohr Bueso Cc: Ingo Molnar Cc: Manfred Spraul Cc: NeilBrown Cc: Steven Rostedt Cc: Waiman Long Link: http://lkml.kernel.org/r/f65c6ee7-bd00-f910-2f8a-37cc67e4ff88@virtuozzo.com Signed-off-by: Linus Torvalds --- kernel/gcov/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 5e891c3c2d93..82babf5aa077 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -108,9 +108,9 @@ static void *gcov_seq_next(struct seq_file *seq, void *data, loff_t *pos) { struct gcov_iterator *iter = data; + (*pos)++; if (gcov_iter_next(iter)) return NULL; - (*pos)++; return iter; } -- cgit From 89163f93c6f969da5811af5377cc10173583123b Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Apr 2020 14:34:13 -0700 Subject: ipc/util.c: sysvipc_find_ipc() should increase position index If seq_file .next function does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Acked-by: Waiman Long Cc: Davidlohr Bueso Cc: Manfred Spraul Cc: Al Viro Cc: Ingo Molnar Cc: NeilBrown Cc: Peter Oberparleiter Cc: Steven Rostedt Link: http://lkml.kernel.org/r/b7a20945-e315-8bb0-21e6-3875c14a8494@virtuozzo.com Signed-off-by: Linus Torvalds --- ipc/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipc/util.c b/ipc/util.c index 97638eb2d7cb..7acccfded7cb 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -764,13 +764,13 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, total++; } + *new_pos = pos + 1; if (total >= ids->in_use) return NULL; for (; pos < ipc_mni; pos++) { ipc = idr_find(&ids->ipcs_idr, pos); if (ipc != NULL) { - *new_pos = pos + 1; rcu_read_lock(); ipc_lock_object(ipc); return ipc; -- cgit