From fab9963a69dbd71304357dbfe4ec5345f14cebdd Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 15 Mar 2016 14:53:38 -0700 Subject: mm: fault-inject take over bootstrap kmem_cache check Remove the SLAB specific function slab_should_failslab(), by moving the check against fault-injection for the bootstrap slab, into the shared function should_failslab() (used by both SLAB and SLUB). This is a step towards sharing alloc_hook's between SLUB and SLAB. This bootstrap slab "kmem_cache" is used for allocating struct kmem_cache objects to the allocator itself. Signed-off-by: Jesper Dangaard Brouer Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fault-inject.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 3159a7dba034..9f4956d8601c 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -62,10 +62,9 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, #endif /* CONFIG_FAULT_INJECTION */ #ifdef CONFIG_FAILSLAB -extern bool should_failslab(size_t size, gfp_t gfpflags, unsigned long flags); +extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags); #else -static inline bool should_failslab(size_t size, gfp_t gfpflags, - unsigned long flags) +static inline bool should_failslab(struct kmem_cache *s, gfp_t gfpflags) { return false; } -- cgit From ca257195511d536308700548de008b51729221eb Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 15 Mar 2016 14:54:00 -0700 Subject: mm: new API kfree_bulk() for SLAB+SLUB allocators This patch introduce a new API call kfree_bulk() for bulk freeing memory objects not bound to a single kmem_cache. Christoph pointed out that it is possible to implement freeing of objects, without knowing the kmem_cache pointer as that information is available from the object's page->slab_cache. Proposing to remove the kmem_cache argument from the bulk free API. Jesper demonstrated that these extra steps per object comes at a performance cost. It is only in the case CONFIG_MEMCG_KMEM is compiled in and activated runtime that these steps are done anyhow. The extra cost is most visible for SLAB allocator, because the SLUB allocator does the page lookup (virt_to_head_page()) anyhow. Thus, the conclusion was to keep the kmem_cache free bulk API with a kmem_cache pointer, but we can still implement a kfree_bulk() API fairly easily. Simply by handling if kmem_cache_free_bulk() gets called with a kmem_cache NULL pointer. This does increase the code size a bit, but implementing a separate kfree_bulk() call would likely increase code size even more. Below benchmarks cost of alloc+free (obj size 256 bytes) on CPU i7-4790K @ 4.00GHz, no PREEMPT and CONFIG_MEMCG_KMEM=y. Code size increase for SLAB: add/remove: 0/0 grow/shrink: 1/0 up/down: 74/0 (74) function old new delta kmem_cache_free_bulk 660 734 +74 SLAB fastpath: 87 cycles(tsc) 21.814 sz - fallback - kmem_cache_free_bulk - kfree_bulk 1 - 103 cycles 25.878 ns - 41 cycles 10.498 ns - 81 cycles 20.312 ns 2 - 94 cycles 23.673 ns - 26 cycles 6.682 ns - 42 cycles 10.649 ns 3 - 92 cycles 23.181 ns - 21 cycles 5.325 ns - 39 cycles 9.950 ns 4 - 90 cycles 22.727 ns - 18 cycles 4.673 ns - 26 cycles 6.693 ns 8 - 89 cycles 22.270 ns - 14 cycles 3.664 ns - 23 cycles 5.835 ns 16 - 88 cycles 22.038 ns - 14 cycles 3.503 ns - 22 cycles 5.543 ns 30 - 89 cycles 22.284 ns - 13 cycles 3.310 ns - 20 cycles 5.197 ns 32 - 88 cycles 22.249 ns - 13 cycles 3.420 ns - 20 cycles 5.166 ns 34 - 88 cycles 22.224 ns - 14 cycles 3.643 ns - 20 cycles 5.170 ns 48 - 88 cycles 22.088 ns - 14 cycles 3.507 ns - 20 cycles 5.203 ns 64 - 88 cycles 22.063 ns - 13 cycles 3.428 ns - 20 cycles 5.152 ns 128 - 89 cycles 22.483 ns - 15 cycles 3.891 ns - 23 cycles 5.885 ns 158 - 89 cycles 22.381 ns - 15 cycles 3.779 ns - 22 cycles 5.548 ns 250 - 91 cycles 22.798 ns - 16 cycles 4.152 ns - 23 cycles 5.967 ns SLAB when enabling MEMCG_KMEM runtime: - kmemcg fastpath: 130 cycles(tsc) 32.684 ns (step:0) 1 - 148 cycles 37.220 ns - 66 cycles 16.622 ns - 66 cycles 16.583 ns 2 - 141 cycles 35.510 ns - 51 cycles 12.820 ns - 58 cycles 14.625 ns 3 - 140 cycles 35.017 ns - 37 cycles 9.326 ns - 33 cycles 8.474 ns 4 - 137 cycles 34.507 ns - 31 cycles 7.888 ns - 33 cycles 8.300 ns 8 - 140 cycles 35.069 ns - 25 cycles 6.461 ns - 25 cycles 6.436 ns 16 - 138 cycles 34.542 ns - 23 cycles 5.945 ns - 22 cycles 5.670 ns 30 - 136 cycles 34.227 ns - 22 cycles 5.502 ns - 22 cycles 5.587 ns 32 - 136 cycles 34.253 ns - 21 cycles 5.475 ns - 21 cycles 5.324 ns 34 - 136 cycles 34.254 ns - 21 cycles 5.448 ns - 20 cycles 5.194 ns 48 - 136 cycles 34.075 ns - 21 cycles 5.458 ns - 21 cycles 5.367 ns 64 - 135 cycles 33.994 ns - 21 cycles 5.350 ns - 21 cycles 5.259 ns 128 - 137 cycles 34.446 ns - 23 cycles 5.816 ns - 22 cycles 5.688 ns 158 - 137 cycles 34.379 ns - 22 cycles 5.727 ns - 22 cycles 5.602 ns 250 - 138 cycles 34.755 ns - 24 cycles 6.093 ns - 23 cycles 5.986 ns Code size increase for SLUB: function old new delta kmem_cache_free_bulk 717 799 +82 SLUB benchmark: SLUB fastpath: 46 cycles(tsc) 11.691 ns (step:0) sz - fallback - kmem_cache_free_bulk - kfree_bulk 1 - 61 cycles 15.486 ns - 53 cycles 13.364 ns - 57 cycles 14.464 ns 2 - 54 cycles 13.703 ns - 32 cycles 8.110 ns - 33 cycles 8.482 ns 3 - 53 cycles 13.272 ns - 25 cycles 6.362 ns - 27 cycles 6.947 ns 4 - 51 cycles 12.994 ns - 24 cycles 6.087 ns - 24 cycles 6.078 ns 8 - 50 cycles 12.576 ns - 21 cycles 5.354 ns - 22 cycles 5.513 ns 16 - 49 cycles 12.368 ns - 20 cycles 5.054 ns - 20 cycles 5.042 ns 30 - 49 cycles 12.273 ns - 18 cycles 4.748 ns - 19 cycles 4.758 ns 32 - 49 cycles 12.401 ns - 19 cycles 4.821 ns - 19 cycles 4.810 ns 34 - 98 cycles 24.519 ns - 24 cycles 6.154 ns - 24 cycles 6.157 ns 48 - 83 cycles 20.833 ns - 21 cycles 5.446 ns - 21 cycles 5.429 ns 64 - 75 cycles 18.891 ns - 20 cycles 5.247 ns - 20 cycles 5.238 ns 128 - 93 cycles 23.271 ns - 27 cycles 6.856 ns - 27 cycles 6.823 ns 158 - 102 cycles 25.581 ns - 30 cycles 7.714 ns - 30 cycles 7.695 ns 250 - 107 cycles 26.917 ns - 38 cycles 9.514 ns - 38 cycles 9.506 ns SLUB when enabling MEMCG_KMEM runtime: - kmemcg fastpath: 71 cycles(tsc) 17.897 ns (step:0) 1 - 85 cycles 21.484 ns - 78 cycles 19.569 ns - 75 cycles 18.938 ns 2 - 81 cycles 20.363 ns - 45 cycles 11.258 ns - 44 cycles 11.076 ns 3 - 78 cycles 19.709 ns - 33 cycles 8.354 ns - 32 cycles 8.044 ns 4 - 77 cycles 19.430 ns - 28 cycles 7.216 ns - 28 cycles 7.003 ns 8 - 101 cycles 25.288 ns - 23 cycles 5.849 ns - 23 cycles 5.787 ns 16 - 76 cycles 19.148 ns - 20 cycles 5.162 ns - 20 cycles 5.081 ns 30 - 76 cycles 19.067 ns - 19 cycles 4.868 ns - 19 cycles 4.821 ns 32 - 76 cycles 19.052 ns - 19 cycles 4.857 ns - 19 cycles 4.815 ns 34 - 121 cycles 30.291 ns - 25 cycles 6.333 ns - 25 cycles 6.268 ns 48 - 108 cycles 27.111 ns - 21 cycles 5.498 ns - 21 cycles 5.458 ns 64 - 100 cycles 25.164 ns - 20 cycles 5.242 ns - 20 cycles 5.229 ns 128 - 155 cycles 38.976 ns - 27 cycles 6.886 ns - 27 cycles 6.892 ns 158 - 132 cycles 33.034 ns - 30 cycles 7.711 ns - 30 cycles 7.728 ns 250 - 130 cycles 32.612 ns - 38 cycles 9.560 ns - 38 cycles 9.549 ns Signed-off-by: Jesper Dangaard Brouer Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 3627d5c1bc47..9d9a5bdb9b00 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -323,6 +323,15 @@ void kmem_cache_free(struct kmem_cache *, void *); void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); +/* + * Caller must not use kfree_bulk() on memory not originally allocated + * by kmalloc(), because the SLOB allocator cannot handle this. + */ +static __always_inline void kfree_bulk(size_t size, void **p) +{ + kmem_cache_free_bulk(NULL, size, p); +} + #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment; -- cgit From 9f706d6820d3ea776d6b3fc0c1de9f81eb0d021b Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 15 Mar 2016 14:54:03 -0700 Subject: mm: fix some spelling Fix up trivial spelling errors, noticed while reading the code. Signed-off-by: Jesper Dangaard Brouer Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- include/linux/slab.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 792c8981e633..30b02e79610e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -765,7 +765,7 @@ int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order); void __memcg_kmem_uncharge(struct page *page, int order); /* - * helper for acessing a memcg's index. It will be used as an index in the + * helper for accessing a memcg's index. It will be used as an index in the * child cache array in kmem_cache, and also to derive its name. This function * will return -1 when this is not a kmem-limited memcg. */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 9d9a5bdb9b00..5d49f0c60dcb 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -314,7 +314,7 @@ void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment void kmem_cache_free(struct kmem_cache *, void *); /* - * Bulk allocation and freeing operations. These are accellerated in an + * Bulk allocation and freeing operations. These are accelerated in an * allocator specific way to avoid taking locks repeatedly or building * metadata structures unnecessarily. * -- cgit From 40b44137971c2e5865a78f9f7de274449983ccb5 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 15 Mar 2016 14:54:21 -0700 Subject: mm/slab: clean up DEBUG_PAGEALLOC processing code Currently, open code for checking DEBUG_PAGEALLOC cache is spread to some sites. It makes code unreadable and hard to change. This patch cleans up this code. The following patch will change the criteria for DEBUG_PAGEALLOC cache so this clean-up will help it, too. [akpm@linux-foundation.org: fix build with CONFIG_DEBUG_PAGEALLOC=n] Signed-off-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2b6e22782699..69fd6bbb8cce 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2195,14 +2195,18 @@ kernel_map_pages(struct page *page, int numpages, int enable) } #ifdef CONFIG_HIBERNATION extern bool kernel_page_present(struct page *page); -#endif /* CONFIG_HIBERNATION */ -#else +#endif /* CONFIG_HIBERNATION */ +#else /* CONFIG_DEBUG_PAGEALLOC */ static inline void kernel_map_pages(struct page *page, int numpages, int enable) {} #ifdef CONFIG_HIBERNATION static inline bool kernel_page_present(struct page *page) { return true; } -#endif /* CONFIG_HIBERNATION */ -#endif +#endif /* CONFIG_HIBERNATION */ +static inline bool debug_pagealloc_enabled(void) +{ + return false; +} +#endif /* CONFIG_DEBUG_PAGEALLOC */ #ifdef __HAVE_ARCH_GATE_AREA extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); -- cgit From d31676dfde257cb2b3e52d4e657d8ad2251e4d49 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 15 Mar 2016 14:54:24 -0700 Subject: mm/slab: alternative implementation for DEBUG_SLAB_LEAK DEBUG_SLAB_LEAK is a debug option. It's current implementation requires status buffer so we need more memory to use it. And, it cause kmem_cache initialization step more complex. To remove this extra memory usage and to simplify initialization step, this patch implement this feature with another way. When user requests to get slab object owner information, it marks that getting information is started. And then, all free objects in caches are flushed to corresponding slab page. Now, we can distinguish all freed object so we can know all allocated objects, too. After collecting slab object owner information on allocated objects, mark is checked that there is no free during the processing. If true, we can be sure that our information is correct so information is returned to user. Although this way is rather complex, it has two important benefits mentioned above. So, I think it is worth changing. There is one drawback that it takes more time to get slab object owner information but it is just a debug option so it doesn't matter at all. To help review, this patch implements new way only. Following patch will remove useless code. Signed-off-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab_def.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index cf139d3fa513..e878ba35ae91 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -60,6 +60,9 @@ struct kmem_cache { atomic_t allocmiss; atomic_t freehit; atomic_t freemiss; +#ifdef CONFIG_DEBUG_SLAB_LEAK + atomic_t store_user_clean; +#endif /* * If debugging is enabled, then the allocator can add additional -- cgit From becfda68abca673d61d5cc953e8e099816db99d9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 15 Mar 2016 14:55:06 -0700 Subject: slub: convert SLAB_DEBUG_FREE to SLAB_CONSISTENCY_CHECKS SLAB_DEBUG_FREE allows expensive consistency checks at free to be turned on or off. Expand its use to be able to turn off all consistency checks. This gives a nice speed up if you only want features such as poisoning or tracing. Credit to Mathias Krause for the original work which inspired this series Signed-off-by: Laura Abbott Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Kees Cook Cc: Mathias Krause Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 5d49f0c60dcb..e4b568738ca3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -20,7 +20,7 @@ * Flags to pass to kmem_cache_create(). * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. */ -#define SLAB_DEBUG_FREE 0x00000100UL /* DEBUG: Perform (expensive) checks on free */ +#define SLAB_CONSISTENCY_CHECKS 0x00000100UL /* DEBUG: Perform (expensive) checks on alloc/free */ #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ #define SLAB_POISON 0x00000800UL /* DEBUG: Poison objects */ #define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ -- cgit From d86bd1bece6fc41d59253002db5441fe960a37f6 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 15 Mar 2016 14:55:12 -0700 Subject: mm/slub: support left redzone SLUB already has a redzone debugging feature. But it is only positioned at the end of object (aka right redzone) so it cannot catch left oob. Although current object's right redzone acts as left redzone of next object, first object in a slab cannot take advantage of this effect. This patch explicitly adds a left red zone to each object to detect left oob more precisely. Background: Someone complained to me that left OOB doesn't catch even if KASAN is enabled which does page allocation debugging. That page is out of our control so it would be allocated when left OOB happens and, in this case, we can't find OOB. Moreover, SLUB debugging feature can be enabled without page allocator debugging and, in this case, we will miss that OOB. Before trying to implement, I expected that changes would be too complex, but, it doesn't look that complex to me now. Almost changes are applied to debug specific functions so I feel okay. Signed-off-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index b7e57927f521..ac5143f95ee6 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -81,6 +81,7 @@ struct kmem_cache { int reserved; /* Reserved bytes at the end of slabs */ const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ + int red_left_pad; /* Left redzone padding size */ #ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ #endif -- cgit From 20f6e03a40ba536dfc7c6f83dd894d994aeb39f3 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Mar 2016 14:55:42 -0700 Subject: tracepoints: move trace_print_flags definitions to tracepoint-defs.h The following patch will need to declare array of struct trace_print_flags in a header. To prevent this header from pulling in all of RCU through trace_events.h, move the struct trace_print_flags{_64} definitions to the new lightweight tracepoint-defs.h header. Signed-off-by: Vlastimil Babka Acked-by: David Rientjes Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Rasmus Villemoes Cc: Joonsoo Kim Cc: Minchan Kim Cc: Sasha Levin Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/trace_events.h | 10 ---------- include/linux/tracepoint-defs.h | 14 ++++++++++++-- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 925730bc9fc1..705df7db4482 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -15,16 +15,6 @@ struct tracer; struct dentry; struct bpf_prog; -struct trace_print_flags { - unsigned long mask; - const char *name; -}; - -struct trace_print_flags_u64 { - unsigned long long mask; - const char *name; -}; - const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, const struct trace_print_flags *flag_array); diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index e1ee97c713bf..4ac89acb6136 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -3,13 +3,23 @@ /* * File can be included directly by headers who only want to access - * tracepoint->key to guard out of line trace calls. Otherwise - * linux/tracepoint.h should be used. + * tracepoint->key to guard out of line trace calls, or the definition of + * trace_print_flags{_u64}. Otherwise linux/tracepoint.h should be used. */ #include #include +struct trace_print_flags { + unsigned long mask; + const char *name; +}; + +struct trace_print_flags_u64 { + unsigned long long mask; + const char *name; +}; + struct tracepoint_func { void *func; void *data; -- cgit From 1f7866b4aebd19e2525775083279e171b36783a4 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Mar 2016 14:55:45 -0700 Subject: mm, tracing: make show_gfp_flags() up to date The show_gfp_flags() macro provides human-friendly printing of gfp flags in tracepoints. However, it is somewhat out of date and missing several flags. This patches fills in the missing flags, and distinguishes properly between GFP_ATOMIC and __GFP_ATOMIC which were both translated to "GFP_ATOMIC". More generally, all __GFP_X flags which were previously printed as GFP_X, are now printed as __GFP_X, since ommiting the underscores results in output that doesn't actually match the source code, and can only lead to confusion. Where both variants are defined equal (e.g. _DMA and _DMA32), the variant without underscores are preferred. Also add a note in gfp.h so hopefully future changes will be synced better. __GFP_MOVABLE is defined twice in include/linux/gfp.h with different comments. Leave just the newer one, which was intended to replace the old one. Signed-off-by: Vlastimil Babka Reviewed-by: Michal Hocko Acked-by: David Rientjes Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Rasmus Villemoes Cc: Joonsoo Kim Cc: Minchan Kim Cc: Sasha Levin Cc: "Kirill A. Shutemov" Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 6 ++++- include/trace/events/gfpflags.h | 53 ++++++++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index af1f2b24bbe4..bbe5e7fae337 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -9,6 +9,11 @@ struct vm_area_struct; +/* + * In case of changes, please don't forget to update + * include/trace/events/gfpflags.h + */ + /* Plain integer GFP bitmasks. Do not use this directly. */ #define ___GFP_DMA 0x01u #define ___GFP_HIGHMEM 0x02u @@ -48,7 +53,6 @@ struct vm_area_struct; #define __GFP_DMA ((__force gfp_t)___GFP_DMA) #define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) #define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) -#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* Page is movable */ #define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ #define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h index dde6bf092c8a..f53b216c9311 100644 --- a/include/trace/events/gfpflags.h +++ b/include/trace/events/gfpflags.h @@ -11,33 +11,42 @@ #define show_gfp_flags(flags) \ (flags) ? __print_flags(flags, "|", \ {(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \ - {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \ + {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"},\ {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ {(unsigned long)GFP_USER, "GFP_USER"}, \ {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \ + {(unsigned long)GFP_KERNEL_ACCOUNT, "GFP_KERNEL_ACCOUNT"}, \ {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \ {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \ {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ - {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \ - {(unsigned long)__GFP_ATOMIC, "GFP_ATOMIC"}, \ - {(unsigned long)__GFP_IO, "GFP_IO"}, \ - {(unsigned long)__GFP_COLD, "GFP_COLD"}, \ - {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \ - {(unsigned long)__GFP_REPEAT, "GFP_REPEAT"}, \ - {(unsigned long)__GFP_NOFAIL, "GFP_NOFAIL"}, \ - {(unsigned long)__GFP_NORETRY, "GFP_NORETRY"}, \ - {(unsigned long)__GFP_COMP, "GFP_COMP"}, \ - {(unsigned long)__GFP_ZERO, "GFP_ZERO"}, \ - {(unsigned long)__GFP_NOMEMALLOC, "GFP_NOMEMALLOC"}, \ - {(unsigned long)__GFP_MEMALLOC, "GFP_MEMALLOC"}, \ - {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ - {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ - {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ - {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ - {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ - {(unsigned long)__GFP_DIRECT_RECLAIM, "GFP_DIRECT_RECLAIM"}, \ - {(unsigned long)__GFP_KSWAPD_RECLAIM, "GFP_KSWAPD_RECLAIM"}, \ - {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ - ) : "GFP_NOWAIT" + {(unsigned long)GFP_NOWAIT, "GFP_NOWAIT"}, \ + {(unsigned long)GFP_DMA, "GFP_DMA"}, \ + {(unsigned long)__GFP_HIGHMEM, "__GFP_HIGHMEM"}, \ + {(unsigned long)GFP_DMA32, "GFP_DMA32"}, \ + {(unsigned long)__GFP_HIGH, "__GFP_HIGH"}, \ + {(unsigned long)__GFP_ATOMIC, "__GFP_ATOMIC"}, \ + {(unsigned long)__GFP_IO, "__GFP_IO"}, \ + {(unsigned long)__GFP_FS, "__GFP_FS"}, \ + {(unsigned long)__GFP_COLD, "__GFP_COLD"}, \ + {(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \ + {(unsigned long)__GFP_REPEAT, "__GFP_REPEAT"}, \ + {(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \ + {(unsigned long)__GFP_NORETRY, "__GFP_NORETRY"}, \ + {(unsigned long)__GFP_COMP, "__GFP_COMP"}, \ + {(unsigned long)__GFP_ZERO, "__GFP_ZERO"}, \ + {(unsigned long)__GFP_NOMEMALLOC, "__GFP_NOMEMALLOC"}, \ + {(unsigned long)__GFP_MEMALLOC, "__GFP_MEMALLOC"}, \ + {(unsigned long)__GFP_HARDWALL, "__GFP_HARDWALL"}, \ + {(unsigned long)__GFP_THISNODE, "__GFP_THISNODE"}, \ + {(unsigned long)__GFP_RECLAIMABLE, "__GFP_RECLAIMABLE"}, \ + {(unsigned long)__GFP_MOVABLE, "__GFP_MOVABLE"}, \ + {(unsigned long)__GFP_ACCOUNT, "__GFP_ACCOUNT"}, \ + {(unsigned long)__GFP_NOTRACK, "__GFP_NOTRACK"}, \ + {(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \ + {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ + {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ + {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\ + {(unsigned long)__GFP_OTHER_NODE, "__GFP_OTHER_NODE"} \ + ) : "none" -- cgit From 14e0a214d62d284ff40b1fd7d687cb66fca9fc67 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Mar 2016 14:55:49 -0700 Subject: tools, perf: make gfp_compact_table up to date When updating tracing's show_gfp_flags() I have noticed that perf's gfp_compact_table is also outdated. Fill in the missing flags and place a note in gfp.h to increase chance that future updates are synced. Convert the __GFP_X flags from "GFP_X" to "__GFP_X" strings in line with the previous patch. Signed-off-by: Vlastimil Babka Acked-by: David Rientjes Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Rasmus Villemoes Cc: Joonsoo Kim Cc: Minchan Kim Cc: Sasha Levin Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index bbe5e7fae337..3d6d878c00f5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -11,7 +11,7 @@ struct vm_area_struct; /* * In case of changes, please don't forget to update - * include/trace/events/gfpflags.h + * include/trace/events/gfpflags.h and tools/perf/builtin-kmem.c */ /* Plain integer GFP bitmasks. Do not use this directly. */ -- cgit From 420adbe9fc1a45187cfa74df9dbfd72272c4e2fa Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Mar 2016 14:55:52 -0700 Subject: mm, tracing: unify mm flags handling in tracepoints and printk In tracepoints, it's possible to print gfp flags in a human-friendly format through a macro show_gfp_flags(), which defines a translation array and passes is to __print_flags(). Since the following patch will introduce support for gfp flags printing in printk(), it would be nice to reuse the array. This is not straightforward, since __print_flags() can't simply reference an array defined in a .c file such as mm/debug.c - it has to be a macro to allow the macro magic to communicate the format to userspace tools such as trace-cmd. The solution is to create a macro __def_gfpflag_names which is used both in show_gfp_flags(), and to define the gfpflag_names[] array in mm/debug.c. On the other hand, mm/debug.c also defines translation tables for page flags and vma flags, and desire was expressed (but not implemented in this series) to use these also from tracepoints. Thus, this patch also renames the events/gfpflags.h file to events/mmflags.h and moves the table definitions there, using the same macro approach as for gfpflags. This allows translating all three kinds of mm-specific flags both in tracepoints and printk. Signed-off-by: Vlastimil Babka Reviewed-by: Michal Hocko Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Rasmus Villemoes Cc: Joonsoo Kim Cc: Minchan Kim Cc: Sasha Levin Cc: "Kirill A. Shutemov" Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- include/trace/events/btrfs.h | 2 +- include/trace/events/compaction.h | 2 +- include/trace/events/gfpflags.h | 52 ------------ include/trace/events/huge_memory.h | 2 - include/trace/events/kmem.h | 2 +- include/trace/events/mmflags.h | 164 +++++++++++++++++++++++++++++++++++++ include/trace/events/vmscan.h | 2 +- 8 files changed, 169 insertions(+), 59 deletions(-) delete mode 100644 include/trace/events/gfpflags.h create mode 100644 include/trace/events/mmflags.h (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 3d6d878c00f5..06546b36eb6a 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -11,7 +11,7 @@ struct vm_area_struct; /* * In case of changes, please don't forget to update - * include/trace/events/gfpflags.h and tools/perf/builtin-kmem.c + * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c */ /* Plain integer GFP bitmasks. Do not use this directly. */ diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index d866f21efbbf..677807f29a1c 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -6,7 +6,7 @@ #include #include -#include +#include struct btrfs_root; struct btrfs_fs_info; diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index c92d1e1cbad9..111e5666e5eb 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #define COMPACTION_STATUS \ EM( COMPACT_DEFERRED, "deferred") \ diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h deleted file mode 100644 index f53b216c9311..000000000000 --- a/include/trace/events/gfpflags.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * The order of these masks is important. Matching masks will be seen - * first and the left over flags will end up showing by themselves. - * - * For example, if we have GFP_KERNEL before GFP_USER we wil get: - * - * GFP_KERNEL|GFP_HARDWALL - * - * Thus most bits set go first. - */ -#define show_gfp_flags(flags) \ - (flags) ? __print_flags(flags, "|", \ - {(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \ - {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"},\ - {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ - {(unsigned long)GFP_USER, "GFP_USER"}, \ - {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \ - {(unsigned long)GFP_KERNEL_ACCOUNT, "GFP_KERNEL_ACCOUNT"}, \ - {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \ - {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \ - {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ - {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ - {(unsigned long)GFP_NOWAIT, "GFP_NOWAIT"}, \ - {(unsigned long)GFP_DMA, "GFP_DMA"}, \ - {(unsigned long)__GFP_HIGHMEM, "__GFP_HIGHMEM"}, \ - {(unsigned long)GFP_DMA32, "GFP_DMA32"}, \ - {(unsigned long)__GFP_HIGH, "__GFP_HIGH"}, \ - {(unsigned long)__GFP_ATOMIC, "__GFP_ATOMIC"}, \ - {(unsigned long)__GFP_IO, "__GFP_IO"}, \ - {(unsigned long)__GFP_FS, "__GFP_FS"}, \ - {(unsigned long)__GFP_COLD, "__GFP_COLD"}, \ - {(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \ - {(unsigned long)__GFP_REPEAT, "__GFP_REPEAT"}, \ - {(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \ - {(unsigned long)__GFP_NORETRY, "__GFP_NORETRY"}, \ - {(unsigned long)__GFP_COMP, "__GFP_COMP"}, \ - {(unsigned long)__GFP_ZERO, "__GFP_ZERO"}, \ - {(unsigned long)__GFP_NOMEMALLOC, "__GFP_NOMEMALLOC"}, \ - {(unsigned long)__GFP_MEMALLOC, "__GFP_MEMALLOC"}, \ - {(unsigned long)__GFP_HARDWALL, "__GFP_HARDWALL"}, \ - {(unsigned long)__GFP_THISNODE, "__GFP_THISNODE"}, \ - {(unsigned long)__GFP_RECLAIMABLE, "__GFP_RECLAIMABLE"}, \ - {(unsigned long)__GFP_MOVABLE, "__GFP_MOVABLE"}, \ - {(unsigned long)__GFP_ACCOUNT, "__GFP_ACCOUNT"}, \ - {(unsigned long)__GFP_NOTRACK, "__GFP_NOTRACK"}, \ - {(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \ - {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ - {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ - {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\ - {(unsigned long)__GFP_OTHER_NODE, "__GFP_OTHER_NODE"} \ - ) : "none" - diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 47c6212d8f3c..551ba4acde4d 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -6,8 +6,6 @@ #include -#include - #define SCAN_STATUS \ EM( SCAN_FAIL, "failed") \ EM( SCAN_SUCCEED, "succeeded") \ diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index f7554fd7fc62..ca7217389067 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -6,7 +6,7 @@ #include #include -#include +#include DECLARE_EVENT_CLASS(kmem_alloc, diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h new file mode 100644 index 000000000000..a849185c82f0 --- /dev/null +++ b/include/trace/events/mmflags.h @@ -0,0 +1,164 @@ +/* + * The order of these masks is important. Matching masks will be seen + * first and the left over flags will end up showing by themselves. + * + * For example, if we have GFP_KERNEL before GFP_USER we wil get: + * + * GFP_KERNEL|GFP_HARDWALL + * + * Thus most bits set go first. + */ + +#define __def_gfpflag_names \ + {(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \ + {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"},\ + {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ + {(unsigned long)GFP_USER, "GFP_USER"}, \ + {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \ + {(unsigned long)GFP_KERNEL_ACCOUNT, "GFP_KERNEL_ACCOUNT"}, \ + {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \ + {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \ + {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ + {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ + {(unsigned long)GFP_NOWAIT, "GFP_NOWAIT"}, \ + {(unsigned long)GFP_DMA, "GFP_DMA"}, \ + {(unsigned long)__GFP_HIGHMEM, "__GFP_HIGHMEM"}, \ + {(unsigned long)GFP_DMA32, "GFP_DMA32"}, \ + {(unsigned long)__GFP_HIGH, "__GFP_HIGH"}, \ + {(unsigned long)__GFP_ATOMIC, "__GFP_ATOMIC"}, \ + {(unsigned long)__GFP_IO, "__GFP_IO"}, \ + {(unsigned long)__GFP_FS, "__GFP_FS"}, \ + {(unsigned long)__GFP_COLD, "__GFP_COLD"}, \ + {(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \ + {(unsigned long)__GFP_REPEAT, "__GFP_REPEAT"}, \ + {(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \ + {(unsigned long)__GFP_NORETRY, "__GFP_NORETRY"}, \ + {(unsigned long)__GFP_COMP, "__GFP_COMP"}, \ + {(unsigned long)__GFP_ZERO, "__GFP_ZERO"}, \ + {(unsigned long)__GFP_NOMEMALLOC, "__GFP_NOMEMALLOC"}, \ + {(unsigned long)__GFP_MEMALLOC, "__GFP_MEMALLOC"}, \ + {(unsigned long)__GFP_HARDWALL, "__GFP_HARDWALL"}, \ + {(unsigned long)__GFP_THISNODE, "__GFP_THISNODE"}, \ + {(unsigned long)__GFP_RECLAIMABLE, "__GFP_RECLAIMABLE"}, \ + {(unsigned long)__GFP_MOVABLE, "__GFP_MOVABLE"}, \ + {(unsigned long)__GFP_ACCOUNT, "__GFP_ACCOUNT"}, \ + {(unsigned long)__GFP_NOTRACK, "__GFP_NOTRACK"}, \ + {(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \ + {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ + {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ + {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\ + {(unsigned long)__GFP_OTHER_NODE, "__GFP_OTHER_NODE"} \ + +#define show_gfp_flags(flags) \ + (flags) ? __print_flags(flags, "|", \ + __def_gfpflag_names \ + ) : "none" + +#ifdef CONFIG_MMU +#define IF_HAVE_PG_MLOCK(flag,string) ,{1UL << flag, string} +#else +#define IF_HAVE_PG_MLOCK(flag,string) +#endif + +#ifdef CONFIG_ARCH_USES_PG_UNCACHED +#define IF_HAVE_PG_UNCACHED(flag,string) ,{1UL << flag, string} +#else +#define IF_HAVE_PG_UNCACHED(flag,string) +#endif + +#ifdef CONFIG_MEMORY_FAILURE +#define IF_HAVE_PG_HWPOISON(flag,string) ,{1UL << flag, string} +#else +#define IF_HAVE_PG_HWPOISON(flag,string) +#endif + +#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +#define IF_HAVE_PG_IDLE(flag,string) ,{1UL << flag, string} +#else +#define IF_HAVE_PG_IDLE(flag,string) +#endif + +#define __def_pageflag_names \ + {1UL << PG_locked, "locked" }, \ + {1UL << PG_error, "error" }, \ + {1UL << PG_referenced, "referenced" }, \ + {1UL << PG_uptodate, "uptodate" }, \ + {1UL << PG_dirty, "dirty" }, \ + {1UL << PG_lru, "lru" }, \ + {1UL << PG_active, "active" }, \ + {1UL << PG_slab, "slab" }, \ + {1UL << PG_owner_priv_1, "owner_priv_1" }, \ + {1UL << PG_arch_1, "arch_1" }, \ + {1UL << PG_reserved, "reserved" }, \ + {1UL << PG_private, "private" }, \ + {1UL << PG_private_2, "private_2" }, \ + {1UL << PG_writeback, "writeback" }, \ + {1UL << PG_head, "head" }, \ + {1UL << PG_swapcache, "swapcache" }, \ + {1UL << PG_mappedtodisk, "mappedtodisk" }, \ + {1UL << PG_reclaim, "reclaim" }, \ + {1UL << PG_swapbacked, "swapbacked" }, \ + {1UL << PG_unevictable, "unevictable" } \ +IF_HAVE_PG_MLOCK(PG_mlocked, "mlocked" ) \ +IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ +IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ +IF_HAVE_PG_IDLE(PG_young, "young" ) \ +IF_HAVE_PG_IDLE(PG_idle, "idle" ) + +#define show_page_flags(flags) \ + (flags) ? __print_flags(flags, "|", \ + __def_pageflag_names \ + ) : "none" + +#if defined(CONFIG_X86) +#define __VM_ARCH_SPECIFIC {VM_PAT, "pat" } +#elif defined(CONFIG_PPC) +#define __VM_ARCH_SPECIFIC {VM_SAO, "sao" } +#elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64) +#define __VM_ARCH_SPECIFIC {VM_GROWSUP, "growsup" } +#elif !defined(CONFIG_MMU) +#define __VM_ARCH_SPECIFIC {VM_MAPPED_COPY,"mappedcopy" } +#else +#define __VM_ARCH_SPECIFIC {VM_ARCH_1, "arch_1" } +#endif + +#ifdef CONFIG_MEM_SOFT_DIRTY +#define IF_HAVE_VM_SOFTDIRTY(flag,name) {flag, name }, +#else +#define IF_HAVE_VM_SOFTDIRTY(flag,name) +#endif + +#define __def_vmaflag_names \ + {VM_READ, "read" }, \ + {VM_WRITE, "write" }, \ + {VM_EXEC, "exec" }, \ + {VM_SHARED, "shared" }, \ + {VM_MAYREAD, "mayread" }, \ + {VM_MAYWRITE, "maywrite" }, \ + {VM_MAYEXEC, "mayexec" }, \ + {VM_MAYSHARE, "mayshare" }, \ + {VM_GROWSDOWN, "growsdown" }, \ + {VM_PFNMAP, "pfnmap" }, \ + {VM_DENYWRITE, "denywrite" }, \ + {VM_LOCKONFAULT, "lockonfault" }, \ + {VM_LOCKED, "locked" }, \ + {VM_IO, "io" }, \ + {VM_SEQ_READ, "seqread" }, \ + {VM_RAND_READ, "randread" }, \ + {VM_DONTCOPY, "dontcopy" }, \ + {VM_DONTEXPAND, "dontexpand" }, \ + {VM_ACCOUNT, "account" }, \ + {VM_NORESERVE, "noreserve" }, \ + {VM_HUGETLB, "hugetlb" }, \ + __VM_ARCH_SPECIFIC , \ + {VM_DONTDUMP, "dontdump" }, \ +IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ + {VM_MIXEDMAP, "mixedmap" }, \ + {VM_HUGEPAGE, "hugepage" }, \ + {VM_NOHUGEPAGE, "nohugepage" }, \ + {VM_MERGEABLE, "mergeable" } \ + +#define show_vma_flags(flags) \ + (flags) ? __print_flags(flags, "|", \ + __def_vmaflag_names \ + ) : "none" diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 31763dd8db1c..0101ef37f1ee 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #define RECLAIM_WB_ANON 0x0001u #define RECLAIM_WB_FILE 0x0002u -- cgit From 60f30350fd69a3e4d5f0f45937d3274c22565134 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Mar 2016 14:56:08 -0700 Subject: mm, page_owner: print migratetype of page and pageblock, symbolic flags The information in /sys/kernel/debug/page_owner includes the migratetype of the pageblock the page belongs to. This is also checked against the page's migratetype (as declared by gfp_flags during its allocation), and the page is reported as Fallback if its migratetype differs from the pageblock's one. t This is somewhat misleading because in fact fallback allocation is not the only reason why these two can differ. It also doesn't direcly provide the page's migratetype, although it's possible to derive that from the gfp_flags. It's arguably better to print both page and pageblock's migratetype and leave the interpretation to the consumer than to suggest fallback allocation as the only possible reason. While at it, we can print the migratetypes as string the same way as /proc/pagetypeinfo does, as some of the numeric values depend on kernel configuration. For that, this patch moves the migratetype_names array from #ifdef CONFIG_PROC_FS part of mm/vmstat.c to mm/page_alloc.c and exports it. With the new format strings for flags, we can now also provide symbolic page and gfp flags in the /sys/kernel/debug/page_owner file. This replaces the positional printing of page flags as single letters, which might have looked nicer, but was limited to a subset of flags, and required the user to remember the letters. Example page_owner entry after the patch: Page allocated via order 0, mask 0x24213ca(GFP_HIGHUSER_MOVABLE|__GFP_COLD|__GFP_NOWARN|__GFP_NORETRY) PFN 520 type Movable Block 1 type Movable Flags 0xfffff8001006c(referenced|uptodate|lru|active|mappedtodisk) [] __alloc_pages_nodemask+0x134/0x230 [] alloc_pages_current+0x88/0x120 [] __page_cache_alloc+0xe6/0x120 [] __do_page_cache_readahead+0xdc/0x240 [] ondemand_readahead+0x135/0x260 [] page_cache_sync_readahead+0x31/0x50 [] generic_file_read_iter+0x453/0x760 [] __vfs_read+0xa7/0xd0 Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Joonsoo Kim Cc: Minchan Kim Cc: Sasha Levin Cc: "Kirill A. Shutemov" Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7b6c2cfee390..9fc23ab550a7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -63,6 +63,9 @@ enum { MIGRATE_TYPES }; +/* In mm/page_alloc.c; keep in sync also with show_migration_types() there */ +extern char * const migratetype_names[MIGRATE_TYPES]; + #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) #else -- cgit From 7dd80b8af0bcd705a9ef2fa272c082882616a499 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Mar 2016 14:56:12 -0700 Subject: mm, page_owner: convert page_owner_inited to static key CONFIG_PAGE_OWNER attempts to impose negligible runtime overhead when enabled during compilation, but not actually enabled during runtime by boot param page_owner=on. This overhead can be further reduced using the static key mechanism, which this patch does. Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Joonsoo Kim Cc: Minchan Kim Cc: Sasha Levin Cc: "Kirill A. Shutemov" Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_owner.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index cacaabea8a09..8e2eb153c7b9 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -1,8 +1,10 @@ #ifndef __LINUX_PAGE_OWNER_H #define __LINUX_PAGE_OWNER_H +#include + #ifdef CONFIG_PAGE_OWNER -extern bool page_owner_inited; +extern struct static_key_false page_owner_inited; extern struct page_ext_operations page_owner_ops; extern void __reset_page_owner(struct page *page, unsigned int order); @@ -12,27 +14,23 @@ extern gfp_t __get_page_owner_gfp(struct page *page); static inline void reset_page_owner(struct page *page, unsigned int order) { - if (likely(!page_owner_inited)) - return; - - __reset_page_owner(page, order); + if (static_branch_unlikely(&page_owner_inited)) + __reset_page_owner(page, order); } static inline void set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { - if (likely(!page_owner_inited)) - return; - - __set_page_owner(page, order, gfp_mask); + if (static_branch_unlikely(&page_owner_inited)) + __set_page_owner(page, order, gfp_mask); } static inline gfp_t get_page_owner_gfp(struct page *page) { - if (likely(!page_owner_inited)) + if (static_branch_unlikely(&page_owner_inited)) + return __get_page_owner_gfp(page); + else return 0; - - return __get_page_owner_gfp(page); } #else static inline void reset_page_owner(struct page *page, unsigned int order) -- cgit From d435edca928805074dae005ab9a42d9fa60fc702 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Mar 2016 14:56:15 -0700 Subject: mm, page_owner: copy page owner info during migration The page_owner mechanism stores gfp_flags of an allocation and stack trace that lead to it. During page migration, the original information is practically replaced by the allocation of free page as the migration target. Arguably this is less useful and might lead to all the page_owner info for migratable pages gradually converge towards compaction or numa balancing migrations. It has also lead to inaccuracies such as one fixed by commit e2cfc91120fa ("mm/page_owner: set correct gfp_mask on page_owner"). This patch thus introduces copying the page_owner info during migration. However, since the fact that the page has been migrated from its original place might be useful for debugging, the next patch will introduce a way to track that information as well. Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Joonsoo Kim Cc: Minchan Kim Cc: Sasha Levin Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_owner.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 8e2eb153c7b9..6440daab4ef8 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -11,6 +11,7 @@ extern void __reset_page_owner(struct page *page, unsigned int order); extern void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask); extern gfp_t __get_page_owner_gfp(struct page *page); +extern void __copy_page_owner(struct page *oldpage, struct page *newpage); static inline void reset_page_owner(struct page *page, unsigned int order) { @@ -32,6 +33,11 @@ static inline gfp_t get_page_owner_gfp(struct page *page) else return 0; } +static inline void copy_page_owner(struct page *oldpage, struct page *newpage) +{ + if (static_branch_unlikely(&page_owner_inited)) + __copy_page_owner(oldpage, newpage); +} #else static inline void reset_page_owner(struct page *page, unsigned int order) { @@ -44,6 +50,8 @@ static inline gfp_t get_page_owner_gfp(struct page *page) { return 0; } - +static inline void copy_page_owner(struct page *oldpage, struct page *newpage) +{ +} #endif /* CONFIG_PAGE_OWNER */ #endif /* __LINUX_PAGE_OWNER_H */ -- cgit From 7cd12b4abfd2f8f42414c520bbd051a5b7dc7a8c Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Mar 2016 14:56:18 -0700 Subject: mm, page_owner: track and print last migrate reason During migration, page_owner info is now copied with the rest of the page, so the stacktrace leading to free page allocation during migration is overwritten. For debugging purposes, it might be however useful to know that the page has been migrated since its initial allocation. This might happen many times during the lifetime for different reasons and fully tracking this, especially with stacktraces would incur extra memory costs. As a compromise, store and print the migrate_reason of the last migration that occurred to the page. This is enough to distinguish compaction, numa balancing etc. Example page_owner entry after the patch: Page allocated via order 0, mask 0x24200ca(GFP_HIGHUSER_MOVABLE) PFN 628753 type Movable Block 1228 type Movable Flags 0x1fffff80040030(dirty|lru|swapbacked) [] __alloc_pages_nodemask+0x134/0x230 [] alloc_pages_vma+0xb5/0x250 [] shmem_alloc_page+0x61/0x90 [] shmem_getpage_gfp+0x678/0x960 [] shmem_fallocate+0x329/0x440 [] vfs_fallocate+0x140/0x230 [] SyS_fallocate+0x44/0x70 [] entry_SYSCALL_64_fastpath+0x12/0x71 Page has been migrated, last migrate reason: compaction Signed-off-by: Vlastimil Babka Cc: Joonsoo Kim Cc: Minchan Kim Cc: Sasha Levin Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Michal Hocko Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 6 +++++- include/linux/page_ext.h | 1 + include/linux/page_owner.h | 9 +++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index cac1c0904d5f..9b50325e4ddf 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -23,9 +23,13 @@ enum migrate_reason { MR_SYSCALL, /* also applies to cpusets */ MR_MEMPOLICY_MBIND, MR_NUMA_MISPLACED, - MR_CMA + MR_CMA, + MR_TYPES }; +/* In mm/debug.c; also keep sync with include/trace/events/migrate.h */ +extern char *migrate_reason_names[MR_TYPES]; + #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 17f118a82854..e1fe7cf5bddf 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -45,6 +45,7 @@ struct page_ext { unsigned int order; gfp_t gfp_mask; unsigned int nr_entries; + int last_migrate_reason; unsigned long trace_entries[8]; #endif }; diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 6440daab4ef8..555893bf13d7 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -12,6 +12,7 @@ extern void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask); extern gfp_t __get_page_owner_gfp(struct page *page); extern void __copy_page_owner(struct page *oldpage, struct page *newpage); +extern void __set_page_owner_migrate_reason(struct page *page, int reason); static inline void reset_page_owner(struct page *page, unsigned int order) { @@ -38,6 +39,11 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage) if (static_branch_unlikely(&page_owner_inited)) __copy_page_owner(oldpage, newpage); } +static inline void set_page_owner_migrate_reason(struct page *page, int reason) +{ + if (static_branch_unlikely(&page_owner_inited)) + __set_page_owner_migrate_reason(page, reason); +} #else static inline void reset_page_owner(struct page *page, unsigned int order) { @@ -53,5 +59,8 @@ static inline gfp_t get_page_owner_gfp(struct page *page) static inline void copy_page_owner(struct page *oldpage, struct page *newpage) { } +static inline void set_page_owner_migrate_reason(struct page *page, int reason) +{ +} #endif /* CONFIG_PAGE_OWNER */ #endif /* __LINUX_PAGE_OWNER_H */ -- cgit From 4e462112e98f9ad6dd62e160f8b14c7df5fed2fc Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Mar 2016 14:56:21 -0700 Subject: mm, page_owner: dump page owner info from dump_page() The page_owner mechanism is useful for dealing with memory leaks. By reading /sys/kernel/debug/page_owner one can determine the stack traces leading to allocations of all pages, and find e.g. a buggy driver. This information might be also potentially useful for debugging, such as the VM_BUG_ON_PAGE() calls to dump_page(). So let's print the stored info from dump_page(). Example output: page:ffffea000292f1c0 count:1 mapcount:0 mapping:ffff8800b2f6cc18 index:0x91d flags: 0x1fffff8001002c(referenced|uptodate|lru|mappedtodisk) page dumped because: VM_BUG_ON_PAGE(1) page->mem_cgroup:ffff8801392c5000 page allocated via order 0, migratetype Movable, gfp_mask 0x24213ca(GFP_HIGHUSER_MOVABLE|__GFP_COLD|__GFP_NOWARN|__GFP_NORETRY) [] __alloc_pages_nodemask+0x134/0x230 [] alloc_pages_current+0x88/0x120 [] __page_cache_alloc+0xe6/0x120 [] __do_page_cache_readahead+0xdc/0x240 [] ondemand_readahead+0x135/0x260 [] page_cache_async_readahead+0x6c/0x70 [] generic_file_read_iter+0x3f2/0x760 [] __vfs_read+0xa7/0xd0 page has been migrated, last migrate reason: compaction Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Joonsoo Kim Cc: Minchan Kim Cc: Sasha Levin Cc: "Kirill A. Shutemov" Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_owner.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 555893bf13d7..46f1b939948c 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -13,6 +13,7 @@ extern void __set_page_owner(struct page *page, extern gfp_t __get_page_owner_gfp(struct page *page); extern void __copy_page_owner(struct page *oldpage, struct page *newpage); extern void __set_page_owner_migrate_reason(struct page *page, int reason); +extern void __dump_page_owner(struct page *page); static inline void reset_page_owner(struct page *page, unsigned int order) { @@ -44,6 +45,11 @@ static inline void set_page_owner_migrate_reason(struct page *page, int reason) if (static_branch_unlikely(&page_owner_inited)) __set_page_owner_migrate_reason(page, reason); } +static inline void dump_page_owner(struct page *page) +{ + if (static_branch_unlikely(&page_owner_inited)) + __dump_page_owner(page); +} #else static inline void reset_page_owner(struct page *page, unsigned int order) { @@ -62,5 +68,8 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage) static inline void set_page_owner_migrate_reason(struct page *page, int reason) { } +static inline void dump_page_owner(struct page *page) +{ +} #endif /* CONFIG_PAGE_OWNER */ #endif /* __LINUX_PAGE_OWNER_H */ -- cgit From ff8e81163889ac4c7f59e7f7db6377d0c5d8d69c Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Mar 2016 14:56:24 -0700 Subject: mm, debug: move bad flags printing to bad_page() Since bad_page() is the only user of the badflags parameter of dump_page_badflags(), we can move the code to bad_page() and simplify a bit. The dump_page_badflags() function is renamed to __dump_page() and can still be called separately from dump_page() for temporary debug prints where page_owner info is not desired. The only user-visible change is that page->mem_cgroup is printed before the bad flags. Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Joonsoo Kim Cc: Minchan Kim Cc: Sasha Levin Cc: "Kirill A. Shutemov" Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmdebug.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 053824b0a412..de7be78c6f0e 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -9,8 +9,7 @@ struct vm_area_struct; struct mm_struct; extern void dump_page(struct page *page, const char *reason); -extern void dump_page_badflags(struct page *page, const char *reason, - unsigned long badflags); +extern void __dump_page(struct page *page, const char *reason); void dump_vma(const struct vm_area_struct *vma); void dump_mm(const struct mm_struct *mm); -- cgit From 8823b1dbc05fab1a8bec275eeae4709257c2661d Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 15 Mar 2016 14:56:27 -0700 Subject: mm/page_poison.c: enable PAGE_POISONING as a separate option Page poisoning is currently set up as a feature if architectures don't have architecture debug page_alloc to allow unmapping of pages. It has uses apart from that though. Clearing of the pages on free provides an increase in security as it helps to limit the risk of information leaks. Allow page poisoning to be enabled as a separate option independent of kernel_map pages since the two features do separate work. Because of how hiberanation is implemented, the checks on alloc cannot occur if hibernation is enabled. The runtime alloc checks can also be enabled with an option when !HIBERNATION. Credit to Grsecurity/PaX team for inspiring this work Signed-off-by: Laura Abbott Cc: Rafael J. Wysocki Cc: "Kirill A. Shutemov" Cc: Vlastimil Babka Cc: Michal Hocko Cc: Kees Cook Cc: Mathias Krause Cc: Dave Hansen Cc: Jianyu Zhan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 69fd6bbb8cce..99dcc8f36e28 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2176,6 +2176,15 @@ extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +#ifdef CONFIG_PAGE_POISONING +extern bool page_poisoning_enabled(void); +extern void kernel_poison_pages(struct page *page, int numpages, int enable); +#else +static inline bool page_poisoning_enabled(void) { return false; } +static inline void kernel_poison_pages(struct page *page, int numpages, + int enable) { } +#endif + #ifdef CONFIG_DEBUG_PAGEALLOC extern bool _debug_pagealloc_enabled; extern void __kernel_map_pages(struct page *page, int numpages, int enable); -- cgit From 1414c7f4f7d72d138fff35f00151d15749b5beda Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 15 Mar 2016 14:56:30 -0700 Subject: mm/page_poisoning.c: allow for zero poisoning By default, page poisoning uses a poison value (0xaa) on free. If this is changed to 0, the page is not only sanitized but zeroing on alloc with __GFP_ZERO can be skipped as well. The tradeoff is that detecting corruption from the poisoning is harder to detect. This feature also cannot be used with hibernation since pages are not guaranteed to be zeroed after hibernation. Credit to Grsecurity/PaX team for inspiring this work Signed-off-by: Laura Abbott Acked-by: Rafael J. Wysocki Cc: "Kirill A. Shutemov" Cc: Vlastimil Babka Cc: Michal Hocko Cc: Kees Cook Cc: Mathias Krause Cc: Dave Hansen Cc: Jianyu Zhan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ include/linux/poison.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 99dcc8f36e28..b97243d6aa49 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2179,10 +2179,12 @@ extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); extern void kernel_poison_pages(struct page *page, int numpages, int enable); +extern bool page_is_poisoned(struct page *page); #else static inline bool page_poisoning_enabled(void) { return false; } static inline void kernel_poison_pages(struct page *page, int numpages, int enable) { } +static inline bool page_is_poisoned(struct page *page) { return false; } #endif #ifdef CONFIG_DEBUG_PAGEALLOC diff --git a/include/linux/poison.h b/include/linux/poison.h index 4a27153574e2..51334edec506 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -30,7 +30,11 @@ #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) /********** mm/debug-pagealloc.c **********/ +#ifdef CONFIG_PAGE_POISONING_ZERO +#define PAGE_POISON 0x00 +#else #define PAGE_POISON 0xaa +#endif /********** mm/page_alloc.c ************/ -- cgit From 31bc3858ea3ebcc3157b3f5f0e624c5962f5a7a6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 15 Mar 2016 14:56:48 -0700 Subject: memory-hotplug: add automatic onlining policy for the newly added memory Currently, all newly added memory blocks remain in 'offline' state unless someone onlines them, some linux distributions carry special udev rules like: SUBSYSTEM=="memory", ACTION=="add", ATTR{state}=="offline", ATTR{state}="online" to make this happen automatically. This is not a great solution for virtual machines where memory hotplug is being used to address high memory pressure situations as such onlining is slow and a userspace process doing this (udev) has a chance of being killed by the OOM killer as it will probably require to allocate some memory. Introduce default policy for the newly added memory blocks in /sys/devices/system/memory/auto_online_blocks file with two possible values: "offline" which preserves the current behavior and "online" which causes all newly added memory blocks to go online as soon as they're added. The default is "offline". Signed-off-by: Vitaly Kuznetsov Reviewed-by: Daniel Kiper Cc: Jonathan Corbet Cc: Greg Kroah-Hartman Cc: Daniel Kiper Cc: Dan Williams Cc: Tang Chen Cc: David Vrabel Acked-by: David Rientjes Cc: Naoya Horiguchi Cc: Xishi Qiu Cc: Mel Gorman Cc: "K. Y. Srinivasan" Cc: Igor Mammedov Cc: Kay Sievers Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 3 +++ include/linux/memory_hotplug.h | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index 8b8d8d12348e..82730adba950 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -109,6 +109,9 @@ extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); extern int register_new_memory(int, struct mem_section *); +extern int memory_block_change_state(struct memory_block *mem, + unsigned long to_state, + unsigned long from_state_req); #ifdef CONFIG_MEMORY_HOTREMOVE extern int unregister_memory_section(struct mem_section *); #endif diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 43405992d027..769d76870550 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -99,6 +99,8 @@ extern void __online_page_free(struct page *page); extern int try_online_node(int nid); +extern bool memhp_auto_online; + #ifdef CONFIG_MEMORY_HOTREMOVE extern bool is_pageblock_removable_nolock(struct page *page); extern int arch_remove_memory(u64 start, u64 size); @@ -267,7 +269,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {} extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); -extern int add_memory_resource(int nid, struct resource *resource); +extern int add_memory_resource(int nid, struct resource *resource, bool online); extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, bool for_device); extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); -- cgit From 81f8c3a461d16f0355ced3d56d6d1bb5923207a1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 15 Mar 2016 14:57:04 -0700 Subject: mm: memcontrol: generalize locking for the page->mem_cgroup binding These patches tag the page cache radix tree eviction entries with the memcg an evicted page belonged to, thus making per-cgroup LRU reclaim work properly and be as adaptive to new cache workingsets as global reclaim already is. This should have been part of the original thrash detection patch series, but was deferred due to the complexity of those patches. This patch (of 5): So far the only sites that needed to exclude charge migration to stabilize page->mem_cgroup have been per-cgroup page statistics, hence the name mem_cgroup_begin_page_stat(). But per-cgroup thrash detection will add another site that needs to ensure page->mem_cgroup lifetime. Rename these locking functions to the more generic lock_page_memcg() and unlock_page_memcg(). Since charge migration is a cgroup1 feature only, we might be able to delete it at some point, and these now easy to identify locking sites along with it. Signed-off-by: Johannes Weiner Suggested-by: Vladimir Davydov Acked-by: Vladimir Davydov Cc: Michal Hocko Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 30b02e79610e..8502fd4144eb 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -429,8 +429,8 @@ bool mem_cgroup_oom_synchronize(bool wait); extern int do_swap_account; #endif -struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page); -void mem_cgroup_end_page_stat(struct mem_cgroup *memcg); +struct mem_cgroup *lock_page_memcg(struct page *page); +void unlock_page_memcg(struct mem_cgroup *memcg); /** * mem_cgroup_update_page_stat - update page state statistics @@ -438,7 +438,13 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg); * @idx: page state item to account * @val: number of pages (positive or negative) * - * See mem_cgroup_begin_page_stat() for locking requirements. + * Callers must use lock_page_memcg() to prevent double accounting + * when the page is concurrently being moved to another memcg: + * + * memcg = lock_page_memcg(page); + * if (TestClearPageState(page)) + * mem_cgroup_update_page_stat(memcg, state, -1); + * unlock_page_memcg(memcg); */ static inline void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx, int val) @@ -613,12 +619,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) +static inline struct mem_cgroup *lock_page_memcg(struct page *page) { return NULL; } -static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) +static inline void unlock_page_memcg(struct mem_cgroup *memcg) { } -- cgit From 23047a96d7cfcfca1a6d026ecaec526ea4803e9e Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 15 Mar 2016 14:57:16 -0700 Subject: mm: workingset: per-cgroup cache thrash detection Cache thrash detection (see a528910e12ec "mm: thrash detection-based file cache sizing" for details) currently only works on the system level, not inside cgroups. Worse, as the refaults are compared to the global number of active cache, cgroups might wrongfully get all their refaults activated when their pages are hotter than those of others. Move the refault machinery from the zone to the lruvec, and then tag eviction entries with the memcg ID. This makes the thrash detection work correctly inside cgroups. [sergey.senozhatsky@gmail.com: do not return from workingset_activation() with locked rcu and page] Signed-off-by: Johannes Weiner Signed-off-by: Sergey Senozhatsky Reviewed-by: Vladimir Davydov Cc: Michal Hocko Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 56 ++++++++++++++++++++++++++++++++++++++++------ include/linux/mmzone.h | 13 ++++++----- 2 files changed, 56 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8502fd4144eb..09b449849369 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -89,6 +89,10 @@ enum mem_cgroup_events_target { }; #ifdef CONFIG_MEMCG + +#define MEM_CGROUP_ID_SHIFT 16 +#define MEM_CGROUP_ID_MAX USHRT_MAX + struct mem_cgroup_stat_cpu { long count[MEMCG_NR_STAT]; unsigned long events[MEMCG_NR_EVENTS]; @@ -265,6 +269,11 @@ struct mem_cgroup { extern struct mem_cgroup *root_mem_cgroup; +static inline bool mem_cgroup_disabled(void) +{ + return !cgroup_subsys_enabled(memory_cgrp_subsys); +} + /** * mem_cgroup_events - count memory events against a cgroup * @memcg: the memory cgroup @@ -312,6 +321,28 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, struct mem_cgroup_reclaim_cookie *); void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); +static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) +{ + if (mem_cgroup_disabled()) + return 0; + + return memcg->css.id; +} + +/** + * mem_cgroup_from_id - look up a memcg from an id + * @id: the id to look up + * + * Caller must hold rcu_read_lock() and use css_tryget() as necessary. + */ +static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) +{ + struct cgroup_subsys_state *css; + + css = css_from_id(id, &memory_cgrp_subsys); + return mem_cgroup_from_css(css); +} + /** * parent_mem_cgroup - find the accounting parent of a memcg * @memcg: memcg whose parent to find @@ -353,11 +384,6 @@ static inline bool mm_match_cgroup(struct mm_struct *mm, struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); ino_t page_cgroup_ino(struct page *page); -static inline bool mem_cgroup_disabled(void) -{ - return !cgroup_subsys_enabled(memory_cgrp_subsys); -} - static inline bool mem_cgroup_online(struct mem_cgroup *memcg) { if (mem_cgroup_disabled()) @@ -502,8 +528,17 @@ void mem_cgroup_split_huge_fixup(struct page *head); #endif #else /* CONFIG_MEMCG */ + +#define MEM_CGROUP_ID_SHIFT 0 +#define MEM_CGROUP_ID_MAX 0 + struct mem_cgroup; +static inline bool mem_cgroup_disabled(void) +{ + return true; +} + static inline void mem_cgroup_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx, unsigned int nr) @@ -586,9 +621,16 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root, { } -static inline bool mem_cgroup_disabled(void) +static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) { - return true; + return 0; +} + +static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) +{ + WARN_ON_ONCE(id); + /* XXX: This should always return root_mem_cgroup */ + return NULL; } static inline bool mem_cgroup_online(struct mem_cgroup *memcg) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9fc23ab550a7..03cbdd906f55 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -212,10 +212,12 @@ struct zone_reclaim_stat { }; struct lruvec { - struct list_head lists[NR_LRU_LISTS]; - struct zone_reclaim_stat reclaim_stat; + struct list_head lists[NR_LRU_LISTS]; + struct zone_reclaim_stat reclaim_stat; + /* Evictions & activations on the inactive file list */ + atomic_long_t inactive_age; #ifdef CONFIG_MEMCG - struct zone *zone; + struct zone *zone; #endif }; @@ -490,9 +492,6 @@ struct zone { spinlock_t lru_lock; struct lruvec lruvec; - /* Evictions & activations on the inactive file list */ - atomic_long_t inactive_age; - /* * When free pages are below this point, additional steps are taken * when reading the number of free pages to avoid per-cpu counter @@ -761,6 +760,8 @@ static inline struct zone *lruvec_zone(struct lruvec *lruvec) #endif } +extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru); + #ifdef CONFIG_HAVE_MEMORY_PRESENT void memory_present(int nid, unsigned long start, unsigned long end); #else -- cgit From 6a93ca8fde3cfce0f00f02281139a377c83e8d8c Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 15 Mar 2016 14:57:19 -0700 Subject: mm: migrate: do not touch page->mem_cgroup of live pages Changing a page's memcg association complicates dealing with the page, so we want to limit this as much as possible. Page migration e.g. does not have to do that. Just like page cache replacement, it can forcibly charge a replacement page, and then uncharge the old page when it gets freed. Temporarily overcharging the cgroup by a single page is not an issue in practice, and charging is so cheap nowadays that this is much preferrable to the headache of messing with live pages. The only place that still changes the page->mem_cgroup binding of live pages is when pages move along with a task to another cgroup. But that path isolates the page from the LRU, takes the page lock, and the move lock (lock_page_memcg()). That means page->mem_cgroup is always stable in callers that have the page isolated from the LRU or locked. Lighter unlocked paths, like writeback accounting, can use lock_page_memcg(). [akpm@linux-foundation.org: fix build] [vdavydov@virtuozzo.com: fix lockdep splat] Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Johannes Weiner Cc: Michal Hocko Cc: Greg Thelen Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- include/linux/mm.h | 9 --------- 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 09b449849369..c45ab3fb6e04 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -300,7 +300,7 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg, void mem_cgroup_uncharge(struct page *page); void mem_cgroup_uncharge_list(struct list_head *page_list); -void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage); +void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); @@ -580,7 +580,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) { } -static inline void mem_cgroup_replace_page(struct page *old, struct page *new) +static inline void mem_cgroup_migrate(struct page *old, struct page *new) { } diff --git a/include/linux/mm.h b/include/linux/mm.h index b97243d6aa49..6b471d1fc8df 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -905,20 +905,11 @@ static inline struct mem_cgroup *page_memcg(struct page *page) { return page->mem_cgroup; } - -static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) -{ - page->mem_cgroup = memcg; -} #else static inline struct mem_cgroup *page_memcg(struct page *page) { return NULL; } - -static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) -{ -} #endif /* -- cgit From 62cccb8c8e7a3ca233f49d5e7dcb1557d25465cd Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 15 Mar 2016 14:57:22 -0700 Subject: mm: simplify lock_page_memcg() Now that migration doesn't clear page->mem_cgroup of live pages anymore, it's safe to make lock_page_memcg() and the memcg stat functions take pages, and spare the callers from memcg objects. [akpm@linux-foundation.org: fix warnings] Signed-off-by: Johannes Weiner Suggested-by: Vladimir Davydov Acked-by: Vladimir Davydov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 35 +++++++++++++++++------------------ include/linux/mm.h | 5 ++--- include/linux/pagemap.h | 3 +-- 3 files changed, 20 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c45ab3fb6e04..d560c9a3cadf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -455,42 +455,42 @@ bool mem_cgroup_oom_synchronize(bool wait); extern int do_swap_account; #endif -struct mem_cgroup *lock_page_memcg(struct page *page); -void unlock_page_memcg(struct mem_cgroup *memcg); +void lock_page_memcg(struct page *page); +void unlock_page_memcg(struct page *page); /** * mem_cgroup_update_page_stat - update page state statistics - * @memcg: memcg to account against + * @page: the page * @idx: page state item to account * @val: number of pages (positive or negative) * * Callers must use lock_page_memcg() to prevent double accounting * when the page is concurrently being moved to another memcg: * - * memcg = lock_page_memcg(page); + * lock_page_memcg(page); * if (TestClearPageState(page)) - * mem_cgroup_update_page_stat(memcg, state, -1); - * unlock_page_memcg(memcg); + * mem_cgroup_update_page_stat(page, state, -1); + * unlock_page_memcg(page); */ -static inline void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, +static inline void mem_cgroup_update_page_stat(struct page *page, enum mem_cgroup_stat_index idx, int val) { VM_BUG_ON(!rcu_read_lock_held()); - if (memcg) - this_cpu_add(memcg->stat->count[idx], val); + if (page->mem_cgroup) + this_cpu_add(page->mem_cgroup->stat->count[idx], val); } -static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, +static inline void mem_cgroup_inc_page_stat(struct page *page, enum mem_cgroup_stat_index idx) { - mem_cgroup_update_page_stat(memcg, idx, 1); + mem_cgroup_update_page_stat(page, idx, 1); } -static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg, +static inline void mem_cgroup_dec_page_stat(struct page *page, enum mem_cgroup_stat_index idx) { - mem_cgroup_update_page_stat(memcg, idx, -1); + mem_cgroup_update_page_stat(page, idx, -1); } unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, @@ -661,12 +661,11 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline struct mem_cgroup *lock_page_memcg(struct page *page) +static inline void lock_page_memcg(struct page *page) { - return NULL; } -static inline void unlock_page_memcg(struct mem_cgroup *memcg) +static inline void unlock_page_memcg(struct page *page) { } @@ -692,12 +691,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) return false; } -static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, +static inline void mem_cgroup_inc_page_stat(struct page *page, enum mem_cgroup_stat_index idx) { } -static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg, +static inline void mem_cgroup_dec_page_stat(struct page *page, enum mem_cgroup_stat_index idx) { } diff --git a/include/linux/mm.h b/include/linux/mm.h index 6b471d1fc8df..a862b4f0ac24 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1291,10 +1291,9 @@ int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); -void account_page_dirtied(struct page *page, struct address_space *mapping, - struct mem_cgroup *memcg); +void account_page_dirtied(struct page *page, struct address_space *mapping); void account_page_cleaned(struct page *page, struct address_space *mapping, - struct mem_cgroup *memcg, struct bdi_writeback *wb); + struct bdi_writeback *wb); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); void cancel_dirty_page(struct page *page); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 92395a0a7dc5..183b15ea052b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -663,8 +663,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern void delete_from_page_cache(struct page *page); -extern void __delete_from_page_cache(struct page *page, void *shadow, - struct mem_cgroup *memcg); +extern void __delete_from_page_cache(struct page *page, void *shadow); int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); /* -- cgit From fdf1cdb91b6ab7a8a91df68c384f36b8a0909cab Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 15 Mar 2016 14:57:25 -0700 Subject: mm: remove unnecessary uses of lock_page_memcg() There are several users that nest lock_page_memcg() inside lock_page() to prevent page->mem_cgroup from changing. But the page lock prevents pages from moving between cgroups, so that is unnecessary overhead. Remove lock_page_memcg() in contexts with locked contexts and fix the debug code in the page stat functions to be okay with the page lock. Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d560c9a3cadf..f0c4bec6565b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -28,6 +28,7 @@ #include #include #include +#include struct mem_cgroup; struct page; @@ -464,18 +465,19 @@ void unlock_page_memcg(struct page *page); * @idx: page state item to account * @val: number of pages (positive or negative) * - * Callers must use lock_page_memcg() to prevent double accounting - * when the page is concurrently being moved to another memcg: + * The @page must be locked or the caller must use lock_page_memcg() + * to prevent double accounting when the page is concurrently being + * moved to another memcg: * - * lock_page_memcg(page); + * lock_page(page) or lock_page_memcg(page) * if (TestClearPageState(page)) * mem_cgroup_update_page_stat(page, state, -1); - * unlock_page_memcg(page); + * unlock_page(page) or unlock_page_memcg(page) */ static inline void mem_cgroup_update_page_stat(struct page *page, enum mem_cgroup_stat_index idx, int val) { - VM_BUG_ON(!rcu_read_lock_held()); + VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page))); if (page->mem_cgroup) this_cpu_add(page->mem_cgroup->stat->count[idx], val); -- cgit From 7cf91a98e607c2f935dbcc177d70011e95b8faff Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 15 Mar 2016 14:57:51 -0700 Subject: mm/compaction: speed up pageblock_pfn_to_page() when zone is contiguous There is a performance drop report due to hugepage allocation and in there half of cpu time are spent on pageblock_pfn_to_page() in compaction [1]. In that workload, compaction is triggered to make hugepage but most of pageblocks are un-available for compaction due to pageblock type and skip bit so compaction usually fails. Most costly operations in this case is to find valid pageblock while scanning whole zone range. To check if pageblock is valid to compact, valid pfn within pageblock is required and we can obtain it by calling pageblock_pfn_to_page(). This function checks whether pageblock is in a single zone and return valid pfn if possible. Problem is that we need to check it every time before scanning pageblock even if we re-visit it and this turns out to be very expensive in this workload. Although we have no way to skip this pageblock check in the system where hole exists at arbitrary position, we can use cached value for zone continuity and just do pfn_to_page() in the system where hole doesn't exist. This optimization considerably speeds up in above workload. Before vs After Max: 1096 MB/s vs 1325 MB/s Min: 635 MB/s 1015 MB/s Avg: 899 MB/s 1194 MB/s Avg is improved by roughly 30% [2]. [1]: http://www.spinics.net/lists/linux-mm/msg97378.html [2]: https://lkml.org/lkml/2015/12/9/23 [akpm@linux-foundation.org: don't forget to restore zone->contiguous on error path, per Vlastimil] Signed-off-by: Joonsoo Kim Reported-by: Aaron Lu Acked-by: Vlastimil Babka Tested-by: Aaron Lu Cc: Mel Gorman Cc: Rik van Riel Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 6 ------ include/linux/memory_hotplug.h | 3 +++ include/linux/mmzone.h | 2 ++ 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 06546b36eb6a..bb16dfeb917e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -519,13 +519,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); void drain_all_pages(struct zone *zone); void drain_local_pages(struct zone *zone); -#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT void page_alloc_init_late(void); -#else -static inline void page_alloc_init_late(void) -{ -} -#endif /* * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 769d76870550..adbef586e696 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -198,6 +198,9 @@ void put_online_mems(void); void mem_hotplug_begin(void); void mem_hotplug_done(void); +extern void set_zone_contiguous(struct zone *zone); +extern void clear_zone_contiguous(struct zone *zone); + #else /* ! CONFIG_MEMORY_HOTPLUG */ /* * Stub functions for when hotplug is off diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 03cbdd906f55..6de02ac378a0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -522,6 +522,8 @@ struct zone { bool compact_blockskip_flush; #endif + bool contiguous; + ZONE_PADDING(_pad3_) /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; -- cgit From e9a7c2f1a548f34bcaa7640094201e8b29247940 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Mar 2016 14:58:25 -0700 Subject: autofs4: coding style fixes Try and make the coding style completely consistent throughtout the autofs module and inline with kernel coding style recommendations. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_dev-ioctl.h | 1 - include/linux/auto_fs.h | 10 +++------- include/uapi/linux/auto_fs.h | 19 ++++++++----------- include/uapi/linux/auto_fs4.h | 15 ++++----------- 4 files changed, 15 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h index 850f39b33e74..642781612062 100644 --- a/include/linux/auto_dev-ioctl.h +++ b/include/linux/auto_dev-ioctl.h @@ -125,7 +125,6 @@ static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; in->size = sizeof(struct autofs_dev_ioctl); in->ioctlfd = -1; - return; } /* diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h index fcd704d354c4..b4066bb89083 100644 --- a/include/linux/auto_fs.h +++ b/include/linux/auto_fs.h @@ -1,14 +1,10 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * linux/include/linux/auto_fs.h - * - * Copyright 1997 Transmeta Corporation - All Rights Reserved +/* + * Copyright 1997 Transmeta Corporation - All Rights Reserved * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your * option, any later version, incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ + */ #ifndef _LINUX_AUTO_FS_H #define _LINUX_AUTO_FS_H diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index bb991dfe134f..5fe176aa61d1 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -1,7 +1,4 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * linux/include/linux/auto_fs.h - * +/* * Copyright 1997 Transmeta Corporation - All Rights Reserved * * This file is part of the Linux kernel and is made available under @@ -63,12 +60,12 @@ struct autofs_packet_expire { char name[NAME_MAX+1]; }; -#define AUTOFS_IOC_READY _IO(0x93,0x60) -#define AUTOFS_IOC_FAIL _IO(0x93,0x61) -#define AUTOFS_IOC_CATATONIC _IO(0x93,0x62) -#define AUTOFS_IOC_PROTOVER _IOR(0x93,0x63,int) -#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,compat_ulong_t) -#define AUTOFS_IOC_SETTIMEOUT _IOWR(0x93,0x64,unsigned long) -#define AUTOFS_IOC_EXPIRE _IOR(0x93,0x65,struct autofs_packet_expire) +#define AUTOFS_IOC_READY _IO(0x93, 0x60) +#define AUTOFS_IOC_FAIL _IO(0x93, 0x61) +#define AUTOFS_IOC_CATATONIC _IO(0x93, 0x62) +#define AUTOFS_IOC_PROTOVER _IOR(0x93, 0x63, int) +#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93, 0x64, compat_ulong_t) +#define AUTOFS_IOC_SETTIMEOUT _IOWR(0x93, 0x64, unsigned long) +#define AUTOFS_IOC_EXPIRE _IOR(0x93, 0x65, struct autofs_packet_expire) #endif /* _UAPI_LINUX_AUTO_FS_H */ diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h index e02982fa2953..924fb1adab0b 100644 --- a/include/uapi/linux/auto_fs4.h +++ b/include/uapi/linux/auto_fs4.h @@ -1,6 +1,4 @@ -/* -*- c -*- - * linux/include/linux/auto_fs4.h - * +/* * Copyright 1999-2000 Jeremy Fitzhardinge * * This file is part of the Linux kernel and is made available under @@ -38,7 +36,6 @@ static inline void set_autofs_type_indirect(unsigned int *type) { *type = AUTOFS_TYPE_INDIRECT; - return; } static inline unsigned int autofs_type_indirect(unsigned int type) @@ -49,7 +46,6 @@ static inline unsigned int autofs_type_indirect(unsigned int type) static inline void set_autofs_type_direct(unsigned int *type) { *type = AUTOFS_TYPE_DIRECT; - return; } static inline unsigned int autofs_type_direct(unsigned int type) @@ -60,7 +56,6 @@ static inline unsigned int autofs_type_direct(unsigned int type) static inline void set_autofs_type_offset(unsigned int *type) { *type = AUTOFS_TYPE_OFFSET; - return; } static inline unsigned int autofs_type_offset(unsigned int type) @@ -81,7 +76,6 @@ static inline unsigned int autofs_type_trigger(unsigned int type) static inline void set_autofs_type_any(unsigned int *type) { *type = AUTOFS_TYPE_ANY; - return; } static inline unsigned int autofs_type_any(unsigned int type) @@ -154,11 +148,10 @@ union autofs_v5_packet_union { autofs_packet_expire_direct_t expire_direct; }; -#define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93,0x66,int) +#define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93, 0x66, int) #define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI #define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI -#define AUTOFS_IOC_PROTOSUBVER _IOR(0x93,0x67,int) -#define AUTOFS_IOC_ASKUMOUNT _IOR(0x93,0x70,int) - +#define AUTOFS_IOC_PROTOSUBVER _IOR(0x93, 0x67, int) +#define AUTOFS_IOC_ASKUMOUNT _IOR(0x93, 0x70, int) #endif /* _LINUX_AUTO_FS4_H */ -- cgit From 0266725ad4ee0f8fcf2ee73be8e68c4adbf2ac79 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Mar 2016 14:58:36 -0700 Subject: autofs4: fix some white space errors Fix some white space format errors. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/auto_fs.h | 2 +- include/uapi/linux/auto_fs4.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index 5fe176aa61d1..9175a1b4dc69 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -48,7 +48,7 @@ struct autofs_packet_hdr { struct autofs_packet_missing { struct autofs_packet_hdr hdr; - autofs_wqt_t wait_queue_token; + autofs_wqt_t wait_queue_token; int len; char name[NAME_MAX+1]; }; diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h index 924fb1adab0b..8f8f1bdcca8c 100644 --- a/include/uapi/linux/auto_fs4.h +++ b/include/uapi/linux/auto_fs4.h @@ -108,7 +108,7 @@ enum autofs_notify { /* v4 multi expire (via pipe) */ struct autofs_packet_expire_multi { struct autofs_packet_hdr hdr; - autofs_wqt_t wait_queue_token; + autofs_wqt_t wait_queue_token; int len; char name[NAME_MAX+1]; }; -- cgit From 63c06227a22b098a3849c5c99e836aea161ca0d7 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Mar 2016 14:58:47 -0700 Subject: autofs4: fix string.h include in auto_dev-ioctl.h Since including linux/string.h will now do the right thing remove the conditional check. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_dev-ioctl.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h index 642781612062..7caaf298f539 100644 --- a/include/linux/auto_dev-ioctl.h +++ b/include/linux/auto_dev-ioctl.h @@ -11,12 +11,7 @@ #define _LINUX_AUTO_DEV_IOCTL_H #include - -#ifdef __KERNEL__ #include -#else -#include -#endif /* __KERNEL__ */ #define AUTOFS_DEVICE_NAME "autofs" -- cgit