From 539a6fea7fdcade532bd3e77be2862a683f8f0c9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Nov 2017 17:33:04 -0800 Subject: mm, swap: introduce SWP_SYNCHRONOUS_IO If rw-page based fast storage is used for swap devices, we need to detect it to enhance swap IO operations. This patch is preparation for optimizing of swap-in operation with next patch. Link: http://lkml.kernel.org/r/1505886205-9671-4-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Hugh Dickins Cc: Christoph Hellwig Cc: Dan Williams Cc: Ilya Dryomov Cc: Jens Axboe Cc: Ross Zwisler Cc: Sergey Senozhatsky Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index f02fb5db8914..933d7c0c3542 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -171,8 +171,9 @@ enum { SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */ SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */ SWP_STABLE_WRITES = (1 << 10), /* no overwrite PG_writeback pages */ + SWP_SYNCHRONOUS_IO = (1 << 11), /* synchronous IO is efficient */ /* add others here before... */ - SWP_SCANNING = (1 << 11), /* refcount in scan_swap_map */ + SWP_SCANNING = (1 << 12), /* refcount in scan_swap_map */ }; #define SWAP_CLUSTER_MAX 32UL -- cgit From 0bcac06f27d7528591c27ac2b093ccd71c5d0168 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Nov 2017 17:33:07 -0800 Subject: mm, swap: skip swapcache for swapin of synchronous device With fast swap storage, the platforms want to use swap more aggressively and swap-in is crucial to application latency. The rw_page() based synchronous devices like zram, pmem and btt are such fast storage. When I profile swapin performance with zram lz4 decompress test, S/W overhead is more than 70%. Maybe, it would be bigger in nvdimm. This patch aims to reduce swap-in latency by skipping swapcache if the swap device is synchronous device like rw_page based device. It enhances 45% my swapin test(5G sequential swapin, no readahead, from 2.41sec to 1.64sec). Link: http://lkml.kernel.org/r/1505886205-9671-5-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Dan Williams Cc: Ross Zwisler Cc: Hugh Dickins Cc: Christoph Hellwig Cc: Ilya Dryomov Cc: Jens Axboe Cc: Sergey Senozhatsky Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 933d7c0c3542..32c06f028c7b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -466,6 +466,7 @@ extern int page_swapcount(struct page *); extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); +extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); extern bool reuse_swap_page(struct page *, int *); extern int try_to_free_swap(struct page *); struct backing_dev_info; @@ -474,6 +475,16 @@ extern void exit_swap_address_space(unsigned int type); #else /* CONFIG_SWAP */ +static inline int swap_readpage(struct page *page, bool do_poll) +{ + return 0; +} + +static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) +{ + return NULL; +} + #define swap_address_space(entry) (NULL) #define get_nr_swap_pages() 0L #define total_swap_pages 0L -- cgit From aa8d22a11da933dbf880b4933b58931f4aefe91c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Nov 2017 17:33:11 -0800 Subject: mm: swap: SWP_SYNCHRONOUS_IO: skip swapcache only if swapped page has no other reference When SWP_SYNCHRONOUS_IO swapped-in pages are shared by several processes, it can cause unnecessary memory wastage by skipping swap cache. Because, with swapin fault by read, they could share a page if the page were in swap cache. Thus, it avoids allocating same content new pages. This patch makes the swapcache skipping work only if the swap pte is non-sharable. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/1507620825-5537-1-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Dan Williams Cc: Ross Zwisler Cc: Hugh Dickins Cc: Christoph Hellwig Cc: Ilya Dryomov Cc: Jens Axboe Cc: Sergey Senozhatsky Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 32c06f028c7b..8b8a6f965785 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -463,6 +463,7 @@ extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct page *, struct block_device **); extern sector_t swapdev_block(int, pgoff_t); extern int page_swapcount(struct page *); +extern int __swap_count(struct swap_info_struct *si, swp_entry_t entry); extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); @@ -589,6 +590,11 @@ static inline int page_swapcount(struct page *page) return 0; } +static inline int __swap_count(struct swap_info_struct *si, swp_entry_t entry) +{ + return 0; +} + static inline int __swp_swapcount(swp_entry_t entry) { return 0; -- cgit From c7df8ad2910e965a6241b6d8f52fd122e26b0315 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Nov 2017 17:37:41 -0800 Subject: mm, truncate: do not check mapping for every page being truncated During truncation, the mapping has already been checked for shmem and dax so it's known that workingset_update_node is required. This patch avoids the checks on mapping for each page being truncated. In all other cases, a lookup helper is used to determine if workingset_update_node() needs to be called. The one danger is that the API is slightly harder to use as calling workingset_update_node directly without checking for dax or shmem mappings could lead to surprises. However, the API rarely needs to be used and hopefully the comment is enough to give people the hint. sparsetruncate (tiny) 4.14.0-rc4 4.14.0-rc4 oneirq-v1r1 pickhelper-v1r1 Min Time 141.00 ( 0.00%) 140.00 ( 0.71%) 1st-qrtle Time 142.00 ( 0.00%) 141.00 ( 0.70%) 2nd-qrtle Time 142.00 ( 0.00%) 142.00 ( 0.00%) 3rd-qrtle Time 143.00 ( 0.00%) 143.00 ( 0.00%) Max-90% Time 144.00 ( 0.00%) 144.00 ( 0.00%) Max-95% Time 147.00 ( 0.00%) 145.00 ( 1.36%) Max-99% Time 195.00 ( 0.00%) 191.00 ( 2.05%) Max Time 230.00 ( 0.00%) 205.00 ( 10.87%) Amean Time 144.37 ( 0.00%) 143.82 ( 0.38%) Stddev Time 10.44 ( 0.00%) 9.00 ( 13.74%) Coeff Time 7.23 ( 0.00%) 6.26 ( 13.41%) Best99%Amean Time 143.72 ( 0.00%) 143.34 ( 0.26%) Best95%Amean Time 142.37 ( 0.00%) 142.00 ( 0.26%) Best90%Amean Time 142.19 ( 0.00%) 141.85 ( 0.24%) Best75%Amean Time 141.92 ( 0.00%) 141.58 ( 0.24%) Best50%Amean Time 141.69 ( 0.00%) 141.31 ( 0.27%) Best25%Amean Time 141.38 ( 0.00%) 140.97 ( 0.29%) As you'd expect, the gain is marginal but it can be detected. The differences in bonnie are all within the noise which is not surprising given the impact on the microbenchmark. radix_tree_update_node_t is a callback for some radix operations that optionally passes in a private field. The only user of the callback is workingset_update_node and as it no longer requires a mapping, the private field is removed. Link: http://lkml.kernel.org/r/20171018075952.10627-3-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Reviewed-by: Jan Kara Cc: Andi Kleen Cc: Dave Chinner Cc: Dave Hansen Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 8b8a6f965785..454f042bcdd5 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -298,7 +298,18 @@ struct vma_swap_readahead { void *workingset_eviction(struct address_space *mapping, struct page *page); bool workingset_refault(void *shadow); void workingset_activation(struct page *page); -void workingset_update_node(struct radix_tree_node *node, void *private); + +/* Do not use directly, use workingset_lookup_update */ +void workingset_update_node(struct radix_tree_node *node); + +/* Returns workingset_update_node() if the mapping has shadow entries. */ +#define workingset_lookup_update(mapping) \ +({ \ + radix_tree_update_node_t __helper = workingset_update_node; \ + if (dax_mapping(mapping) || shmem_mapping(mapping)) \ + __helper = NULL; \ + __helper; \ +}) /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; -- cgit From c6f92f9fbe7dbcc8903a67229aa88b4077ae4422 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Nov 2017 17:37:55 -0800 Subject: mm: remove cold parameter for release_pages All callers of release_pages claim the pages being released are cache hot. As no one cares about the hotness of pages being released to the allocator, just ditch the parameter. No performance impact is expected as the overhead is marginal. The parameter is removed simply because it is a bit stupid to have a useless parameter copied everywhere. Link: http://lkml.kernel.org/r/20171018075952.10627-7-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Dave Chinner Cc: Dave Hansen Cc: Jan Kara Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 454f042bcdd5..c2b8128799c1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -510,7 +510,7 @@ static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) #define free_page_and_swap_cache(page) \ put_page(page) #define free_pages_and_swap_cache(pages, nr) \ - release_pages((pages), (nr), false); + release_pages((pages), (nr)); static inline void show_swap_cache_info(void) { -- cgit