diff options
Diffstat (limited to 'include/linux/swap.h')
-rw-r--r-- | include/linux/swap.h | 140 |
1 files changed, 79 insertions, 61 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h index 8d28f6091a32..b13b72645db3 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -219,10 +219,10 @@ enum { SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */ SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */ /* add others here before... */ - SWP_SCANNING = (1 << 14), /* refcount in scan_swap_map */ }; #define SWAP_CLUSTER_MAX 32UL +#define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10) #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX /* Bit flag in swap_map */ @@ -243,23 +243,47 @@ enum { * free clusters are organized into a list. We fetch an entry from the list to * get a free cluster. * - * The data field stores next cluster if the cluster is free or cluster usage - * counter otherwise. The flags field determines if a cluster is free. This is - * protected by swap_info_struct.lock. + * The flags field determines if a cluster is free. This is + * protected by cluster lock. */ struct swap_cluster_info { spinlock_t lock; /* * Protect swap_cluster_info fields - * and swap_info_struct->swap_map - * elements correspond to the swap - * cluster + * other than list, and swap_info_struct->swap_map + * elements corresponding to the swap cluster. */ - unsigned int data:24; - unsigned int flags:8; + u16 count; + u8 flags; + u8 order; + struct list_head list; +}; + +/* All on-list cluster must have a non-zero flag. */ +enum swap_cluster_flags { + CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */ + CLUSTER_FLAG_FREE, + CLUSTER_FLAG_NONFULL, + CLUSTER_FLAG_FRAG, + /* Clusters with flags above are allocatable */ + CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG, + CLUSTER_FLAG_FULL, + CLUSTER_FLAG_DISCARD, + CLUSTER_FLAG_MAX, }; -#define CLUSTER_FLAG_FREE 1 /* This cluster is free */ -#define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */ -#define CLUSTER_FLAG_HUGE 4 /* This cluster is backing a transparent huge page */ + +/* + * The first page in the swap file is the swap header, which is always marked + * bad to prevent it from being allocated as an entry. This also prevents the + * cluster to which it belongs being marked free. Therefore 0 is safe to use as + * a sentinel to indicate an entry is not valid. + */ +#define SWAP_ENTRY_INVALID 0 + +#ifdef CONFIG_THP_SWAP +#define SWAP_NR_ORDERS (PMD_ORDER + 1) +#else +#define SWAP_NR_ORDERS 1 +#endif /* * We assign a cluster to each CPU, so each CPU can allocate swap entry from @@ -267,13 +291,8 @@ struct swap_cluster_info { * throughput. */ struct percpu_cluster { - struct swap_cluster_info index; /* Current cluster index */ - unsigned int next; /* Likely next allocation offset */ -}; - -struct swap_cluster_list { - struct swap_cluster_info head; - struct swap_cluster_info tail; + local_lock_t lock; /* Protect the percpu_cluster above */ + unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */ }; /* @@ -287,21 +306,23 @@ struct swap_info_struct { signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ + unsigned long *zeromap; /* kvmalloc'ed bitmap to track zero pages */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ - struct swap_cluster_list free_clusters; /* free clusters list */ - unsigned int lowest_bit; /* index of first free in swap_map */ - unsigned int highest_bit; /* index of last free in swap_map */ + struct list_head free_clusters; /* free clusters list */ + struct list_head full_clusters; /* full clusters list */ + struct list_head nonfull_clusters[SWAP_NR_ORDERS]; + /* list of cluster that contains at least one free slot */ + struct list_head frag_clusters[SWAP_NR_ORDERS]; + /* list of cluster that are fragmented or contented */ + atomic_long_t frag_cluster_nr[SWAP_NR_ORDERS]; unsigned int pages; /* total of usable pages of swap */ - unsigned int inuse_pages; /* number of those currently in use */ - unsigned int cluster_next; /* likely index for next allocation */ - unsigned int cluster_nr; /* countdown to next cluster search */ - unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ + atomic_long_t inuse_pages; /* number of those currently in use */ struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ + struct percpu_cluster *global_cluster; /* Use one global cluster for rotating device */ + spinlock_t global_cluster_lock; /* Serialize usage of global cluster */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ - struct bdev_handle *bdev_handle;/* open handle of the bdev */ struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ - unsigned int old_block_size; /* seldom referenced */ struct completion comp; /* seldom referenced */ spinlock_t lock; /* * protect map scan related fields like @@ -321,7 +342,8 @@ struct swap_info_struct { * list. */ struct work_struct discard_work; /* discard worker */ - struct swap_cluster_list discard_clusters; /* discard clusters list */ + struct work_struct reclaim_work; /* reclaim worker */ + struct list_head discard_clusters; /* discard clusters list */ struct plist_node avail_lists[]; /* * entries in swap_avail_heads, one * entry per node. @@ -344,22 +366,13 @@ static inline swp_entry_t page_swap_entry(struct page *page) } /* linux/mm/workingset.c */ -bool workingset_test_recent(void *shadow, bool file, bool *workingset); +bool workingset_test_recent(void *shadow, bool file, bool *workingset, + bool flush); void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); void workingset_refault(struct folio *folio, void *shadow); void workingset_activation(struct folio *folio); -/* Only track the nodes of mappings with shadow entries */ -void workingset_update_node(struct xa_node *node); -extern struct list_lru shadow_nodes; -#define mapping_set_update(xas, mapping) do { \ - if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \ - xas_set_update(xas, workingset_update_node); \ - xas_set_lru(xas, &shadow_nodes); \ - } \ -} while (0) - /* linux/mm/page_alloc.c */ extern unsigned long totalreserve_pages; @@ -404,10 +417,13 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #define MEMCG_RECLAIM_MAY_SWAP (1 << 1) #define MEMCG_RECLAIM_PROACTIVE (1 << 2) +#define MIN_SWAPPINESS 0 +#define MAX_SWAPPINESS 200 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, - unsigned int reclaim_options); + unsigned int reclaim_options, + int *swappiness); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, @@ -447,9 +463,9 @@ static inline unsigned long total_swapcache_pages(void) return global_node_page_state(NR_SWAPCACHE); } -extern void free_swap_cache(struct page *page); -extern void free_page_and_swap_cache(struct page *); -extern void free_pages_and_swap_cache(struct encoded_page **, int); +void free_swap_cache(struct folio *folio); +void free_page_and_swap_cache(struct page *); +void free_pages_and_swap_cache(struct encoded_page **, int); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; @@ -472,14 +488,14 @@ swp_entry_t folio_alloc_swap(struct folio *folio); bool folio_free_swap(struct folio *folio); void put_swap_folio(struct folio *folio, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); -extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size); +extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order); extern int add_swap_count_continuation(swp_entry_t, gfp_t); -extern void swap_shmem_alloc(swp_entry_t); +extern void swap_shmem_alloc(swp_entry_t, int); extern int swap_duplicate(swp_entry_t); -extern int swapcache_prepare(swp_entry_t); -extern void swap_free(swp_entry_t); +extern int swapcache_prepare(swp_entry_t entry, int nr); +extern void swap_free_nr(swp_entry_t entry, int nr_pages); extern void swapcache_free_entries(swp_entry_t *entries, int n); -extern int free_swap_and_cache(swp_entry_t); +extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); int swap_type_of(dev_t device, sector_t offset); int find_first_swap(dev_t *device); extern unsigned int count_swap_pages(int, int); @@ -528,10 +544,11 @@ static inline void put_swap_device(struct swap_info_struct *si) #define free_pages_and_swap_cache(pages, nr) \ release_pages((pages), (nr)); -/* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */ -#define free_swap_and_cache(e) is_pfn_swap_entry(e) +static inline void free_swap_and_cache_nr(swp_entry_t entry, int nr) +{ +} -static inline void free_swap_cache(struct page *page) +static inline void free_swap_cache(struct folio *folio) { } @@ -540,7 +557,7 @@ static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask) return 0; } -static inline void swap_shmem_alloc(swp_entry_t swp) +static inline void swap_shmem_alloc(swp_entry_t swp, int nr) { } @@ -549,12 +566,12 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } -static inline int swapcache_prepare(swp_entry_t swp) +static inline int swapcache_prepare(swp_entry_t swp, int nr) { return 0; } -static inline void swap_free(swp_entry_t swp) +static inline void swap_free_nr(swp_entry_t entry, int nr_pages) { } @@ -597,14 +614,15 @@ static inline int add_swap_extent(struct swap_info_struct *sis, } #endif /* CONFIG_SWAP */ -#ifdef CONFIG_THP_SWAP -extern int split_swap_cluster(swp_entry_t entry); -#else -static inline int split_swap_cluster(swp_entry_t entry) +static inline void free_swap_and_cache(swp_entry_t entry) { - return 0; + free_swap_and_cache_nr(entry, 1); +} + +static inline void swap_free(swp_entry_t entry) +{ + swap_free_nr(entry, 1); } -#endif #ifdef CONFIG_MEMCG static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) |