diff options
Diffstat (limited to 'mm/swap.h')
| -rw-r--r-- | mm/swap.h | 425 |
1 files changed, 363 insertions, 62 deletions
diff --git a/mm/swap.h b/mm/swap.h index 8a3c7a0ace4f..d034c13d8dd2 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -2,13 +2,191 @@ #ifndef _MM_SWAP_H #define _MM_SWAP_H +#include <linux/atomic.h> /* for atomic_long_t */ +struct mempolicy; +struct swap_iocb; + +extern int page_cluster; + +#ifdef CONFIG_THP_SWAP +#define SWAPFILE_CLUSTER HPAGE_PMD_NR +#define swap_entry_order(order) (order) +#else +#define SWAPFILE_CLUSTER 256 +#define swap_entry_order(order) 0 +#endif + +extern struct swap_info_struct *swap_info[]; + +/* + * We use this to track usage of a cluster. A cluster is a block of swap disk + * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All + * free clusters are organized into a list. We fetch an entry from the list to + * get a free cluster. + * + * The flags field determines if a cluster is free. This is + * protected by cluster lock. + */ +struct swap_cluster_info { + spinlock_t lock; /* + * Protect swap_cluster_info fields + * other than list, and swap_info_struct->swap_map + * elements corresponding to the swap cluster. + */ + u16 count; + u8 flags; + u8 order; + atomic_long_t __rcu *table; /* Swap table entries, see mm/swap_table.h */ + struct list_head list; +}; + +/* All on-list cluster must have a non-zero flag. */ +enum swap_cluster_flags { + CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */ + CLUSTER_FLAG_FREE, + CLUSTER_FLAG_NONFULL, + CLUSTER_FLAG_FRAG, + /* Clusters with flags above are allocatable */ + CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG, + CLUSTER_FLAG_FULL, + CLUSTER_FLAG_DISCARD, + CLUSTER_FLAG_MAX, +}; + #ifdef CONFIG_SWAP +#include <linux/swapops.h> /* for swp_offset */ #include <linux/blk_types.h> /* for bio_end_io_t */ +static inline unsigned int swp_cluster_offset(swp_entry_t entry) +{ + return swp_offset(entry) % SWAPFILE_CLUSTER; +} + +/* + * Callers of all helpers below must ensure the entry, type, or offset is + * valid, and protect the swap device with reference count or locks. + */ +static inline struct swap_info_struct *__swap_type_to_info(int type) +{ + struct swap_info_struct *si; + + si = READ_ONCE(swap_info[type]); /* rcu_dereference() */ + VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ + return si; +} + +static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry) +{ + return __swap_type_to_info(swp_type(entry)); +} + +static inline struct swap_cluster_info *__swap_offset_to_cluster( + struct swap_info_struct *si, pgoff_t offset) +{ + VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ + VM_WARN_ON_ONCE(offset >= si->max); + return &si->cluster_info[offset / SWAPFILE_CLUSTER]; +} + +static inline struct swap_cluster_info *__swap_entry_to_cluster(swp_entry_t entry) +{ + return __swap_offset_to_cluster(__swap_entry_to_info(entry), + swp_offset(entry)); +} + +static __always_inline struct swap_cluster_info *__swap_cluster_lock( + struct swap_info_struct *si, unsigned long offset, bool irq) +{ + struct swap_cluster_info *ci = __swap_offset_to_cluster(si, offset); + + /* + * Nothing modifies swap cache in an IRQ context. All access to + * swap cache is wrapped by swap_cache_* helpers, and swap cache + * writeback is handled outside of IRQs. Swapin or swapout never + * occurs in IRQ, and neither does in-place split or replace. + * + * Besides, modifying swap cache requires synchronization with + * swap_map, which was never IRQ safe. + */ + VM_WARN_ON_ONCE(!in_task()); + VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ + if (irq) + spin_lock_irq(&ci->lock); + else + spin_lock(&ci->lock); + return ci; +} + +/** + * swap_cluster_lock - Lock and return the swap cluster of given offset. + * @si: swap device the cluster belongs to. + * @offset: the swap entry offset, pointing to a valid slot. + * + * Context: The caller must ensure the offset is in the valid range and + * protect the swap device with reference count or locks. + */ +static inline struct swap_cluster_info *swap_cluster_lock( + struct swap_info_struct *si, unsigned long offset) +{ + return __swap_cluster_lock(si, offset, false); +} + +static inline struct swap_cluster_info *__swap_cluster_get_and_lock( + const struct folio *folio, bool irq) +{ + VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); + VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio); + return __swap_cluster_lock(__swap_entry_to_info(folio->swap), + swp_offset(folio->swap), irq); +} + +/* + * swap_cluster_get_and_lock - Locks the cluster that holds a folio's entries. + * @folio: The folio. + * + * This locks and returns the swap cluster that contains a folio's swap + * entries. The swap entries of a folio are always in one single cluster. + * The folio has to be locked so its swap entries won't change and the + * cluster won't be freed. + * + * Context: Caller must ensure the folio is locked and in the swap cache. + * Return: Pointer to the swap cluster. + */ +static inline struct swap_cluster_info *swap_cluster_get_and_lock( + const struct folio *folio) +{ + return __swap_cluster_get_and_lock(folio, false); +} + +/* + * swap_cluster_get_and_lock_irq - Locks the cluster that holds a folio's entries. + * @folio: The folio. + * + * Same as swap_cluster_get_and_lock but also disable IRQ. + * + * Context: Caller must ensure the folio is locked and in the swap cache. + * Return: Pointer to the swap cluster. + */ +static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq( + const struct folio *folio) +{ + return __swap_cluster_get_and_lock(folio, true); +} + +static inline void swap_cluster_unlock(struct swap_cluster_info *ci) +{ + spin_unlock(&ci->lock); +} + +static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci) +{ + spin_unlock_irq(&ci->lock); +} + /* linux/mm/page_io.c */ int sio_pool_init(void); struct swap_iocb; -void swap_readpage(struct page *page, bool do_poll, struct swap_iocb **plug); +void swap_read_folio(struct folio *folio, struct swap_iocb **plug); void __swap_read_unplug(struct swap_iocb *plug); static inline void swap_read_unplug(struct swap_iocb *plug) { @@ -16,54 +194,166 @@ static inline void swap_read_unplug(struct swap_iocb *plug) __swap_read_unplug(plug); } void swap_write_unplug(struct swap_iocb *sio); -int swap_writepage(struct page *page, struct writeback_control *wbc); -void __swap_writepage(struct page *page, struct writeback_control *wbc); +int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug); +void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug); /* linux/mm/swap_state.c */ -/* One swap address space for each 64M swap space */ -#define SWAP_ADDRESS_SPACE_SHIFT 14 -#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) -extern struct address_space *swapper_spaces[]; -#define swap_address_space(entry) \ - (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ - >> SWAP_ADDRESS_SPACE_SHIFT]) +extern struct address_space swap_space __ro_after_init; +static inline struct address_space *swap_address_space(swp_entry_t entry) +{ + return &swap_space; +} + +/* + * Return the swap device position of the swap entry. + */ +static inline loff_t swap_dev_pos(swp_entry_t entry) +{ + return ((loff_t)swp_offset(entry)) << PAGE_SHIFT; +} + +/** + * folio_matches_swap_entry - Check if a folio matches a given swap entry. + * @folio: The folio. + * @entry: The swap entry to check against. + * + * Context: The caller should have the folio locked to ensure it's stable + * and nothing will move it in or out of the swap cache. + * Return: true or false. + */ +static inline bool folio_matches_swap_entry(const struct folio *folio, + swp_entry_t entry) +{ + swp_entry_t folio_entry = folio->swap; + long nr_pages = folio_nr_pages(folio); + + VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); + if (!folio_test_swapcache(folio)) + return false; + VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio_entry.val, nr_pages), folio); + return folio_entry.val == round_down(entry.val, nr_pages); +} + +/* + * All swap cache helpers below require the caller to ensure the swap entries + * used are valid and stablize the device by any of the following ways: + * - Hold a reference by get_swap_device(): this ensures a single entry is + * valid and increases the swap device's refcount. + * - Locking a folio in the swap cache: this ensures the folio's swap entries + * are valid and pinned, also implies reference to the device. + * - Locking anything referencing the swap entry: e.g. PTL that protects + * swap entries in the page table, similar to locking swap cache folio. + * - See the comment of get_swap_device() for more complex usage. + */ +struct folio *swap_cache_get_folio(swp_entry_t entry); +void *swap_cache_get_shadow(swp_entry_t entry); +void swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void **shadow); +void swap_cache_del_folio(struct folio *folio); +/* Below helpers require the caller to lock and pass in the swap cluster. */ +void __swap_cache_del_folio(struct swap_cluster_info *ci, + struct folio *folio, swp_entry_t entry, void *shadow); +void __swap_cache_replace_folio(struct swap_cluster_info *ci, + struct folio *old, struct folio *new); +void __swap_cache_clear_shadow(swp_entry_t entry, int nr_ents); void show_swap_cache_info(void); -bool add_to_swap(struct folio *folio); -void *get_shadow_from_swap_cache(swp_entry_t entry); -int add_to_swap_cache(struct folio *folio, swp_entry_t entry, - gfp_t gfp, void **shadowp); -void __delete_from_swap_cache(struct folio *folio, - swp_entry_t entry, void *shadow); -void delete_from_swap_cache(struct folio *folio); -void clear_shadow_from_swap_cache(int type, unsigned long begin, - unsigned long end); -struct folio *swap_cache_get_folio(swp_entry_t entry, - struct vm_area_struct *vma, unsigned long addr); -struct folio *filemap_get_incore_folio(struct address_space *mapping, - pgoff_t index); - -struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, - struct vm_area_struct *vma, - unsigned long addr, - struct swap_iocb **plug); -struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, - struct vm_area_struct *vma, - unsigned long addr, - bool *new_page_allocated); -struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, - struct vm_fault *vmf); -struct page *swapin_readahead(swp_entry_t entry, gfp_t flag, - struct vm_fault *vmf); +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr); +struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, unsigned long addr, + struct swap_iocb **plug); +struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_flags, + struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated, + bool skip_if_exists); +struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, + struct mempolicy *mpol, pgoff_t ilx); +struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag, + struct vm_fault *vmf); +void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma, + unsigned long addr); static inline unsigned int folio_swap_flags(struct folio *folio) { - return page_swap_info(&folio->page)->flags; + return __swap_entry_to_info(folio->swap)->flags; +} + +/* + * Return the count of contiguous swap entries that share the same + * zeromap status as the starting entry. If is_zeromap is not NULL, + * it will return the zeromap status of the starting entry. + */ +static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, + bool *is_zeromap) +{ + struct swap_info_struct *sis = __swap_entry_to_info(entry); + unsigned long start = swp_offset(entry); + unsigned long end = start + max_nr; + bool first_bit; + + first_bit = test_bit(start, sis->zeromap); + if (is_zeromap) + *is_zeromap = first_bit; + + if (max_nr <= 1) + return max_nr; + if (first_bit) + return find_next_zero_bit(sis->zeromap, end, start) - start; + else + return find_next_bit(sis->zeromap, end, start) - start; +} + +static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) +{ + struct swap_info_struct *si = __swap_entry_to_info(entry); + pgoff_t offset = swp_offset(entry); + int i; + + /* + * While allocating a large folio and doing mTHP swapin, we need to + * ensure all entries are not cached, otherwise, the mTHP folio will + * be in conflict with the folio in swap cache. + */ + for (i = 0; i < max_nr; i++) { + if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) + return i; + } + + return i; } + #else /* CONFIG_SWAP */ struct swap_iocb; -static inline void swap_readpage(struct page *page, bool do_poll, - struct swap_iocb **plug) +static inline struct swap_cluster_info *swap_cluster_lock( + struct swap_info_struct *si, pgoff_t offset, bool irq) +{ + return NULL; +} + +static inline struct swap_cluster_info *swap_cluster_get_and_lock( + struct folio *folio) +{ + return NULL; +} + +static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq( + struct folio *folio) +{ + return NULL; +} + +static inline void swap_cluster_unlock(struct swap_cluster_info *ci) +{ +} + +static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci) +{ +} + +static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry) +{ + return NULL; +} + +static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug) { } static inline void swap_write_unplug(struct swap_iocb *sio) @@ -75,67 +365,67 @@ static inline struct address_space *swap_address_space(swp_entry_t entry) return NULL; } +static inline bool folio_matches_swap_entry(const struct folio *folio, swp_entry_t entry) +{ + return false; +} + static inline void show_swap_cache_info(void) { } -static inline struct page *swap_cluster_readahead(swp_entry_t entry, - gfp_t gfp_mask, struct vm_fault *vmf) +static inline struct folio *swap_cluster_readahead(swp_entry_t entry, + gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx) { return NULL; } -static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, +static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, struct vm_fault *vmf) { return NULL; } -static inline int swap_writepage(struct page *p, struct writeback_control *wbc) +static inline void swap_update_readahead(struct folio *folio, + struct vm_area_struct *vma, unsigned long addr) { - return 0; } -static inline struct folio *swap_cache_get_folio(swp_entry_t entry, - struct vm_area_struct *vma, unsigned long addr) +static inline int swap_writeout(struct folio *folio, + struct swap_iocb **swap_plug) { - return NULL; + return 0; } -static inline -struct folio *filemap_get_incore_folio(struct address_space *mapping, - pgoff_t index) +static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr) { - return filemap_get_folio(mapping, index); } -static inline bool add_to_swap(struct folio *folio) +static inline struct folio *swap_cache_get_folio(swp_entry_t entry) { - return false; + return NULL; } -static inline void *get_shadow_from_swap_cache(swp_entry_t entry) +static inline void *swap_cache_get_shadow(swp_entry_t entry) { return NULL; } -static inline int add_to_swap_cache(struct folio *folio, swp_entry_t entry, - gfp_t gfp_mask, void **shadowp) +static inline void swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void **shadow) { - return -1; } -static inline void __delete_from_swap_cache(struct folio *folio, - swp_entry_t entry, void *shadow) +static inline void swap_cache_del_folio(struct folio *folio) { } -static inline void delete_from_swap_cache(struct folio *folio) +static inline void __swap_cache_del_folio(struct swap_cluster_info *ci, + struct folio *folio, swp_entry_t entry, void *shadow) { } -static inline void clear_shadow_from_swap_cache(int type, unsigned long begin, - unsigned long end) +static inline void __swap_cache_replace_folio(struct swap_cluster_info *ci, + struct folio *old, struct folio *new) { } @@ -143,5 +433,16 @@ static inline unsigned int folio_swap_flags(struct folio *folio) { return 0; } + +static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, + bool *has_zeromap) +{ + return 0; +} + +static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) +{ + return 0; +} #endif /* CONFIG_SWAP */ #endif /* _MM_SWAP_H */ |
