diff options
Diffstat (limited to 'mm/swap_state.c')
| -rw-r--r-- | mm/swap_state.c | 601 |
1 files changed, 271 insertions, 330 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c index e0c0321b8ff7..5f97c6ae70a2 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -12,7 +12,7 @@ #include <linux/kernel_stat.h> #include <linux/mempolicy.h> #include <linux/swap.h> -#include <linux/swapops.h> +#include <linux/leafops.h> #include <linux/init.h> #include <linux/pagemap.h> #include <linux/pagevec.h> @@ -20,10 +20,10 @@ #include <linux/blkdev.h> #include <linux/migrate.h> #include <linux/vmalloc.h> -#include <linux/swap_slots.h> #include <linux/huge_mm.h> #include <linux/shmem_fs.h> #include "internal.h" +#include "swap_table.h" #include "swap.h" /* @@ -31,15 +31,17 @@ * vmscan's shrink_folio_list. */ static const struct address_space_operations swap_aops = { - .writepage = swap_writepage, .dirty_folio = noop_dirty_folio, #ifdef CONFIG_MIGRATION .migrate_folio = migrate_folio, #endif }; -struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly; -static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly; +/* Set swap_space as read only as swap cache is handled by swap table */ +struct address_space swap_space __ro_after_init = { + .a_ops = &swap_aops, +}; + static bool enable_vma_readahead __read_mostly = true; #define SWAP_RA_ORDER_CEILING 5 @@ -71,211 +73,235 @@ void show_swap_cache_info(void) printk("Total swap = %lukB\n", K(total_swap_pages)); } -void *get_shadow_from_swap_cache(swp_entry_t entry) +/** + * swap_cache_get_folio - Looks up a folio in the swap cache. + * @entry: swap entry used for the lookup. + * + * A found folio will be returned unlocked and with its refcount increased. + * + * Context: Caller must ensure @entry is valid and protect the swap device + * with reference count or locks. + * Return: Returns the found folio on success, NULL otherwise. The caller + * must lock nd check if the folio still matches the swap entry before + * use (e.g., folio_matches_swap_entry). + */ +struct folio *swap_cache_get_folio(swp_entry_t entry) { - struct address_space *address_space = swap_address_space(entry); - pgoff_t idx = swap_cache_index(entry); - void *shadow; + unsigned long swp_tb; + struct folio *folio; + + for (;;) { + swp_tb = swap_table_get(__swap_entry_to_cluster(entry), + swp_cluster_offset(entry)); + if (!swp_tb_is_folio(swp_tb)) + return NULL; + folio = swp_tb_to_folio(swp_tb); + if (likely(folio_try_get(folio))) + return folio; + } - shadow = xa_load(&address_space->i_pages, idx); - if (xa_is_value(shadow)) - return shadow; return NULL; } -/* - * add_to_swap_cache resembles filemap_add_folio on swapper_space, - * but sets SwapCache flag and private instead of mapping and index. +/** + * swap_cache_get_shadow - Looks up a shadow in the swap cache. + * @entry: swap entry used for the lookup. + * + * Context: Caller must ensure @entry is valid and protect the swap device + * with reference count or locks. + * Return: Returns either NULL or an XA_VALUE (shadow). */ -int add_to_swap_cache(struct folio *folio, swp_entry_t entry, - gfp_t gfp, void **shadowp) +void *swap_cache_get_shadow(swp_entry_t entry) { - struct address_space *address_space = swap_address_space(entry); - pgoff_t idx = swap_cache_index(entry); - XA_STATE_ORDER(xas, &address_space->i_pages, idx, folio_order(folio)); - unsigned long i, nr = folio_nr_pages(folio); - void *old; + unsigned long swp_tb; - xas_set_update(&xas, workingset_update_node); + swp_tb = swap_table_get(__swap_entry_to_cluster(entry), + swp_cluster_offset(entry)); + if (swp_tb_is_shadow(swp_tb)) + return swp_tb_to_shadow(swp_tb); + return NULL; +} - VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio); - VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio); +/** + * swap_cache_add_folio - Add a folio into the swap cache. + * @folio: The folio to be added. + * @entry: The swap entry corresponding to the folio. + * @gfp: gfp_mask for XArray node allocation. + * @shadowp: If a shadow is found, return the shadow. + * + * Context: Caller must ensure @entry is valid and protect the swap device + * with reference count or locks. + * The caller also needs to update the corresponding swap_map slots with + * SWAP_HAS_CACHE bit to avoid race or conflict. + */ +void swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void **shadowp) +{ + void *shadow = NULL; + unsigned long old_tb, new_tb; + struct swap_cluster_info *ci; + unsigned int ci_start, ci_off, ci_end; + unsigned long nr_pages = folio_nr_pages(folio); + + VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); + VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio); + VM_WARN_ON_ONCE_FOLIO(!folio_test_swapbacked(folio), folio); + + new_tb = folio_to_swp_tb(folio); + ci_start = swp_cluster_offset(entry); + ci_end = ci_start + nr_pages; + ci_off = ci_start; + ci = swap_cluster_lock(__swap_entry_to_info(entry), swp_offset(entry)); + do { + old_tb = __swap_table_xchg(ci, ci_off, new_tb); + WARN_ON_ONCE(swp_tb_is_folio(old_tb)); + if (swp_tb_is_shadow(old_tb)) + shadow = swp_tb_to_shadow(old_tb); + } while (++ci_off < ci_end); - folio_ref_add(folio, nr); + folio_ref_add(folio, nr_pages); folio_set_swapcache(folio); folio->swap = entry; + swap_cluster_unlock(ci); - do { - xas_lock_irq(&xas); - xas_create_range(&xas); - if (xas_error(&xas)) - goto unlock; - for (i = 0; i < nr; i++) { - VM_BUG_ON_FOLIO(xas.xa_index != idx + i, folio); - if (shadowp) { - old = xas_load(&xas); - if (xa_is_value(old)) - *shadowp = old; - } - xas_store(&xas, folio); - xas_next(&xas); - } - address_space->nrpages += nr; - __node_stat_mod_folio(folio, NR_FILE_PAGES, nr); - __lruvec_stat_mod_folio(folio, NR_SWAPCACHE, nr); -unlock: - xas_unlock_irq(&xas); - } while (xas_nomem(&xas, gfp)); + node_stat_mod_folio(folio, NR_FILE_PAGES, nr_pages); + lruvec_stat_mod_folio(folio, NR_SWAPCACHE, nr_pages); - if (!xas_error(&xas)) - return 0; - - folio_clear_swapcache(folio); - folio_ref_sub(folio, nr); - return xas_error(&xas); + if (shadowp) + *shadowp = shadow; } -/* - * This must be called only on folios that have - * been verified to be in the swap cache. +/** + * __swap_cache_del_folio - Removes a folio from the swap cache. + * @ci: The locked swap cluster. + * @folio: The folio. + * @entry: The first swap entry that the folio corresponds to. + * @shadow: shadow value to be filled in the swap cache. + * + * Removes a folio from the swap cache and fills a shadow in place. + * This won't put the folio's refcount. The caller has to do that. + * + * Context: Caller must ensure the folio is locked and in the swap cache + * using the index of @entry, and lock the cluster that holds the entries. */ -void __delete_from_swap_cache(struct folio *folio, - swp_entry_t entry, void *shadow) +void __swap_cache_del_folio(struct swap_cluster_info *ci, struct folio *folio, + swp_entry_t entry, void *shadow) { - struct address_space *address_space = swap_address_space(entry); - int i; - long nr = folio_nr_pages(folio); - pgoff_t idx = swap_cache_index(entry); - XA_STATE(xas, &address_space->i_pages, idx); - - xas_set_update(&xas, workingset_update_node); - - VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio); - VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio); - - for (i = 0; i < nr; i++) { - void *entry = xas_store(&xas, shadow); - VM_BUG_ON_PAGE(entry != folio, entry); - xas_next(&xas); - } + unsigned long old_tb, new_tb; + unsigned int ci_start, ci_off, ci_end; + unsigned long nr_pages = folio_nr_pages(folio); + + VM_WARN_ON_ONCE(__swap_entry_to_cluster(entry) != ci); + VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); + VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio); + VM_WARN_ON_ONCE_FOLIO(folio_test_writeback(folio), folio); + + new_tb = shadow_swp_to_tb(shadow); + ci_start = swp_cluster_offset(entry); + ci_end = ci_start + nr_pages; + ci_off = ci_start; + do { + /* If shadow is NULL, we sets an empty shadow */ + old_tb = __swap_table_xchg(ci, ci_off, new_tb); + WARN_ON_ONCE(!swp_tb_is_folio(old_tb) || + swp_tb_to_folio(old_tb) != folio); + } while (++ci_off < ci_end); + folio->swap.val = 0; folio_clear_swapcache(folio); - address_space->nrpages -= nr; - __node_stat_mod_folio(folio, NR_FILE_PAGES, -nr); - __lruvec_stat_mod_folio(folio, NR_SWAPCACHE, -nr); + node_stat_mod_folio(folio, NR_FILE_PAGES, -nr_pages); + lruvec_stat_mod_folio(folio, NR_SWAPCACHE, -nr_pages); } /** - * add_to_swap - allocate swap space for a folio - * @folio: folio we want to move to swap + * swap_cache_del_folio - Removes a folio from the swap cache. + * @folio: The folio. * - * Allocate swap space for the folio and add the folio to the - * swap cache. + * Same as __swap_cache_del_folio, but handles lock and refcount. The + * caller must ensure the folio is either clean or has a swap count + * equal to zero, or it may cause data loss. * - * Context: Caller needs to hold the folio lock. - * Return: Whether the folio was added to the swap cache. + * Context: Caller must ensure the folio is locked and in the swap cache. */ -bool add_to_swap(struct folio *folio) +void swap_cache_del_folio(struct folio *folio) { - swp_entry_t entry; - int err; - - VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio); - - entry = folio_alloc_swap(folio); - if (!entry.val) - return false; - - /* - * XArray node allocations from PF_MEMALLOC contexts could - * completely exhaust the page allocator. __GFP_NOMEMALLOC - * stops emergency reserves from being allocated. - * - * TODO: this could cause a theoretical memory reclaim - * deadlock in the swap out path. - */ - /* - * Add it to the swap cache. - */ - err = add_to_swap_cache(folio, entry, - __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL); - if (err) - /* - * add_to_swap_cache() doesn't return -EEXIST, so we can safely - * clear SWAP_HAS_CACHE flag. - */ - goto fail; - /* - * Normally the folio will be dirtied in unmap because its - * pte should be dirty. A special case is MADV_FREE page. The - * page's pte could have dirty bit cleared but the folio's - * SwapBacked flag is still set because clearing the dirty bit - * and SwapBacked flag has no lock protected. For such folio, - * unmap will not set dirty bit for it, so folio reclaim will - * not write the folio out. This can cause data corruption when - * the folio is swapped in later. Always setting the dirty flag - * for the folio solves the problem. - */ - folio_mark_dirty(folio); + struct swap_cluster_info *ci; + swp_entry_t entry = folio->swap; - return true; + ci = swap_cluster_lock(__swap_entry_to_info(entry), swp_offset(entry)); + __swap_cache_del_folio(ci, folio, entry, NULL); + swap_cluster_unlock(ci); -fail: put_swap_folio(folio, entry); - return false; + folio_ref_sub(folio, folio_nr_pages(folio)); } -/* - * This must be called only on folios that have - * been verified to be in the swap cache and locked. - * It will never put the folio into the free list, - * the caller has a reference on the folio. +/** + * __swap_cache_replace_folio - Replace a folio in the swap cache. + * @ci: The locked swap cluster. + * @old: The old folio to be replaced. + * @new: The new folio. + * + * Replace an existing folio in the swap cache with a new folio. The + * caller is responsible for setting up the new folio's flag and swap + * entries. Replacement will take the new folio's swap entry value as + * the starting offset to override all slots covered by the new folio. + * + * Context: Caller must ensure both folios are locked, and lock the + * cluster that holds the old folio to be replaced. */ -void delete_from_swap_cache(struct folio *folio) +void __swap_cache_replace_folio(struct swap_cluster_info *ci, + struct folio *old, struct folio *new) { - swp_entry_t entry = folio->swap; - struct address_space *address_space = swap_address_space(entry); - - xa_lock_irq(&address_space->i_pages); - __delete_from_swap_cache(folio, entry, NULL); - xa_unlock_irq(&address_space->i_pages); + swp_entry_t entry = new->swap; + unsigned long nr_pages = folio_nr_pages(new); + unsigned int ci_off = swp_cluster_offset(entry); + unsigned int ci_end = ci_off + nr_pages; + unsigned long old_tb, new_tb; + + VM_WARN_ON_ONCE(!folio_test_swapcache(old) || !folio_test_swapcache(new)); + VM_WARN_ON_ONCE(!folio_test_locked(old) || !folio_test_locked(new)); + VM_WARN_ON_ONCE(!entry.val); + + /* Swap cache still stores N entries instead of a high-order entry */ + new_tb = folio_to_swp_tb(new); + do { + old_tb = __swap_table_xchg(ci, ci_off, new_tb); + WARN_ON_ONCE(!swp_tb_is_folio(old_tb) || swp_tb_to_folio(old_tb) != old); + } while (++ci_off < ci_end); - put_swap_folio(folio, entry); - folio_ref_sub(folio, folio_nr_pages(folio)); + /* + * If the old folio is partially replaced (e.g., splitting a large + * folio, the old folio is shrunk, and new split sub folios replace + * the shrunk part), ensure the new folio doesn't overlap it. + */ + if (IS_ENABLED(CONFIG_DEBUG_VM) && + folio_order(old) != folio_order(new)) { + ci_off = swp_cluster_offset(old->swap); + ci_end = ci_off + folio_nr_pages(old); + while (ci_off++ < ci_end) + WARN_ON_ONCE(swp_tb_to_folio(__swap_table_get(ci, ci_off)) != old); + } } -void clear_shadow_from_swap_cache(int type, unsigned long begin, - unsigned long end) +/** + * swap_cache_clear_shadow - Clears a set of shadows in the swap cache. + * @entry: The starting index entry. + * @nr_ents: How many slots need to be cleared. + * + * Context: Caller must ensure the range is valid, all in one single cluster, + * not occupied by any folio, and lock the cluster. + */ +void __swap_cache_clear_shadow(swp_entry_t entry, int nr_ents) { - unsigned long curr = begin; - void *old; - - for (;;) { - swp_entry_t entry = swp_entry(type, curr); - unsigned long index = curr & SWAP_ADDRESS_SPACE_MASK; - struct address_space *address_space = swap_address_space(entry); - XA_STATE(xas, &address_space->i_pages, index); - - xas_set_update(&xas, workingset_update_node); - - xa_lock_irq(&address_space->i_pages); - xas_for_each(&xas, old, min(index + (end - curr), SWAP_ADDRESS_SPACE_PAGES)) { - if (!xa_is_value(old)) - continue; - xas_store(&xas, NULL); - } - xa_unlock_irq(&address_space->i_pages); + struct swap_cluster_info *ci = __swap_entry_to_cluster(entry); + unsigned int ci_off = swp_cluster_offset(entry), ci_end; + unsigned long old; - /* search the next swapcache until we meet end */ - curr >>= SWAP_ADDRESS_SPACE_SHIFT; - curr++; - curr <<= SWAP_ADDRESS_SPACE_SHIFT; - if (curr > end) - break; - } + ci_end = ci_off + nr_ents; + do { + old = __swap_table_xchg(ci, ci_off, null_to_swp_tb()); + WARN_ON_ONCE(swp_tb_is_folio(old)); + } while (++ci_off < ci_end); } /* @@ -296,13 +322,11 @@ void free_swap_cache(struct folio *folio) } /* - * Perform a free_page(), also freeing any swap cache associated with - * this page if it is the last user of the page. + * Freeing a folio and also freeing any swap cache associated with + * this folio if it is the last user. */ -void free_page_and_swap_cache(struct page *page) +void free_folio_and_swap_cache(struct folio *folio) { - struct folio *folio = page_folio(page); - free_swap_cache(folio); if (!is_huge_zero_folio(folio)) folio_put(folio); @@ -317,7 +341,6 @@ void free_pages_and_swap_cache(struct encoded_page **pages, int nr) struct folio_batch folios; unsigned int refs[PAGEVEC_SIZE]; - lru_add_drain(); folio_batch_init(&folios); for (int i = 0; i < nr; i++) { struct folio *folio = page_folio(encoded_page_ptr(pages[i])); @@ -340,131 +363,71 @@ static inline bool swap_use_vma_readahead(void) return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap); } -/* - * Lookup a swap entry in the swap cache. A found folio will be returned - * unlocked and with its refcount incremented - we rely on the kernel - * lock getting page table operations atomic even if we drop the folio - * lock before returning. - * - * Caller must lock the swap device or hold a reference to keep it valid. +/** + * swap_update_readahead - Update the readahead statistics of VMA or globally. + * @folio: the swap cache folio that just got hit. + * @vma: the VMA that should be updated, could be NULL for global update. + * @addr: the addr that triggered the swapin, ignored if @vma is NULL. */ -struct folio *swap_cache_get_folio(swp_entry_t entry, - struct vm_area_struct *vma, unsigned long addr) +void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma, + unsigned long addr) { - struct folio *folio; + bool readahead, vma_ra = swap_use_vma_readahead(); - folio = filemap_get_folio(swap_address_space(entry), swap_cache_index(entry)); - if (!IS_ERR(folio)) { - bool vma_ra = swap_use_vma_readahead(); - bool readahead; - - /* - * At the moment, we don't support PG_readahead for anon THP - * so let's bail out rather than confusing the readahead stat. - */ - if (unlikely(folio_test_large(folio))) - return folio; - - readahead = folio_test_clear_readahead(folio); - if (vma && vma_ra) { - unsigned long ra_val; - int win, hits; - - ra_val = GET_SWAP_RA_VAL(vma); - win = SWAP_RA_WIN(ra_val); - hits = SWAP_RA_HITS(ra_val); - if (readahead) - hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); - atomic_long_set(&vma->swap_readahead_info, - SWAP_RA_VAL(addr, win, hits)); - } - - if (readahead) { - count_vm_event(SWAP_RA_HIT); - if (!vma || !vma_ra) - atomic_inc(&swapin_readahead_hits); - } - } else { - folio = NULL; + /* + * At the moment, we don't support PG_readahead for anon THP + * so let's bail out rather than confusing the readahead stat. + */ + if (unlikely(folio_test_large(folio))) + return; + + readahead = folio_test_clear_readahead(folio); + if (vma && vma_ra) { + unsigned long ra_val; + int win, hits; + + ra_val = GET_SWAP_RA_VAL(vma); + win = SWAP_RA_WIN(ra_val); + hits = SWAP_RA_HITS(ra_val); + if (readahead) + hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); + atomic_long_set(&vma->swap_readahead_info, + SWAP_RA_VAL(addr, win, hits)); } - return folio; -} - -/** - * filemap_get_incore_folio - Find and get a folio from the page or swap caches. - * @mapping: The address_space to search. - * @index: The page cache index. - * - * This differs from filemap_get_folio() in that it will also look for the - * folio in the swap cache. - * - * Return: The found folio or %NULL. - */ -struct folio *filemap_get_incore_folio(struct address_space *mapping, - pgoff_t index) -{ - swp_entry_t swp; - struct swap_info_struct *si; - struct folio *folio = filemap_get_entry(mapping, index); - - if (!folio) - return ERR_PTR(-ENOENT); - if (!xa_is_value(folio)) - return folio; - if (!shmem_mapping(mapping)) - return ERR_PTR(-ENOENT); - - swp = radix_to_swp_entry(folio); - /* There might be swapin error entries in shmem mapping. */ - if (non_swap_entry(swp)) - return ERR_PTR(-ENOENT); - /* Prevent swapoff from happening to us */ - si = get_swap_device(swp); - if (!si) - return ERR_PTR(-ENOENT); - index = swap_cache_index(swp); - folio = filemap_get_folio(swap_address_space(swp), index); - put_swap_device(si); - return folio; + if (readahead) { + count_vm_event(SWAP_RA_HIT); + if (!vma || !vma_ra) + atomic_inc(&swapin_readahead_hits); + } } struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated, bool skip_if_exists) { - struct swap_info_struct *si; + struct swap_info_struct *si = __swap_entry_to_info(entry); struct folio *folio; struct folio *new_folio = NULL; struct folio *result = NULL; void *shadow = NULL; *new_page_allocated = false; - si = get_swap_device(entry); - if (!si) - return NULL; - for (;;) { int err; + /* - * First check the swap cache. Since this is normally - * called after swap_cache_get_folio() failed, re-calling - * that would confuse statistics. + * Check the swap cache first, if a cached folio is found, + * return it unlocked. The caller will lock and check it. */ - folio = filemap_get_folio(swap_address_space(entry), - swap_cache_index(entry)); - if (!IS_ERR(folio)) + folio = swap_cache_get_folio(entry); + if (folio) goto got_folio; /* * Just skip read ahead for unused swap slot. - * During swap_off when swap_slot_cache is disabled, - * we have to handle the race between putting - * swap entry in swap cache and marking swap slot - * as SWAP_HAS_CACHE. That's done in later part of code or - * else swap_off will be aborted if we return NULL. */ - if (!swap_swapcount(si, entry) && swap_slot_cache_enabled) + if (!swap_entry_swapped(si, entry)) goto put_and_return; /* @@ -500,7 +463,7 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, goto put_and_return; /* - * We might race against __delete_from_swap_cache(), and + * We might race against __swap_cache_del_folio(), and * stumble across a swap_map entry whose SWAP_HAS_CACHE * has not yet been cleared. Or race against another * __read_swap_cache_async(), which has set SWAP_HAS_CACHE @@ -518,11 +481,8 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, if (mem_cgroup_swapin_charge_folio(new_folio, NULL, gfp_mask, entry)) goto fail_unlock; - /* May fail (-ENOMEM) if XArray node allocation failed. */ - if (add_to_swap_cache(new_folio, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) - goto fail_unlock; - - mem_cgroup_swapin_uncharge_swap(entry, 1); + swap_cache_add_folio(new_folio, entry, &shadow); + memcg1_swapin(entry, 1); if (shadow) workingset_refault(new_folio, shadow); @@ -539,7 +499,6 @@ fail_unlock: put_swap_folio(new_folio, entry); folio_unlock(new_folio); put_and_return: - put_swap_device(si); if (!(*new_page_allocated) && new_folio) folio_put(new_folio); return result; @@ -550,20 +509,21 @@ put_and_return: * and reading the disk if it is not already cached. * A failure return means that either the page allocation failed or that * the swap entry is no longer in use. - * - * get/put_swap_device() aren't needed to call this function, because - * __read_swap_cache_async() call them and swap_read_folio() holds the - * swap cache folio lock. */ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr, struct swap_iocb **plug) { + struct swap_info_struct *si; bool page_allocated; struct mempolicy *mpol; pgoff_t ilx; struct folio *folio; + si = get_swap_device(entry); + if (!si) + return NULL; + mpol = get_vma_policy(vma, addr, 0, &ilx); folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx, &page_allocated, false); @@ -571,6 +531,8 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, if (page_allocated) swap_read_folio(folio, plug); + + put_swap_device(si); return folio; } @@ -661,7 +623,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, unsigned long offset = entry_offset; unsigned long start_offset, end_offset; unsigned long mask; - struct swap_info_struct *si = swp_swap_info(entry); + struct swap_info_struct *si = __swap_entry_to_info(entry); struct blk_plug plug; struct swap_iocb *splug = NULL; bool page_allocated; @@ -707,41 +669,6 @@ skip: return folio; } -int init_swap_address_space(unsigned int type, unsigned long nr_pages) -{ - struct address_space *spaces, *space; - unsigned int i, nr; - - nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); - spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL); - if (!spaces) - return -ENOMEM; - for (i = 0; i < nr; i++) { - space = spaces + i; - xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ); - atomic_set(&space->i_mmap_writable, 0); - space->a_ops = &swap_aops; - /* swap cache doesn't use writeback related tags */ - mapping_set_no_writeback_tags(space); - } - nr_swapper_spaces[type] = nr; - swapper_spaces[type] = spaces; - - return 0; -} - -void exit_swap_address_space(unsigned int type) -{ - int i; - struct address_space *spaces = swapper_spaces[type]; - - for (i = 0; i < nr_swapper_spaces[type]; i++) - VM_WARN_ON_ONCE(!mapping_empty(&spaces[i])); - kvfree(spaces); - nr_swapper_spaces[type] = 0; - swapper_spaces[type] = NULL; -} - static int swap_vma_ra_win(struct vm_fault *vmf, unsigned long *start, unsigned long *end) { @@ -805,7 +732,6 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, pte_t *pte = NULL, pentry; int win; unsigned long start, end, addr; - swp_entry_t entry; pgoff_t ilx; bool page_allocated; @@ -817,21 +743,34 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, blk_start_plug(&plug); for (addr = start; addr < end; ilx++, addr += PAGE_SIZE) { + struct swap_info_struct *si = NULL; + softleaf_t entry; + if (!pte++) { pte = pte_offset_map(vmf->pmd, addr); if (!pte) break; } pentry = ptep_get_lockless(pte); - if (!is_swap_pte(pentry)) - continue; - entry = pte_to_swp_entry(pentry); - if (unlikely(non_swap_entry(entry))) + entry = softleaf_from_pte(pentry); + + if (!softleaf_is_swap(entry)) continue; pte_unmap(pte); pte = NULL; + /* + * Readahead entry may come from a device that we are not + * holding a reference to, try to grab a reference, or skip. + */ + if (swp_type(entry) != swp_type(targ_entry)) { + si = get_swap_device(entry); + if (!si) + continue; + } folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx, &page_allocated, false); + if (si) + put_swap_device(si); if (!folio) continue; if (page_allocated) { @@ -914,7 +853,7 @@ static const struct attribute_group swap_attr_group = { .attrs = swap_attrs, }; -static int __init swap_init_sysfs(void) +static int __init swap_init(void) { int err; struct kobject *swap_kobj; @@ -929,11 +868,13 @@ static int __init swap_init_sysfs(void) pr_err("failed to register swap group\n"); goto delete_obj; } + /* Swap cache writeback is LRU based, no tags for it */ + mapping_set_no_writeback_tags(&swap_space); return 0; delete_obj: kobject_put(swap_kobj); return err; } -subsys_initcall(swap_init_sysfs); +subsys_initcall(swap_init); #endif |
