diff options
-rw-r--r-- | mm/shmem.c | 110 |
1 files changed, 105 insertions, 5 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 95b80c24f6f9..42e11ab2e944 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1967,6 +1967,65 @@ unlock: return ERR_PTR(error); } +static struct folio *shmem_swap_alloc_folio(struct inode *inode, + struct vm_area_struct *vma, pgoff_t index, + swp_entry_t entry, int order, gfp_t gfp) +{ + struct shmem_inode_info *info = SHMEM_I(inode); + struct folio *new; + void *shadow; + int nr_pages; + + /* + * We have arrived here because our zones are constrained, so don't + * limit chance of success with further cpuset and node constraints. + */ + gfp &= ~GFP_CONSTRAINT_MASK; + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && order > 0) { + gfp_t huge_gfp = vma_thp_gfp_mask(vma); + + gfp = limit_gfp_mask(huge_gfp, gfp); + } + + new = shmem_alloc_folio(gfp, order, info, index); + if (!new) + return ERR_PTR(-ENOMEM); + + nr_pages = folio_nr_pages(new); + if (mem_cgroup_swapin_charge_folio(new, vma ? vma->vm_mm : NULL, + gfp, entry)) { + folio_put(new); + return ERR_PTR(-ENOMEM); + } + + /* + * Prevent parallel swapin from proceeding with the swap cache flag. + * + * Of course there is another possible concurrent scenario as well, + * that is to say, the swap cache flag of a large folio has already + * been set by swapcache_prepare(), while another thread may have + * already split the large swap entry stored in the shmem mapping. + * In this case, shmem_add_to_page_cache() will help identify the + * concurrent swapin and return -EEXIST. + */ + if (swapcache_prepare(entry, nr_pages)) { + folio_put(new); + return ERR_PTR(-EEXIST); + } + + __folio_set_locked(new); + __folio_set_swapbacked(new); + new->swap = entry; + + mem_cgroup_swapin_uncharge_swap(entry, nr_pages); + shadow = get_shadow_from_swap_cache(entry); + if (shadow) + workingset_refault(new, shadow); + folio_add_lru(new); + swap_read_folio(new, NULL); + return new; +} + /* * When a page is moved from swapcache to shmem filecache (either by the * usual swapin of shmem_get_folio_gfp(), or by the less common swapoff of @@ -2070,7 +2129,8 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp, } static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, - struct folio *folio, swp_entry_t swap) + struct folio *folio, swp_entry_t swap, + bool skip_swapcache) { struct address_space *mapping = inode->i_mapping; swp_entry_t swapin_error; @@ -2086,7 +2146,8 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, nr_pages = folio_nr_pages(folio); folio_wait_writeback(folio); - delete_from_swap_cache(folio); + if (!skip_swapcache) + delete_from_swap_cache(folio); /* * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks) @@ -2190,6 +2251,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct shmem_inode_info *info = SHMEM_I(inode); struct swap_info_struct *si; struct folio *folio = NULL; + bool skip_swapcache = false; swp_entry_t swap; int error, nr_pages; @@ -2211,6 +2273,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); if (!folio) { + int order = xa_get_order(&mapping->i_pages, index); + bool fallback_order0 = false; int split_order; /* Or update major stats only when swapin succeeds?? */ @@ -2221,6 +2285,33 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } /* + * If uffd is active for the vma, we need per-page fault + * fidelity to maintain the uffd semantics, then fallback + * to swapin order-0 folio, as well as for zswap case. + */ + if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) || + !zswap_never_enabled())) + fallback_order0 = true; + + /* Skip swapcache for synchronous device. */ + if (!fallback_order0 && data_race(si->flags & SWP_SYNCHRONOUS_IO)) { + folio = shmem_swap_alloc_folio(inode, vma, index, swap, order, gfp); + if (!IS_ERR(folio)) { + skip_swapcache = true; + goto alloced; + } + + /* + * Fallback to swapin order-0 folio unless the swap entry + * already exists. + */ + error = PTR_ERR(folio); + folio = NULL; + if (error == -EEXIST) + goto failed; + } + + /* * Now swap device can only swap in order 0 folio, then we * should split the large swap entry stored in the pagecache * if necessary. @@ -2250,9 +2341,10 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } } +alloced: /* We have to do this with folio locked to prevent races */ folio_lock(folio); - if (!folio_test_swapcache(folio) || + if ((!skip_swapcache && !folio_test_swapcache(folio)) || folio->swap.val != swap.val || !shmem_confirm_swap(mapping, index, swap)) { error = -EEXIST; @@ -2288,7 +2380,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, if (sgp == SGP_WRITE) folio_mark_accessed(folio); - delete_from_swap_cache(folio); + if (skip_swapcache) { + folio->swap.val = 0; + swapcache_clear(si, swap, nr_pages); + } else { + delete_from_swap_cache(folio); + } folio_mark_dirty(folio); swap_free_nr(swap, nr_pages); put_swap_device(si); @@ -2299,8 +2396,11 @@ failed: if (!shmem_confirm_swap(mapping, index, swap)) error = -EEXIST; if (error == -EIO) - shmem_set_folio_swapin_error(inode, index, folio, swap); + shmem_set_folio_swapin_error(inode, index, folio, swap, + skip_swapcache); unlock: + if (skip_swapcache) + swapcache_clear(si, swap, folio_nr_pages(folio)); if (folio) { folio_unlock(folio); folio_put(folio); |