summaryrefslogtreecommitdiff
path: root/mm/swap.h
diff options
context:
space:
mode:
Diffstat (limited to 'mm/swap.h')
-rw-r--r--mm/swap.h425
1 files changed, 363 insertions, 62 deletions
diff --git a/mm/swap.h b/mm/swap.h
index 8a3c7a0ace4f..d034c13d8dd2 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -2,13 +2,191 @@
#ifndef _MM_SWAP_H
#define _MM_SWAP_H
+#include <linux/atomic.h> /* for atomic_long_t */
+struct mempolicy;
+struct swap_iocb;
+
+extern int page_cluster;
+
+#ifdef CONFIG_THP_SWAP
+#define SWAPFILE_CLUSTER HPAGE_PMD_NR
+#define swap_entry_order(order) (order)
+#else
+#define SWAPFILE_CLUSTER 256
+#define swap_entry_order(order) 0
+#endif
+
+extern struct swap_info_struct *swap_info[];
+
+/*
+ * We use this to track usage of a cluster. A cluster is a block of swap disk
+ * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
+ * free clusters are organized into a list. We fetch an entry from the list to
+ * get a free cluster.
+ *
+ * The flags field determines if a cluster is free. This is
+ * protected by cluster lock.
+ */
+struct swap_cluster_info {
+ spinlock_t lock; /*
+ * Protect swap_cluster_info fields
+ * other than list, and swap_info_struct->swap_map
+ * elements corresponding to the swap cluster.
+ */
+ u16 count;
+ u8 flags;
+ u8 order;
+ atomic_long_t __rcu *table; /* Swap table entries, see mm/swap_table.h */
+ struct list_head list;
+};
+
+/* All on-list cluster must have a non-zero flag. */
+enum swap_cluster_flags {
+ CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
+ CLUSTER_FLAG_FREE,
+ CLUSTER_FLAG_NONFULL,
+ CLUSTER_FLAG_FRAG,
+ /* Clusters with flags above are allocatable */
+ CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
+ CLUSTER_FLAG_FULL,
+ CLUSTER_FLAG_DISCARD,
+ CLUSTER_FLAG_MAX,
+};
+
#ifdef CONFIG_SWAP
+#include <linux/swapops.h> /* for swp_offset */
#include <linux/blk_types.h> /* for bio_end_io_t */
+static inline unsigned int swp_cluster_offset(swp_entry_t entry)
+{
+ return swp_offset(entry) % SWAPFILE_CLUSTER;
+}
+
+/*
+ * Callers of all helpers below must ensure the entry, type, or offset is
+ * valid, and protect the swap device with reference count or locks.
+ */
+static inline struct swap_info_struct *__swap_type_to_info(int type)
+{
+ struct swap_info_struct *si;
+
+ si = READ_ONCE(swap_info[type]); /* rcu_dereference() */
+ VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
+ return si;
+}
+
+static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
+{
+ return __swap_type_to_info(swp_type(entry));
+}
+
+static inline struct swap_cluster_info *__swap_offset_to_cluster(
+ struct swap_info_struct *si, pgoff_t offset)
+{
+ VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
+ VM_WARN_ON_ONCE(offset >= si->max);
+ return &si->cluster_info[offset / SWAPFILE_CLUSTER];
+}
+
+static inline struct swap_cluster_info *__swap_entry_to_cluster(swp_entry_t entry)
+{
+ return __swap_offset_to_cluster(__swap_entry_to_info(entry),
+ swp_offset(entry));
+}
+
+static __always_inline struct swap_cluster_info *__swap_cluster_lock(
+ struct swap_info_struct *si, unsigned long offset, bool irq)
+{
+ struct swap_cluster_info *ci = __swap_offset_to_cluster(si, offset);
+
+ /*
+ * Nothing modifies swap cache in an IRQ context. All access to
+ * swap cache is wrapped by swap_cache_* helpers, and swap cache
+ * writeback is handled outside of IRQs. Swapin or swapout never
+ * occurs in IRQ, and neither does in-place split or replace.
+ *
+ * Besides, modifying swap cache requires synchronization with
+ * swap_map, which was never IRQ safe.
+ */
+ VM_WARN_ON_ONCE(!in_task());
+ VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
+ if (irq)
+ spin_lock_irq(&ci->lock);
+ else
+ spin_lock(&ci->lock);
+ return ci;
+}
+
+/**
+ * swap_cluster_lock - Lock and return the swap cluster of given offset.
+ * @si: swap device the cluster belongs to.
+ * @offset: the swap entry offset, pointing to a valid slot.
+ *
+ * Context: The caller must ensure the offset is in the valid range and
+ * protect the swap device with reference count or locks.
+ */
+static inline struct swap_cluster_info *swap_cluster_lock(
+ struct swap_info_struct *si, unsigned long offset)
+{
+ return __swap_cluster_lock(si, offset, false);
+}
+
+static inline struct swap_cluster_info *__swap_cluster_get_and_lock(
+ const struct folio *folio, bool irq)
+{
+ VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio);
+ return __swap_cluster_lock(__swap_entry_to_info(folio->swap),
+ swp_offset(folio->swap), irq);
+}
+
+/*
+ * swap_cluster_get_and_lock - Locks the cluster that holds a folio's entries.
+ * @folio: The folio.
+ *
+ * This locks and returns the swap cluster that contains a folio's swap
+ * entries. The swap entries of a folio are always in one single cluster.
+ * The folio has to be locked so its swap entries won't change and the
+ * cluster won't be freed.
+ *
+ * Context: Caller must ensure the folio is locked and in the swap cache.
+ * Return: Pointer to the swap cluster.
+ */
+static inline struct swap_cluster_info *swap_cluster_get_and_lock(
+ const struct folio *folio)
+{
+ return __swap_cluster_get_and_lock(folio, false);
+}
+
+/*
+ * swap_cluster_get_and_lock_irq - Locks the cluster that holds a folio's entries.
+ * @folio: The folio.
+ *
+ * Same as swap_cluster_get_and_lock but also disable IRQ.
+ *
+ * Context: Caller must ensure the folio is locked and in the swap cache.
+ * Return: Pointer to the swap cluster.
+ */
+static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
+ const struct folio *folio)
+{
+ return __swap_cluster_get_and_lock(folio, true);
+}
+
+static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
+{
+ spin_unlock(&ci->lock);
+}
+
+static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
+{
+ spin_unlock_irq(&ci->lock);
+}
+
/* linux/mm/page_io.c */
int sio_pool_init(void);
struct swap_iocb;
-void swap_readpage(struct page *page, bool do_poll, struct swap_iocb **plug);
+void swap_read_folio(struct folio *folio, struct swap_iocb **plug);
void __swap_read_unplug(struct swap_iocb *plug);
static inline void swap_read_unplug(struct swap_iocb *plug)
{
@@ -16,54 +194,166 @@ static inline void swap_read_unplug(struct swap_iocb *plug)
__swap_read_unplug(plug);
}
void swap_write_unplug(struct swap_iocb *sio);
-int swap_writepage(struct page *page, struct writeback_control *wbc);
-void __swap_writepage(struct page *page, struct writeback_control *wbc);
+int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug);
+void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug);
/* linux/mm/swap_state.c */
-/* One swap address space for each 64M swap space */
-#define SWAP_ADDRESS_SPACE_SHIFT 14
-#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT)
-extern struct address_space *swapper_spaces[];
-#define swap_address_space(entry) \
- (&swapper_spaces[swp_type(entry)][swp_offset(entry) \
- >> SWAP_ADDRESS_SPACE_SHIFT])
+extern struct address_space swap_space __ro_after_init;
+static inline struct address_space *swap_address_space(swp_entry_t entry)
+{
+ return &swap_space;
+}
+
+/*
+ * Return the swap device position of the swap entry.
+ */
+static inline loff_t swap_dev_pos(swp_entry_t entry)
+{
+ return ((loff_t)swp_offset(entry)) << PAGE_SHIFT;
+}
+
+/**
+ * folio_matches_swap_entry - Check if a folio matches a given swap entry.
+ * @folio: The folio.
+ * @entry: The swap entry to check against.
+ *
+ * Context: The caller should have the folio locked to ensure it's stable
+ * and nothing will move it in or out of the swap cache.
+ * Return: true or false.
+ */
+static inline bool folio_matches_swap_entry(const struct folio *folio,
+ swp_entry_t entry)
+{
+ swp_entry_t folio_entry = folio->swap;
+ long nr_pages = folio_nr_pages(folio);
+
+ VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
+ if (!folio_test_swapcache(folio))
+ return false;
+ VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio_entry.val, nr_pages), folio);
+ return folio_entry.val == round_down(entry.val, nr_pages);
+}
+
+/*
+ * All swap cache helpers below require the caller to ensure the swap entries
+ * used are valid and stablize the device by any of the following ways:
+ * - Hold a reference by get_swap_device(): this ensures a single entry is
+ * valid and increases the swap device's refcount.
+ * - Locking a folio in the swap cache: this ensures the folio's swap entries
+ * are valid and pinned, also implies reference to the device.
+ * - Locking anything referencing the swap entry: e.g. PTL that protects
+ * swap entries in the page table, similar to locking swap cache folio.
+ * - See the comment of get_swap_device() for more complex usage.
+ */
+struct folio *swap_cache_get_folio(swp_entry_t entry);
+void *swap_cache_get_shadow(swp_entry_t entry);
+void swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void **shadow);
+void swap_cache_del_folio(struct folio *folio);
+/* Below helpers require the caller to lock and pass in the swap cluster. */
+void __swap_cache_del_folio(struct swap_cluster_info *ci,
+ struct folio *folio, swp_entry_t entry, void *shadow);
+void __swap_cache_replace_folio(struct swap_cluster_info *ci,
+ struct folio *old, struct folio *new);
+void __swap_cache_clear_shadow(swp_entry_t entry, int nr_ents);
void show_swap_cache_info(void);
-bool add_to_swap(struct folio *folio);
-void *get_shadow_from_swap_cache(swp_entry_t entry);
-int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
- gfp_t gfp, void **shadowp);
-void __delete_from_swap_cache(struct folio *folio,
- swp_entry_t entry, void *shadow);
-void delete_from_swap_cache(struct folio *folio);
-void clear_shadow_from_swap_cache(int type, unsigned long begin,
- unsigned long end);
-struct folio *swap_cache_get_folio(swp_entry_t entry,
- struct vm_area_struct *vma, unsigned long addr);
-struct folio *filemap_get_incore_folio(struct address_space *mapping,
- pgoff_t index);
-
-struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
- struct vm_area_struct *vma,
- unsigned long addr,
- struct swap_iocb **plug);
-struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
- struct vm_area_struct *vma,
- unsigned long addr,
- bool *new_page_allocated);
-struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
- struct vm_fault *vmf);
-struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
- struct vm_fault *vmf);
+void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr);
+struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
+ struct vm_area_struct *vma, unsigned long addr,
+ struct swap_iocb **plug);
+struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_flags,
+ struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated,
+ bool skip_if_exists);
+struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
+ struct mempolicy *mpol, pgoff_t ilx);
+struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag,
+ struct vm_fault *vmf);
+void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
+ unsigned long addr);
static inline unsigned int folio_swap_flags(struct folio *folio)
{
- return page_swap_info(&folio->page)->flags;
+ return __swap_entry_to_info(folio->swap)->flags;
+}
+
+/*
+ * Return the count of contiguous swap entries that share the same
+ * zeromap status as the starting entry. If is_zeromap is not NULL,
+ * it will return the zeromap status of the starting entry.
+ */
+static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
+ bool *is_zeromap)
+{
+ struct swap_info_struct *sis = __swap_entry_to_info(entry);
+ unsigned long start = swp_offset(entry);
+ unsigned long end = start + max_nr;
+ bool first_bit;
+
+ first_bit = test_bit(start, sis->zeromap);
+ if (is_zeromap)
+ *is_zeromap = first_bit;
+
+ if (max_nr <= 1)
+ return max_nr;
+ if (first_bit)
+ return find_next_zero_bit(sis->zeromap, end, start) - start;
+ else
+ return find_next_bit(sis->zeromap, end, start) - start;
+}
+
+static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
+{
+ struct swap_info_struct *si = __swap_entry_to_info(entry);
+ pgoff_t offset = swp_offset(entry);
+ int i;
+
+ /*
+ * While allocating a large folio and doing mTHP swapin, we need to
+ * ensure all entries are not cached, otherwise, the mTHP folio will
+ * be in conflict with the folio in swap cache.
+ */
+ for (i = 0; i < max_nr; i++) {
+ if ((si->swap_map[offset + i] & SWAP_HAS_CACHE))
+ return i;
+ }
+
+ return i;
}
+
#else /* CONFIG_SWAP */
struct swap_iocb;
-static inline void swap_readpage(struct page *page, bool do_poll,
- struct swap_iocb **plug)
+static inline struct swap_cluster_info *swap_cluster_lock(
+ struct swap_info_struct *si, pgoff_t offset, bool irq)
+{
+ return NULL;
+}
+
+static inline struct swap_cluster_info *swap_cluster_get_and_lock(
+ struct folio *folio)
+{
+ return NULL;
+}
+
+static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
+ struct folio *folio)
+{
+ return NULL;
+}
+
+static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
+{
+}
+
+static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
+{
+}
+
+static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
+{
+ return NULL;
+}
+
+static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
{
}
static inline void swap_write_unplug(struct swap_iocb *sio)
@@ -75,67 +365,67 @@ static inline struct address_space *swap_address_space(swp_entry_t entry)
return NULL;
}
+static inline bool folio_matches_swap_entry(const struct folio *folio, swp_entry_t entry)
+{
+ return false;
+}
+
static inline void show_swap_cache_info(void)
{
}
-static inline struct page *swap_cluster_readahead(swp_entry_t entry,
- gfp_t gfp_mask, struct vm_fault *vmf)
+static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
+ gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx)
{
return NULL;
}
-static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
+static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
struct vm_fault *vmf)
{
return NULL;
}
-static inline int swap_writepage(struct page *p, struct writeback_control *wbc)
+static inline void swap_update_readahead(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long addr)
{
- return 0;
}
-static inline struct folio *swap_cache_get_folio(swp_entry_t entry,
- struct vm_area_struct *vma, unsigned long addr)
+static inline int swap_writeout(struct folio *folio,
+ struct swap_iocb **swap_plug)
{
- return NULL;
+ return 0;
}
-static inline
-struct folio *filemap_get_incore_folio(struct address_space *mapping,
- pgoff_t index)
+static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr)
{
- return filemap_get_folio(mapping, index);
}
-static inline bool add_to_swap(struct folio *folio)
+static inline struct folio *swap_cache_get_folio(swp_entry_t entry)
{
- return false;
+ return NULL;
}
-static inline void *get_shadow_from_swap_cache(swp_entry_t entry)
+static inline void *swap_cache_get_shadow(swp_entry_t entry)
{
return NULL;
}
-static inline int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
- gfp_t gfp_mask, void **shadowp)
+static inline void swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void **shadow)
{
- return -1;
}
-static inline void __delete_from_swap_cache(struct folio *folio,
- swp_entry_t entry, void *shadow)
+static inline void swap_cache_del_folio(struct folio *folio)
{
}
-static inline void delete_from_swap_cache(struct folio *folio)
+static inline void __swap_cache_del_folio(struct swap_cluster_info *ci,
+ struct folio *folio, swp_entry_t entry, void *shadow)
{
}
-static inline void clear_shadow_from_swap_cache(int type, unsigned long begin,
- unsigned long end)
+static inline void __swap_cache_replace_folio(struct swap_cluster_info *ci,
+ struct folio *old, struct folio *new)
{
}
@@ -143,5 +433,16 @@ static inline unsigned int folio_swap_flags(struct folio *folio)
{
return 0;
}
+
+static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
+ bool *has_zeromap)
+{
+ return 0;
+}
+
+static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
+{
+ return 0;
+}
#endif /* CONFIG_SWAP */
#endif /* _MM_SWAP_H */