summaryrefslogtreecommitdiff
path: root/include/linux/pagemap.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/pagemap.h')
-rw-r--r--include/linux/pagemap.h784
1 files changed, 453 insertions, 331 deletions
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 29e1f9e76eb6..31a848485ad9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -30,13 +30,21 @@ static inline void invalidate_remote_inode(struct inode *inode)
int invalidate_inode_pages2(struct address_space *mapping);
int invalidate_inode_pages2_range(struct address_space *mapping,
pgoff_t start, pgoff_t end);
+int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
+void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count);
+int filemap_invalidate_pages(struct address_space *mapping,
+ loff_t pos, loff_t end, bool nowait);
+
int write_inode_now(struct inode *, int sync);
int filemap_fdatawrite(struct address_space *);
int filemap_flush(struct address_space *);
+int filemap_flush_nr(struct address_space *mapping, long *nr_to_write);
int filemap_fdatawait_keep_errors(struct address_space *mapping);
int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
loff_t start_byte, loff_t end_byte);
+int filemap_invalidate_inode(struct inode *inode, bool flush,
+ loff_t start, loff_t end);
static inline int filemap_fdatawait(struct address_space *mapping)
{
@@ -46,14 +54,11 @@ static inline int filemap_fdatawait(struct address_space *mapping)
bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend);
int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
-int __filemap_fdatawrite_range(struct address_space *mapping,
- loff_t start, loff_t end, int sync_mode);
int filemap_fdatawrite_range(struct address_space *mapping,
loff_t start, loff_t end);
int filemap_check_errors(struct address_space *mapping);
void __filemap_set_wb_err(struct address_space *mapping, int err);
-int filemap_fdatawrite_wbc(struct address_space *mapping,
- struct writeback_control *wbc);
+int kiocb_write_and_wait(struct kiocb *iocb, size_t count);
static inline int filemap_write_and_wait(struct address_space *mapping)
{
@@ -132,7 +137,7 @@ static inline int inode_drain_writes(struct inode *inode)
return filemap_write_and_wait(inode->i_mapping);
}
-static inline bool mapping_empty(struct address_space *mapping)
+static inline bool mapping_empty(const struct address_space *mapping)
{
return xa_empty(&mapping->i_pages);
}
@@ -158,7 +163,7 @@ static inline bool mapping_empty(struct address_space *mapping)
* refcount and the referenced bit, which will be elevated or set in
* the process of adding new cache pages to an inode.
*/
-static inline bool mapping_shrinkable(struct address_space *mapping)
+static inline bool mapping_shrinkable(const struct address_space *mapping)
{
void *head;
@@ -198,9 +203,24 @@ enum mapping_flags {
AS_EXITING = 4, /* final truncate in progress */
/* writeback related tags are not used */
AS_NO_WRITEBACK_TAGS = 5,
- AS_LARGE_FOLIO_SUPPORT = 6,
+ AS_RELEASE_ALWAYS = 6, /* Call ->release_folio(), even if no private data */
+ AS_STABLE_WRITES = 7, /* must wait for writeback before modifying
+ folio contents */
+ AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */
+ AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9,
+ AS_KERNEL_FILE = 10, /* mapping for a fake kernel file that shouldn't
+ account usage to user cgroups */
+ /* Bits 16-25 are used for FOLIO_ORDER */
+ AS_FOLIO_ORDER_BITS = 5,
+ AS_FOLIO_ORDER_MIN = 16,
+ AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS,
};
+#define AS_FOLIO_ORDER_BITS_MASK ((1u << AS_FOLIO_ORDER_BITS) - 1)
+#define AS_FOLIO_ORDER_MIN_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MIN)
+#define AS_FOLIO_ORDER_MAX_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MAX)
+#define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK)
+
/**
* mapping_set_error - record a writeback error in the address_space
* @mapping: the mapping in which an error should be set
@@ -244,7 +264,7 @@ static inline void mapping_clear_unevictable(struct address_space *mapping)
clear_bit(AS_UNEVICTABLE, &mapping->flags);
}
-static inline bool mapping_unevictable(struct address_space *mapping)
+static inline bool mapping_unevictable(const struct address_space *mapping)
{
return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
}
@@ -254,7 +274,7 @@ static inline void mapping_set_exiting(struct address_space *mapping)
set_bit(AS_EXITING, &mapping->flags);
}
-static inline int mapping_exiting(struct address_space *mapping)
+static inline int mapping_exiting(const struct address_space *mapping)
{
return test_bit(AS_EXITING, &mapping->flags);
}
@@ -264,18 +284,74 @@ static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
}
-static inline int mapping_use_writeback_tags(struct address_space *mapping)
+static inline int mapping_use_writeback_tags(const struct address_space *mapping)
{
return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
}
-static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
+static inline bool mapping_release_always(const struct address_space *mapping)
+{
+ return test_bit(AS_RELEASE_ALWAYS, &mapping->flags);
+}
+
+static inline void mapping_set_release_always(struct address_space *mapping)
+{
+ set_bit(AS_RELEASE_ALWAYS, &mapping->flags);
+}
+
+static inline void mapping_clear_release_always(struct address_space *mapping)
+{
+ clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
+}
+
+static inline bool mapping_stable_writes(const struct address_space *mapping)
+{
+ return test_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
+static inline void mapping_set_stable_writes(struct address_space *mapping)
+{
+ set_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
+static inline void mapping_clear_stable_writes(struct address_space *mapping)
+{
+ clear_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
+static inline void mapping_set_inaccessible(struct address_space *mapping)
+{
+ /*
+ * It's expected inaccessible mappings are also unevictable. Compaction
+ * migrate scanner (isolate_migratepages_block()) relies on this to
+ * reduce page locking.
+ */
+ set_bit(AS_UNEVICTABLE, &mapping->flags);
+ set_bit(AS_INACCESSIBLE, &mapping->flags);
+}
+
+static inline bool mapping_inaccessible(const struct address_space *mapping)
+{
+ return test_bit(AS_INACCESSIBLE, &mapping->flags);
+}
+
+static inline void mapping_set_writeback_may_deadlock_on_reclaim(struct address_space *mapping)
+{
+ set_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
+}
+
+static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct address_space *mapping)
+{
+ return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
+}
+
+static inline gfp_t mapping_gfp_mask(const struct address_space *mapping)
{
return mapping->gfp_mask;
}
/* Restricts the given gfp_mask to what the mapping allows. */
-static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
+static inline gfp_t mapping_gfp_constraint(const struct address_space *mapping,
gfp_t gfp_mask)
{
return mapping_gfp_mask(mapping) & gfp_mask;
@@ -290,9 +366,84 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
m->gfp_mask = mask;
}
+/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size. I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
+#else
+#define PREFERRED_MAX_PAGECACHE_ORDER 8
+#endif
+
+/*
+ * xas_split_alloc() does not support arbitrary orders. This implies no
+ * 512MB THP on ARM64 with 64KB base page size.
+ */
+#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1)
+#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
+
+/*
+ * mapping_max_folio_size_supported() - Check the max folio size supported
+ *
+ * The filesystem should call this function at mount time if there is a
+ * requirement on the folio mapping size in the page cache.
+ */
+static inline size_t mapping_max_folio_size_supported(void)
+{
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ return 1U << (PAGE_SHIFT + MAX_PAGECACHE_ORDER);
+ return PAGE_SIZE;
+}
+
+/*
+ * mapping_set_folio_order_range() - Set the orders supported by a file.
+ * @mapping: The address space of the file.
+ * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive).
+ * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive).
+ *
+ * The filesystem should call this function in its inode constructor to
+ * indicate which base size (min) and maximum size (max) of folio the VFS
+ * can use to cache the contents of the file. This should only be used
+ * if the filesystem needs special handling of folio sizes (ie there is
+ * something the core cannot know).
+ * Do not tune it based on, eg, i_size.
+ *
+ * Context: This should not be called while the inode is active as it
+ * is non-atomic.
+ */
+static inline void mapping_set_folio_order_range(struct address_space *mapping,
+ unsigned int min,
+ unsigned int max)
+{
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ return;
+
+ if (min > MAX_PAGECACHE_ORDER)
+ min = MAX_PAGECACHE_ORDER;
+
+ if (max > MAX_PAGECACHE_ORDER)
+ max = MAX_PAGECACHE_ORDER;
+
+ if (max < min)
+ max = min;
+
+ mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) |
+ (min << AS_FOLIO_ORDER_MIN) | (max << AS_FOLIO_ORDER_MAX);
+}
+
+static inline void mapping_set_folio_min_order(struct address_space *mapping,
+ unsigned int min)
+{
+ mapping_set_folio_order_range(mapping, min, MAX_PAGECACHE_ORDER);
+}
+
/**
* mapping_set_large_folios() - Indicate the file supports large folios.
- * @mapping: The file.
+ * @mapping: The address space of the file.
*
* The filesystem should call this function in its inode constructor to
* indicate that the VFS can use large folios to cache the contents of
@@ -303,20 +454,72 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
*/
static inline void mapping_set_large_folios(struct address_space *mapping)
{
- __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+ mapping_set_folio_order_range(mapping, 0, MAX_PAGECACHE_ORDER);
+}
+
+static inline unsigned int
+mapping_max_folio_order(const struct address_space *mapping)
+{
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ return 0;
+ return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX;
+}
+
+static inline unsigned int
+mapping_min_folio_order(const struct address_space *mapping)
+{
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ return 0;
+ return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
+}
+
+static inline unsigned long
+mapping_min_folio_nrpages(const struct address_space *mapping)
+{
+ return 1UL << mapping_min_folio_order(mapping);
+}
+
+static inline unsigned long
+mapping_min_folio_nrbytes(const struct address_space *mapping)
+{
+ return mapping_min_folio_nrpages(mapping) << PAGE_SHIFT;
+}
+
+/**
+ * mapping_align_index() - Align index for this mapping.
+ * @mapping: The address_space.
+ * @index: The page index.
+ *
+ * The index of a folio must be naturally aligned. If you are adding a
+ * new folio to the page cache and need to know what index to give it,
+ * call this function.
+ */
+static inline pgoff_t mapping_align_index(const struct address_space *mapping,
+ pgoff_t index)
+{
+ return round_down(index, mapping_min_folio_nrpages(mapping));
}
/*
* Large folio support currently depends on THP. These dependencies are
* being worked on but are not yet fixed.
*/
-static inline bool mapping_large_folio_support(struct address_space *mapping)
+static inline bool mapping_large_folio_support(const struct address_space *mapping)
+{
+ /* AS_FOLIO_ORDER is only reasonable for pagecache folios */
+ VM_WARN_ONCE((unsigned long)mapping & FOLIO_MAPPING_ANON,
+ "Anonymous mapping always supports large folio");
+
+ return mapping_max_folio_order(mapping) > 0;
+}
+
+/* Return the maximum folio size for this pagecache mapping, in bytes. */
+static inline size_t mapping_max_folio_size(const struct address_space *mapping)
{
- return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
- test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+ return PAGE_SIZE << mapping_max_folio_order(mapping);
}
-static inline int filemap_nr_thps(struct address_space *mapping)
+static inline int filemap_nr_thps(const struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
return atomic_read(&mapping->nr_thps);
@@ -345,44 +548,25 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
#endif
}
-struct address_space *page_mapping(struct page *);
-struct address_space *folio_mapping(struct folio *);
-struct address_space *swapcache_mapping(struct folio *);
+struct address_space *folio_mapping(const struct folio *folio);
/**
- * folio_file_mapping - Find the mapping this folio belongs to.
+ * folio_flush_mapping - Find the file mapping this folio belongs to.
* @folio: The folio.
*
* For folios which are in the page cache, return the mapping that this
- * page belongs to. Folios in the swap cache return the mapping of the
- * swap file or swap device where the data is stored. This is different
- * from the mapping returned by folio_mapping(). The only reason to
- * use it is if, like NFS, you return 0 from ->activate_swapfile.
+ * page belongs to. Anonymous folios return NULL, even if they're in
+ * the swap cache. Other kinds of folio also return NULL.
*
- * Do not call this for folios which aren't in the page cache or swap cache.
+ * This is ONLY used by architecture cache flushing code. If you aren't
+ * writing cache flushing code, you want either folio_mapping() or
+ * folio_file_mapping().
*/
-static inline struct address_space *folio_file_mapping(struct folio *folio)
+static inline struct address_space *folio_flush_mapping(struct folio *folio)
{
if (unlikely(folio_test_swapcache(folio)))
- return swapcache_mapping(folio);
-
- return folio->mapping;
-}
-
-static inline struct address_space *page_file_mapping(struct page *page)
-{
- return folio_file_mapping(page_folio(page));
-}
-
-/*
- * For file cache pages, return the address_space, otherwise return NULL
- */
-static inline struct address_space *page_mapping_file(struct page *page)
-{
- struct folio *folio = page_folio(page);
-
- if (unlikely(folio_test_swapcache(folio)))
return NULL;
+
return folio_mapping(folio);
}
@@ -467,22 +651,22 @@ static inline void *detach_page_private(struct page *page)
}
#ifdef CONFIG_NUMA
-struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
+struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order,
+ struct mempolicy *policy);
#else
-static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
+static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order,
+ struct mempolicy *policy)
{
- return folio_alloc(gfp, order);
+ return folio_alloc_noprof(gfp, order);
}
#endif
-static inline struct page *__page_cache_alloc(gfp_t gfp)
-{
- return &filemap_alloc_folio(gfp, 0)->page;
-}
+#define filemap_alloc_folio(...) \
+ alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__))
-static inline struct page *page_cache_alloc(struct address_space *x)
+static inline struct page *__page_cache_alloc(gfp_t gfp)
{
- return __page_cache_alloc(mapping_gfp_mask(x));
+ return &filemap_alloc_folio(gfp, 0, NULL)->page;
}
static inline gfp_t readahead_gfp_mask(struct address_space *x)
@@ -497,20 +681,114 @@ pgoff_t page_cache_next_miss(struct address_space *mapping,
pgoff_t page_cache_prev_miss(struct address_space *mapping,
pgoff_t index, unsigned long max_scan);
-#define FGP_ACCESSED 0x00000001
-#define FGP_LOCK 0x00000002
-#define FGP_CREAT 0x00000004
-#define FGP_WRITE 0x00000008
-#define FGP_NOFS 0x00000010
-#define FGP_NOWAIT 0x00000020
-#define FGP_FOR_MMAP 0x00000040
-#define FGP_ENTRY 0x00000080
-#define FGP_STABLE 0x00000100
-
-struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
- int fgp_flags, gfp_t gfp);
+/**
+ * typedef fgf_t - Flags for getting folios from the page cache.
+ *
+ * Most users of the page cache will not need to use these flags;
+ * there are convenience functions such as filemap_get_folio() and
+ * filemap_lock_folio(). For users which need more control over exactly
+ * what is done with the folios, these flags to __filemap_get_folio()
+ * are available.
+ *
+ * * %FGP_ACCESSED - The folio will be marked accessed.
+ * * %FGP_LOCK - The folio is returned locked.
+ * * %FGP_CREAT - If no folio is present then a new folio is allocated,
+ * added to the page cache and the VM's LRU list. The folio is
+ * returned locked.
+ * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
+ * folio is already in cache. If the folio was allocated, unlock it
+ * before returning so the caller can do the same dance.
+ * * %FGP_WRITE - The folio will be written to by the caller.
+ * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
+ * * %FGP_NOWAIT - Don't block on the folio lock.
+ * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
+ * * %FGP_DONTCACHE - Uncached buffered IO
+ * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin()
+ * implementation.
+ */
+typedef unsigned int __bitwise fgf_t;
+
+#define FGP_ACCESSED ((__force fgf_t)0x00000001)
+#define FGP_LOCK ((__force fgf_t)0x00000002)
+#define FGP_CREAT ((__force fgf_t)0x00000004)
+#define FGP_WRITE ((__force fgf_t)0x00000008)
+#define FGP_NOFS ((__force fgf_t)0x00000010)
+#define FGP_NOWAIT ((__force fgf_t)0x00000020)
+#define FGP_FOR_MMAP ((__force fgf_t)0x00000040)
+#define FGP_STABLE ((__force fgf_t)0x00000080)
+#define FGP_DONTCACHE ((__force fgf_t)0x00000100)
+#define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */
+
+#define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
+
+static inline unsigned int filemap_get_order(size_t size)
+{
+ unsigned int shift = ilog2(size);
+
+ if (shift <= PAGE_SHIFT)
+ return 0;
+
+ return shift - PAGE_SHIFT;
+}
+
+/**
+ * fgf_set_order - Encode a length in the fgf_t flags.
+ * @size: The suggested size of the folio to create.
+ *
+ * The caller of __filemap_get_folio() can use this to suggest a preferred
+ * size for the folio that is created. If there is already a folio at
+ * the index, it will be returned, no matter what its size. If a folio
+ * is freshly created, it may be of a different size than requested
+ * due to alignment constraints, memory pressure, or the presence of
+ * other folios at nearby indices.
+ */
+static inline fgf_t fgf_set_order(size_t size)
+{
+ unsigned int order = filemap_get_order(size);
+
+ if (!order)
+ return 0;
+ return (__force fgf_t)(order << 26);
+}
+
+void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
+struct folio *__filemap_get_folio_mpol(struct address_space *mapping,
+ pgoff_t index, fgf_t fgf_flags, gfp_t gfp, struct mempolicy *policy);
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
- int fgp_flags, gfp_t gfp);
+ fgf_t fgp_flags, gfp_t gfp);
+
+static inline struct folio *__filemap_get_folio(struct address_space *mapping,
+ pgoff_t index, fgf_t fgf_flags, gfp_t gfp)
+{
+ return __filemap_get_folio_mpol(mapping, index, fgf_flags, gfp, NULL);
+}
+
+/**
+ * write_begin_get_folio - Get folio for write_begin with flags.
+ * @iocb: The kiocb passed from write_begin (may be NULL).
+ * @mapping: The address space to search.
+ * @index: The page cache index.
+ * @len: Length of data being written.
+ *
+ * This is a helper for filesystem write_begin() implementations.
+ * It wraps __filemap_get_folio(), setting appropriate flags in
+ * the write begin context.
+ *
+ * Return: A folio or an ERR_PTR.
+ */
+static inline struct folio *write_begin_get_folio(const struct kiocb *iocb,
+ struct address_space *mapping, pgoff_t index, size_t len)
+{
+ fgf_t fgp_flags = FGP_WRITEBEGIN;
+
+ fgp_flags |= fgf_set_order(len);
+
+ if (iocb && iocb->ki_flags & IOCB_DONTCACHE)
+ fgp_flags |= FGP_DONTCACHE;
+
+ return __filemap_get_folio(mapping, index, fgp_flags,
+ mapping_gfp_mask(mapping));
+}
/**
* filemap_get_folio - Find and get a folio.
@@ -520,7 +798,8 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
* Looks up the page cache entry at @mapping & @index. If a folio is
* present, it is returned with an increased refcount.
*
- * Otherwise, %NULL is returned.
+ * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for
+ * this index. Will not return a shadow, swap or DAX entry.
*/
static inline struct folio *filemap_get_folio(struct address_space *mapping,
pgoff_t index)
@@ -537,8 +816,8 @@ static inline struct folio *filemap_get_folio(struct address_space *mapping,
* present, it is returned locked with an increased refcount.
*
* Context: May sleep.
- * Return: A folio or %NULL if there is no folio in the cache for this
- * index. Will not return a shadow, swap or DAX entry.
+ * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for
+ * this index. Will not return a shadow, swap or DAX entry.
*/
static inline struct folio *filemap_lock_folio(struct address_space *mapping,
pgoff_t index)
@@ -547,6 +826,26 @@ static inline struct folio *filemap_lock_folio(struct address_space *mapping,
}
/**
+ * filemap_grab_folio - grab a folio from the page cache
+ * @mapping: The address space to search
+ * @index: The page index
+ *
+ * Looks up the page cache entry at @mapping & @index. If no folio is found,
+ * a new folio is created. The folio is locked, marked as accessed, and
+ * returned.
+ *
+ * Return: A found or created folio. ERR_PTR(-ENOMEM) if no folio is found
+ * and failed to create a folio.
+ */
+static inline struct folio *filemap_grab_folio(struct address_space *mapping,
+ pgoff_t index)
+{
+ return __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ mapping_gfp_mask(mapping));
+}
+
+/**
* find_get_page - find and get a page reference
* @mapping: the address_space to search
* @offset: the page index
@@ -563,7 +862,7 @@ static inline struct page *find_get_page(struct address_space *mapping,
}
static inline struct page *find_get_page_flags(struct address_space *mapping,
- pgoff_t offset, int fgp_flags)
+ pgoff_t offset, fgf_t fgp_flags)
{
return pagecache_get_page(mapping, offset, fgp_flags, 0);
}
@@ -619,7 +918,8 @@ static inline struct page *find_or_create_page(struct address_space *mapping,
* @mapping: target address_space
* @index: the page index
*
- * Same as grab_cache_page(), but do not wait if the page is unavailable.
+ * Returns locked page at given index in given cache, creating it if
+ * needed, but do not wait if the page is locked or to reclaim memory.
* This is intended for speculative data generators, where the data can
* be regenerated if the page couldn't be grabbed. This routine should
* be safe to call while holding the lock for another page.
@@ -635,35 +935,26 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
mapping_gfp_mask(mapping));
}
-#define swapcache_index(folio) __page_file_index(&(folio)->page)
-
/**
- * folio_index - File index of a folio.
- * @folio: The folio.
- *
- * For a folio which is either in the page cache or the swap cache,
- * return its index within the address_space it belongs to. If you know
- * the page is definitely in the page cache, you can look at the folio's
- * index directly.
+ * folio_next_index - Get the index of the next folio.
+ * @folio: The current folio.
*
- * Return: The index (offset in units of pages) of a folio in its file.
+ * Return: The index of the folio which follows this folio in the file.
*/
-static inline pgoff_t folio_index(struct folio *folio)
+static inline pgoff_t folio_next_index(const struct folio *folio)
{
- if (unlikely(folio_test_swapcache(folio)))
- return swapcache_index(folio);
- return folio->index;
+ return folio->index + folio_nr_pages(folio);
}
/**
- * folio_next_index - Get the index of the next folio.
+ * folio_next_pos - Get the file position of the next folio.
* @folio: The current folio.
*
- * Return: The index of the folio which follows this folio in the file.
+ * Return: The position of the folio which follows this folio in the file.
*/
-static inline pgoff_t folio_next_index(struct folio *folio)
+static inline loff_t folio_next_pos(const struct folio *folio)
{
- return folio->index + folio_nr_pages(folio);
+ return (loff_t)folio_next_index(folio) << PAGE_SHIFT;
}
/**
@@ -678,9 +969,6 @@ static inline pgoff_t folio_next_index(struct folio *folio)
*/
static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
{
- /* HugeTLBfs indexes the page cache in units of hpage_size */
- if (folio_test_hugetlb(folio))
- return &folio->page;
return folio_page(folio, index & (folio_nr_pages(folio) - 1));
}
@@ -689,61 +977,29 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
* @folio: The folio.
* @index: The page index within the file.
*
- * Context: The caller should have the page locked in order to prevent
- * (eg) shmem from moving the page between the page cache and swap cache
- * and changing its index in the middle of the operation.
+ * Context: The caller should have the folio locked and ensure
+ * e.g., shmem did not move this folio to the swap cache.
* Return: true or false.
*/
-static inline bool folio_contains(struct folio *folio, pgoff_t index)
+static inline bool folio_contains(const struct folio *folio, pgoff_t index)
{
- /* HugeTLBfs indexes the page cache in units of hpage_size */
- if (folio_test_hugetlb(folio))
- return folio->index == index;
- return index - folio_index(folio) < folio_nr_pages(folio);
-}
-
-/*
- * Given the page we found in the page cache, return the page corresponding
- * to this index in the file
- */
-static inline struct page *find_subpage(struct page *head, pgoff_t index)
-{
- /* HugeTLBfs wants the head page regardless */
- if (PageHuge(head))
- return head;
-
- return head + (index & (thp_nr_pages(head) - 1));
+ VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio);
+ return index - folio->index < folio_nr_pages(folio);
}
unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch);
unsigned filemap_get_folios_contig(struct address_space *mapping,
pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
-unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
- pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
- struct page **pages);
-static inline unsigned find_get_pages_tag(struct address_space *mapping,
- pgoff_t *index, xa_mark_t tag, unsigned int nr_pages,
- struct page **pages)
-{
- return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag,
- nr_pages, pages);
-}
-
-struct page *grab_cache_page_write_begin(struct address_space *mapping,
- pgoff_t index);
-
-/*
- * Returns locked page at given index in given cache, creating it if needed.
- */
-static inline struct page *grab_cache_page(struct address_space *mapping,
- pgoff_t index)
-{
- return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
-}
+unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
+ pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch);
+unsigned filemap_get_folios_dirty(struct address_space *mapping,
+ pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
struct folio *read_cache_folio(struct address_space *, pgoff_t index,
filler_t *filler, struct file *file);
+struct folio *mapping_read_folio_gfp(struct address_space *, pgoff_t index,
+ gfp_t flags);
struct page *read_cache_page(struct address_space *, pgoff_t index,
filler_t *filler, struct file *file);
extern struct page * read_cache_page_gfp(struct address_space *mapping,
@@ -761,36 +1017,34 @@ static inline struct folio *read_mapping_folio(struct address_space *mapping,
return read_cache_folio(mapping, index, NULL, file);
}
-/*
- * Get index of the page within radix-tree (but not for hugetlb pages).
- * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
+/**
+ * page_pgoff - Calculate the logical page offset of this page.
+ * @folio: The folio containing this page.
+ * @page: The page which we need the offset of.
+ *
+ * For file pages, this is the offset from the beginning of the file
+ * in units of PAGE_SIZE. For anonymous pages, this is the offset from
+ * the beginning of the anon_vma in units of PAGE_SIZE. This will
+ * return nonsense for KSM pages.
+ *
+ * Context: Caller must have a reference on the folio or otherwise
+ * prevent it from being split or freed.
+ *
+ * Return: The offset in units of PAGE_SIZE.
*/
-static inline pgoff_t page_to_index(struct page *page)
+static inline pgoff_t page_pgoff(const struct folio *folio,
+ const struct page *page)
{
- struct page *head;
-
- if (likely(!PageTransTail(page)))
- return page->index;
-
- head = compound_head(page);
- /*
- * We don't initialize ->index for tail pages: calculate based on
- * head page
- */
- return head->index + page - head;
+ return folio->index + folio_page_idx(folio, page);
}
-extern pgoff_t hugetlb_basepage_index(struct page *page);
-
-/*
- * Get the offset in PAGE_SIZE (even for hugetlb pages).
- * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
+/**
+ * folio_pos - Returns the byte position of this folio in its file.
+ * @folio: The folio.
*/
-static inline pgoff_t page_to_pgoff(struct page *page)
+static inline loff_t folio_pos(const struct folio *folio)
{
- if (unlikely(PageHuge(page)))
- return hugetlb_basepage_index(page);
- return page_to_index(page);
+ return ((loff_t)folio->index) * PAGE_SIZE;
}
/*
@@ -798,55 +1052,23 @@ static inline pgoff_t page_to_pgoff(struct page *page)
*/
static inline loff_t page_offset(struct page *page)
{
- return ((loff_t)page->index) << PAGE_SHIFT;
-}
-
-static inline loff_t page_file_offset(struct page *page)
-{
- return ((loff_t)page_index(page)) << PAGE_SHIFT;
-}
-
-/**
- * folio_pos - Returns the byte position of this folio in its file.
- * @folio: The folio.
- */
-static inline loff_t folio_pos(struct folio *folio)
-{
- return page_offset(&folio->page);
-}
+ struct folio *folio = page_folio(page);
-/**
- * folio_file_pos - Returns the byte position of this folio in its file.
- * @folio: The folio.
- *
- * This differs from folio_pos() for folios which belong to a swap file.
- * NFS is the only filesystem today which needs to use folio_file_pos().
- */
-static inline loff_t folio_file_pos(struct folio *folio)
-{
- return page_file_offset(&folio->page);
+ return folio_pos(folio) + folio_page_idx(folio, page) * PAGE_SIZE;
}
/*
* Get the offset in PAGE_SIZE (even for hugetlb folios).
- * (TODO: hugetlb folios should have ->index in PAGE_SIZE)
*/
-static inline pgoff_t folio_pgoff(struct folio *folio)
+static inline pgoff_t folio_pgoff(const struct folio *folio)
{
- if (unlikely(folio_test_hugetlb(folio)))
- return hugetlb_basepage_index(&folio->page);
return folio->index;
}
-extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
- unsigned long address);
-
-static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
- unsigned long address)
+static inline pgoff_t linear_page_index(const struct vm_area_struct *vma,
+ const unsigned long address)
{
pgoff_t pgoff;
- if (unlikely(is_vm_hugetlb_page(vma)))
- return linear_hugepage_index(vma, address);
pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
pgoff += vma->vm_pgoff;
return pgoff;
@@ -879,8 +1101,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page,
void __folio_lock(struct folio *folio);
int __folio_lock_killable(struct folio *folio);
-bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
- unsigned int flags);
+vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf);
void unlock_page(struct page *page);
void folio_unlock(struct folio *folio);
@@ -904,7 +1125,7 @@ static inline bool folio_trylock(struct folio *folio)
/*
* Return true if the page was successfully locked
*/
-static inline int trylock_page(struct page *page)
+static inline bool trylock_page(struct page *page)
{
return folio_trylock(page_folio(page));
}
@@ -978,27 +1199,19 @@ static inline int folio_lock_killable(struct folio *folio)
}
/*
- * lock_page_killable is like lock_page but can be interrupted by fatal
- * signals. It returns 0 if it locked the page and -EINTR if it was
- * killed while waiting.
- */
-static inline int lock_page_killable(struct page *page)
-{
- return folio_lock_killable(page_folio(page));
-}
-
-/*
* folio_lock_or_retry - Lock the folio, unless this would block and the
* caller indicated that it can handle a retry.
*
* Return value and mmap_lock implications depend on flags; see
* __folio_lock_or_retry().
*/
-static inline bool folio_lock_or_retry(struct folio *folio,
- struct mm_struct *mm, unsigned int flags)
+static inline vm_fault_t folio_lock_or_retry(struct folio *folio,
+ struct vm_fault *vmf)
{
might_sleep();
- return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags);
+ if (!folio_trylock(folio))
+ return __folio_lock_or_retry(folio, vmf);
+ return 0;
}
/*
@@ -1028,29 +1241,16 @@ static inline int folio_wait_locked_killable(struct folio *folio)
return folio_wait_bit_killable(folio, PG_locked);
}
-static inline void wait_on_page_locked(struct page *page)
-{
- folio_wait_locked(page_folio(page));
-}
-
-static inline int wait_on_page_locked_killable(struct page *page)
-{
- return folio_wait_locked_killable(page_folio(page));
-}
-
+void folio_end_read(struct folio *folio, bool success);
void wait_on_page_writeback(struct page *page);
void folio_wait_writeback(struct folio *folio);
int folio_wait_writeback_killable(struct folio *folio);
void end_page_writeback(struct page *page);
void folio_end_writeback(struct folio *folio);
-void wait_for_stable_page(struct page *page);
+void folio_end_writeback_no_dropbehind(struct folio *folio);
+void folio_end_dropbehind(struct folio *folio);
void folio_wait_stable(struct folio *folio);
void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
-static inline void __set_page_dirty(struct page *page,
- struct address_space *mapping, int warn)
-{
- __folio_mark_dirty(page_folio(page), mapping, warn);
-}
void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb);
void __folio_cancel_dirty(struct folio *folio);
static inline void folio_cancel_dirty(struct folio *folio)
@@ -1062,13 +1262,6 @@ static inline void folio_cancel_dirty(struct folio *folio)
bool folio_clear_dirty_for_io(struct folio *folio);
bool clear_page_dirty_for_io(struct page *page);
void folio_invalidate(struct folio *folio, size_t offset, size_t length);
-int __must_check folio_write_one(struct folio *folio);
-static inline int __must_check write_one_page(struct page *page)
-{
- return folio_write_one(page_folio(page));
-}
-
-int __set_page_dirty_nobuffers(struct page *page);
bool noop_dirty_folio(struct address_space *mapping, struct folio *folio);
#ifdef CONFIG_MIGRATION
@@ -1077,18 +1270,11 @@ int filemap_migrate_folio(struct address_space *mapping, struct folio *dst,
#else
#define filemap_migrate_folio NULL
#endif
-void page_endio(struct page *page, bool is_write, int err);
-
void folio_end_private_2(struct folio *folio);
void folio_wait_private_2(struct folio *folio);
int folio_wait_private_2_killable(struct folio *folio);
/*
- * Add an arbitrary waiter to a page's wait queue
- */
-void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter);
-
-/*
* Fault in userspace address range.
*/
size_t fault_in_writeable(char __user *uaddr, size_t size);
@@ -1146,9 +1332,9 @@ static inline bool filemap_range_needs_writeback(struct address_space *mapping,
* struct readahead_control - Describes a readahead request.
*
* A readahead request is for consecutive pages. Filesystems which
- * implement the ->readahead method should call readahead_page() or
- * readahead_page_batch() in a loop and attempt to start I/O against
- * each page in the request.
+ * implement the ->readahead method should call readahead_folio() or
+ * __readahead_batch() in a loop and attempt to start reads into each
+ * folio in the request.
*
* Most of the fields in this struct are private and should be accessed
* by the functions below.
@@ -1166,6 +1352,7 @@ struct readahead_control {
pgoff_t _index;
unsigned int _nr_pages;
unsigned int _batch_count;
+ bool dropbehind;
bool _workingset;
unsigned long _pflags;
};
@@ -1215,8 +1402,7 @@ void page_cache_sync_readahead(struct address_space *mapping,
* @mapping: address_space which holds the pagecache and I/O vectors
* @ra: file_ra_state which holds the readahead state
* @file: Used by the filesystem for authentication.
- * @folio: The folio at @index which triggered the readahead call.
- * @index: Index of first page to be read.
+ * @folio: The folio which triggered the readahead call.
* @req_count: Total number of pages being read by the caller.
*
* page_cache_async_readahead() should be called when a page is used which
@@ -1227,9 +1413,9 @@ void page_cache_sync_readahead(struct address_space *mapping,
static inline
void page_cache_async_readahead(struct address_space *mapping,
struct file_ra_state *ra, struct file *file,
- struct folio *folio, pgoff_t index, unsigned long req_count)
+ struct folio *folio, unsigned long req_count)
{
- DEFINE_READAHEAD(ractl, file, ra, mapping, index);
+ DEFINE_READAHEAD(ractl, file, ra, mapping, folio->index);
page_cache_async_ra(&ractl, folio, req_count);
}
@@ -1254,22 +1440,6 @@ static inline struct folio *__readahead_folio(struct readahead_control *ractl)
}
/**
- * readahead_page - Get the next page to read.
- * @ractl: The current readahead request.
- *
- * Context: The page is locked and has an elevated refcount. The caller
- * should decreases the refcount once the page has been submitted for I/O
- * and unlock the page once all I/O to that page has completed.
- * Return: A pointer to the next page, or %NULL if we are done.
- */
-static inline struct page *readahead_page(struct readahead_control *ractl)
-{
- struct folio *folio = __readahead_folio(ractl);
-
- return &folio->page;
-}
-
-/**
* readahead_folio - Get the next folio to read.
* @ractl: The current readahead request.
*
@@ -1291,7 +1461,7 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
{
unsigned int i = 0;
XA_STATE(xas, &rac->mapping->i_pages, 0);
- struct page *page;
+ struct folio *folio;
BUG_ON(rac->_batch_count > rac->_nr_pages);
rac->_nr_pages -= rac->_batch_count;
@@ -1300,13 +1470,12 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
xas_set(&xas, rac->_index);
rcu_read_lock();
- xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
- if (xas_retry(&xas, page))
+ xas_for_each(&xas, folio, rac->_index + rac->_nr_pages - 1) {
+ if (xas_retry(&xas, folio))
continue;
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(PageTail(page), page);
- array[i++] = page;
- rac->_batch_count += thp_nr_pages(page);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ array[i++] = folio_page(folio, 0);
+ rac->_batch_count += folio_nr_pages(folio);
if (i == array_sz)
break;
}
@@ -1316,24 +1485,10 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
}
/**
- * readahead_page_batch - Get a batch of pages to read.
- * @rac: The current readahead request.
- * @array: An array of pointers to struct page.
- *
- * Context: The pages are locked and have an elevated refcount. The caller
- * should decreases the refcount once the page has been submitted for I/O
- * and unlock the page once all I/O to that page has completed.
- * Return: The number of pages placed in the array. 0 indicates the request
- * is complete.
- */
-#define readahead_page_batch(rac, array) \
- __readahead_batch(rac, array, ARRAY_SIZE(array))
-
-/**
* readahead_pos - The byte offset into the file of this readahead request.
* @rac: The readahead request.
*/
-static inline loff_t readahead_pos(struct readahead_control *rac)
+static inline loff_t readahead_pos(const struct readahead_control *rac)
{
return (loff_t)rac->_index * PAGE_SIZE;
}
@@ -1342,7 +1497,7 @@ static inline loff_t readahead_pos(struct readahead_control *rac)
* readahead_length - The number of bytes in this readahead request.
* @rac: The readahead request.
*/
-static inline size_t readahead_length(struct readahead_control *rac)
+static inline size_t readahead_length(const struct readahead_control *rac)
{
return rac->_nr_pages * PAGE_SIZE;
}
@@ -1351,7 +1506,7 @@ static inline size_t readahead_length(struct readahead_control *rac)
* readahead_index - The index of the first page in this readahead request.
* @rac: The readahead request.
*/
-static inline pgoff_t readahead_index(struct readahead_control *rac)
+static inline pgoff_t readahead_index(const struct readahead_control *rac)
{
return rac->_index;
}
@@ -1360,7 +1515,7 @@ static inline pgoff_t readahead_index(struct readahead_control *rac)
* readahead_count - The number of pages in this readahead request.
* @rac: The readahead request.
*/
-static inline unsigned int readahead_count(struct readahead_control *rac)
+static inline unsigned int readahead_count(const struct readahead_control *rac)
{
return rac->_nr_pages;
}
@@ -1369,12 +1524,12 @@ static inline unsigned int readahead_count(struct readahead_control *rac)
* readahead_batch_length - The number of bytes in the current batch.
* @rac: The readahead request.
*/
-static inline size_t readahead_batch_length(struct readahead_control *rac)
+static inline size_t readahead_batch_length(const struct readahead_control *rac)
{
return rac->_batch_count * PAGE_SIZE;
}
-static inline unsigned long dir_pages(struct inode *inode)
+static inline unsigned long dir_pages(const struct inode *inode)
{
return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
PAGE_SHIFT;
@@ -1388,8 +1543,8 @@ static inline unsigned long dir_pages(struct inode *inode)
* Return: the number of bytes in the folio up to EOF,
* or -EFAULT if the folio was truncated.
*/
-static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
- struct inode *inode)
+static inline ssize_t folio_mkwrite_check_truncate(const struct folio *folio,
+ const struct inode *inode)
{
loff_t size = i_size_read(inode);
pgoff_t index = size >> PAGE_SHIFT;
@@ -1409,34 +1564,6 @@ static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
}
/**
- * page_mkwrite_check_truncate - check if page was truncated
- * @page: the page to check
- * @inode: the inode to check the page against
- *
- * Returns the number of bytes in the page up to EOF,
- * or -EFAULT if the page was truncated.
- */
-static inline int page_mkwrite_check_truncate(struct page *page,
- struct inode *inode)
-{
- loff_t size = i_size_read(inode);
- pgoff_t index = size >> PAGE_SHIFT;
- int offset = offset_in_page(size);
-
- if (page->mapping != inode->i_mapping)
- return -EFAULT;
-
- /* page is wholly inside EOF */
- if (page->index < index)
- return PAGE_SIZE;
- /* page is wholly past EOF */
- if (page->index > index || !offset)
- return -EFAULT;
- /* page is partially inside EOF */
- return offset;
-}
-
-/**
* i_blocks_per_folio - How many blocks fit in this folio.
* @inode: The inode which contains the blocks.
* @folio: The folio.
@@ -1448,14 +1575,9 @@ static inline int page_mkwrite_check_truncate(struct page *page,
* Return: The number of filesystem blocks covered by this folio.
*/
static inline
-unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
+unsigned int i_blocks_per_folio(const struct inode *inode,
+ const struct folio *folio)
{
return folio_size(folio) >> inode->i_blkbits;
}
-
-static inline
-unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
-{
- return i_blocks_per_folio(inode, page_folio(page));
-}
#endif /* _LINUX_PAGEMAP_H */