diff options
Diffstat (limited to 'fs/btrfs/extent_io.h')
| -rw-r--r-- | fs/btrfs/extent_io.h | 334 |
1 files changed, 197 insertions, 137 deletions
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 0399cf8e3c32..02ebb2f238af 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -7,48 +7,62 @@ #include <linux/refcount.h> #include <linux/fiemap.h> #include <linux/btrfs_tree.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> +#include <linux/rwsem.h> +#include <linux/list.h> +#include <linux/slab.h> +#include "messages.h" #include "ulist.h" - -/* - * flags for bio submission. The high bits indicate the compression - * type for this bio - */ -#define EXTENT_BIO_COMPRESSED 1 -#define EXTENT_BIO_FLAG_SHIFT 16 +#include "misc.h" + +struct page; +struct file; +struct folio; +struct inode; +struct fiemap_extent_info; +struct readahead_control; +struct address_space; +struct writeback_control; +struct extent_io_tree; +struct extent_map_tree; +struct extent_state; +struct btrfs_block_group; +struct btrfs_fs_info; +struct btrfs_inode; +struct btrfs_root; +struct btrfs_trans_handle; +struct btrfs_tree_parent_check; enum { EXTENT_BUFFER_UPTODATE, EXTENT_BUFFER_DIRTY, - EXTENT_BUFFER_CORRUPT, - /* this got triggered by readahead */ - EXTENT_BUFFER_READAHEAD, EXTENT_BUFFER_TREE_REF, EXTENT_BUFFER_STALE, EXTENT_BUFFER_WRITEBACK, - /* read IO error */ - EXTENT_BUFFER_READ_ERR, EXTENT_BUFFER_UNMAPPED, - EXTENT_BUFFER_IN_TREE, /* write IO error */ EXTENT_BUFFER_WRITE_ERR, - EXTENT_BUFFER_NO_CHECK, - EXTENT_BUFFER_ZONE_FINISH, + /* Indicate the extent buffer is written zeroed out (for zoned) */ + EXTENT_BUFFER_ZONED_ZEROOUT, + /* Indicate that extent buffer pages a being read */ + EXTENT_BUFFER_READING, }; /* these are flags for __process_pages_contig */ -#define PAGE_UNLOCK (1 << 0) -/* Page starts writeback, clear dirty bit and set writeback bit */ -#define PAGE_START_WRITEBACK (1 << 1) -#define PAGE_END_WRITEBACK (1 << 2) -#define PAGE_SET_ORDERED (1 << 3) -#define PAGE_SET_ERROR (1 << 4) -#define PAGE_LOCK (1 << 5) +enum { + ENUM_BIT(PAGE_UNLOCK), + /* Page starts writeback, clear dirty bit and set writeback bit */ + ENUM_BIT(PAGE_START_WRITEBACK), + ENUM_BIT(PAGE_END_WRITEBACK), + ENUM_BIT(PAGE_SET_ORDERED), +}; /* - * page->private values. Every page that is controlled by the extent - * map has page->private set to one. + * Folio private values. Every page that is controlled by the extent map has + * folio private set to this value. */ -#define EXTENT_PAGE_PRIVATE 1 +#define EXTENT_FOLIO_PRIVATE 1 /* * The extent buffer bitmap operations are done with byte granularity instead of @@ -58,67 +72,119 @@ enum { * single word in a bitmap may straddle two pages in the extent buffer. */ #define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE) -#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1) +#define BYTE_MASK ((1U << BITS_PER_BYTE) - 1) #define BITMAP_FIRST_BYTE_MASK(start) \ ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK) #define BITMAP_LAST_BYTE_MASK(nbits) \ (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1))) -struct btrfs_root; -struct btrfs_inode; -struct btrfs_io_bio; -struct btrfs_fs_info; -struct io_failure_record; -struct extent_io_tree; - -typedef blk_status_t (submit_bio_hook_t)(struct inode *inode, struct bio *bio, - int mirror_num, - unsigned long bio_flags); -typedef blk_status_t (extent_submit_bio_start_t)(struct inode *inode, - struct bio *bio, u64 dio_file_offset); +int __init extent_buffer_init_cachep(void); +void __cold extent_buffer_free_cachep(void); #define INLINE_EXTENT_BUFFER_PAGES (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE) struct extent_buffer { u64 start; - unsigned long len; + u32 len; + u32 folio_size; unsigned long bflags; struct btrfs_fs_info *fs_info; + + /* + * The address where the eb can be accessed without any cross-page handling. + * This can be NULL if not possible. + */ + void *addr; + spinlock_t refs_lock; - atomic_t refs; - atomic_t io_pages; + refcount_t refs; int read_mirror; - struct rcu_head rcu_head; - pid_t lock_owner; /* >= 0 if eb belongs to a log tree, -1 otherwise */ s8 log_index; + u8 folio_shift; + struct rcu_head rcu_head; struct rw_semaphore lock; - struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; - struct list_head release_list; + /* + * Pointers to all the folios of the extent buffer. + * + * For now the folio is always order 0 (aka, a single page). + */ + struct folio *folios[INLINE_EXTENT_BUFFER_PAGES]; #ifdef CONFIG_BTRFS_DEBUG struct list_head leak_list; + pid_t lock_owner; #endif }; +struct btrfs_eb_write_context { + struct writeback_control *wbc; + struct extent_buffer *eb; + /* Block group @eb resides in. Only used for zoned mode. */ + struct btrfs_block_group *zoned_bg; +}; + +static inline unsigned long offset_in_eb_folio(const struct extent_buffer *eb, + u64 start) +{ + ASSERT(eb->folio_size); + return start & (eb->folio_size - 1); +} + /* - * Structure to record info about the bio being assembled, and other info like - * how many bytes are there before stripe/ordered extent boundary. + * Get the correct offset inside the page of extent buffer. + * + * @eb: target extent buffer + * @start: offset inside the extent buffer + * + * Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases. */ -struct btrfs_bio_ctrl { - struct bio *bio; - unsigned long bio_flags; - u32 len_to_stripe_boundary; - u32 len_to_oe_boundary; -}; +static inline size_t get_eb_offset_in_folio(const struct extent_buffer *eb, + unsigned long offset) +{ + /* + * 1) sectorsize == PAGE_SIZE and nodesize >= PAGE_SIZE case + * 1.1) One large folio covering the whole eb + * The eb->start is aligned to folio size, thus adding it + * won't cause any difference. + * 1.2) Several page sized folios + * The eb->start is aligned to folio (page) size, thus + * adding it won't cause any difference. + * + * 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case + * In this case there would only be one page sized folio, and there + * may be several different extent buffers in the page/folio. + * We need to add eb->start to properly access the offset inside + * that eb. + */ + return offset_in_folio(eb->folios[0], offset + eb->start); +} + +static inline unsigned long get_eb_folio_index(const struct extent_buffer *eb, + unsigned long offset) +{ + /* + * 1) sectorsize == PAGE_SIZE and nodesize >= PAGE_SIZE case + * 1.1) One large folio covering the whole eb. + * the folio_shift would be large enough to always make us + * return 0 as index. + * 1.2) Several page sized folios + * The folio_shift would be PAGE_SHIFT, giving us the correct + * index. + * + * 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case + * The folio would only be page sized, and always give us 0 as index. + */ + return offset >> eb->folio_shift; +} /* * Structure to record how many bytes and which ranges are set/cleared */ struct extent_changeset { /* How many bytes are set/cleared in this operation */ - unsigned int bytes_changed; + u64 bytes_changed; /* Changed ranges */ struct ulist range_changed; @@ -142,6 +208,11 @@ static inline struct extent_changeset *extent_changeset_alloc(void) return ret; } +static inline void extent_changeset_prealloc(struct extent_changeset *changeset, gfp_t gfp_mask) +{ + ulist_prealloc(&changeset->range_changed, gfp_mask); +} + static inline void extent_changeset_release(struct extent_changeset *changeset) { if (!changeset) @@ -158,47 +229,23 @@ static inline void extent_changeset_free(struct extent_changeset *changeset) kfree(changeset); } -static inline void extent_set_compress_type(unsigned long *bio_flags, - int compress_type) -{ - *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; -} +bool try_release_extent_mapping(struct folio *folio, gfp_t mask); +int try_release_extent_buffer(struct folio *folio); -static inline int extent_compress_type(unsigned long bio_flags) -{ - return bio_flags >> EXTENT_BIO_FLAG_SHIFT; -} - -struct extent_map_tree; - -typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode, - struct page *page, size_t pg_offset, - u64 start, u64 len); - -int try_release_extent_mapping(struct page *page, gfp_t mask); -int try_release_extent_buffer(struct page *page); - -int __must_check submit_one_bio(struct bio *bio, int mirror_num, - unsigned long bio_flags); -int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, - struct btrfs_bio_ctrl *bio_ctrl, - unsigned int read_flags, u64 *prev_em_start); -int extent_write_full_page(struct page *page, struct writeback_control *wbc); -int extent_write_locked_range(struct inode *inode, u64 start, u64 end); -int extent_writepages(struct address_space *mapping, - struct writeback_control *wbc); +int btrfs_read_folio(struct file *file, struct folio *folio); +void extent_write_locked_range(struct inode *inode, const struct folio *locked_folio, + u64 start, u64 end, struct writeback_control *wbc, + bool pages_dirty); +int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); int btree_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc); -void extent_readahead(struct readahead_control *rac); -int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, - u64 start, u64 len); -int set_page_extent_mapped(struct page *page); -void clear_page_extent_mapped(struct page *page); +void btrfs_btree_wait_writeback_range(struct btrfs_fs_info *fs_info, u64 start, u64 end); +void btrfs_readahead(struct readahead_control *rac); +int set_folio_extent_mapped(struct folio *folio); +void clear_folio_extent_mapped(struct folio *folio); struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, u64 owner_root, int level); -struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start, unsigned long len); struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, u64 start); struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src); @@ -206,17 +253,23 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, u64 start); void free_extent_buffer(struct extent_buffer *eb); void free_extent_buffer_stale(struct extent_buffer *eb); -#define WAIT_NONE 0 -#define WAIT_COMPLETE 1 -#define WAIT_PAGE_LOCK 2 -int read_extent_buffer_pages(struct extent_buffer *eb, int wait, - int mirror_num); -void wait_on_extent_buffer_writeback(struct extent_buffer *eb); +int read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num, + const struct btrfs_tree_parent_check *parent_check); +int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num, + const struct btrfs_tree_parent_check *parent_check); + +static inline void wait_on_extent_buffer_writeback(struct extent_buffer *eb) +{ + wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK, + TASK_UNINTERRUPTIBLE); +} + void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, u64 owner_root, u64 gen, int level); void btrfs_readahead_node_child(struct extent_buffer *node, int slot); -static inline int num_extent_pages(const struct extent_buffer *eb) +/* Note: this can be used in for loops without caching the value in a variable. */ +static inline int __pure num_extent_pages(const struct extent_buffer *eb) { /* * For sectorsize == PAGE_SIZE case, since nodesize is always aligned to @@ -228,6 +281,24 @@ static inline int num_extent_pages(const struct extent_buffer *eb) return (eb->len >> PAGE_SHIFT) ?: 1; } +/* + * This can only be determined at runtime by checking eb::folios[0]. + * + * As we can have either one large folio covering the whole eb + * (either nodesize <= PAGE_SIZE, or high order folio), or multiple + * single-paged folios. + * + * Note: this can be used in for loops without caching the value in a variable. + */ +static inline int __pure num_extent_folios(const struct extent_buffer *eb) +{ + if (!eb->folios[0]) + return 0; + if (folio_order(eb->folios[0])) + return 1; + return num_extent_pages(eb); +} + static inline int extent_buffer_uptodate(const struct extent_buffer *eb) { return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); @@ -241,11 +312,24 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dst, int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb, void __user *dst, unsigned long start, unsigned long len); -void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src); -void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb, - const void *src); void write_extent_buffer(const struct extent_buffer *eb, const void *src, unsigned long start, unsigned long len); + +static inline void write_extent_buffer_chunk_tree_uuid( + const struct extent_buffer *eb, const void *chunk_tree_uuid) +{ + write_extent_buffer(eb, chunk_tree_uuid, + offsetof(struct btrfs_header, chunk_tree_uuid), + BTRFS_FSID_SIZE); +} + +static inline void write_extent_buffer_fsid(const struct extent_buffer *eb, + const void *fsid) +{ + write_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid), + BTRFS_FSID_SIZE); +} + void copy_extent_buffer_full(const struct extent_buffer *dst, const struct extent_buffer *src); void copy_extent_buffer(const struct extent_buffer *dst, @@ -260,57 +344,33 @@ void memmove_extent_buffer(const struct extent_buffer *dst, unsigned long len); void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start, unsigned long len); -int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, - unsigned long pos); +bool extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, + unsigned long pos); void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start, unsigned long pos, unsigned long len); void extent_buffer_bitmap_clear(const struct extent_buffer *eb, unsigned long start, unsigned long pos, unsigned long len); -void clear_extent_buffer_dirty(const struct extent_buffer *eb); -bool set_extent_buffer_dirty(struct extent_buffer *eb); +void set_extent_buffer_dirty(struct extent_buffer *eb); void set_extent_buffer_uptodate(struct extent_buffer *eb); void clear_extent_buffer_uptodate(struct extent_buffer *eb); -int extent_buffer_under_io(const struct extent_buffer *eb); -void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); -void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end, - struct page *locked_page, + const struct folio *locked_folio, + struct extent_state **cached, u32 bits_to_clear, unsigned long page_ops); -struct bio *btrfs_bio_alloc(unsigned int nr_iovecs); -struct bio *btrfs_bio_clone(struct bio *bio); -struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size); - -void end_extent_writepage(struct page *page, int err, u64 start, u64 end); -int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num); - -/* - * When IO fails, either with EIO or csum verification fails, we - * try other mirrors that might have a good copy of the data. This - * io_failure_record is used to record state as we go through all the - * mirrors. If another mirror has good data, the sector is set up to date - * and things continue. If a good mirror can't be found, the original - * bio end_io callback is called to indicate things have failed. - */ -struct io_failure_record { - struct page *page; - u64 start; - u64 len; - u64 logical; - unsigned long bio_flags; - int this_mirror; - int failed_mirror; -}; +int extent_invalidate_folio(struct extent_io_tree *tree, + struct folio *folio, size_t offset); +void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans, + struct extent_buffer *buf); -int btrfs_repair_one_sector(struct inode *inode, - struct bio *failed_bio, u32 bio_offset, - struct page *page, unsigned int pgoff, - u64 start, int failed_mirror, - submit_bio_hook_t *submit_bio_hook); +int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array, + bool nofail); +int btrfs_alloc_folio_array(unsigned int nr_folios, unsigned int order, + struct folio **folio_array); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS bool find_lock_delalloc_range(struct inode *inode, - struct page *locked_page, u64 *start, + struct folio *locked_folio, u64 *start, u64 *end); #endif struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, |
