diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:47:51 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:47:51 -0800 |
commit | 149c51f876322d9bfbd5e2d6ffae7aff3d794384 (patch) | |
tree | a61c7dd828356e307fca06fc66dbdbf9b109c18f /fs/btrfs/inode.c | |
parent | 97971df811b8854882c0f6c6631e23ab8cdcc44f (diff) | |
parent | b7af0635c87ff78d6bd523298ab7471f9ffd3ce5 (diff) |
Merge tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"This round there are a lot of cleanups and moved code so the diffstat
looks huge, otherwise there are some nice performance improvements and
an update to raid56 reliability.
User visible features:
- raid56 reliability vs performance trade off:
- fix destructive RMW for raid5 data (raid6 still needs work): do
full checksum verification for all data during RMW cycle, this
should prevent rewriting potentially corrupted data without
notice
- stripes are cached in memory which should reduce the performance
impact but still can hurt some workloads
- checksums are verified after repair again
- this is the last option without introducing additional features
(write intent bitmap, journal, another tree), the extra checksum
read/verification was supposed to be avoided by the original
implementation exactly for performance reasons but that caused
all the reliability problems
- discard=async by default for devices that support it
- implement emergency flush reserve to avoid almost all unnecessary
transaction aborts due to ENOSPC in cases where there are too many
delayed refs or delayed allocation
- skip block group synchronization if there's no change in used
bytes, can reduce transaction commit count for some workloads
Performance improvements:
- fiemap and lseek:
- overall speedup due to skipping unnecessary or duplicate
searches (-40% run time)
- cache some data structures and sharedness of extents (-30% run
time)
- send:
- faster backref resolution when finding clones
- cached leaf to root mapping for faster backref walking
- improved clone/sharing detection
- overall run time improvements (-70%)
Core:
- module initialization converted to a table of function pointers run
in a sequence
- preparation for fscrypt, extend passing file names across calls,
dir item can store encryption status
- raid56 updates:
- more accurate error tracking of sectors within stripe
- simplify recovery path and remove dedicated endio worker kthread
- simplify scrub call paths
- refactoring to support the extra data checksum verification
during RMW cycle
- tree block parentness checks consolidated and done at metadata read
time
- improved error handling
- cleanups:
- move a lot of code for better synchronization between kernel and
user space sources, split big files
- enum cleanups
- GFP flag cleanups
- header file cleanups, prototypes, dependencies
- redundant parameter cleanups
- inline extent handling simplifications
- inode parameter conversion
- data structure cleanups, reductions, renames, merges"
* tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (249 commits)
btrfs: print transaction aborted messages with an error level
btrfs: sync some cleanups from progs into uapi/btrfs.h
btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range
btrfs: fix extent map use-after-free when handling missing device in read_one_chunk
btrfs: remove outdated logic from overwrite_item() and add assertion
btrfs: unify overwrite_item() and do_overwrite_item()
btrfs: replace strncpy() with strscpy()
btrfs: fix uninitialized variable in find_first_clear_extent_bit
btrfs: fix uninitialized parent in insert_state
btrfs: add might_sleep() annotations
btrfs: add stack helpers for a few btrfs items
btrfs: add nr_global_roots to the super block definition
btrfs: remove BTRFS_LEAF_DATA_OFFSET
btrfs: add helpers for manipulating leaf items and data
btrfs: add eb to btrfs_node_key_ptr_offset
btrfs: pass the extent buffer for the btrfs_item_nr helpers
btrfs: move the csum helpers into ctree.h
btrfs: move eb offset helpers into extent_io.h
btrfs: move file_extent_item helpers into file-item.h
btrfs: move leaf_data_end into ctree.c
...
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 904 |
1 files changed, 484 insertions, 420 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5a54bb93c413..8bcad9940154 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -43,7 +43,7 @@ #include "ordered-data.h" #include "xattr.h" #include "tree-log.h" -#include "volumes.h" +#include "bio.h" #include "compression.h" #include "locking.h" #include "free-space-cache.h" @@ -55,6 +55,21 @@ #include "zoned.h" #include "subpage.h" #include "inode-item.h" +#include "fs.h" +#include "accessors.h" +#include "extent-tree.h" +#include "root-tree.h" +#include "defrag.h" +#include "dir-item.h" +#include "file-item.h" +#include "uuid-tree.h" +#include "ioctl.h" +#include "file.h" +#include "acl.h" +#include "relocation.h" +#include "verity.h" +#include "super.h" +#include "orphan.h" struct btrfs_iget_args { u64 ino; @@ -69,7 +84,7 @@ struct btrfs_dio_data { }; struct btrfs_dio_private { - struct inode *inode; + struct btrfs_inode *inode; /* * Since DIO can use anonymous page, we cannot use page_offset() to @@ -107,13 +122,9 @@ static const struct address_space_operations btrfs_aops; static const struct file_operations btrfs_dir_file_operations; static struct kmem_cache *btrfs_inode_cachep; -struct kmem_cache *btrfs_trans_handle_cachep; -struct kmem_cache *btrfs_path_cachep; -struct kmem_cache *btrfs_free_space_cachep; -struct kmem_cache *btrfs_free_space_bitmap_cachep; static int btrfs_setsize(struct inode *inode, struct iattr *attr); -static int btrfs_truncate(struct inode *inode, bool skip_writeback); +static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback); static noinline int cow_file_range(struct btrfs_inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, @@ -125,6 +136,32 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, u64 ram_bytes, int compress_type, int type); +static void __cold btrfs_print_data_csum_error(struct btrfs_inode *inode, + u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num) +{ + struct btrfs_root *root = inode->root; + const u32 csum_size = root->fs_info->csum_size; + + /* Output without objectid, which is more meaningful */ + if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID) { + btrfs_warn_rl(root->fs_info, +"csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", + root->root_key.objectid, btrfs_ino(inode), + logical_start, + CSUM_FMT_VALUE(csum_size, csum), + CSUM_FMT_VALUE(csum_size, csum_expected), + mirror_num); + } else { + btrfs_warn_rl(root->fs_info, +"csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", + root->root_key.objectid, btrfs_ino(inode), + logical_start, + CSUM_FMT_VALUE(csum_size, csum), + CSUM_FMT_VALUE(csum_size, csum_expected), + mirror_num); + } +} + /* * btrfs_inode_lock - lock inode i_rwsem based on arguments passed * @@ -135,27 +172,27 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, * return -EAGAIN * BTRFS_ILOCK_MMAP - acquire a write lock on the i_mmap_lock */ -int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags) +int btrfs_inode_lock(struct btrfs_inode *inode, unsigned int ilock_flags) { if (ilock_flags & BTRFS_ILOCK_SHARED) { if (ilock_flags & BTRFS_ILOCK_TRY) { - if (!inode_trylock_shared(inode)) + if (!inode_trylock_shared(&inode->vfs_inode)) return -EAGAIN; else return 0; } - inode_lock_shared(inode); + inode_lock_shared(&inode->vfs_inode); } else { if (ilock_flags & BTRFS_ILOCK_TRY) { - if (!inode_trylock(inode)) + if (!inode_trylock(&inode->vfs_inode)) return -EAGAIN; else return 0; } - inode_lock(inode); + inode_lock(&inode->vfs_inode); } if (ilock_flags & BTRFS_ILOCK_MMAP) - down_write(&BTRFS_I(inode)->i_mmap_lock); + down_write(&inode->i_mmap_lock); return 0; } @@ -165,14 +202,14 @@ int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags) * ilock_flags should contain the same bits set as passed to btrfs_inode_lock() * to decide whether the lock acquired is shared or exclusive. */ -void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags) +void btrfs_inode_unlock(struct btrfs_inode *inode, unsigned int ilock_flags) { if (ilock_flags & BTRFS_ILOCK_MMAP) - up_write(&BTRFS_I(inode)->i_mmap_lock); + up_write(&inode->i_mmap_lock); if (ilock_flags & BTRFS_ILOCK_SHARED) - inode_unlock_shared(inode); + inode_unlock_shared(&inode->vfs_inode); else - inode_unlock(inode); + inode_unlock(&inode->vfs_inode); } /* @@ -249,7 +286,7 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, return btrfs_mark_ordered_io_finished(inode, NULL, offset, bytes, false); } -static int btrfs_dirty_inode(struct inode *inode); +static int btrfs_dirty_inode(struct btrfs_inode *inode); static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, struct btrfs_new_inode_args *args) @@ -483,7 +520,7 @@ struct async_extent { }; struct async_chunk { - struct inode *inode; + struct btrfs_inode *inode; struct page *locked_page; u64 start; u64 end; @@ -611,8 +648,8 @@ static inline void inode_should_defrag(struct btrfs_inode *inode, */ static noinline int compress_file_range(struct async_chunk *async_chunk) { - struct inode *inode = async_chunk->inode; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_inode *inode = async_chunk->inode; + struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 blocksize = fs_info->sectorsize; u64 start = async_chunk->start; u64 end = async_chunk->end; @@ -629,8 +666,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk) int compressed_extents = 0; int redirty = 0; - inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1, - SZ_16K); + inode_should_defrag(inode, start, end, end - start + 1, SZ_16K); /* * We need to save i_size before now because it could change in between @@ -642,7 +678,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk) * does that for us. */ barrier(); - i_size = i_size_read(inode); + i_size = i_size_read(&inode->vfs_inode); barrier(); actual_end = min_t(u64, i_size, end + 1); again: @@ -671,7 +707,7 @@ again: * isn't an inline extent, since it doesn't save disk space at all. */ if (total_compressed <= blocksize && - (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) + (start > 0 || end + 1 < inode->disk_i_size)) goto cleanup_and_bail_uncompressed; /* @@ -695,7 +731,7 @@ again: * inode has not been flagged as nocompress. This flag can * change at any time if we discover bad compression ratios. */ - if (inode_need_compress(BTRFS_I(inode), start, end)) { + if (inode_need_compress(inode, start, end)) { WARN_ON(pages); pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!pages) { @@ -704,10 +740,10 @@ again: goto cont; } - if (BTRFS_I(inode)->defrag_compress) - compress_type = BTRFS_I(inode)->defrag_compress; - else if (BTRFS_I(inode)->prop_compress) - compress_type = BTRFS_I(inode)->prop_compress; + if (inode->defrag_compress) + compress_type = inode->defrag_compress; + else if (inode->prop_compress) + compress_type = inode->prop_compress; /* * we need to call clear_page_dirty_for_io on each @@ -722,14 +758,14 @@ again: * has moved, the end is the original one. */ if (!redirty) { - extent_range_clear_dirty_for_io(inode, start, end); + extent_range_clear_dirty_for_io(&inode->vfs_inode, start, end); redirty = 1; } /* Compression level is applied here and only here */ ret = btrfs_compress_pages( compress_type | (fs_info->compress_level << 4), - inode->i_mapping, start, + inode->vfs_inode.i_mapping, start, pages, &nr_pages, &total_in, @@ -758,12 +794,12 @@ cont: /* we didn't compress the entire range, try * to make an uncompressed inline extent. */ - ret = cow_file_range_inline(BTRFS_I(inode), actual_end, + ret = cow_file_range_inline(inode, actual_end, 0, BTRFS_COMPRESS_NONE, NULL, false); } else { /* try making a compressed inline extent */ - ret = cow_file_range_inline(BTRFS_I(inode), actual_end, + ret = cow_file_range_inline(inode, actual_end, total_compressed, compress_type, pages, false); @@ -786,7 +822,7 @@ cont: * our outstanding extent for clearing delalloc for this * range. */ - extent_clear_unlock_delalloc(BTRFS_I(inode), start, end, + extent_clear_unlock_delalloc(inode, start, end, NULL, clear_flags, PAGE_UNLOCK | @@ -861,8 +897,8 @@ cont: /* flag the file so we don't compress in the future */ if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) && - !(BTRFS_I(inode)->prop_compress)) { - BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; + !(inode->prop_compress)) { + inode->flags |= BTRFS_INODE_NOCOMPRESS; } } cleanup_and_bail_uncompressed: @@ -880,7 +916,7 @@ cleanup_and_bail_uncompressed: } if (redirty) - extent_range_redirty_for_io(inode, start, end); + extent_range_redirty_for_io(&inode->vfs_inode, start, end); add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0, BTRFS_COMPRESS_NONE); compressed_extents++; @@ -1076,7 +1112,7 @@ out_free: */ static noinline void submit_compressed_extents(struct async_chunk *async_chunk) { - struct btrfs_inode *inode = BTRFS_I(async_chunk->inode); + struct btrfs_inode *inode = async_chunk->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; struct async_extent *async_extent; u64 alloc_hint = 0; @@ -1565,7 +1601,7 @@ static int cow_file_range_async(struct btrfs_inode *inode, */ ihold(&inode->vfs_inode); async_chunk[i].async_cow = ctx; - async_chunk[i].inode = &inode->vfs_inode; + async_chunk[i].inode = inode; async_chunk[i].start = start; async_chunk[i].end = cur_end; async_chunk[i].write_flags = write_flags; @@ -1673,9 +1709,8 @@ static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info, int ret; LIST_HEAD(list); - ret = btrfs_lookup_csums_range(csum_root, bytenr, - bytenr + num_bytes - 1, &list, 0, - nowait); + ret = btrfs_lookup_csums_list(csum_root, bytenr, bytenr + num_bytes - 1, + &list, 0, nowait); if (ret == 0 && list_empty(&list)) return 0; @@ -1733,7 +1768,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page, * when starting writeback. */ count = count_range_bits(io_tree, &range_start, end, range_bytes, - EXTENT_NORESERVE, 0); + EXTENT_NORESERVE, 0, NULL); if (count > 0 || is_space_ino || is_reloc_ino) { u64 bytes = count; struct btrfs_fs_info *fs_info = inode->root->fs_info; @@ -2240,10 +2275,10 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page return ret; } -void btrfs_split_delalloc_extent(struct inode *inode, +void btrfs_split_delalloc_extent(struct btrfs_inode *inode, struct extent_state *orig, u64 split) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 size; /* not delalloc, ignore it */ @@ -2267,9 +2302,9 @@ void btrfs_split_delalloc_extent(struct inode *inode, return; } - spin_lock(&BTRFS_I(inode)->lock); - btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); - spin_unlock(&BTRFS_I(inode)->lock); + spin_lock(&inode->lock); + btrfs_mod_outstanding_extents(inode, 1); + spin_unlock(&inode->lock); } /* @@ -2277,10 +2312,10 @@ void btrfs_split_delalloc_extent(struct inode *inode, * that are just merged onto old extents, such as when we are doing sequential * writes, so we can properly account for the metadata space we'll need. */ -void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new, +void btrfs_merge_delalloc_extent(struct btrfs_inode *inode, struct extent_state *new, struct extent_state *other) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 new_size, old_size; u32 num_extents; @@ -2295,9 +2330,9 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new, /* we're not bigger than the max, unreserve the space and go */ if (new_size <= fs_info->max_extent_size) { - spin_lock(&BTRFS_I(inode)->lock); - btrfs_mod_outstanding_extents(BTRFS_I(inode), -1); - spin_unlock(&BTRFS_I(inode)->lock); + spin_lock(&inode->lock); + btrfs_mod_outstanding_extents(inode, -1); + spin_unlock(&inode->lock); return; } @@ -2326,22 +2361,20 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new, if (count_max_extents(fs_info, new_size) >= num_extents) return; - spin_lock(&BTRFS_I(inode)->lock); - btrfs_mod_outstanding_extents(BTRFS_I(inode), -1); - spin_unlock(&BTRFS_I(inode)->lock); + spin_lock(&inode->lock); + btrfs_mod_outstanding_extents(inode, -1); + spin_unlock(&inode->lock); } static void btrfs_add_delalloc_inodes(struct btrfs_root *root, - struct inode *inode) + struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; spin_lock(&root->delalloc_lock); - if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { - list_add_tail(&BTRFS_I(inode)->delalloc_inodes, - &root->delalloc_inodes); - set_bit(BTRFS_INODE_IN_DELALLOC_LIST, - &BTRFS_I(inode)->runtime_flags); + if (list_empty(&inode->delalloc_inodes)) { + list_add_tail(&inode->delalloc_inodes, &root->delalloc_inodes); + set_bit(BTRFS_INODE_IN_DELALLOC_LIST, &inode->runtime_flags); root->nr_delalloc_inodes++; if (root->nr_delalloc_inodes == 1) { spin_lock(&fs_info->delalloc_root_lock); @@ -2354,7 +2387,6 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root, spin_unlock(&root->delalloc_lock); } - void __btrfs_del_delalloc_inode(struct btrfs_root *root, struct btrfs_inode *inode) { @@ -2387,10 +2419,10 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root, * Properly track delayed allocation bytes in the inode and to maintain the * list of inodes that have pending delalloc work to be done. */ -void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state, +void btrfs_set_delalloc_extent(struct btrfs_inode *inode, struct extent_state *state, u32 bits) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; if ((bits & EXTENT_DEFRAG) && !(bits & EXTENT_DELALLOC)) WARN_ON(1); @@ -2400,14 +2432,14 @@ void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state, * bit, which is only set or cleared with irqs on */ if (!(state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { - struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_root *root = inode->root; u64 len = state->end + 1 - state->start; u32 num_extents = count_max_extents(fs_info, len); - bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode)); + bool do_list = !btrfs_is_free_space_inode(inode); - spin_lock(&BTRFS_I(inode)->lock); - btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents); - spin_unlock(&BTRFS_I(inode)->lock); + spin_lock(&inode->lock); + btrfs_mod_outstanding_extents(inode, num_extents); + spin_unlock(&inode->lock); /* For sanity tests */ if (btrfs_is_testing(fs_info)) @@ -2415,22 +2447,21 @@ void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state, percpu_counter_add_batch(&fs_info->delalloc_bytes, len, fs_info->delalloc_batch); - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->delalloc_bytes += len; + spin_lock(&inode->lock); + inode->delalloc_bytes += len; if (bits & EXTENT_DEFRAG) - BTRFS_I(inode)->defrag_bytes += len; + inode->defrag_bytes += len; if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, - &BTRFS_I(inode)->runtime_flags)) + &inode->runtime_flags)) btrfs_add_delalloc_inodes(root, inode); - spin_unlock(&BTRFS_I(inode)->lock); + spin_unlock(&inode->lock); } if (!(state->state & EXTENT_DELALLOC_NEW) && (bits & EXTENT_DELALLOC_NEW)) { - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 - - state->start; - spin_unlock(&BTRFS_I(inode)->lock); + spin_lock(&inode->lock); + inode->new_delalloc_bytes += state->end + 1 - state->start; + spin_unlock(&inode->lock); } } @@ -2438,11 +2469,10 @@ void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state, * Once a range is no longer delalloc this function ensures that proper * accounting happens. */ -void btrfs_clear_delalloc_extent(struct inode *vfs_inode, +void btrfs_clear_delalloc_extent(struct btrfs_inode *inode, struct extent_state *state, u32 bits) { - struct btrfs_inode *inode = BTRFS_I(vfs_inode); - struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 len = state->end + 1 - state->start; u32 num_extents = count_max_extents(fs_info, len); @@ -2513,10 +2543,9 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio, - u64 dio_file_offset) +blk_status_t btrfs_submit_bio_start(struct btrfs_inode *inode, struct bio *bio) { - return btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false); + return btrfs_csum_one_bio(inode, bio, (u64)-1, false); } /* @@ -2694,14 +2723,13 @@ out: return errno_to_blk_status(ret); } -void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirror_num) +void btrfs_submit_data_write_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_inode *bi = BTRFS_I(inode); + struct btrfs_fs_info *fs_info = inode->root->fs_info; blk_status_t ret; if (bio_op(bio) == REQ_OP_ZONE_APPEND) { - ret = extract_ordered_extent(bi, bio, + ret = extract_ordered_extent(inode, bio, page_offset(bio_first_bvec_all(bio)->bv_page)); if (ret) { btrfs_bio_end_io(btrfs_bio(bio), ret); @@ -2717,15 +2745,14 @@ void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirro * Csum items for reloc roots have already been cloned at this point, * so they are handled as part of the no-checksum case. */ - if (!(bi->flags & BTRFS_INODE_NODATASUM) && + if (!(inode->flags & BTRFS_INODE_NODATASUM) && !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) && - !btrfs_is_data_reloc_root(bi->root)) { - if (!atomic_read(&bi->sync_writers) && - btrfs_wq_submit_bio(inode, bio, mirror_num, 0, - btrfs_submit_bio_start)) + !btrfs_is_data_reloc_root(inode->root)) { + if (!atomic_read(&inode->sync_writers) && + btrfs_wq_submit_bio(inode, bio, mirror_num, 0, WQ_SUBMIT_DATA)) return; - ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false); + ret = btrfs_csum_one_bio(inode, bio, (u64)-1, false); if (ret) { btrfs_bio_end_io(btrfs_bio(bio), ret); return; @@ -2734,10 +2761,10 @@ void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirro btrfs_submit_bio(fs_info, bio, mirror_num); } -void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio, +void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num, enum btrfs_compression_type compress_type) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; blk_status_t ret; if (compress_type != BTRFS_COMPRESS_NONE) { @@ -2745,7 +2772,7 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio, * btrfs_submit_compressed_read will handle completing the bio * if there were any errors, so just return here. */ - btrfs_submit_compressed_read(inode, bio, mirror_num); + btrfs_submit_compressed_read(&inode->vfs_inode, bio, mirror_num); return; } @@ -2756,7 +2783,7 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio, * Lookup bio sums does extra checks around whether we need to csum or * not, which is why we ignore skip_sum here. */ - ret = btrfs_lookup_bio_sums(inode, bio, NULL); + ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL); if (ret) { btrfs_bio_end_io(btrfs_bio(bio), ret); return; @@ -2859,7 +2886,7 @@ int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, /* see btrfs_writepage_start_hook for details on why this is required */ struct btrfs_writepage_fixup { struct page *page; - struct inode *inode; + struct btrfs_inode *inode; struct btrfs_work work; }; @@ -2878,7 +2905,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) fixup = container_of(work, struct btrfs_writepage_fixup, work); page = fixup->page; - inode = BTRFS_I(fixup->inode); + inode = fixup->inode; page_start = page_offset(page); page_end = page_offset(page) + PAGE_SIZE - 1; @@ -2988,7 +3015,7 @@ out_page: * that could need flushing space. Recursing back to fixup worker would * deadlock. */ - btrfs_add_delayed_iput(&inode->vfs_inode); + btrfs_add_delayed_iput(inode); } /* @@ -3037,7 +3064,7 @@ int btrfs_writepage_cow_fixup(struct page *page) get_page(page); btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); fixup->page = page; - fixup->inode = inode; + fixup->inode = BTRFS_I(inode); btrfs_queue_work(fs_info->fixup_workers, &fixup->work); return -EAGAIN; @@ -3459,10 +3486,10 @@ static u8 *btrfs_csum_ptr(const struct btrfs_fs_info *fs_info, u8 *csums, u64 of * When csum mismatch is detected, we will also report the error and fill the * corrupted range with zero. (Thus it needs the extra parameters) */ -int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio, +int btrfs_check_data_csum(struct btrfs_inode *inode, struct btrfs_bio *bbio, u32 bio_offset, struct page *page, u32 pgoff) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; u32 len = fs_info->sectorsize; u8 *csum_expected; u8 csum[BTRFS_CSUM_SIZE]; @@ -3476,8 +3503,7 @@ int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio, return 0; zeroit: - btrfs_print_data_csum_error(BTRFS_I(inode), - bbio->file_offset + bio_offset, + btrfs_print_data_csum_error(inode, bbio->file_offset + bio_offset, csum, csum_expected, bbio->mirror_num); if (bbio->device) btrfs_dev_stat_inc_and_print(bbio->device, @@ -3502,10 +3528,10 @@ unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, u32 bio_offset, struct page *page, u64 start, u64 end) { - struct inode *inode = page->mapping->host; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_inode *inode = BTRFS_I(page->mapping->host); + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_io_tree *io_tree = &inode->io_tree; const u32 sectorsize = root->fs_info->sectorsize; u32 pg_off; unsigned int result = 0; @@ -3518,7 +3544,7 @@ unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, if (bbio->csum == NULL) return 0; - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) + if (inode->flags & BTRFS_INODE_NODATASUM) return 0; if (unlikely(test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))) @@ -3563,18 +3589,17 @@ unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, * the inode to the delayed iput machinery. Delayed iputs are processed at * transaction commit time/superblock commit/cleaner kthread. */ -void btrfs_add_delayed_iput(struct inode *inode) +void btrfs_add_delayed_iput(struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_inode *binode = BTRFS_I(inode); + struct btrfs_fs_info *fs_info = inode->root->fs_info; - if (atomic_add_unless(&inode->i_count, -1, 1)) + if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1)) return; atomic_inc(&fs_info->nr_delayed_iputs); spin_lock(&fs_info->delayed_iput_lock); - ASSERT(list_empty(&binode->delayed_iput)); - list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); + ASSERT(list_empty(&inode->delayed_iput)); + list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs); spin_unlock(&fs_info->delayed_iput_lock); if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags)) wake_up_process(fs_info->cleaner_kthread); @@ -3617,7 +3642,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->delayed_iput_lock); } -/** +/* * Wait for flushing all delayed iputs * * @fs_info: the filesystem @@ -4262,7 +4287,7 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_inode *dir, struct btrfs_inode *inode, - const char *name, int name_len, + const struct fscrypt_str *name, struct btrfs_rename_ctx *rename_ctx) { struct btrfs_root *root = dir->root; @@ -4280,8 +4305,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, goto out; } - di = btrfs_lookup_dir_item(trans, root, path, dir_ino, - name, name_len, -1); + di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, -1); if (IS_ERR_OR_NULL(di)) { ret = di ? PTR_ERR(di) : -ENOENT; goto err; @@ -4309,12 +4333,11 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, } } - ret = btrfs_del_inode_ref(trans, root, name, name_len, ino, - dir_ino, &index); + ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index); if (ret) { btrfs_info(fs_info, "failed to delete reference to %.*s, inode %llu parent %llu", - name_len, name, ino, dir_ino); + name->len, name->name, ino, dir_ino); btrfs_abort_transaction(trans, ret); goto err; } @@ -4335,10 +4358,8 @@ skip_backref: * operations on the log tree, increasing latency for applications. */ if (!rename_ctx) { - btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, - dir_ino); - btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, - index); + btrfs_del_inode_ref_in_log(trans, root, name, inode, dir_ino); + btrfs_del_dir_entries_in_log(trans, root, name, dir, index); } /* @@ -4356,7 +4377,7 @@ err: if (ret) goto out; - btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2); + btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2); inode_inc_iversion(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode); @@ -4369,10 +4390,11 @@ out: int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_inode *dir, struct btrfs_inode *inode, - const char *name, int name_len) + const struct fscrypt_str *name) { int ret; - ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len, NULL); + + ret = __btrfs_unlink_inode(trans, dir, inode, name, NULL); if (!ret) { drop_nlink(&inode->vfs_inode); ret = btrfs_update_inode(trans, inode->root, inode); @@ -4388,9 +4410,9 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, * plenty of slack room in the global reserve to migrate, otherwise we cannot * allow the unlink to occur. */ -static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir) +static struct btrfs_trans_handle *__unlink_start_trans(struct btrfs_inode *dir) { - struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_root *root = dir->root; /* * 1 for the possible orphan item @@ -4408,47 +4430,62 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; struct inode *inode = d_inode(dentry); int ret; + struct fscrypt_name fname; - trans = __unlink_start_trans(dir); - if (IS_ERR(trans)) - return PTR_ERR(trans); + ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); + if (ret) + return ret; + + /* This needs to handle no-key deletions later on */ + + trans = __unlink_start_trans(BTRFS_I(dir)); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto fscrypt_free; + } btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), 0); - ret = btrfs_unlink_inode(trans, BTRFS_I(dir), - BTRFS_I(d_inode(dentry)), dentry->d_name.name, - dentry->d_name.len); + ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), + &fname.disk_name); if (ret) - goto out; + goto end_trans; if (inode->i_nlink == 0) { ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) - goto out; + goto end_trans; } -out: +end_trans: btrfs_end_transaction(trans); btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info); +fscrypt_free: + fscrypt_free_filename(&fname); return ret; } static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, - struct inode *dir, struct dentry *dentry) + struct btrfs_inode *dir, struct dentry *dentry) { - struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_root *root = dir->root; struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_dir_item *di; struct btrfs_key key; - const char *name = dentry->d_name.name; - int name_len = dentry->d_name.len; u64 index; int ret; u64 objectid; - u64 dir_ino = btrfs_ino(BTRFS_I(dir)); + u64 dir_ino = btrfs_ino(dir); + struct fscrypt_name fname; + + ret = fscrypt_setup_filename(&dir->vfs_inode, &dentry->d_name, 1, &fname); + if (ret) + return ret; + + /* This needs to handle no-key deletions later on */ if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) { objectid = inode->root->root_key.objectid; @@ -4456,15 +4493,18 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, objectid = inode->location.objectid; } else { WARN_ON(1); + fscrypt_free_filename(&fname); return -EINVAL; } path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } di = btrfs_lookup_dir_item(trans, root, path, dir_ino, - name, name_len, -1); + &fname.disk_name, -1); if (IS_ERR_OR_NULL(di)) { ret = di ? PTR_ERR(di) : -ENOENT; goto out; @@ -4490,8 +4530,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, * call btrfs_del_root_ref, and it _shouldn't_ fail. */ if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) { - di = btrfs_search_dir_index_item(root, path, dir_ino, - name, name_len); + di = btrfs_search_dir_index_item(root, path, dir_ino, &fname.disk_name); if (IS_ERR_OR_NULL(di)) { if (!di) ret = -ENOENT; @@ -4508,28 +4547,29 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, } else { ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid, dir_ino, - &index, name, name_len); + &index, &fname.disk_name); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } } - ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index); + ret = btrfs_delete_delayed_dir_index(trans, dir, index); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } - btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2); - inode_inc_iversion(dir); - dir->i_mtime = current_time(dir); - dir->i_ctime = dir->i_mtime; - ret = btrfs_update_inode_fallback(trans, root, BTRFS_I(dir)); + btrfs_i_size_write(dir, dir->vfs_inode.i_size - fname.disk_name.len * 2); + inode_inc_iversion(&dir->vfs_inode); + dir->vfs_inode.i_mtime = current_time(&dir->vfs_inode); + dir->vfs_inode.i_ctime = dir->vfs_inode.i_mtime; + ret = btrfs_update_inode_fallback(trans, root, dir); if (ret) btrfs_abort_transaction(trans, ret); out: btrfs_free_path(path); + fscrypt_free_filename(&fname); return ret; } @@ -4543,6 +4583,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) struct btrfs_path *path; struct btrfs_dir_item *di; struct btrfs_key key; + struct fscrypt_str name = FSTR_INIT("default", 7); u64 dir_id; int ret; @@ -4553,7 +4594,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) /* Make sure this root isn't set as the default subvol */ dir_id = btrfs_super_root_dir(fs_info->super_copy); di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path, - dir_id, "default", 7, 0); + dir_id, &name, 0); if (di && !IS_ERR(di)) { btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); if (key.objectid == root->root_key.objectid) { @@ -4652,10 +4693,10 @@ again: spin_unlock(&root->inode_lock); } -int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) +int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) { struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); - struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_root *root = dir->root; struct inode *inode = d_inode(dentry); struct btrfs_root *dest = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; @@ -4712,7 +4753,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; - btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); + btrfs_record_snapshot_destroy(trans, dir); ret = btrfs_unlink_subvol(trans, dir, dentry); if (ret) { @@ -4792,6 +4833,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) int err = 0; struct btrfs_trans_handle *trans; u64 last_unlink_trans; + struct fscrypt_name fname; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; @@ -4801,15 +4843,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) "extent tree v2 doesn't support snapshot deletion yet"); return -EOPNOTSUPP; } - return btrfs_delete_subvolume(dir, dentry); + return btrfs_delete_subvolume(BTRFS_I(dir), dentry); } - trans = __unlink_start_trans(dir); - if (IS_ERR(trans)) - return PTR_ERR(trans); + err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); + if (err) + return err; + + /* This needs to handle no-key deletions later on */ + + trans = __unlink_start_trans(BTRFS_I(dir)); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out_notrans; + } if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { - err = btrfs_unlink_subvol(trans, dir, dentry); + err = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry); goto out; } @@ -4820,9 +4870,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; /* now the directory is empty */ - err = btrfs_unlink_inode(trans, BTRFS_I(dir), - BTRFS_I(d_inode(dentry)), dentry->d_name.name, - dentry->d_name.len); + err = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), + &fname.disk_name); if (!err) { btrfs_i_size_write(BTRFS_I(inode), 0); /* @@ -4841,7 +4890,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) } out: btrfs_end_transaction(trans); +out_notrans: btrfs_btree_balance_dirty(fs_info); + fscrypt_free_filename(&fname); return err; } @@ -5210,7 +5261,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) inode_dio_wait(inode); - ret = btrfs_truncate(inode, newsize == oldsize); + ret = btrfs_truncate(BTRFS_I(inode), newsize == oldsize); if (ret && inode->i_nlink) { int err; @@ -5253,7 +5304,7 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr if (attr->ia_valid) { setattr_copy(mnt_userns, inode, attr); inode_inc_iversion(inode); - err = btrfs_dirty_inode(inode); + err = btrfs_dirty_inode(BTRFS_I(inode)); if (!err && attr->ia_valid & ATTR_MODE) err = posix_acl_chmod(mnt_userns, dentry, inode->i_mode); @@ -5511,22 +5562,27 @@ no_delete: * If no dir entries were found, returns -ENOENT. * If found a corrupted location in dir entry, returns -EUCLEAN. */ -static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, +static int btrfs_inode_by_name(struct btrfs_inode *dir, struct dentry *dentry, struct btrfs_key *location, u8 *type) { - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; struct btrfs_dir_item *di; struct btrfs_path *path; - struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_root *root = dir->root; int ret = 0; + struct fscrypt_name fname; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)), - name, namelen, 0); + ret = fscrypt_setup_filename(&dir->vfs_inode, &dentry->d_name, 1, &fname); + if (ret) + goto out; + + /* This needs to handle no-key deletions later on */ + + di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), + &fname.disk_name, 0); if (IS_ERR_OR_NULL(di)) { ret = di ? PTR_ERR(di) : -ENOENT; goto out; @@ -5538,12 +5594,13 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ret = -EUCLEAN; btrfs_warn(root->fs_info, "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", - __func__, name, btrfs_ino(BTRFS_I(dir)), + __func__, fname.disk_name.name, btrfs_ino(dir), location->objectid, location->type, location->offset); } if (!ret) - *type = btrfs_dir_type(path->nodes[0], di); + *type = btrfs_dir_ftype(path->nodes[0], di); out: + fscrypt_free_filename(&fname); btrfs_free_path(path); return ret; } @@ -5554,7 +5611,7 @@ out: * is kind of like crossing a mount point. */ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, - struct inode *dir, + struct btrfs_inode *dir, struct dentry *dentry, struct btrfs_key *location, struct btrfs_root **sub_root) @@ -5566,6 +5623,11 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, struct btrfs_key key; int ret; int err = 0; + struct fscrypt_name fname; + + ret = fscrypt_setup_filename(&dir->vfs_inode, &dentry->d_name, 0, &fname); + if (ret) + return ret; path = btrfs_alloc_path(); if (!path) { @@ -5574,7 +5636,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, } err = -ENOENT; - key.objectid = BTRFS_I(dir)->root->root_key.objectid; + key.objectid = dir->root->root_key.objectid; key.type = BTRFS_ROOT_REF_KEY; key.offset = location->objectid; @@ -5587,13 +5649,12 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); - if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) || - btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) + if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) || + btrfs_root_ref_name_len(leaf, ref) != fname.disk_name.len) goto out; - ret = memcmp_extent_buffer(leaf, dentry->d_name.name, - (unsigned long)(ref + 1), - dentry->d_name.len); + ret = memcmp_extent_buffer(leaf, fname.disk_name.name, + (unsigned long)(ref + 1), fname.disk_name.len); if (ret) goto out; @@ -5612,19 +5673,20 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, err = 0; out: btrfs_free_path(path); + fscrypt_free_filename(&fname); return err; } -static void inode_tree_add(struct inode *inode) +static void inode_tree_add(struct btrfs_inode *inode) { - struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_root *root = inode->root; struct btrfs_inode *entry; struct rb_node **p; struct rb_node *parent; - struct rb_node *new = &BTRFS_I(inode)->rb_node; - u64 ino = btrfs_ino(BTRFS_I(inode)); + struct rb_node *new = &inode->rb_node; + u64 ino = btrfs_ino(inode); - if (inode_unhashed(inode)) + if (inode_unhashed(&inode->vfs_inode)) return; parent = NULL; spin_lock(&root->inode_lock); @@ -5736,7 +5798,7 @@ struct inode *btrfs_iget_path(struct super_block *s, u64 ino, ret = btrfs_read_locked_inode(inode, path); if (!ret) { - inode_tree_add(inode); + inode_tree_add(BTRFS_I(inode)); unlock_new_inode(inode); } else { iget_failed(inode); @@ -5816,7 +5878,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ret = btrfs_inode_by_name(dir, dentry, &location, &di_type); + ret = btrfs_inode_by_name(BTRFS_I(dir), dentry, &location, &di_type); if (ret < 0) return ERR_PTR(ret); @@ -5837,7 +5899,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) return inode; } - ret = fixup_tree_root_location(fs_info, dir, dentry, + ret = fixup_tree_root_location(fs_info, BTRFS_I(dir), dentry, &location, &sub_root); if (ret < 0) { if (ret != -ENOENT) @@ -5985,6 +6047,7 @@ again: btrfs_for_each_slot(root, &key, &found_key, path, ret) { struct dir_entry *entry; struct extent_buffer *leaf = path->nodes[0]; + u8 ftype; if (found_key.objectid != key.objectid) break; @@ -6008,13 +6071,13 @@ again: goto again; } + ftype = btrfs_dir_flags_to_ftype(btrfs_dir_flags(leaf, di)); entry = addr; - put_unaligned(name_len, &entry->name_len); name_ptr = (char *)(entry + 1); - read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), - name_len); - put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)), - &entry->type); + read_extent_buffer(leaf, name_ptr, + (unsigned long)(di + 1), name_len); + put_unaligned(name_len, &entry->name_len); + put_unaligned(fs_ftype_to_dtype(ftype), &entry->type); btrfs_dir_item_key_to_cpu(leaf, di, &location); put_unaligned(location.objectid, &entry->ino); put_unaligned(found_key.offset, &entry->offset); @@ -6072,21 +6135,21 @@ err: * FIXME, needs more benchmarking...there are no reasons other than performance * to keep or drop this code. */ -static int btrfs_dirty_inode(struct inode *inode) +static int btrfs_dirty_inode(struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *trans; int ret; - if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) + if (test_bit(BTRFS_INODE_DUMMY, &inode->runtime_flags)) return 0; trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); + ret = btrfs_update_inode(trans, root, inode); if (ret && (ret == -ENOSPC || ret == -EDQUOT)) { /* whoops, lets try again with the full transaction */ btrfs_end_transaction(trans); @@ -6094,10 +6157,10 @@ static int btrfs_dirty_inode(struct inode *inode) if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); + ret = btrfs_update_inode(trans, root, inode); } btrfs_end_transaction(trans); - if (BTRFS_I(inode)->delayed_node) + if (inode->delayed_node) btrfs_balance_delayed_items(fs_info); return ret; @@ -6124,7 +6187,7 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now, inode->i_mtime = *now; if (flags & S_ATIME) inode->i_atime = *now; - return dirty ? btrfs_dirty_inode(inode) : 0; + return dirty ? btrfs_dirty_inode(BTRFS_I(inode)) : 0; } /* @@ -6220,9 +6283,18 @@ int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args, struct inode *inode = args->inode; int ret; + if (!args->orphan) { + ret = fscrypt_setup_filename(dir, &args->dentry->d_name, 0, + &args->fname); + if (ret) + return ret; + } + ret = posix_acl_create(dir, &inode->i_mode, &args->default_acl, &args->acl); - if (ret) + if (ret) { + fscrypt_free_filename(&args->fname); return ret; + } /* 1 to add inode item */ *trans_num_items = 1; @@ -6262,6 +6334,7 @@ void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args) { posix_acl_release(args->acl); posix_acl_release(args->default_acl); + fscrypt_free_filename(&args->fname); } /* @@ -6269,27 +6342,27 @@ void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args) * * Currently only the compression flags and the cow flags are inherited. */ -static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) +static void btrfs_inherit_iflags(struct btrfs_inode *inode, struct btrfs_inode *dir) { unsigned int flags; - flags = BTRFS_I(dir)->flags; + flags = dir->flags; if (flags & BTRFS_INODE_NOCOMPRESS) { - BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; - BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; + inode->flags &= ~BTRFS_INODE_COMPRESS; + inode->flags |= BTRFS_INODE_NOCOMPRESS; } else if (flags & BTRFS_INODE_COMPRESS) { - BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; - BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; + inode->flags &= ~BTRFS_INODE_NOCOMPRESS; + inode->flags |= BTRFS_INODE_COMPRESS; } if (flags & BTRFS_INODE_NODATACOW) { - BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; - if (S_ISREG(inode->i_mode)) - BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; + inode->flags |= BTRFS_INODE_NODATACOW; + if (S_ISREG(inode->vfs_inode.i_mode)) + inode->flags |= BTRFS_INODE_NODATASUM; } - btrfs_sync_inode_flags_to_i_flags(inode); + btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode); } int btrfs_create_new_inode(struct btrfs_trans_handle *trans, @@ -6297,8 +6370,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, { struct inode *dir = args->dir; struct inode *inode = args->inode; - const char *name = args->orphan ? NULL : args->dentry->d_name.name; - int name_len = args->orphan ? 0 : args->dentry->d_name.len; + const struct fscrypt_str *name = args->orphan ? NULL : &args->fname.disk_name; struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); struct btrfs_root *root; struct btrfs_inode_item *inode_item; @@ -6349,7 +6421,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, * change it now without compatibility issues. */ if (!args->subvol) - btrfs_inherit_iflags(inode, dir); + btrfs_inherit_iflags(BTRFS_I(inode), BTRFS_I(dir)); if (S_ISREG(inode->i_mode)) { if (btrfs_test_opt(fs_info, NODATASUM)) @@ -6399,7 +6471,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, sizes[1] = 2 + sizeof(*ref); } else { key[1].offset = btrfs_ino(BTRFS_I(dir)); - sizes[1] = name_len + sizeof(*ref); + sizes[1] = name->len + sizeof(*ref); } } @@ -6438,10 +6510,12 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_ref_index(path->nodes[0], ref, 0); write_extent_buffer(path->nodes[0], "..", ptr, 2); } else { - btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); + btrfs_set_inode_ref_name_len(path->nodes[0], ref, + name->len); btrfs_set_inode_ref_index(path->nodes[0], ref, BTRFS_I(inode)->dir_index); - write_extent_buffer(path->nodes[0], name, ptr, name_len); + write_extent_buffer(path->nodes[0], name->name, ptr, + name->len); } } @@ -6491,7 +6565,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, } } - inode_tree_add(inode); + inode_tree_add(BTRFS_I(inode)); trace_btrfs_inode_new(inode); btrfs_set_inode_last_trans(trans, BTRFS_I(inode)); @@ -6502,7 +6576,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, ret = btrfs_orphan_add(trans, BTRFS_I(inode)); } else { ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name, - name_len, 0, BTRFS_I(inode)->dir_index); + 0, BTRFS_I(inode)->dir_index); } if (ret) { btrfs_abort_transaction(trans, ret); @@ -6531,7 +6605,7 @@ out: */ int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_inode *parent_inode, struct btrfs_inode *inode, - const char *name, int name_len, int add_backref, u64 index) + const struct fscrypt_str *name, int add_backref, u64 index) { int ret = 0; struct btrfs_key key; @@ -6550,17 +6624,17 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { ret = btrfs_add_root_ref(trans, key.objectid, root->root_key.objectid, parent_ino, - index, name, name_len); + index, name); } else if (add_backref) { - ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino, - parent_ino, index); + ret = btrfs_insert_inode_ref(trans, root, name, + ino, parent_ino, index); } /* Nothing to clean up yet */ if (ret) return ret; - ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key, + ret = btrfs_insert_dir_item(trans, name, parent_inode, &key, btrfs_inode_type(&inode->vfs_inode), index); if (ret == -EEXIST || ret == -EOVERFLOW) goto fail_dir_item; @@ -6570,7 +6644,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, } btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size + - name_len * 2); + name->len * 2); inode_inc_iversion(&parent_inode->vfs_inode); /* * If we are replaying a log tree, we do not want to update the mtime @@ -6595,15 +6669,15 @@ fail_dir_item: int err; err = btrfs_del_root_ref(trans, key.objectid, root->root_key.objectid, parent_ino, - &local_index, name, name_len); + &local_index, name); if (err) btrfs_abort_transaction(trans, err); } else if (add_backref) { u64 local_index; int err; - err = btrfs_del_inode_ref(trans, root, name, name_len, - ino, parent_ino, &local_index); + err = btrfs_del_inode_ref(trans, root, name, ino, parent_ino, + &local_index); if (err) btrfs_abort_transaction(trans, err); } @@ -6686,6 +6760,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = d_inode(old_dentry); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct fscrypt_name fname; u64 index; int err; int drop_inode = 0; @@ -6697,6 +6772,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink >= BTRFS_LINK_MAX) return -EMLINK; + err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); + if (err) + goto fail; + err = btrfs_set_inode_index(BTRFS_I(dir), &index); if (err) goto fail; @@ -6723,7 +6802,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), - dentry->d_name.name, dentry->d_name.len, 1, index); + &fname.disk_name, 1, index); if (err) { drop_inode = 1; @@ -6747,6 +6826,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, } fail: + fscrypt_free_filename(&fname); if (trans) btrfs_end_transaction(trans); if (drop_inode) { @@ -6773,7 +6853,6 @@ static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, static noinline int uncompress_inline(struct btrfs_path *path, struct page *page, - size_t pg_offset, u64 extent_offset, struct btrfs_file_extent_item *item) { int ret; @@ -6784,7 +6863,6 @@ static noinline int uncompress_inline(struct btrfs_path *path, unsigned long ptr; int compress_type; - WARN_ON(pg_offset != 0); compress_type = btrfs_file_extent_compression(leaf, item); max_size = btrfs_file_extent_ram_bytes(leaf, item); inline_size = btrfs_file_extent_inline_item_len(leaf, path->slots[0]); @@ -6796,8 +6874,7 @@ static noinline int uncompress_inline(struct btrfs_path *path, read_extent_buffer(leaf, tmp, ptr, inline_size); max_size = min_t(unsigned long, PAGE_SIZE, max_size); - ret = btrfs_decompress(compress_type, tmp, page, - extent_offset, inline_size, max_size); + ret = btrfs_decompress(compress_type, tmp, page, 0, inline_size, max_size); /* * decompression code contains a memset to fill in any space between the end @@ -6807,25 +6884,52 @@ static noinline int uncompress_inline(struct btrfs_path *path, * cover that region here. */ - if (max_size + pg_offset < PAGE_SIZE) - memzero_page(page, pg_offset + max_size, - PAGE_SIZE - max_size - pg_offset); + if (max_size < PAGE_SIZE) + memzero_page(page, max_size, PAGE_SIZE - max_size); kfree(tmp); return ret; } -/** - * btrfs_get_extent - Lookup the first extent overlapping a range in a file. +static int read_inline_extent(struct btrfs_inode *inode, struct btrfs_path *path, + struct page *page) +{ + struct btrfs_file_extent_item *fi; + void *kaddr; + size_t copy_size; + + if (!page || PageUptodate(page)) + return 0; + + ASSERT(page_offset(page) == 0); + + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_compression(path->nodes[0], fi) != BTRFS_COMPRESS_NONE) + return uncompress_inline(path, page, fi); + + copy_size = min_t(u64, PAGE_SIZE, + btrfs_file_extent_ram_bytes(path->nodes[0], fi)); + kaddr = kmap_local_page(page); + read_extent_buffer(path->nodes[0], kaddr, + btrfs_file_extent_inline_start(fi), copy_size); + kunmap_local(kaddr); + if (copy_size < PAGE_SIZE) + memzero_page(page, copy_size, PAGE_SIZE - copy_size); + return 0; +} + +/* + * Lookup the first extent overlapping a range in a file. + * * @inode: file to search in * @page: page to read extent data into if the extent is inline * @pg_offset: offset into @page to copy to * @start: file offset * @len: length of range starting at @start * - * This returns the first &struct extent_map which overlaps with the given - * range, reading it from the B-tree and caching it if necessary. Note that - * there may be more extents which overlap the given range after the returned - * extent_map. + * Return the first &struct extent_map which overlaps the given range, reading + * it from the B-tree and caching it if necessary. Note that there may be more + * extents which overlap the given range after the returned extent_map. * * If @page is not NULL and the extent is inline, this also reads the extent * data directly into the page and marks the extent up to date in the io_tree. @@ -6966,51 +7070,33 @@ next: goto insert; } - btrfs_extent_item_to_extent_map(inode, path, item, !page, em); + btrfs_extent_item_to_extent_map(inode, path, item, em); if (extent_type == BTRFS_FILE_EXTENT_REG || extent_type == BTRFS_FILE_EXTENT_PREALLOC) { goto insert; } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - unsigned long ptr; - char *map; - size_t size; - size_t extent_offset; - size_t copy_size; - - if (!page) - goto out; + /* + * Inline extent can only exist at file offset 0. This is + * ensured by tree-checker and inline extent creation path. + * Thus all members representing file offsets should be zero. + */ + ASSERT(pg_offset == 0); + ASSERT(extent_start == 0); + ASSERT(em->start == 0); - size = btrfs_file_extent_ram_bytes(leaf, item); - extent_offset = page_offset(page) + pg_offset - extent_start; - copy_size = min_t(u64, PAGE_SIZE - pg_offset, - size - extent_offset); - em->start = extent_start + extent_offset; - em->len = ALIGN(copy_size, fs_info->sectorsize); - em->orig_block_len = em->len; - em->orig_start = em->start; - ptr = btrfs_file_extent_inline_start(item) + extent_offset; + /* + * btrfs_extent_item_to_extent_map() should have properly + * initialized em members already. + * + * Other members are not utilized for inline extents. + */ + ASSERT(em->block_start == EXTENT_MAP_INLINE); + ASSERT(em->len = fs_info->sectorsize); - if (!PageUptodate(page)) { - if (btrfs_file_extent_compression(leaf, item) != - BTRFS_COMPRESS_NONE) { - ret = uncompress_inline(path, page, pg_offset, - extent_offset, item); - if (ret) - goto out; - } else { - map = kmap_local_page(page); - read_extent_buffer(leaf, map + pg_offset, ptr, - copy_size); - if (pg_offset + copy_size < PAGE_SIZE) { - memset(map + pg_offset + copy_size, 0, - PAGE_SIZE - pg_offset - - copy_size); - } - kunmap_local(map); - } - flush_dcache_page(page); - } + ret = read_inline_extent(inode, path, page); + if (ret < 0) + goto out; goto insert; } not_found: @@ -7255,7 +7341,8 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, while (1) { if (nowait) { - if (!try_lock_extent(io_tree, lockstart, lockend)) + if (!try_lock_extent(io_tree, lockstart, lockend, + cached_state)) return -EAGAIN; } else { lock_extent(io_tree, lockstart, lockend, cached_state); @@ -7811,11 +7898,11 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip) return; if (btrfs_op(&dip->bio) == BTRFS_MAP_WRITE) { - btrfs_mark_ordered_io_finished(BTRFS_I(dip->inode), NULL, + btrfs_mark_ordered_io_finished(dip->inode, NULL, dip->file_offset, dip->bytes, !dip->bio.bi_status); } else { - unlock_extent(&BTRFS_I(dip->inode)->io_tree, + unlock_extent(&dip->inode->io_tree, dip->file_offset, dip->file_offset + dip->bytes - 1, NULL); } @@ -7824,24 +7911,21 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip) bio_endio(&dip->bio); } -static void submit_dio_repair_bio(struct inode *inode, struct bio *bio, - int mirror_num, - enum btrfs_compression_type compress_type) +void btrfs_submit_dio_repair_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num) { struct btrfs_dio_private *dip = btrfs_bio(bio)->private; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); BUG_ON(bio_op(bio) == REQ_OP_WRITE); refcount_inc(&dip->refs); - btrfs_submit_bio(fs_info, bio, mirror_num); + btrfs_submit_bio(inode->root->fs_info, bio, mirror_num); } static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip, struct btrfs_bio *bbio, const bool uptodate) { - struct inode *inode = dip->inode; + struct inode *inode = &dip->inode->vfs_inode; struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); blk_status_t err = BLK_STS_OK; @@ -7853,16 +7937,15 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip, u64 start = bbio->file_offset + offset; if (uptodate && - (!csum || !btrfs_check_data_csum(inode, bbio, offset, bv.bv_page, - bv.bv_offset))) { + (!csum || !btrfs_check_data_csum(BTRFS_I(inode), bbio, offset, + bv.bv_page, bv.bv_offset))) { btrfs_clean_io_failure(BTRFS_I(inode), start, bv.bv_page, bv.bv_offset); } else { int ret; - ret = btrfs_repair_one_sector(inode, bbio, offset, - bv.bv_page, bv.bv_offset, - submit_dio_repair_bio); + ret = btrfs_repair_one_sector(BTRFS_I(inode), bbio, offset, + bv.bv_page, bv.bv_offset, false); if (ret) err = errno_to_blk_status(ret); } @@ -7871,11 +7954,11 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip, return err; } -static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode, - struct bio *bio, - u64 dio_file_offset) +blk_status_t btrfs_submit_bio_start_direct_io(struct btrfs_inode *inode, + struct bio *bio, + u64 dio_file_offset) { - return btrfs_csum_one_bio(BTRFS_I(inode), bio, dio_file_offset, false); + return btrfs_csum_one_bio(inode, bio, dio_file_offset, false); } static void btrfs_end_dio_bio(struct btrfs_bio *bbio) @@ -7885,9 +7968,9 @@ static void btrfs_end_dio_bio(struct btrfs_bio *bbio) blk_status_t err = bio->bi_status; if (err) - btrfs_warn(BTRFS_I(dip->inode)->root->fs_info, + btrfs_warn(dip->inode->root->fs_info, "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d", - btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio), + btrfs_ino(dip->inode), bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, err); @@ -7897,16 +7980,16 @@ static void btrfs_end_dio_bio(struct btrfs_bio *bbio) if (err) dip->bio.bi_status = err; - btrfs_record_physical_zoned(dip->inode, bbio->file_offset, bio); + btrfs_record_physical_zoned(&dip->inode->vfs_inode, bbio->file_offset, bio); bio_put(bio); btrfs_dio_private_put(dip); } -static void btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, +static void btrfs_submit_dio_bio(struct bio *bio, struct btrfs_inode *inode, u64 file_offset, int async_submit) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_dio_private *dip = btrfs_bio(bio)->private; blk_status_t ret; @@ -7914,21 +7997,21 @@ static void btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, if (btrfs_op(bio) == BTRFS_MAP_READ) btrfs_bio(bio)->iter = bio->bi_iter; - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) + if (inode->flags & BTRFS_INODE_NODATASUM) goto map; if (btrfs_op(bio) == BTRFS_MAP_WRITE) { /* Check btrfs_submit_data_write_bio() for async submit rules */ - if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers) && + if (async_submit && !atomic_read(&inode->sync_writers) && btrfs_wq_submit_bio(inode, bio, 0, file_offset, - btrfs_submit_bio_start_direct_io)) + WQ_SUBMIT_DATA_DIO)) return; /* * If we aren't doing async submit, calculate the csum of the * bio now. */ - ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, false); + ret = btrfs_csum_one_bio(inode, bio, file_offset, false); if (ret) { btrfs_bio_end_io(btrfs_bio(bio), ret); return; @@ -7964,7 +8047,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, struct btrfs_dio_data *dio_data = iter->private; struct extent_map *em = NULL; - dip->inode = inode; + dip->inode = BTRFS_I(inode); dip->file_offset = file_offset; dip->bytes = dio_bio->bi_iter.bi_size; refcount_set(&dip->refs, 1); @@ -8050,7 +8133,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, async_submit = 1; } - btrfs_submit_dio_bio(bio, inode, file_offset, async_submit); + btrfs_submit_dio_bio(bio, BTRFS_I(inode), file_offset, async_submit); dio_data->submitted += clone_len; clone_offset += clone_len; @@ -8540,16 +8623,16 @@ out_noreserve: return ret; } -static int btrfs_truncate(struct inode *inode, bool skip_writeback) +static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback) { struct btrfs_truncate_control control = { - .inode = BTRFS_I(inode), - .ino = btrfs_ino(BTRFS_I(inode)), + .inode = inode, + .ino = btrfs_ino(inode), .min_type = BTRFS_EXTENT_DATA_KEY, .clear_extent_range = true, }; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_rsv *rsv; int ret; struct btrfs_trans_handle *trans; @@ -8557,7 +8640,8 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback) u64 min_size = btrfs_calc_metadata_size(fs_info, 1); if (!skip_writeback) { - ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask), + ret = btrfs_wait_ordered_range(&inode->vfs_inode, + inode->vfs_inode.i_size & (~mask), (u64)-1); if (ret) return ret; @@ -8616,34 +8700,32 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback) while (1) { struct extent_state *cached_state = NULL; - const u64 new_size = inode->i_size; + const u64 new_size = inode->vfs_inode.i_size; const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize); control.new_size = new_size; - lock_extent(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, - &cached_state); + lock_extent(&inode->io_tree, lock_start, (u64)-1, &cached_state); /* * We want to drop from the next block forward in case this new * size is not block aligned since we will be keeping the last * block of the extent just the way it is. */ - btrfs_drop_extent_map_range(BTRFS_I(inode), + btrfs_drop_extent_map_range(inode, ALIGN(new_size, fs_info->sectorsize), (u64)-1, false); ret = btrfs_truncate_inode_items(trans, root, &control); - inode_sub_bytes(inode, control.sub_bytes); - btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), control.last_size); + inode_sub_bytes(&inode->vfs_inode, control.sub_bytes); + btrfs_inode_safe_disk_i_size_write(inode, control.last_size); - unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, - &cached_state); + unlock_extent(&inode->io_tree, lock_start, (u64)-1, &cached_state); trans->block_rsv = &fs_info->trans_block_rsv; if (ret != -ENOSPC && ret != -EAGAIN) break; - ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); + ret = btrfs_update_inode(trans, root, inode); if (ret) break; @@ -8674,7 +8756,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback) btrfs_end_transaction(trans); btrfs_btree_balance_dirty(fs_info); - ret = btrfs_truncate_block(BTRFS_I(inode), inode->i_size, 0, 0); + ret = btrfs_truncate_block(inode, inode->vfs_inode.i_size, 0, 0); if (ret) goto out; trans = btrfs_start_transaction(root, 1); @@ -8682,14 +8764,14 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback) ret = PTR_ERR(trans); goto out; } - btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0); + btrfs_inode_safe_disk_i_size_write(inode, 0); } if (trans) { int ret2; trans->block_rsv = &fs_info->trans_block_rsv; - ret2 = btrfs_update_inode(trans, root, BTRFS_I(inode)); + ret2 = btrfs_update_inode(trans, root, inode); if (ret2 && !ret) ret = ret2; @@ -8715,7 +8797,7 @@ out: * extents beyond i_size to drop. */ if (control.extents_found > 0) - btrfs_set_inode_full_sync(BTRFS_I(inode)); + btrfs_set_inode_full_sync(inode); return ret; } @@ -8784,9 +8866,10 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) inode = &ei->vfs_inode; extent_map_tree_init(&ei->extent_tree); - extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode); + extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO); + ei->io_tree.inode = ei; extent_io_tree_init(fs_info, &ei->file_extent_tree, - IO_TREE_INODE_FILE_EXTENT, NULL); + IO_TREE_INODE_FILE_EXTENT); ei->io_failure_tree = RB_ROOT; atomic_set(&ei->sync_writers, 0); mutex_init(&ei->log_mutex); @@ -8899,10 +8982,6 @@ void __cold btrfs_destroy_cachep(void) rcu_barrier(); bioset_exit(&btrfs_dio_bioset); kmem_cache_destroy(btrfs_inode_cachep); - kmem_cache_destroy(btrfs_trans_handle_cachep); - kmem_cache_destroy(btrfs_path_cachep); - kmem_cache_destroy(btrfs_free_space_cachep); - kmem_cache_destroy(btrfs_free_space_bitmap_cachep); } int __init btrfs_init_cachep(void) @@ -8914,30 +8993,6 @@ int __init btrfs_init_cachep(void) if (!btrfs_inode_cachep) goto fail; - btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle", - sizeof(struct btrfs_trans_handle), 0, - SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL); - if (!btrfs_trans_handle_cachep) - goto fail; - - btrfs_path_cachep = kmem_cache_create("btrfs_path", - sizeof(struct btrfs_path), 0, - SLAB_MEM_SPREAD, NULL); - if (!btrfs_path_cachep) - goto fail; - - btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space", - sizeof(struct btrfs_free_space), 0, - SLAB_MEM_SPREAD, NULL); - if (!btrfs_free_space_cachep) - goto fail; - - btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap", - PAGE_SIZE, PAGE_SIZE, - SLAB_MEM_SPREAD, NULL); - if (!btrfs_free_space_bitmap_cachep) - goto fail; - if (bioset_init(&btrfs_dio_bioset, BIO_POOL_SIZE, offsetof(struct btrfs_dio_private, bio), BIOSET_NEED_BVECS)) @@ -9013,6 +9068,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, int ret; int ret2; bool need_abort = false; + struct fscrypt_name old_fname, new_fname; + struct fscrypt_str *old_name, *new_name; /* * For non-subvolumes allow exchange only within one subvolume, in the @@ -9024,6 +9081,19 @@ static int btrfs_rename_exchange(struct inode *old_dir, new_ino != BTRFS_FIRST_FREE_OBJECTID)) return -EXDEV; + ret = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &old_fname); + if (ret) + return ret; + + ret = fscrypt_setup_filename(new_dir, &new_dentry->d_name, 0, &new_fname); + if (ret) { + fscrypt_free_filename(&old_fname); + return ret; + } + + old_name = &old_fname.disk_name; + new_name = &new_fname.disk_name; + /* close the race window with snapshot create/destroy ioctl */ if (old_ino == BTRFS_FIRST_FREE_OBJECTID || new_ino == BTRFS_FIRST_FREE_OBJECTID) @@ -9091,10 +9161,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, /* force full log commit if subvolume involved. */ btrfs_set_log_full_commit(trans); } else { - ret = btrfs_insert_inode_ref(trans, dest, - new_dentry->d_name.name, - new_dentry->d_name.len, - old_ino, + ret = btrfs_insert_inode_ref(trans, dest, new_name, old_ino, btrfs_ino(BTRFS_I(new_dir)), old_idx); if (ret) @@ -9107,10 +9174,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, /* force full log commit if subvolume involved. */ btrfs_set_log_full_commit(trans); } else { - ret = btrfs_insert_inode_ref(trans, root, - old_dentry->d_name.name, - old_dentry->d_name.len, - new_ino, + ret = btrfs_insert_inode_ref(trans, root, old_name, new_ino, btrfs_ino(BTRFS_I(old_dir)), new_idx); if (ret) { @@ -9141,13 +9205,11 @@ static int btrfs_rename_exchange(struct inode *old_dir, /* src is a subvolume */ if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { - ret = btrfs_unlink_subvol(trans, old_dir, old_dentry); + ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry); } else { /* src is an inode */ ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), BTRFS_I(old_dentry->d_inode), - old_dentry->d_name.name, - old_dentry->d_name.len, - &old_rename_ctx); + old_name, &old_rename_ctx); if (!ret) ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode)); } @@ -9158,13 +9220,11 @@ static int btrfs_rename_exchange(struct inode *old_dir, /* dest is a subvolume */ if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { - ret = btrfs_unlink_subvol(trans, new_dir, new_dentry); + ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry); } else { /* dest is an inode */ ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir), BTRFS_I(new_dentry->d_inode), - new_dentry->d_name.name, - new_dentry->d_name.len, - &new_rename_ctx); + new_name, &new_rename_ctx); if (!ret) ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode)); } @@ -9174,16 +9234,14 @@ static int btrfs_rename_exchange(struct inode *old_dir, } ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), - new_dentry->d_name.name, - new_dentry->d_name.len, 0, old_idx); + new_name, 0, old_idx); if (ret) { btrfs_abort_transaction(trans, ret); goto out_fail; } ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode), - old_dentry->d_name.name, - old_dentry->d_name.len, 0, new_idx); + old_name, 0, new_idx); if (ret) { btrfs_abort_transaction(trans, ret); goto out_fail; @@ -9226,6 +9284,8 @@ out_notrans: old_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&fs_info->subvol_sem); + fscrypt_free_filename(&new_fname); + fscrypt_free_filename(&old_fname); return ret; } @@ -9265,6 +9325,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, int ret; int ret2; u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); + struct fscrypt_name old_fname, new_fname; if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return -EPERM; @@ -9281,22 +9342,28 @@ static int btrfs_rename(struct user_namespace *mnt_userns, new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; + ret = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &old_fname); + if (ret) + return ret; - /* check for collisions, even if the name isn't there */ - ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, - new_dentry->d_name.name, - new_dentry->d_name.len); + ret = fscrypt_setup_filename(new_dir, &new_dentry->d_name, 0, &new_fname); + if (ret) { + fscrypt_free_filename(&old_fname); + return ret; + } + /* check for collisions, even if the name isn't there */ + ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, &new_fname.disk_name); if (ret) { if (ret == -EEXIST) { /* we shouldn't get * eexist without a new_inode */ if (WARN_ON(!new_inode)) { - return ret; + goto out_fscrypt_names; } } else { /* maybe -EOVERFLOW */ - return ret; + goto out_fscrypt_names; } } ret = 0; @@ -9379,11 +9446,9 @@ static int btrfs_rename(struct user_namespace *mnt_userns, /* force full log commit if subvolume involved. */ btrfs_set_log_full_commit(trans); } else { - ret = btrfs_insert_inode_ref(trans, dest, - new_dentry->d_name.name, - new_dentry->d_name.len, - old_ino, - btrfs_ino(BTRFS_I(new_dir)), index); + ret = btrfs_insert_inode_ref(trans, dest, &new_fname.disk_name, + old_ino, btrfs_ino(BTRFS_I(new_dir)), + index); if (ret) goto out_fail; } @@ -9402,13 +9467,11 @@ static int btrfs_rename(struct user_namespace *mnt_userns, BTRFS_I(old_inode), 1); if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { - ret = btrfs_unlink_subvol(trans, old_dir, old_dentry); + ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry); } else { ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), - BTRFS_I(d_inode(old_dentry)), - old_dentry->d_name.name, - old_dentry->d_name.len, - &rename_ctx); + BTRFS_I(d_inode(old_dentry)), + &old_fname.disk_name, &rename_ctx); if (!ret) ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode)); } @@ -9422,13 +9485,12 @@ static int btrfs_rename(struct user_namespace *mnt_userns, new_inode->i_ctime = current_time(new_inode); if (unlikely(btrfs_ino(BTRFS_I(new_inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { - ret = btrfs_unlink_subvol(trans, new_dir, new_dentry); + ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry); BUG_ON(new_inode->i_nlink == 0); } else { ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir), BTRFS_I(d_inode(new_dentry)), - new_dentry->d_name.name, - new_dentry->d_name.len); + &new_fname.disk_name); } if (!ret && new_inode->i_nlink == 0) ret = btrfs_orphan_add(trans, @@ -9440,8 +9502,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, } ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), - new_dentry->d_name.name, - new_dentry->d_name.len, 0, index); + &new_fname.disk_name, 0, index); if (ret) { btrfs_abort_transaction(trans, ret); goto out_fail; @@ -9476,6 +9537,9 @@ out_notrans: out_whiteout_inode: if (flags & RENAME_WHITEOUT) iput(whiteout_args.inode); +out_fscrypt_names: + fscrypt_free_filename(&old_fname); + fscrypt_free_filename(&new_fname); return ret; } @@ -9595,7 +9659,7 @@ static int start_delalloc_inodes(struct btrfs_root *root, &work->work); } else { ret = filemap_fdatawrite_wbc(inode->i_mapping, wbc); - btrfs_add_delayed_iput(inode); + btrfs_add_delayed_iput(BTRFS_I(inode)); if (ret || wbc->nr_to_write <= 0) goto out; } @@ -10205,7 +10269,7 @@ static ssize_t btrfs_encoded_read_inline( read_extent_buffer(leaf, tmp, ptr, count); btrfs_release_path(path); unlock_extent(io_tree, start, lockend, cached_state); - btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED); + btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); *unlocked = true; ret = copy_to_iter(tmp, count, iter); @@ -10265,7 +10329,7 @@ static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio) pgoff = bvec->bv_offset; for (i = 0; i < nr_sectors; i++) { ASSERT(pgoff < PAGE_SIZE); - if (btrfs_check_data_csum(&inode->vfs_inode, bbio, bio_offset, + if (btrfs_check_data_csum(inode, bbio, bio_offset, bvec->bv_page, pgoff)) return BLK_STS_IOERR; bio_offset += sectorsize; @@ -10408,7 +10472,7 @@ static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb, goto out; unlock_extent(io_tree, start, lockend, cached_state); - btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED); + btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); *unlocked = true; if (compressed) { @@ -10457,10 +10521,10 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter, file_accessed(iocb->ki_filp); - btrfs_inode_lock(&inode->vfs_inode, BTRFS_ILOCK_SHARED); + btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); if (iocb->ki_pos >= inode->vfs_inode.i_size) { - btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED); + btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); return 0; } start = ALIGN_DOWN(iocb->ki_pos, fs_info->sectorsize); @@ -10558,7 +10622,7 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter, if (disk_bytenr == EXTENT_MAP_HOLE) { unlock_extent(io_tree, start, lockend, &cached_state); - btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED); + btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); unlocked = true; ret = iov_iter_zero(count, iter); if (ret != count) @@ -10581,7 +10645,7 @@ out_unlock_extent: unlock_extent(io_tree, start, lockend, &cached_state); out_unlock_inode: if (!unlocked) - btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED); + btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); return ret; } @@ -11247,7 +11311,7 @@ void btrfs_update_inode_bytes(struct btrfs_inode *inode, spin_unlock(&inode->lock); } -/** +/* * Verify that there are no ordered extents for a given file range. * * @inode: The target inode. |