summaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c612
1 files changed, 370 insertions, 242 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 19c707bc8801..dbbb67293e34 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -70,6 +70,7 @@
#include "verity.h"
#include "super.h"
#include "orphan.h"
+#include "backref.h"
struct btrfs_iget_args {
u64 ino;
@@ -100,6 +101,18 @@ struct btrfs_rename_ctx {
u64 index;
};
+/*
+ * Used by data_reloc_print_warning_inode() to pass needed info for filename
+ * resolution and output of error message.
+ */
+struct data_reloc_warn {
+ struct btrfs_path path;
+ struct btrfs_fs_info *fs_info;
+ u64 extent_item_size;
+ u64 logical;
+ int mirror_num;
+};
+
static const struct inode_operations btrfs_dir_inode_operations;
static const struct inode_operations btrfs_symlink_inode_operations;
static const struct inode_operations btrfs_special_inode_operations;
@@ -122,12 +135,198 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
u64 ram_bytes, int compress_type,
int type);
+static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
+ u64 root, void *warn_ctx)
+{
+ struct data_reloc_warn *warn = warn_ctx;
+ struct btrfs_fs_info *fs_info = warn->fs_info;
+ struct extent_buffer *eb;
+ struct btrfs_inode_item *inode_item;
+ struct inode_fs_paths *ipath = NULL;
+ struct btrfs_root *local_root;
+ struct btrfs_key key;
+ unsigned int nofs_flag;
+ u32 nlink;
+ int ret;
+
+ local_root = btrfs_get_fs_root(fs_info, root, true);
+ if (IS_ERR(local_root)) {
+ ret = PTR_ERR(local_root);
+ goto err;
+ }
+
+ /* This makes the path point to (inum INODE_ITEM ioff). */
+ key.objectid = inum;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, local_root, &key, &warn->path, 0, 0);
+ if (ret) {
+ btrfs_put_root(local_root);
+ btrfs_release_path(&warn->path);
+ goto err;
+ }
+
+ eb = warn->path.nodes[0];
+ inode_item = btrfs_item_ptr(eb, warn->path.slots[0], struct btrfs_inode_item);
+ nlink = btrfs_inode_nlink(eb, inode_item);
+ btrfs_release_path(&warn->path);
+
+ nofs_flag = memalloc_nofs_save();
+ ipath = init_ipath(4096, local_root, &warn->path);
+ memalloc_nofs_restore(nofs_flag);
+ if (IS_ERR(ipath)) {
+ btrfs_put_root(local_root);
+ ret = PTR_ERR(ipath);
+ ipath = NULL;
+ /*
+ * -ENOMEM, not a critical error, just output an generic error
+ * without filename.
+ */
+ btrfs_warn(fs_info,
+"checksum error at logical %llu mirror %u root %llu, inode %llu offset %llu",
+ warn->logical, warn->mirror_num, root, inum, offset);
+ return ret;
+ }
+ ret = paths_from_inode(inum, ipath);
+ if (ret < 0)
+ goto err;
+
+ /*
+ * We deliberately ignore the bit ipath might have been too small to
+ * hold all of the paths here
+ */
+ for (int i = 0; i < ipath->fspath->elem_cnt; i++) {
+ btrfs_warn(fs_info,
+"checksum error at logical %llu mirror %u root %llu inode %llu offset %llu length %u links %u (path: %s)",
+ warn->logical, warn->mirror_num, root, inum, offset,
+ fs_info->sectorsize, nlink,
+ (char *)(unsigned long)ipath->fspath->val[i]);
+ }
+
+ btrfs_put_root(local_root);
+ free_ipath(ipath);
+ return 0;
+
+err:
+ btrfs_warn(fs_info,
+"checksum error at logical %llu mirror %u root %llu inode %llu offset %llu, path resolving failed with ret=%d",
+ warn->logical, warn->mirror_num, root, inum, offset, ret);
+
+ free_ipath(ipath);
+ return ret;
+}
+
+/*
+ * Do extra user-friendly error output (e.g. lookup all the affected files).
+ *
+ * Return true if we succeeded doing the backref lookup.
+ * Return false if such lookup failed, and has to fallback to the old error message.
+ */
+static void print_data_reloc_error(const struct btrfs_inode *inode, u64 file_off,
+ const u8 *csum, const u8 *csum_expected,
+ int mirror_num)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_path path = { 0 };
+ struct btrfs_key found_key = { 0 };
+ struct extent_buffer *eb;
+ struct btrfs_extent_item *ei;
+ const u32 csum_size = fs_info->csum_size;
+ u64 logical;
+ u64 flags;
+ u32 item_size;
+ int ret;
+
+ mutex_lock(&fs_info->reloc_mutex);
+ logical = btrfs_get_reloc_bg_bytenr(fs_info);
+ mutex_unlock(&fs_info->reloc_mutex);
+
+ if (logical == U64_MAX) {
+ btrfs_warn_rl(fs_info, "has data reloc tree but no running relocation");
+ btrfs_warn_rl(fs_info,
+"csum failed root %lld ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
+ inode->root->root_key.objectid, btrfs_ino(inode), file_off,
+ CSUM_FMT_VALUE(csum_size, csum),
+ CSUM_FMT_VALUE(csum_size, csum_expected),
+ mirror_num);
+ return;
+ }
+
+ logical += file_off;
+ btrfs_warn_rl(fs_info,
+"csum failed root %lld ino %llu off %llu logical %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
+ inode->root->root_key.objectid,
+ btrfs_ino(inode), file_off, logical,
+ CSUM_FMT_VALUE(csum_size, csum),
+ CSUM_FMT_VALUE(csum_size, csum_expected),
+ mirror_num);
+
+ ret = extent_from_logical(fs_info, logical, &path, &found_key, &flags);
+ if (ret < 0) {
+ btrfs_err_rl(fs_info, "failed to lookup extent item for logical %llu: %d",
+ logical, ret);
+ return;
+ }
+ eb = path.nodes[0];
+ ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
+ item_size = btrfs_item_size(eb, path.slots[0]);
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ unsigned long ptr = 0;
+ u64 ref_root;
+ u8 ref_level;
+
+ while (true) {
+ ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
+ item_size, &ref_root,
+ &ref_level);
+ if (ret < 0) {
+ btrfs_warn_rl(fs_info,
+ "failed to resolve tree backref for logical %llu: %d",
+ logical, ret);
+ break;
+ }
+ if (ret > 0)
+ break;
+
+ btrfs_warn_rl(fs_info,
+"csum error at logical %llu mirror %u: metadata %s (level %d) in tree %llu",
+ logical, mirror_num,
+ (ref_level ? "node" : "leaf"),
+ ref_level, ref_root);
+ }
+ btrfs_release_path(&path);
+ } else {
+ struct btrfs_backref_walk_ctx ctx = { 0 };
+ struct data_reloc_warn reloc_warn = { 0 };
+
+ btrfs_release_path(&path);
+
+ ctx.bytenr = found_key.objectid;
+ ctx.extent_item_pos = logical - found_key.objectid;
+ ctx.fs_info = fs_info;
+
+ reloc_warn.logical = logical;
+ reloc_warn.extent_item_size = found_key.offset;
+ reloc_warn.mirror_num = mirror_num;
+ reloc_warn.fs_info = fs_info;
+
+ iterate_extent_inodes(&ctx, true,
+ data_reloc_print_warning_inode, &reloc_warn);
+ }
+}
+
static void __cold btrfs_print_data_csum_error(struct btrfs_inode *inode,
u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
{
struct btrfs_root *root = inode->root;
const u32 csum_size = root->fs_info->csum_size;
+ /* For data reloc tree, it's better to do a backref lookup instead. */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ return print_data_reloc_error(inode, logical_start, csum,
+ csum_expected, mirror_num);
+
/* Output without objectid, which is more meaningful */
if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID) {
btrfs_warn_rl(root->fs_info,
@@ -636,6 +835,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
{
struct btrfs_inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct address_space *mapping = inode->vfs_inode.i_mapping;
u64 blocksize = fs_info->sectorsize;
u64 start = async_chunk->start;
u64 end = async_chunk->end;
@@ -750,7 +950,7 @@ again:
/* Compression level is applied here and only here */
ret = btrfs_compress_pages(
compress_type | (fs_info->compress_level << 4),
- inode->vfs_inode.i_mapping, start,
+ mapping, start,
pages,
&nr_pages,
&total_in,
@@ -793,9 +993,9 @@ cont:
unsigned long clear_flags = EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING;
- unsigned long page_error_op;
- page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
+ if (ret < 0)
+ mapping_set_error(mapping, -EIO);
/*
* inline extent creation worked or returned error,
@@ -812,7 +1012,6 @@ cont:
clear_flags,
PAGE_UNLOCK |
PAGE_START_WRITEBACK |
- page_error_op |
PAGE_END_WRITEBACK);
/*
@@ -934,6 +1133,12 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
unsigned long nr_written = 0;
int page_started = 0;
int ret;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .range_start = start,
+ .range_end = end,
+ .no_cgroup_owner = 1,
+ };
/*
* Call cow_file_range() to run the delalloc range directly, since we
@@ -954,8 +1159,6 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
const u64 page_start = page_offset(locked_page);
const u64 page_end = page_start + PAGE_SIZE - 1;
- btrfs_page_set_error(inode->root->fs_info, locked_page,
- page_start, PAGE_SIZE);
set_page_writeback(locked_page);
end_page_writeback(locked_page);
end_extent_writepage(locked_page, ret, page_start, page_end);
@@ -965,7 +1168,10 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
}
/* All pages will be unlocked, including @locked_page */
- return extent_write_locked_range(&inode->vfs_inode, start, end);
+ wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
+ ret = extent_write_locked_range(&inode->vfs_inode, start, end, &wbc);
+ wbc_detach_inode(&wbc);
+ return ret;
}
static int submit_one_async_extent(struct btrfs_inode *inode,
@@ -976,6 +1182,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
struct extent_io_tree *io_tree = &inode->io_tree;
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_ordered_extent *ordered;
struct btrfs_key ins;
struct page *locked_page = NULL;
struct extent_map *em;
@@ -1037,7 +1244,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode, start, /* file_offset */
+ ordered = btrfs_alloc_ordered_extent(inode, start, /* file_offset */
async_extent->ram_size, /* num_bytes */
async_extent->ram_size, /* ram_bytes */
ins.objectid, /* disk_bytenr */
@@ -1045,8 +1252,9 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
0, /* offset */
1 << BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
- if (ret) {
+ if (IS_ERR(ordered)) {
btrfs_drop_extent_map_range(inode, start, end, false);
+ ret = PTR_ERR(ordered);
goto out_free_reserve;
}
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@@ -1055,11 +1263,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
extent_clear_unlock_delalloc(inode, start, end,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_START_WRITEBACK);
-
- btrfs_submit_compressed_write(inode, start, /* file_offset */
- async_extent->ram_size, /* num_bytes */
- ins.objectid, /* disk_bytenr */
- ins.offset, /* compressed_len */
+ btrfs_submit_compressed_write(ordered,
async_extent->pages, /* compressed_pages */
async_extent->nr_pages,
async_chunk->write_flags, true);
@@ -1074,12 +1278,13 @@ out_free_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
out_free:
+ mapping_set_error(inode->vfs_inode.i_mapping, -EIO);
extent_clear_unlock_delalloc(inode, start, end,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
PAGE_UNLOCK | PAGE_START_WRITEBACK |
- PAGE_END_WRITEBACK | PAGE_SET_ERROR);
+ PAGE_END_WRITEBACK);
free_async_extent_pages(async_extent);
goto done;
}
@@ -1287,6 +1492,8 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
min_alloc_size = fs_info->sectorsize;
while (num_bytes > 0) {
+ struct btrfs_ordered_extent *ordered;
+
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
min_alloc_size, 0, alloc_hint,
@@ -1311,16 +1518,18 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode, start, ram_size, ram_size,
- ins.objectid, cur_alloc_size, 0,
- 1 << BTRFS_ORDERED_REGULAR,
- BTRFS_COMPRESS_NONE);
- if (ret)
+ ordered = btrfs_alloc_ordered_extent(inode, start, ram_size,
+ ram_size, ins.objectid, cur_alloc_size,
+ 0, 1 << BTRFS_ORDERED_REGULAR,
+ BTRFS_COMPRESS_NONE);
+ if (IS_ERR(ordered)) {
+ ret = PTR_ERR(ordered);
goto out_drop_extent_cache;
+ }
if (btrfs_is_data_reloc_root(root)) {
- ret = btrfs_reloc_clone_csums(inode, start,
- cur_alloc_size);
+ ret = btrfs_reloc_clone_csums(ordered);
+
/*
* Only drop cache here, and process as normal.
*
@@ -1337,6 +1546,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
start + ram_size - 1,
false);
}
+ btrfs_put_ordered_extent(ordered);
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@@ -1494,7 +1704,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
* ->inode could be NULL if async_chunk_start has failed to compress,
* in which case we don't have anything to submit, yet we need to
* always adjust ->async_delalloc_pages as its paired with the init
- * happening in cow_file_range_async
+ * happening in run_delalloc_compressed
*/
if (async_chunk->inode)
submit_compressed_extents(async_chunk);
@@ -1521,58 +1731,36 @@ static noinline void async_cow_free(struct btrfs_work *work)
kvfree(async_cow);
}
-static int cow_file_range_async(struct btrfs_inode *inode,
- struct writeback_control *wbc,
- struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written)
+static bool run_delalloc_compressed(struct btrfs_inode *inode,
+ struct writeback_control *wbc,
+ struct page *locked_page,
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
struct async_cow *ctx;
struct async_chunk *async_chunk;
unsigned long nr_pages;
- u64 cur_end;
u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
int i;
- bool should_compress;
unsigned nofs_flag;
const blk_opf_t write_flags = wbc_to_write_flags(wbc);
- unlock_extent(&inode->io_tree, start, end, NULL);
-
- if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
- !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
- num_chunks = 1;
- should_compress = false;
- } else {
- should_compress = true;
- }
-
nofs_flag = memalloc_nofs_save();
ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
+ if (!ctx)
+ return false;
- if (!ctx) {
- unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
- EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
- EXTENT_DO_ACCOUNTING;
- unsigned long page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK |
- PAGE_END_WRITEBACK | PAGE_SET_ERROR;
-
- extent_clear_unlock_delalloc(inode, start, end, locked_page,
- clear_bits, page_ops);
- return -ENOMEM;
- }
+ unlock_extent(&inode->io_tree, start, end, NULL);
+ set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
async_chunk = ctx->chunks;
atomic_set(&ctx->num_chunks, num_chunks);
for (i = 0; i < num_chunks; i++) {
- if (should_compress)
- cur_end = min(end, start + SZ_512K - 1);
- else
- cur_end = end;
+ u64 cur_end = min(end, start + SZ_512K - 1);
/*
* igrab is called higher up in the call chain, take only the
@@ -1633,13 +1821,14 @@ static int cow_file_range_async(struct btrfs_inode *inode,
start = cur_end + 1;
}
*page_started = 1;
- return 0;
+ return true;
}
static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
struct page *locked_page, u64 start,
u64 end, int *page_started,
- unsigned long *nr_written)
+ unsigned long *nr_written,
+ struct writeback_control *wbc)
{
u64 done_offset = end;
int ret;
@@ -1671,8 +1860,8 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
account_page_redirty(locked_page);
}
locked_page_done = true;
- extent_write_locked_range(&inode->vfs_inode, start, done_offset);
-
+ extent_write_locked_range(&inode->vfs_inode, start, done_offset,
+ wbc);
start = done_offset + 1;
}
@@ -1864,7 +2053,7 @@ static int can_nocow_file_extent(struct btrfs_path *path,
ret = btrfs_cross_ref_exist(root, btrfs_ino(inode),
key->offset - args->extent_offset,
- args->disk_bytenr, false, path);
+ args->disk_bytenr, args->strict, path);
WARN_ON_ONCE(ret > 0 && is_freespace_inode);
if (ret != 0)
goto out;
@@ -1947,6 +2136,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
nocow_args.writeback_path = true;
while (1) {
+ struct btrfs_ordered_extent *ordered;
struct btrfs_key found_key;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
@@ -1954,6 +2144,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
u64 ram_bytes;
u64 nocow_end;
int extent_type;
+ bool is_prealloc;
nocow = false;
@@ -2092,8 +2283,8 @@ out_check:
}
nocow_end = cur_offset + nocow_args.num_bytes - 1;
-
- if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ is_prealloc = extent_type == BTRFS_FILE_EXTENT_PREALLOC;
+ if (is_prealloc) {
u64 orig_start = found_key.offset - nocow_args.extent_offset;
struct extent_map *em;
@@ -2109,29 +2300,22 @@ out_check:
goto error;
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode,
- cur_offset, nocow_args.num_bytes,
- nocow_args.num_bytes,
- nocow_args.disk_bytenr,
- nocow_args.num_bytes, 0,
- 1 << BTRFS_ORDERED_PREALLOC,
- BTRFS_COMPRESS_NONE);
- if (ret) {
+ }
+
+ ordered = btrfs_alloc_ordered_extent(inode, cur_offset,
+ nocow_args.num_bytes, nocow_args.num_bytes,
+ nocow_args.disk_bytenr, nocow_args.num_bytes, 0,
+ is_prealloc
+ ? (1 << BTRFS_ORDERED_PREALLOC)
+ : (1 << BTRFS_ORDERED_NOCOW),
+ BTRFS_COMPRESS_NONE);
+ if (IS_ERR(ordered)) {
+ if (is_prealloc) {
btrfs_drop_extent_map_range(inode, cur_offset,
nocow_end, false);
- goto error;
}
- } else {
- ret = btrfs_add_ordered_extent(inode, cur_offset,
- nocow_args.num_bytes,
- nocow_args.num_bytes,
- nocow_args.disk_bytenr,
- nocow_args.num_bytes,
- 0,
- 1 << BTRFS_ORDERED_NOCOW,
- BTRFS_COMPRESS_NONE);
- if (ret)
- goto error;
+ ret = PTR_ERR(ordered);
+ goto error;
}
if (nocow) {
@@ -2145,8 +2329,8 @@ out_check:
* extent_clear_unlock_delalloc() in error handler
* from freeing metadata of created ordered extent.
*/
- ret = btrfs_reloc_clone_csums(inode, cur_offset,
- nocow_args.num_bytes);
+ ret = btrfs_reloc_clone_csums(ordered);
+ btrfs_put_ordered_extent(ordered);
extent_clear_unlock_delalloc(inode, cur_offset, nocow_end,
locked_page, EXTENT_LOCKED |
@@ -2214,7 +2398,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
u64 start, u64 end, int *page_started, unsigned long *nr_written,
struct writeback_control *wbc)
{
- int ret;
+ int ret = 0;
const bool zoned = btrfs_is_zoned(inode->root->fs_info);
/*
@@ -2235,19 +2419,23 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, nr_written);
- } else if (!btrfs_inode_can_compress(inode) ||
- !inode_need_compress(inode, start, end)) {
- if (zoned)
- ret = run_delalloc_zoned(inode, locked_page, start, end,
- page_started, nr_written);
- else
- ret = cow_file_range(inode, locked_page, start, end,
- page_started, nr_written, 1, NULL);
- } else {
- set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
- ret = cow_file_range_async(inode, wbc, locked_page, start, end,
- page_started, nr_written);
+ goto out;
}
+
+ if (btrfs_inode_can_compress(inode) &&
+ inode_need_compress(inode, start, end) &&
+ run_delalloc_compressed(inode, wbc, locked_page, start,
+ end, page_started, nr_written))
+ goto out;
+
+ if (zoned)
+ ret = run_delalloc_zoned(inode, locked_page, start, end,
+ page_started, nr_written, wbc);
+ else
+ ret = cow_file_range(inode, locked_page, start, end,
+ page_started, nr_written, 1, NULL);
+
+out:
ASSERT(ret <= 0);
if (ret)
btrfs_cleanup_ordered_extents(inode, locked_page, start,
@@ -2515,125 +2703,42 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
}
}
-/*
- * Split off the first pre bytes from the extent_map at [start, start + len]
- *
- * This function is intended to be used only for extract_ordered_extent().
- */
-static int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre)
-{
- struct extent_map_tree *em_tree = &inode->extent_tree;
- struct extent_map *em;
- struct extent_map *split_pre = NULL;
- struct extent_map *split_mid = NULL;
- int ret = 0;
- unsigned long flags;
-
- ASSERT(pre != 0);
- ASSERT(pre < len);
-
- split_pre = alloc_extent_map();
- if (!split_pre)
- return -ENOMEM;
- split_mid = alloc_extent_map();
- if (!split_mid) {
- ret = -ENOMEM;
- goto out_free_pre;
- }
-
- lock_extent(&inode->io_tree, start, start + len - 1, NULL);
- write_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, len);
- if (!em) {
- ret = -EIO;
- goto out_unlock;
- }
-
- ASSERT(em->len == len);
- ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
- ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
- ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
- ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
- ASSERT(!list_empty(&em->list));
-
- flags = em->flags;
- clear_bit(EXTENT_FLAG_PINNED, &em->flags);
-
- /* First, replace the em with a new extent_map starting from * em->start */
- split_pre->start = em->start;
- split_pre->len = pre;
- split_pre->orig_start = split_pre->start;
- split_pre->block_start = em->block_start;
- split_pre->block_len = split_pre->len;
- split_pre->orig_block_len = split_pre->block_len;
- split_pre->ram_bytes = split_pre->len;
- split_pre->flags = flags;
- split_pre->compress_type = em->compress_type;
- split_pre->generation = em->generation;
-
- replace_extent_mapping(em_tree, em, split_pre, 1);
-
- /*
- * Now we only have an extent_map at:
- * [em->start, em->start + pre]
- */
-
- /* Insert the middle extent_map. */
- split_mid->start = em->start + pre;
- split_mid->len = em->len - pre;
- split_mid->orig_start = split_mid->start;
- split_mid->block_start = em->block_start + pre;
- split_mid->block_len = split_mid->len;
- split_mid->orig_block_len = split_mid->block_len;
- split_mid->ram_bytes = split_mid->len;
- split_mid->flags = flags;
- split_mid->compress_type = em->compress_type;
- split_mid->generation = em->generation;
- add_extent_mapping(em_tree, split_mid, 1);
-
- /* Once for us */
- free_extent_map(em);
- /* Once for the tree */
- free_extent_map(em);
-
-out_unlock:
- write_unlock(&em_tree->lock);
- unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
- free_extent_map(split_mid);
-out_free_pre:
- free_extent_map(split_pre);
- return ret;
-}
-
-int btrfs_extract_ordered_extent(struct btrfs_bio *bbio,
- struct btrfs_ordered_extent *ordered)
+static int btrfs_extract_ordered_extent(struct btrfs_bio *bbio,
+ struct btrfs_ordered_extent *ordered)
{
u64 start = (u64)bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
u64 len = bbio->bio.bi_iter.bi_size;
- struct btrfs_inode *inode = bbio->inode;
- u64 ordered_len = ordered->num_bytes;
- int ret = 0;
+ struct btrfs_ordered_extent *new;
+ int ret;
/* Must always be called for the beginning of an ordered extent. */
if (WARN_ON_ONCE(start != ordered->disk_bytenr))
return -EINVAL;
/* No need to split if the ordered extent covers the entire bio. */
- if (ordered->disk_num_bytes == len)
+ if (ordered->disk_num_bytes == len) {
+ refcount_inc(&ordered->refs);
+ bbio->ordered = ordered;
return 0;
-
- ret = btrfs_split_ordered_extent(ordered, len);
- if (ret)
- return ret;
+ }
/*
* Don't split the extent_map for NOCOW extents, as we're writing into
* a pre-existing one.
*/
- if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
- return 0;
+ if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
+ ret = split_extent_map(bbio->inode, bbio->file_offset,
+ ordered->num_bytes, len,
+ ordered->disk_bytenr);
+ if (ret)
+ return ret;
+ }
- return split_extent_map(inode, bbio->file_offset, ordered_len, len);
+ new = btrfs_split_ordered_extent(ordered, len);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+ bbio->ordered = new;
+ return 0;
}
/*
@@ -2651,7 +2756,7 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
trans->adding_csums = true;
if (!csum_root)
csum_root = btrfs_csum_root(trans->fs_info,
- sum->bytenr);
+ sum->logical);
ret = btrfs_csum_file_blocks(trans, csum_root, sum);
trans->adding_csums = false;
if (ret)
@@ -2689,8 +2794,7 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
ret = set_extent_bit(&inode->io_tree, search_start,
search_start + em_len - 1,
- EXTENT_DELALLOC_NEW, cached_state,
- GFP_NOFS);
+ EXTENT_DELALLOC_NEW, cached_state);
next:
search_start = extent_map_end(em);
free_extent_map(em);
@@ -2723,8 +2827,8 @@ int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
return ret;
}
- return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
- cached_state);
+ return set_extent_bit(&inode->io_tree, start, end,
+ EXTENT_DELALLOC | extra_bits, cached_state);
}
/* see btrfs_writepage_start_hook for details on why this is required */
@@ -2847,7 +2951,6 @@ out_page:
mapping_set_error(page->mapping, ret);
end_extent_writepage(page, ret, page_start, page_end);
clear_page_dirty_for_io(page);
- SetPageError(page);
}
btrfs_page_clear_checked(inode->root->fs_info, page, page_start, PAGE_SIZE);
unlock_page(page);
@@ -3068,7 +3171,7 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
* an ordered extent if the range of bytes in the file it covers are
* fully written.
*/
-int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
+int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
{
struct btrfs_inode *inode = BTRFS_I(ordered_extent->inode);
struct btrfs_root *root = inode->root;
@@ -3103,15 +3206,9 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
- /* A valid ->physical implies a write on a sequential zone. */
- if (ordered_extent->physical != (u64)-1) {
- btrfs_rewrite_logical_zoned(ordered_extent);
- btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
- ordered_extent->disk_num_bytes);
- } else if (btrfs_is_data_reloc_root(inode->root)) {
+ if (btrfs_is_zoned(fs_info))
btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
- }
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
truncated = true;
@@ -3279,6 +3376,14 @@ out:
return ret;
}
+int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
+{
+ if (btrfs_is_zoned(btrfs_sb(ordered->inode->i_sb)) &&
+ !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
+ btrfs_finish_ordered_zoned(ordered);
+ return btrfs_finish_one_ordered(ordered);
+}
+
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start,
u64 end, bool uptodate)
@@ -4226,7 +4331,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
}
btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
- 0);
+ false);
ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
&fname.disk_name);
@@ -4801,7 +4906,7 @@ again:
if (only_release_metadata)
set_extent_bit(&inode->io_tree, block_start, block_end,
- EXTENT_NORESERVE, NULL, GFP_NOFS);
+ EXTENT_NORESERVE, NULL);
out_unlock:
if (ret) {
@@ -7264,7 +7369,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
static int btrfs_get_blocks_direct_write(struct extent_map **map,
struct inode *inode,
struct btrfs_dio_data *dio_data,
- u64 start, u64 len,
+ u64 start, u64 *lenp,
unsigned int iomap_flags)
{
const bool nowait = (iomap_flags & IOMAP_NOWAIT);
@@ -7275,6 +7380,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
struct btrfs_block_group *bg;
bool can_nocow = false;
bool space_reserved = false;
+ u64 len = *lenp;
u64 prev_len;
int ret = 0;
@@ -7345,15 +7451,19 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
free_extent_map(em);
*map = NULL;
- if (nowait)
- return -EAGAIN;
+ if (nowait) {
+ ret = -EAGAIN;
+ goto out;
+ }
/*
* If we could not allocate data space before locking the file
* range and we can't do a NOCOW write, then we have to fail.
*/
- if (!dio_data->data_space_reserved)
- return -ENOSPC;
+ if (!dio_data->data_space_reserved) {
+ ret = -ENOSPC;
+ goto out;
+ }
/*
* We have to COW and we have already reserved data space before,
@@ -7394,6 +7504,7 @@ out:
btrfs_delalloc_release_extents(BTRFS_I(inode), len);
btrfs_delalloc_release_metadata(BTRFS_I(inode), len, true);
}
+ *lenp = len;
return ret;
}
@@ -7570,7 +7681,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
if (write) {
ret = btrfs_get_blocks_direct_write(&em, inode, dio_data,
- start, len, flags);
+ start, &len, flags);
if (ret < 0)
goto unlock_err;
unlock_extents = true;
@@ -7664,8 +7775,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
pos += submitted;
length -= submitted;
if (write)
- btrfs_mark_ordered_io_finished(BTRFS_I(inode), NULL,
- pos, length, false);
+ btrfs_finish_ordered_extent(dio_data->ordered, NULL,
+ pos, length, false);
else
unlock_extent(&BTRFS_I(inode)->io_tree, pos,
pos + length - 1, NULL);
@@ -7695,12 +7806,14 @@ static void btrfs_dio_end_io(struct btrfs_bio *bbio)
dip->file_offset, dip->bytes, bio->bi_status);
}
- if (btrfs_op(bio) == BTRFS_MAP_WRITE)
- btrfs_mark_ordered_io_finished(inode, NULL, dip->file_offset,
- dip->bytes, !bio->bi_status);
- else
+ if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
+ btrfs_finish_ordered_extent(bbio->ordered, NULL,
+ dip->file_offset, dip->bytes,
+ !bio->bi_status);
+ } else {
unlock_extent(&inode->io_tree, dip->file_offset,
dip->file_offset + dip->bytes - 1, NULL);
+ }
bbio->bio.bi_private = bbio->private;
iomap_dio_bio_end_io(bio);
@@ -7736,7 +7849,8 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered);
if (ret) {
- btrfs_bio_end_io(bbio, errno_to_blk_status(ret));
+ bbio->bio.bi_status = errno_to_blk_status(ret);
+ btrfs_dio_end_io(bbio);
return;
}
}
@@ -8230,7 +8344,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
int ret;
struct btrfs_trans_handle *trans;
u64 mask = fs_info->sectorsize - 1;
- u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
+ const u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
if (!skip_writeback) {
ret = btrfs_wait_ordered_range(&inode->vfs_inode,
@@ -8287,7 +8401,15 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
/* Migrate the slack space for the truncate to our reserve */
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
min_size, false);
- BUG_ON(ret);
+ /*
+ * We have reserved 2 metadata units when we started the transaction and
+ * min_size matches 1 unit, so this should never fail, but if it does,
+ * it's not critical we just fail truncation.
+ */
+ if (WARN_ON(ret)) {
+ btrfs_end_transaction(trans);
+ goto out;
+ }
trans->block_rsv = rsv;
@@ -8335,7 +8457,14 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
btrfs_block_rsv_release(fs_info, rsv, -1, NULL);
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
rsv, min_size, false);
- BUG_ON(ret); /* shouldn't happen */
+ /*
+ * We have reserved 2 metadata units when we started the
+ * transaction and min_size matches 1 unit, so this should never
+ * fail, but if it does, it's not critical we just fail truncation.
+ */
+ if (WARN_ON(ret))
+ break;
+
trans->block_rsv = rsv;
}
@@ -8462,7 +8591,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->io_tree.inode = ei;
extent_io_tree_init(fs_info, &ei->file_extent_tree,
IO_TREE_INODE_FILE_EXTENT);
- atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->delalloc_inodes);
@@ -8633,7 +8761,7 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
inode_bytes = inode_get_bytes(inode);
spin_unlock(&BTRFS_I(inode)->lock);
stat->blocks = (ALIGN(inode_bytes, blocksize) +
- ALIGN(delalloc_bytes, blocksize)) >> 9;
+ ALIGN(delalloc_bytes, blocksize)) >> SECTOR_SHIFT;
return 0;
}
@@ -8789,9 +8917,9 @@ static int btrfs_rename_exchange(struct inode *old_dir,
if (old_dentry->d_parent != new_dentry->d_parent) {
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
- BTRFS_I(old_inode), 1);
+ BTRFS_I(old_inode), true);
btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
- BTRFS_I(new_inode), 1);
+ BTRFS_I(new_inode), true);
}
/* src is a subvolume */
@@ -9057,7 +9185,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
- BTRFS_I(old_inode), 1);
+ BTRFS_I(old_inode), true);
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry);
@@ -10164,6 +10292,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
struct extent_io_tree *io_tree = &inode->io_tree;
struct extent_changeset *data_reserved = NULL;
struct extent_state *cached_state = NULL;
+ struct btrfs_ordered_extent *ordered;
int compression;
size_t orig_count;
u64 start, end;
@@ -10340,14 +10469,15 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode, start, num_bytes, ram_bytes,
+ ordered = btrfs_alloc_ordered_extent(inode, start, num_bytes, ram_bytes,
ins.objectid, ins.offset,
encoded->unencoded_offset,
(1 << BTRFS_ORDERED_ENCODED) |
(1 << BTRFS_ORDERED_COMPRESSED),
compression);
- if (ret) {
+ if (IS_ERR(ordered)) {
btrfs_drop_extent_map_range(inode, start, end, false);
+ ret = PTR_ERR(ordered);
goto out_free_reserved;
}
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@@ -10359,8 +10489,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
btrfs_delalloc_release_extents(inode, num_bytes);
- btrfs_submit_compressed_write(inode, start, num_bytes, ins.objectid,
- ins.offset, pages, nr_pages, 0, false);
+ btrfs_submit_compressed_write(ordered, pages, nr_pages, 0, false);
ret = orig_count;
goto out;
@@ -10897,7 +11026,6 @@ static const struct address_space_operations btrfs_aops = {
.read_folio = btrfs_read_folio,
.writepages = btrfs_writepages,
.readahead = btrfs_readahead,
- .direct_IO = noop_direct_IO,
.invalidate_folio = btrfs_invalidate_folio,
.release_folio = btrfs_release_folio,
.migrate_folio = btrfs_migrate_folio,