summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorRussell King (Oracle) <rmk+kernel@armlinux.org.uk>2023-06-27 14:17:19 +0100
committerRussell King (Oracle) <rmk+kernel@armlinux.org.uk>2023-06-27 14:17:19 +0100
commit53ae158f6ddc14df5c44d62c06e33fdb66de1196 (patch)
treed0d0485f5f614e0070bab6ff1b53d56aec7c2d8e /fs
parentac9a78681b921877518763ba0e89202254349d1b (diff)
parent47ba5f39eab3c2a9a1ba878159a6050f2bbfc0e2 (diff)
Merge tag 'arm-vfp-refactor-for-rmk' of git://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux into devel-stabledevel-stable
Refactor VFP support code and reimplement in C The VFP related changes to permit kernel mode NEON in softirq context resulted in some issues regarding en/disabling of sofirqs from asm code, and this made it clear that it would be better to handle more of it from C code. Given that we already have infrastructure that associates undefined instruction exceptions with handler code based on value/mask pairs, we can easily move the dispatch of VFP and NEON instructions to C code once we reimplement the actual VFP support routine (which reasons about how to deal with the exception and whether any emulation is needed) in C code first. With those out of the way, we can drop the partial decoding logic in asm that reasons about which ISA is being used by user space, as the remaining cases are all 32-bit ARM only. This leaves a FPE specific routine with some iWMMXT logic that is easily duplicated in C as well, allowing us to move the FPE asm code into the FPE asm source file, and out of the shared entry code.
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c19
-rw-r--r--fs/btrfs/backref.h6
-rw-r--r--fs/btrfs/block-rsv.c3
-rw-r--r--fs/btrfs/ctree.c38
-rw-r--r--fs/btrfs/disk-io.c25
-rw-r--r--fs/btrfs/file-item.c5
-rw-r--r--fs/btrfs/free-space-cache.c7
-rw-r--r--fs/btrfs/free-space-tree.c50
-rw-r--r--fs/btrfs/free-space-tree.h3
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/print-tree.c6
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/btrfs/zoned.c17
-rw-r--r--fs/cifs/cifsfs.c18
-rw-r--r--fs/cifs/connect.c7
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/smb2pdu.c3
-rw-r--r--fs/ext4/balloc.c43
-rw-r--r--fs/ext4/ext4.h39
-rw-r--r--fs/ext4/extents_status.c30
-rw-r--r--fs/ext4/hash.c6
-rw-r--r--fs/ext4/ialloc.c12
-rw-r--r--fs/ext4/inline.c17
-rw-r--r--fs/ext4/inode.c20
-rw-r--r--fs/ext4/mballoc.c70
-rw-r--r--fs/ext4/migrate.c11
-rw-r--r--fs/ext4/mmp.c30
-rw-r--r--fs/ext4/namei.c53
-rw-r--r--fs/ext4/super.c37
-rw-r--r--fs/ext4/xattr.c5
-rw-r--r--fs/gfs2/super.c8
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c11
-rw-r--r--fs/pipe.c6
-rw-r--r--fs/xfs/libxfs/xfs_ag.c19
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c5
-rw-r--r--fs/xfs/scrub/bmap.c4
-rw-r--r--fs/xfs/scrub/common.c26
-rw-r--r--fs/xfs/scrub/common.h2
-rw-r--r--fs/xfs/scrub/fscounters.c13
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/scrub.h1
-rw-r--r--fs/xfs/scrub/trace.h1
-rw-r--r--fs/xfs/xfs_bmap_util.c4
-rw-r--r--fs/xfs/xfs_icache.c40
-rw-r--r--fs/xfs/xfs_iomap.c5
-rw-r--r--fs/xfs/xfs_mount.h3
-rw-r--r--fs/xfs/xfs_super.c3
51 files changed, 539 insertions, 214 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e54f0884802a..79336fa853db 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -45,7 +45,8 @@ static int check_extent_in_eb(struct btrfs_backref_walk_ctx *ctx,
int root_count;
bool cached;
- if (!btrfs_file_extent_compression(eb, fi) &&
+ if (!ctx->ignore_extent_item_pos &&
+ !btrfs_file_extent_compression(eb, fi) &&
!btrfs_file_extent_encryption(eb, fi) &&
!btrfs_file_extent_other_encoding(eb, fi)) {
u64 data_offset;
@@ -552,7 +553,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx,
count++;
else
goto next;
- if (!ctx->ignore_extent_item_pos) {
+ if (!ctx->skip_inode_ref_list) {
ret = check_extent_in_eb(ctx, &key, eb, fi, &eie);
if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP ||
ret < 0)
@@ -564,7 +565,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx,
eie, (void **)&old, GFP_NOFS);
if (ret < 0)
break;
- if (!ret && !ctx->ignore_extent_item_pos) {
+ if (!ret && !ctx->skip_inode_ref_list) {
while (old->next)
old = old->next;
old->next = eie;
@@ -1606,7 +1607,7 @@ again:
goto out;
}
if (ref->count && ref->parent) {
- if (!ctx->ignore_extent_item_pos && !ref->inode_list &&
+ if (!ctx->skip_inode_ref_list && !ref->inode_list &&
ref->level == 0) {
struct btrfs_tree_parent_check check = { 0 };
struct extent_buffer *eb;
@@ -1647,7 +1648,7 @@ again:
(void **)&eie, GFP_NOFS);
if (ret < 0)
goto out;
- if (!ret && !ctx->ignore_extent_item_pos) {
+ if (!ret && !ctx->skip_inode_ref_list) {
/*
* We've recorded that parent, so we must extend
* its inode list here.
@@ -1743,7 +1744,7 @@ int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx)
static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx)
{
const u64 orig_bytenr = ctx->bytenr;
- const bool orig_ignore_extent_item_pos = ctx->ignore_extent_item_pos;
+ const bool orig_skip_inode_ref_list = ctx->skip_inode_ref_list;
bool roots_ulist_allocated = false;
struct ulist_iterator uiter;
int ret = 0;
@@ -1764,7 +1765,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx)
roots_ulist_allocated = true;
}
- ctx->ignore_extent_item_pos = true;
+ ctx->skip_inode_ref_list = true;
ULIST_ITER_INIT(&uiter);
while (1) {
@@ -1789,7 +1790,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx)
ulist_free(ctx->refs);
ctx->refs = NULL;
ctx->bytenr = orig_bytenr;
- ctx->ignore_extent_item_pos = orig_ignore_extent_item_pos;
+ ctx->skip_inode_ref_list = orig_skip_inode_ref_list;
return ret;
}
@@ -1912,7 +1913,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
goto out_trans;
}
- walk_ctx.ignore_extent_item_pos = true;
+ walk_ctx.skip_inode_ref_list = true;
walk_ctx.trans = trans;
walk_ctx.fs_info = fs_info;
walk_ctx.refs = &ctx->refs;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index ef6bbea3f456..1616e3e3f1e4 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -60,6 +60,12 @@ struct btrfs_backref_walk_ctx {
* @extent_item_pos is ignored.
*/
bool ignore_extent_item_pos;
+ /*
+ * If true and bytenr corresponds to a data extent, then the inode list
+ * (each member describing inode number, file offset and root) is not
+ * added to each reference added to the @refs ulist.
+ */
+ bool skip_inode_ref_list;
/* A valid transaction handle or NULL. */
struct btrfs_trans_handle *trans;
/*
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index 3ab707e26fa2..ac18c43fadad 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -124,7 +124,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
} else {
num_bytes = 0;
}
- if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
+ if (qgroup_to_release_ret &&
+ block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
qgroup_to_release = block_rsv->qgroup_rsv_reserved -
block_rsv->qgroup_rsv_size;
block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3c983c70028a..2ff2961b1183 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2627,6 +2627,10 @@ static bool check_sibling_keys(struct extent_buffer *left,
}
if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
+ btrfs_crit(left->fs_info, "left extent buffer:");
+ btrfs_print_tree(left, false);
+ btrfs_crit(left->fs_info, "right extent buffer:");
+ btrfs_print_tree(right, false);
btrfs_crit(left->fs_info,
"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
left_last.objectid, left_last.type,
@@ -3215,6 +3219,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (check_sibling_keys(left, right)) {
ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
btrfs_tree_unlock(right);
free_extent_buffer(right);
return ret;
@@ -3433,6 +3438,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
if (check_sibling_keys(left, right)) {
ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
return __push_leaf_left(trans, path, min_data_size, empty, left,
@@ -4478,10 +4484,12 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
+ struct btrfs_key orig_key;
struct btrfs_disk_key found_key;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
+ orig_key = key;
if (key.offset > 0) {
key.offset--;
@@ -4498,8 +4506,36 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
+ if (ret <= 0)
return ret;
+
+ /*
+ * Previous key not found. Even if we were at slot 0 of the leaf we had
+ * before releasing the path and calling btrfs_search_slot(), we now may
+ * be in a slot pointing to the same original key - this can happen if
+ * after we released the path, one of more items were moved from a
+ * sibling leaf into the front of the leaf we had due to an insertion
+ * (see push_leaf_right()).
+ * If we hit this case and our slot is > 0 and just decrement the slot
+ * so that the caller does not process the same key again, which may or
+ * may not break the caller, depending on its logic.
+ */
+ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
+ btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
+ ret = comp_keys(&found_key, &orig_key);
+ if (ret == 0) {
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ return 0;
+ }
+ /*
+ * At slot 0, same key as before, it means orig_key is
+ * the lowest, leftmost, key in the tree. We're done.
+ */
+ return 1;
+ }
+ }
+
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 59ea049fe7ee..fbf9006c6234 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3121,23 +3121,34 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
{
int ret;
const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
- bool clear_free_space_tree = false;
+ bool rebuild_free_space_tree = false;
if (btrfs_test_opt(fs_info, CLEAR_CACHE) &&
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
- clear_free_space_tree = true;
+ rebuild_free_space_tree = true;
} else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) {
btrfs_warn(fs_info, "free space tree is invalid");
- clear_free_space_tree = true;
+ rebuild_free_space_tree = true;
}
- if (clear_free_space_tree) {
- btrfs_info(fs_info, "clearing free space tree");
- ret = btrfs_clear_free_space_tree(fs_info);
+ if (rebuild_free_space_tree) {
+ btrfs_info(fs_info, "rebuilding free space tree");
+ ret = btrfs_rebuild_free_space_tree(fs_info);
if (ret) {
btrfs_warn(fs_info,
- "failed to clear free space tree: %d", ret);
+ "failed to rebuild free space tree: %d", ret);
+ goto out;
+ }
+ }
+
+ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+ !btrfs_test_opt(fs_info, FREE_SPACE_TREE)) {
+ btrfs_info(fs_info, "disabling free space tree");
+ ret = btrfs_delete_free_space_tree(fs_info);
+ if (ret) {
+ btrfs_warn(fs_info,
+ "failed to disable free space tree: %d", ret);
goto out;
}
}
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 018c711a0bc8..cd4cce9ba443 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -52,13 +52,13 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
u64 start, end, i_size;
int ret;
+ spin_lock(&inode->lock);
i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
inode->disk_i_size = i_size;
- return;
+ goto out_unlock;
}
- spin_lock(&inode->lock);
ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start,
&end, EXTENT_DIRTY);
if (!ret && start == 0)
@@ -66,6 +66,7 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
else
i_size = 0;
inode->disk_i_size = i_size;
+out_unlock:
spin_unlock(&inode->lock);
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d84cef89cdff..cf98a3c05480 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -870,15 +870,16 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
spin_lock(&ctl->tree_lock);
ret = link_free_space(ctl, e);
- ctl->total_bitmaps++;
- recalculate_thresholds(ctl);
- spin_unlock(&ctl->tree_lock);
if (ret) {
+ spin_unlock(&ctl->tree_lock);
btrfs_err(fs_info,
"Duplicate entries in free space cache, dumping");
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
+ ctl->total_bitmaps++;
+ recalculate_thresholds(ctl);
+ spin_unlock(&ctl->tree_lock);
list_add_tail(&e->list, &bitmaps);
}
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 4d155a48ec59..b21da1446f2a 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1252,7 +1252,7 @@ out:
return ret;
}
-int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
+int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *tree_root = fs_info->tree_root;
@@ -1298,6 +1298,54 @@ abort:
return ret;
}
+int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_key key = {
+ .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
+ .type = BTRFS_ROOT_ITEM_KEY,
+ .offset = 0,
+ };
+ struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
+ struct rb_node *node;
+ int ret;
+
+ trans = btrfs_start_transaction(free_space_root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+ set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+
+ ret = clear_free_space_tree(trans, free_space_root);
+ if (ret)
+ goto abort;
+
+ node = rb_first_cached(&fs_info->block_group_cache_tree);
+ while (node) {
+ struct btrfs_block_group *block_group;
+
+ block_group = rb_entry(node, struct btrfs_block_group,
+ cache_node);
+ ret = populate_free_space_tree(trans, block_group);
+ if (ret)
+ goto abort;
+ node = rb_next(node);
+ }
+
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
+ clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+
+ ret = btrfs_commit_transaction(trans);
+ clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+ return ret;
+abort:
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+}
+
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path)
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index dc2463e4cfe3..6d5551d0ced8 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -18,7 +18,8 @@ struct btrfs_caching_control;
void set_free_space_tree_thresholds(struct btrfs_block_group *block_group);
int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info);
-int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info);
+int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info);
+int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info);
int load_free_space_tree(struct btrfs_caching_control *caching_ctl);
int add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 57d070025c7a..19c707bc8801 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3108,6 +3108,9 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
btrfs_rewrite_logical_zoned(ordered_extent);
btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
+ } else if (btrfs_is_data_reloc_root(inode->root)) {
+ btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
+ ordered_extent->disk_num_bytes);
}
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 25833b4eeaf5..2fa36f694daa 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -454,7 +454,9 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
case BTRFS_EXCLOP_BALANCE_PAUSED:
spin_lock(&fs_info->super_lock);
ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
- fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD);
+ fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD ||
+ fs_info->exclusive_operation == BTRFS_EXCLOP_NONE ||
+ fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
spin_unlock(&fs_info->super_lock);
break;
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index b93c96213304..497b9dbd8a13 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -151,10 +151,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
pr_cont("shared data backref parent %llu count %u\n",
offset, btrfs_shared_data_ref_count(eb, sref));
/*
- * offset is supposed to be a tree block which
- * must be aligned to nodesize.
+ * Offset is supposed to be a tree block which must be
+ * aligned to sectorsize.
*/
- if (!IS_ALIGNED(offset, eb->fs_info->nodesize))
+ if (!IS_ALIGNED(offset, eb->fs_info->sectorsize))
pr_info(
"\t\t\t(parent %llu not aligned to sectorsize %u)\n",
offset, eb->fs_info->sectorsize);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 09b1988d1791..59a06499c647 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3422,7 +3422,7 @@ int add_data_references(struct reloc_control *rc,
btrfs_release_path(path);
ctx.bytenr = extent_key->objectid;
- ctx.ignore_extent_item_pos = true;
+ ctx.skip_inode_ref_list = true;
ctx.fs_info = rc->extent_root->fs_info;
ret = btrfs_find_all_leafs(&ctx);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 6cb97efee976..ec18e2210602 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -826,7 +826,11 @@ out:
!btrfs_test_opt(info, CLEAR_CACHE)) {
btrfs_err(info, "cannot disable free space tree");
ret = -EINVAL;
-
+ }
+ if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE) &&
+ !btrfs_test_opt(info, FREE_SPACE_TREE)) {
+ btrfs_err(info, "cannot disable free space tree with block-group-tree feature");
+ ret = -EINVAL;
}
if (!ret)
ret = btrfs_check_mountopts_zoned(info);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 03f52e4a20aa..841e799dece5 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -395,6 +395,7 @@ void btrfs_free_device(struct btrfs_device *device)
{
WARN_ON(!list_empty(&device->post_commit_list));
rcu_string_free(device->name);
+ extent_io_tree_release(&device->alloc_state);
btrfs_destroy_dev_zone_info(device);
kfree(device);
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index a9b32ba6b2ce..39828af4a4e8 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -122,10 +122,9 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
int i;
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
- u64 bytenr;
-
- bytenr = ((zones[i].start + zones[i].len)
- << SECTOR_SHIFT) - BTRFS_SUPER_INFO_SIZE;
+ u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT;
+ u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) -
+ BTRFS_SUPER_INFO_SIZE;
page[i] = read_cache_page_gfp(mapping,
bytenr >> PAGE_SHIFT, GFP_NOFS);
@@ -1168,12 +1167,12 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
return -ERANGE;
/* All the zones are conventional */
- if (find_next_bit(zinfo->seq_zones, begin, end) == end)
+ if (find_next_bit(zinfo->seq_zones, end, begin) == end)
return 0;
/* All the zones are sequential and empty */
- if (find_next_zero_bit(zinfo->seq_zones, begin, end) == end &&
- find_next_zero_bit(zinfo->empty_zones, begin, end) == end)
+ if (find_next_zero_bit(zinfo->seq_zones, end, begin) == end &&
+ find_next_zero_bit(zinfo->empty_zones, end, begin) == end)
return 0;
for (pos = start; pos < start + size; pos += zinfo->zone_size) {
@@ -1610,11 +1609,11 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans,
!list_empty(&eb->release_list))
return;
+ memzero_extent_buffer(eb, 0, eb->len);
+ set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
set_extent_buffer_dirty(eb);
set_extent_bits_nowait(&trans->dirty_pages, eb->start,
eb->start + eb->len - 1, EXTENT_DIRTY);
- memzero_extent_buffer(eb, 0, eb->len);
- set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
spin_lock(&trans->releasing_ebs_lock);
list_add_tail(&eb->release_list, &trans->releasing_ebs);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 32f7c81a7b89..43a4d8603db3 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -246,7 +246,7 @@ cifs_read_super(struct super_block *sb)
if (cifs_sb->ctx->rasize)
sb->s_bdi->ra_pages = cifs_sb->ctx->rasize / PAGE_SIZE;
else
- sb->s_bdi->ra_pages = cifs_sb->ctx->rsize / PAGE_SIZE;
+ sb->s_bdi->ra_pages = 2 * (cifs_sb->ctx->rsize / PAGE_SIZE);
sb->s_blocksize = CIFS_MAX_MSGSIZE;
sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */
@@ -744,6 +744,7 @@ static void cifs_umount_begin(struct super_block *sb)
spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
+ cifs_close_all_deferred_files(tcon);
/* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
/* cancel_notify_requests(tcon); */
if (tcon->ses && tcon->ses->server) {
@@ -759,6 +760,20 @@ static void cifs_umount_begin(struct super_block *sb)
return;
}
+static int cifs_freeze(struct super_block *sb)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+ struct cifs_tcon *tcon;
+
+ if (cifs_sb == NULL)
+ return 0;
+
+ tcon = cifs_sb_master_tcon(cifs_sb);
+
+ cifs_close_all_deferred_files(tcon);
+ return 0;
+}
+
#ifdef CONFIG_CIFS_STATS2
static int cifs_show_stats(struct seq_file *s, struct dentry *root)
{
@@ -797,6 +812,7 @@ static const struct super_operations cifs_super_ops = {
as opens */
.show_options = cifs_show_options,
.umount_begin = cifs_umount_begin,
+ .freeze_fs = cifs_freeze,
#ifdef CONFIG_CIFS_STATS2
.show_stats = cifs_show_stats,
#endif
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index eeeed6fda13b..8e9a672320ab 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2709,6 +2709,13 @@ cifs_match_super(struct super_block *sb, void *data)
spin_lock(&cifs_tcp_ses_lock);
cifs_sb = CIFS_SB(sb);
+
+ /* We do not want to use a superblock that has been shutdown */
+ if (CIFS_MOUNT_SHUTDOWN & cifs_sb->mnt_cifs_flags) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
+ }
+
tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
if (tlink == NULL) {
/* can not match superblock if tlink were ever null */
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index a81758225fcd..a295e4c2d54e 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1682,7 +1682,7 @@ smb2_copychunk_range(const unsigned int xid,
pcchunk->SourceOffset = cpu_to_le64(src_off);
pcchunk->TargetOffset = cpu_to_le64(dest_off);
pcchunk->Length =
- cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
+ cpu_to_le32(min_t(u64, len, tcon->max_bytes_chunk));
/* Request server copy to target from src identified by key */
kfree(retbuf);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e33ca0d33906..9ed61b6f9b21 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1947,6 +1947,9 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
init_copy_chunk_defaults(tcon);
if (server->ops->validate_negotiate)
rc = server->ops->validate_negotiate(xid, tcon);
+ if (rc == 0) /* See MS-SMB2 2.2.10 and 3.2.5.5 */
+ if (tcon->share_flags & SMB2_SHAREFLAG_ISOLATED_TRANSPORT)
+ server->nosharesock = true;
tcon_exit:
free_rsp_buf(resp_buftype, rsp);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 094269488183..c1edde817be8 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -305,6 +305,38 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
return desc;
}
+static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb,
+ ext4_group_t block_group,
+ struct buffer_head *bh)
+{
+ ext4_grpblk_t next_zero_bit;
+ unsigned long bitmap_size = sb->s_blocksize * 8;
+ unsigned int offset = num_clusters_in_group(sb, block_group);
+
+ if (bitmap_size <= offset)
+ return 0;
+
+ next_zero_bit = ext4_find_next_zero_bit(bh->b_data, bitmap_size, offset);
+
+ return (next_zero_bit < bitmap_size ? next_zero_bit : 0);
+}
+
+struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
+ ext4_group_t group)
+{
+ struct ext4_group_info **grp_info;
+ long indexv, indexh;
+
+ if (unlikely(group >= EXT4_SB(sb)->s_groups_count)) {
+ ext4_error(sb, "invalid group %u", group);
+ return NULL;
+ }
+ indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
+ indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
+ grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
+ return grp_info[indexh];
+}
+
/*
* Return the block number which was discovered to be invalid, or 0 if
* the block bitmap is valid.
@@ -379,7 +411,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
if (buffer_verified(bh))
return 0;
- if (EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ if (!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
return -EFSCORRUPTED;
ext4_lock_group(sb, block_group);
@@ -402,6 +434,15 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
return -EFSCORRUPTED;
}
+ blk = ext4_valid_block_bitmap_padding(sb, block_group, bh);
+ if (unlikely(blk != 0)) {
+ ext4_unlock_group(sb, block_group);
+ ext4_error(sb, "bg %u: block %llu: padding at end of block bitmap is not set",
+ block_group, blk);
+ ext4_mark_group_bitmap_corrupted(sb, block_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
+ return -EFSCORRUPTED;
+ }
set_buffer_verified(bh);
verified:
ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 18cb2680dc39..6948d673bba2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1684,6 +1684,30 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
return container_of(inode, struct ext4_inode_info, vfs_inode);
}
+static inline int ext4_writepages_down_read(struct super_block *sb)
+{
+ percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem);
+ return memalloc_nofs_save();
+}
+
+static inline void ext4_writepages_up_read(struct super_block *sb, int ctx)
+{
+ memalloc_nofs_restore(ctx);
+ percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem);
+}
+
+static inline int ext4_writepages_down_write(struct super_block *sb)
+{
+ percpu_down_write(&EXT4_SB(sb)->s_writepages_rwsem);
+ return memalloc_nofs_save();
+}
+
+static inline void ext4_writepages_up_write(struct super_block *sb, int ctx)
+{
+ memalloc_nofs_restore(ctx);
+ percpu_up_write(&EXT4_SB(sb)->s_writepages_rwsem);
+}
+
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
@@ -2625,6 +2649,8 @@ extern void ext4_check_blocks_bitmap(struct super_block *);
extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
ext4_group_t block_group,
struct buffer_head ** bh);
+extern struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
+ ext4_group_t group);
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
@@ -3232,19 +3258,6 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
}
-static inline
-struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
- ext4_group_t group)
-{
- struct ext4_group_info **grp_info;
- long indexv, indexh;
- BUG_ON(group >= EXT4_SB(sb)->s_groups_count);
- indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
- indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
- grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
- return grp_info[indexh];
-}
-
/*
* Reading s_groups_count requires using smp_rmb() afterwards. See
* the locking protocol documented in the comments of ext4_group_add()
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 7bc221038c6c..595abb9e7d74 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -267,14 +267,12 @@ static void __es_find_extent_range(struct inode *inode,
/* see if the extent has been cached */
es->es_lblk = es->es_len = es->es_pblk = 0;
- if (tree->cache_es) {
- es1 = tree->cache_es;
- if (in_range(lblk, es1->es_lblk, es1->es_len)) {
- es_debug("%u cached by [%u/%u) %llu %x\n",
- lblk, es1->es_lblk, es1->es_len,
- ext4_es_pblock(es1), ext4_es_status(es1));
- goto out;
- }
+ es1 = READ_ONCE(tree->cache_es);
+ if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
+ es_debug("%u cached by [%u/%u) %llu %x\n",
+ lblk, es1->es_lblk, es1->es_len,
+ ext4_es_pblock(es1), ext4_es_status(es1));
+ goto out;
}
es1 = __es_tree_search(&tree->root, lblk);
@@ -293,7 +291,7 @@ out:
}
if (es1 && matching_fn(es1)) {
- tree->cache_es = es1;
+ WRITE_ONCE(tree->cache_es, es1);
es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk;
@@ -931,14 +929,12 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
/* find extent in cache firstly */
es->es_lblk = es->es_len = es->es_pblk = 0;
- if (tree->cache_es) {
- es1 = tree->cache_es;
- if (in_range(lblk, es1->es_lblk, es1->es_len)) {
- es_debug("%u cached by [%u/%u)\n",
- lblk, es1->es_lblk, es1->es_len);
- found = 1;
- goto out;
- }
+ es1 = READ_ONCE(tree->cache_es);
+ if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
+ es_debug("%u cached by [%u/%u)\n",
+ lblk, es1->es_lblk, es1->es_len);
+ found = 1;
+ goto out;
}
node = tree->root.rb_node;
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 147b5241dd94..46c3423ddfa1 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -277,7 +277,11 @@ static int __ext4fs_dirhash(const struct inode *dir, const char *name, int len,
}
default:
hinfo->hash = 0;
- return -1;
+ hinfo->minor_hash = 0;
+ ext4_warning(dir->i_sb,
+ "invalid/unsupported hash tree version %u",
+ hinfo->hash_version);
+ return -EINVAL;
}
hash = hash & ~1;
if (hash == (EXT4_HTREE_EOF_32BIT << 1))
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 787ab89c2c26..754f961cd9fd 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -91,7 +91,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
if (buffer_verified(bh))
return 0;
- if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
return -EFSCORRUPTED;
ext4_lock_group(sb, block_group);
@@ -293,7 +293,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
}
if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
grp = ext4_get_group_info(sb, block_group);
- if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
+ if (!grp || unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
fatal = -EFSCORRUPTED;
goto error_return;
}
@@ -1046,7 +1046,7 @@ got_group:
* Skip groups with already-known suspicious inode
* tables
*/
- if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
goto next_group;
}
@@ -1183,6 +1183,10 @@ got:
if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
grp = ext4_get_group_info(sb, group);
+ if (!grp) {
+ err = -EFSCORRUPTED;
+ goto out;
+ }
down_read(&grp->alloc_sem); /*
* protect vs itable
* lazyinit
@@ -1526,7 +1530,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
}
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
- if (!gdp)
+ if (!gdp || !grp)
goto out;
/*
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 859bc4e2c9b0..5854bd5a3352 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -34,6 +34,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_entry *entry;
struct ext4_inode *raw_inode;
+ void *end;
int free, min_offs;
if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
@@ -57,14 +58,23 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
raw_inode = ext4_raw_inode(iloc);
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
+ end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
/* Compute min_offs. */
- for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
+ while (!IS_LAST_ENTRY(entry)) {
+ void *next = EXT4_XATTR_NEXT(entry);
+
+ if (next >= end) {
+ EXT4_ERROR_INODE(inode,
+ "corrupt xattr in inline inode");
+ return 0;
+ }
if (!entry->e_value_inum && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs)
min_offs = offs;
}
+ entry = next;
}
free = min_offs -
((void *)entry - (void *)IFIRST(header)) - sizeof(__u32);
@@ -350,7 +360,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
value, len);
- if (error == -ENODATA)
+ if (error < 0)
goto out;
BUFFER_TRACE(is.iloc.bh, "get_write_access");
@@ -1175,6 +1185,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
ext4_initialize_dirent_tail(dir_block,
inode->i_sb->s_blocksize);
set_buffer_uptodate(dir_block);
+ unlock_buffer(dir_block);
err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err)
return err;
@@ -1249,6 +1260,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
if (!S_ISDIR(inode->i_mode)) {
memcpy(data_bh->b_data, buf, inline_size);
set_buffer_uptodate(data_bh);
+ unlock_buffer(data_bh);
error = ext4_handle_dirty_metadata(handle,
inode, data_bh);
} else {
@@ -1256,7 +1268,6 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
buf, inline_size);
}
- unlock_buffer(data_bh);
out_restore:
if (error)
ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0d5ba922e411..ce5f21b6c2b3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2783,11 +2783,12 @@ static int ext4_writepages(struct address_space *mapping,
.can_map = 1,
};
int ret;
+ int alloc_ctx;
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
return -EIO;
- percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem);
+ alloc_ctx = ext4_writepages_down_read(sb);
ret = ext4_do_writepages(&mpd);
/*
* For data=journal writeback we could have come across pages marked
@@ -2796,7 +2797,7 @@ static int ext4_writepages(struct address_space *mapping,
*/
if (!ret && mpd.journalled_more_data)
ret = ext4_do_writepages(&mpd);
- percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem);
+ ext4_writepages_up_read(sb, alloc_ctx);
return ret;
}
@@ -2824,17 +2825,18 @@ static int ext4_dax_writepages(struct address_space *mapping,
long nr_to_write = wbc->nr_to_write;
struct inode *inode = mapping->host;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+ int alloc_ctx;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
- percpu_down_read(&sbi->s_writepages_rwsem);
+ alloc_ctx = ext4_writepages_down_read(inode->i_sb);
trace_ext4_writepages(inode, wbc);
ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
trace_ext4_writepages_result(inode, wbc, ret,
nr_to_write - wbc->nr_to_write);
- percpu_up_read(&sbi->s_writepages_rwsem);
+ ext4_writepages_up_read(inode->i_sb, alloc_ctx);
return ret;
}
@@ -3375,7 +3377,7 @@ static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset,
*/
flags &= ~IOMAP_WRITE;
ret = ext4_iomap_begin(inode, offset, length, flags, iomap, srcmap);
- WARN_ON_ONCE(iomap->type != IOMAP_MAPPED);
+ WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED);
return ret;
}
@@ -5928,7 +5930,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
journal_t *journal;
handle_t *handle;
int err;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int alloc_ctx;
/*
* We have to be very careful here: changing a data block's
@@ -5966,7 +5968,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
}
}
- percpu_down_write(&sbi->s_writepages_rwsem);
+ alloc_ctx = ext4_writepages_down_write(inode->i_sb);
jbd2_journal_lock_updates(journal);
/*
@@ -5983,7 +5985,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
err = jbd2_journal_flush(journal, 0);
if (err < 0) {
jbd2_journal_unlock_updates(journal);
- percpu_up_write(&sbi->s_writepages_rwsem);
+ ext4_writepages_up_write(inode->i_sb, alloc_ctx);
return err;
}
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
@@ -5991,7 +5993,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
ext4_set_aops(inode);
jbd2_journal_unlock_updates(journal);
- percpu_up_write(&sbi->s_writepages_rwsem);
+ ext4_writepages_up_write(inode->i_sb, alloc_ctx);
if (val)
filemap_invalidate_unlock(inode->i_mapping);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 78259bddbc4d..7b2e36d103cb 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -745,6 +745,8 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
grp = ext4_get_group_info(sb, e4b->bd_group);
+ if (!grp)
+ return NULL;
list_for_each(cur, &grp->bb_prealloc_list) {
ext4_group_t groupnr;
struct ext4_prealloc_space *pa;
@@ -1060,9 +1062,9 @@ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
static noinline_for_stack
void ext4_mb_generate_buddy(struct super_block *sb,
- void *buddy, void *bitmap, ext4_group_t group)
+ void *buddy, void *bitmap, ext4_group_t group,
+ struct ext4_group_info *grp)
{
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
ext4_grpblk_t i = 0;
@@ -1181,6 +1183,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
break;
grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo)
+ continue;
/*
* If page is uptodate then we came here after online resize
* which added some new uninitialized group info structs, so
@@ -1246,6 +1250,10 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
group, page->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo) {
+ err = -EFSCORRUPTED;
+ goto out;
+ }
grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0,
sizeof(*grinfo->bb_counters) *
@@ -1256,7 +1264,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
ext4_lock_group(sb, group);
/* init the buddy */
memset(data, 0xff, blocksize);
- ext4_mb_generate_buddy(sb, data, incore, group);
+ ext4_mb_generate_buddy(sb, data, incore, group, grinfo);
ext4_unlock_group(sb, group);
incore = NULL;
} else {
@@ -1370,6 +1378,9 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
might_sleep();
mb_debug(sb, "init group %u\n", group);
this_grp = ext4_get_group_info(sb, group);
+ if (!this_grp)
+ return -EFSCORRUPTED;
+
/*
* This ensures that we don't reinit the buddy cache
* page which map to the group from which we are already
@@ -1444,6 +1455,8 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
blocks_per_page = PAGE_SIZE / sb->s_blocksize;
grp = ext4_get_group_info(sb, group);
+ if (!grp)
+ return -EFSCORRUPTED;
e4b->bd_blkbits = sb->s_blocksize_bits;
e4b->bd_info = grp;
@@ -2159,6 +2172,8 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
struct ext4_free_extent ex;
+ if (!grp)
+ return -EFSCORRUPTED;
if (!(ac->ac_flags & (EXT4_MB_HINT_TRY_GOAL | EXT4_MB_HINT_GOAL_ONLY)))
return 0;
if (grp->bb_free == 0)
@@ -2385,7 +2400,7 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
BUG_ON(cr < 0 || cr >= 4);
- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp) || !grp))
return false;
free = grp->bb_free;
@@ -2454,6 +2469,8 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
ext4_grpblk_t free;
int ret = 0;
+ if (!grp)
+ return -EFSCORRUPTED;
if (sbi->s_mb_stats)
atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]);
if (should_lock) {
@@ -2534,7 +2551,7 @@ ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
* prefetch once, so we avoid getblk() call, which can
* be expensive.
*/
- if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
+ if (gdp && grp && !EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
EXT4_MB_GRP_NEED_INIT(grp) &&
ext4_free_group_clusters(sb, gdp) > 0 &&
!(ext4_has_group_desc_csum(sb) &&
@@ -2578,7 +2595,7 @@ void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
gdp = ext4_get_group_desc(sb, group, NULL);
grp = ext4_get_group_info(sb, group);
- if (EXT4_MB_GRP_NEED_INIT(grp) &&
+ if (grp && gdp && EXT4_MB_GRP_NEED_INIT(grp) &&
ext4_free_group_clusters(sb, gdp) > 0 &&
!(ext4_has_group_desc_csum(sb) &&
(gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
@@ -2837,6 +2854,8 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
sizeof(struct ext4_group_info);
grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo)
+ return 0;
/* Load the group info in memory only if not already loaded. */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
err = ext4_mb_load_buddy(sb, group, &e4b);
@@ -2847,7 +2866,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
buddy_loaded = 1;
}
- memcpy(&sg, ext4_get_group_info(sb, group), i);
+ memcpy(&sg, grinfo, i);
if (buddy_loaded)
ext4_mb_unload_buddy(&e4b);
@@ -3208,8 +3227,12 @@ static int ext4_mb_init_backend(struct super_block *sb)
err_freebuddy:
cachep = get_groupinfo_cache(sb->s_blocksize_bits);
- while (i-- > 0)
- kmem_cache_free(cachep, ext4_get_group_info(sb, i));
+ while (i-- > 0) {
+ struct ext4_group_info *grp = ext4_get_group_info(sb, i);
+
+ if (grp)
+ kmem_cache_free(cachep, grp);
+ }
i = sbi->s_group_info_size;
rcu_read_lock();
group_info = rcu_dereference(sbi->s_group_info);
@@ -3522,6 +3545,8 @@ int ext4_mb_release(struct super_block *sb)
for (i = 0; i < ngroups; i++) {
cond_resched();
grinfo = ext4_get_group_info(sb, i);
+ if (!grinfo)
+ continue;
mb_group_bb_bitmap_free(grinfo);
ext4_lock_group(sb, i);
count = ext4_mb_cleanup_pa(grinfo);
@@ -4606,6 +4631,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
struct ext4_free_data *entry;
grp = ext4_get_group_info(sb, group);
+ if (!grp)
+ return;
n = rb_first(&(grp->bb_free_root));
while (n) {
@@ -4633,6 +4660,9 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
int preallocated = 0;
int len;
+ if (!grp)
+ return;
+
/* all form of preallocation discards first load group,
* so the only competing code is preallocation use.
* we don't need any locking here
@@ -4869,6 +4899,8 @@ adjust_bex:
ei = EXT4_I(ac->ac_inode);
grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
+ if (!grp)
+ return;
pa->pa_node_lock.inode_lock = &ei->i_prealloc_lock;
pa->pa_inode = ac->ac_inode;
@@ -4918,6 +4950,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
+ if (!grp)
+ return;
lg = ac->ac_lg;
BUG_ON(lg == NULL);
@@ -5013,7 +5047,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
trace_ext4_mb_release_group_pa(sb, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
- BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
+ if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) {
+ ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu",
+ e4b->bd_group, group, pa->pa_pstart);
+ return 0;
+ }
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
@@ -5043,6 +5081,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
int err;
int free = 0;
+ if (!grp)
+ return 0;
mb_debug(sb, "discard preallocation for group %u\n", group);
if (list_empty(&grp->bb_prealloc_list))
goto out_dbg;
@@ -5297,6 +5337,9 @@ static inline void ext4_mb_show_pa(struct super_block *sb)
struct ext4_prealloc_space *pa;
ext4_grpblk_t start;
struct list_head *cur;
+
+ if (!grp)
+ continue;
ext4_lock_group(sb, i);
list_for_each(cur, &grp->bb_prealloc_list) {
pa = list_entry(cur, struct ext4_prealloc_space,
@@ -6064,6 +6107,7 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
struct ext4_group_desc *gdp;
+ struct ext4_group_info *grp;
unsigned int overflow;
ext4_grpblk_t bit;
struct buffer_head *gd_bh;
@@ -6089,8 +6133,8 @@ do_more:
overflow = 0;
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
- ext4_get_group_info(sb, block_group))))
+ grp = ext4_get_group_info(sb, block_group);
+ if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
return;
/*
@@ -6692,6 +6736,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
for (group = first_group; group <= last_group; group++) {
grp = ext4_get_group_info(sb, group);
+ if (!grp)
+ continue;
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
ret = ext4_mb_init_group(sb, group, GFP_NOFS);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index a19a9661646e..d98ac2af8199 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -408,7 +408,6 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
int ext4_ext_migrate(struct inode *inode)
{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
handle_t *handle;
int retval = 0, i;
__le32 *i_data;
@@ -418,6 +417,7 @@ int ext4_ext_migrate(struct inode *inode)
unsigned long max_entries;
__u32 goal, tmp_csum_seed;
uid_t owner[2];
+ int alloc_ctx;
/*
* If the filesystem does not support extents, or the inode
@@ -434,7 +434,7 @@ int ext4_ext_migrate(struct inode *inode)
*/
return retval;
- percpu_down_write(&sbi->s_writepages_rwsem);
+ alloc_ctx = ext4_writepages_down_write(inode->i_sb);
/*
* Worst case we can touch the allocation bitmaps and a block
@@ -586,7 +586,7 @@ out_tmp_inode:
unlock_new_inode(tmp_inode);
iput(tmp_inode);
out_unlock:
- percpu_up_write(&sbi->s_writepages_rwsem);
+ ext4_writepages_up_write(inode->i_sb, alloc_ctx);
return retval;
}
@@ -605,6 +605,7 @@ int ext4_ind_migrate(struct inode *inode)
ext4_fsblk_t blk;
handle_t *handle;
int ret, ret2 = 0;
+ int alloc_ctx;
if (!ext4_has_feature_extents(inode->i_sb) ||
(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
@@ -621,7 +622,7 @@ int ext4_ind_migrate(struct inode *inode)
if (test_opt(inode->i_sb, DELALLOC))
ext4_alloc_da_blocks(inode);
- percpu_down_write(&sbi->s_writepages_rwsem);
+ alloc_ctx = ext4_writepages_down_write(inode->i_sb);
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
if (IS_ERR(handle)) {
@@ -665,6 +666,6 @@ errout:
ext4_journal_stop(handle);
up_write(&EXT4_I(inode)->i_data_sem);
out_unlock:
- percpu_up_write(&sbi->s_writepages_rwsem);
+ ext4_writepages_up_write(inode->i_sb, alloc_ctx);
return ret;
}
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 4022bc713421..0aaf38ffcb6e 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -39,28 +39,36 @@ static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
* Write the MMP block using REQ_SYNC to try to get the block on-disk
* faster.
*/
-static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
+static int write_mmp_block_thawed(struct super_block *sb,
+ struct buffer_head *bh)
{
struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
- /*
- * We protect against freezing so that we don't create dirty buffers
- * on frozen filesystem.
- */
- sb_start_write(sb);
ext4_mmp_csum_set(sb, mmp);
lock_buffer(bh);
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh);
wait_on_buffer(bh);
- sb_end_write(sb);
if (unlikely(!buffer_uptodate(bh)))
return -EIO;
-
return 0;
}
+static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
+{
+ int err;
+
+ /*
+ * We protect against freezing so that we don't create dirty buffers
+ * on frozen filesystem.
+ */
+ sb_start_write(sb);
+ err = write_mmp_block_thawed(sb, bh);
+ sb_end_write(sb);
+ return err;
+}
+
/*
* Read the MMP block. It _must_ be read from disk and hence we clear the
* uptodate flag on the buffer.
@@ -344,7 +352,11 @@ skip:
seq = mmp_new_seq();
mmp->mmp_seq = cpu_to_le32(seq);
- retval = write_mmp_block(sb, bh);
+ /*
+ * On mount / remount we are protected against fs freezing (by s_umount
+ * semaphore) and grabbing freeze protection upsets lockdep
+ */
+ retval = write_mmp_block_thawed(sb, bh);
if (retval)
goto failed;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a5010b5b8a8c..45b579805c95 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -674,7 +674,7 @@ static struct stats dx_show_leaf(struct inode *dir,
len = de->name_len;
if (!IS_ENCRYPTED(dir)) {
/* Directory is not encrypted */
- ext4fs_dirhash(dir, de->name,
+ (void) ext4fs_dirhash(dir, de->name,
de->name_len, &h);
printk("%*.s:(U)%x.%u ", len,
name, h.hash,
@@ -709,8 +709,9 @@ static struct stats dx_show_leaf(struct inode *dir,
if (IS_CASEFOLDED(dir))
h.hash = EXT4_DIRENT_HASH(de);
else
- ext4fs_dirhash(dir, de->name,
- de->name_len, &h);
+ (void) ext4fs_dirhash(dir,
+ de->name,
+ de->name_len, &h);
printk("%*.s:(E)%x.%u ", len, name,
h.hash, (unsigned) ((char *) de
- base));
@@ -720,7 +721,8 @@ static struct stats dx_show_leaf(struct inode *dir,
#else
int len = de->name_len;
char *name = de->name;
- ext4fs_dirhash(dir, de->name, de->name_len, &h);
+ (void) ext4fs_dirhash(dir, de->name,
+ de->name_len, &h);
printk("%*.s:%x.%u ", len, name, h.hash,
(unsigned) ((char *) de - base));
#endif
@@ -849,8 +851,14 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
/* hash is already computed for encrypted casefolded directory */
if (fname && fname_name(fname) &&
- !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)))
- ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
+ !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir))) {
+ int ret = ext4fs_dirhash(dir, fname_name(fname),
+ fname_len(fname), hinfo);
+ if (ret < 0) {
+ ret_err = ERR_PTR(ret);
+ goto fail;
+ }
+ }
hash = hinfo->hash;
if (root->info.unused_flags & 1) {
@@ -1111,7 +1119,12 @@ static int htree_dirblock_to_tree(struct file *dir_file,
hinfo->minor_hash = 0;
}
} else {
- ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
+ err = ext4fs_dirhash(dir, de->name,
+ de->name_len, hinfo);
+ if (err < 0) {
+ count = err;
+ goto errout;
+ }
}
if ((hinfo->hash < start_hash) ||
((hinfo->hash == start_hash) &&
@@ -1313,8 +1326,12 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh,
if (de->name_len && de->inode) {
if (ext4_hash_in_dirent(dir))
h.hash = EXT4_DIRENT_HASH(de);
- else
- ext4fs_dirhash(dir, de->name, de->name_len, &h);
+ else {
+ int err = ext4fs_dirhash(dir, de->name,
+ de->name_len, &h);
+ if (err < 0)
+ return err;
+ }
map_tail--;
map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2;
@@ -1452,10 +1469,9 @@ int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
hinfo->hash_version = DX_HASH_SIPHASH;
hinfo->seed = NULL;
if (cf_name->name)
- ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
+ return ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
else
- ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
- return 0;
+ return ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
}
#endif
@@ -2298,10 +2314,15 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
/* casefolded encrypted hashes are computed on fname setup */
- if (!ext4_hash_in_dirent(dir))
- ext4fs_dirhash(dir, fname_name(fname),
- fname_len(fname), &fname->hinfo);
-
+ if (!ext4_hash_in_dirent(dir)) {
+ int err = ext4fs_dirhash(dir, fname_name(fname),
+ fname_len(fname), &fname->hinfo);
+ if (err < 0) {
+ brelse(bh2);
+ brelse(bh);
+ return err;
+ }
+ }
memset(frames, 0, sizeof(frames));
frame = frames;
frame->entries = entries;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d39f386e9baf..9680fe753e59 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1048,6 +1048,8 @@ void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
int ret;
+ if (!grp || !gdp)
+ return;
if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
&grp->bb_state);
@@ -3238,11 +3240,9 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
crc = crc16(crc, (__u8 *)gdp, offset);
offset += sizeof(gdp->bg_checksum); /* skip checksum */
/* for checksum of struct ext4_group_desc do the rest...*/
- if (ext4_has_feature_64bit(sb) &&
- offset < le16_to_cpu(sbi->s_es->s_desc_size))
+ if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
crc = crc16(crc, (__u8 *)gdp + offset,
- le16_to_cpu(sbi->s_es->s_desc_size) -
- offset);
+ sbi->s_desc_size - offset);
out:
return cpu_to_le16(crc);
@@ -5684,8 +5684,9 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
descr = "out journal";
if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
- ext4_msg(sb, KERN_INFO, "mounted filesystem %pU with%s. "
- "Quota mode: %s.", &sb->s_uuid, descr,
+ ext4_msg(sb, KERN_INFO, "mounted filesystem %pU %s with%s. "
+ "Quota mode: %s.", &sb->s_uuid,
+ sb_rdonly(sb) ? "ro" : "r/w", descr,
ext4_quota_mode(sb));
/* Update the s_overhead_clusters if necessary */
@@ -6387,6 +6388,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
struct ext4_mount_options old_opts;
ext4_group_t g;
int err = 0;
+ int enable_rw = 0;
#ifdef CONFIG_QUOTA
int enable_quota = 0;
int i, j;
@@ -6573,7 +6575,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
if (err)
goto restore_opts;
- sb->s_flags &= ~SB_RDONLY;
+ enable_rw = 1;
if (ext4_has_feature_mmp(sb)) {
err = ext4_multi_mount_protect(sb,
le64_to_cpu(es->s_mmp_block));
@@ -6616,9 +6618,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
}
#ifdef CONFIG_QUOTA
- /* Release old quota file names */
- for (i = 0; i < EXT4_MAXQUOTAS; i++)
- kfree(old_opts.s_qf_names[i]);
if (enable_quota) {
if (sb_any_quota_suspended(sb))
dquot_resume(sb, -1);
@@ -6628,16 +6627,29 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
goto restore_opts;
}
}
+ /* Release old quota file names */
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(old_opts.s_qf_names[i]);
#endif
if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
ext4_release_system_zone(sb);
+ if (enable_rw)
+ sb->s_flags &= ~SB_RDONLY;
+
if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
ext4_stop_mmpd(sbi);
return 0;
restore_opts:
+ /*
+ * If there was a failing r/w to ro transition, we may need to
+ * re-enable quota
+ */
+ if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
+ sb_any_quota_suspended(sb))
+ dquot_resume(sb, -1);
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_mount_opt2 = old_opts.s_mount_opt2;
@@ -6678,8 +6690,9 @@ static int ext4_reconfigure(struct fs_context *fc)
if (ret < 0)
return ret;
- ext4_msg(sb, KERN_INFO, "re-mounted %pU. Quota mode: %s.",
- &sb->s_uuid, ext4_quota_mode(sb));
+ ext4_msg(sb, KERN_INFO, "re-mounted %pU %s. Quota mode: %s.",
+ &sb->s_uuid, sb_rdonly(sb) ? "ro" : "r/w",
+ ext4_quota_mode(sb));
return 0;
}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index dadad29bd81b..dfc2e223bd10 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2614,6 +2614,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
.in_inode = !!entry->e_value_inum,
};
struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
+ int needs_kvfree = 0;
int error;
is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
@@ -2636,7 +2637,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
error = -ENOMEM;
goto out;
}
-
+ needs_kvfree = 1;
error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
if (error)
goto out;
@@ -2675,7 +2676,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
out:
kfree(b_entry_name);
- if (entry->e_value_inum && buffer)
+ if (needs_kvfree && buffer)
kvfree(buffer);
if (is)
brelse(is->iloc.bh);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 5eed8c237500..a84bf6444bba 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1419,6 +1419,14 @@ static void gfs2_evict_inode(struct inode *inode)
if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr)
goto out;
+ /*
+ * In case of an incomplete mount, gfs2_evict_inode() may be called for
+ * system files without having an active journal to write to. In that
+ * case, skip the filesystem evict.
+ */
+ if (!sdp->sd_jdesc)
+ goto out;
+
gfs2_holder_mark_uninitialized(&gh);
ret = evict_should_delete(inode, &gh);
if (ret == SHOULD_DEFER_EVICTION)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index bacad0c57810..e63c1d46f189 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -402,7 +402,7 @@ static struct folio *nfs_readdir_folio_get_locked(struct address_space *mapping,
struct folio *folio;
folio = filemap_grab_folio(mapping, index);
- if (!folio)
+ if (IS_ERR(folio))
return NULL;
nfs_readdir_folio_init_and_validate(folio, cookie, change_attr);
return folio;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 49cfe2ae6d23..993375f0db67 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -65,7 +65,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
struct fsnotify_event *fsn_event;
struct fsnotify_group *group = inode_mark->group;
int ret;
- int len = 0;
+ int len = 0, wd;
int alloc_len = sizeof(struct inotify_event_info);
struct mem_cgroup *old_memcg;
@@ -81,6 +81,13 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
fsn_mark);
/*
+ * We can be racing with mark being detached. Don't report event with
+ * invalid wd.
+ */
+ wd = READ_ONCE(i_mark->wd);
+ if (wd == -1)
+ return 0;
+ /*
* Whoever is interested in the event, pays for the allocation. Do not
* trigger OOM killer in the target monitoring memcg as it may have
* security repercussion.
@@ -110,7 +117,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
fsn_event = &event->fse;
fsnotify_init_event(fsn_event);
event->mask = mask;
- event->wd = i_mark->wd;
+ event->wd = wd;
event->sync_cookie = cookie;
event->name_len = len;
if (len)
diff --git a/fs/pipe.c b/fs/pipe.c
index ceb17d2dfa19..2d88f73f585a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -342,7 +342,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
break;
if (ret)
break;
- if (filp->f_flags & O_NONBLOCK) {
+ if ((filp->f_flags & O_NONBLOCK) ||
+ (iocb->ki_flags & IOCB_NOWAIT)) {
ret = -EAGAIN;
break;
}
@@ -547,7 +548,8 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
continue;
/* Wait for buffer space to become available. */
- if (filp->f_flags & O_NONBLOCK) {
+ if ((filp->f_flags & O_NONBLOCK) ||
+ (iocb->ki_flags & IOCB_NOWAIT)) {
if (!ret)
ret = -EAGAIN;
break;
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 1b078bbbf225..9b373a0c7aaf 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -495,10 +495,12 @@ xfs_freesp_init_recs(
ASSERT(start >= mp->m_ag_prealloc_blocks);
if (start != mp->m_ag_prealloc_blocks) {
/*
- * Modify first record to pad stripe align of log
+ * Modify first record to pad stripe align of log and
+ * bump the record count.
*/
arec->ar_blockcount = cpu_to_be32(start -
mp->m_ag_prealloc_blocks);
+ be16_add_cpu(&block->bb_numrecs, 1);
nrec = arec + 1;
/*
@@ -509,7 +511,6 @@ xfs_freesp_init_recs(
be32_to_cpu(arec->ar_startblock) +
be32_to_cpu(arec->ar_blockcount));
arec = nrec;
- be16_add_cpu(&block->bb_numrecs, 1);
}
/*
* Change record start to after the internal log
@@ -518,15 +519,13 @@ xfs_freesp_init_recs(
}
/*
- * Calculate the record block count and check for the case where
- * the log might have consumed all available space in the AG. If
- * so, reset the record count to 0 to avoid exposure of an invalid
- * record start block.
+ * Calculate the block count of this record; if it is nonzero,
+ * increment the record count.
*/
arec->ar_blockcount = cpu_to_be32(id->agsize -
be32_to_cpu(arec->ar_startblock));
- if (!arec->ar_blockcount)
- block->bb_numrecs = 0;
+ if (arec->ar_blockcount)
+ be16_add_cpu(&block->bb_numrecs, 1);
}
/*
@@ -538,7 +537,7 @@ xfs_bnoroot_init(
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
- xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno);
xfs_freesp_init_recs(mp, bp, id);
}
@@ -548,7 +547,7 @@ xfs_cntroot_init(
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
- xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno);
xfs_freesp_init_recs(mp, bp, id);
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index b512de0540d5..cd8870a16fd1 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3494,8 +3494,10 @@ xfs_bmap_btalloc_at_eof(
if (!caller_pag)
args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
error = xfs_alloc_vextent_exact_bno(args, ap->blkno);
- if (!caller_pag)
+ if (!caller_pag) {
xfs_perag_put(args->pag);
+ args->pag = NULL;
+ }
if (error)
return error;
@@ -3505,7 +3507,6 @@ xfs_bmap_btalloc_at_eof(
* Exact allocation failed. Reset to try an aligned allocation
* according to the original allocation specification.
*/
- args->pag = NULL;
args->alignment = stripe_align;
args->minlen = nextminlen;
args->minalignslop = 0;
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 87ab9f95a487..69bc89d0fc68 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -42,12 +42,12 @@ xchk_setup_inode_bmap(
xfs_ilock(sc->ip, XFS_IOLOCK_EXCL);
/*
- * We don't want any ephemeral data fork updates sitting around
+ * We don't want any ephemeral data/cow fork updates sitting around
* while we inspect block mappings, so wait for directio to finish
* and flush dirty data if we have delalloc reservations.
*/
if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
- sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
+ sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
sc->ilock_flags |= XFS_MMAPLOCK_EXCL;
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 9aa79665c608..7a20256be969 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -1164,32 +1164,6 @@ xchk_metadata_inode_forks(
return 0;
}
-/* Pause background reaping of resources. */
-void
-xchk_stop_reaping(
- struct xfs_scrub *sc)
-{
- sc->flags |= XCHK_REAPING_DISABLED;
- xfs_blockgc_stop(sc->mp);
- xfs_inodegc_stop(sc->mp);
-}
-
-/* Restart background reaping of resources. */
-void
-xchk_start_reaping(
- struct xfs_scrub *sc)
-{
- /*
- * Readonly filesystems do not perform inactivation or speculative
- * preallocation, so there's no need to restart the workers.
- */
- if (!xfs_is_readonly(sc->mp)) {
- xfs_inodegc_start(sc->mp);
- xfs_blockgc_start(sc->mp);
- }
- sc->flags &= ~XCHK_REAPING_DISABLED;
-}
-
/*
* Enable filesystem hooks (i.e. runtime code patching) before starting a scrub
* operation. Callers must not hold any locks that intersect with the CPU
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 18b5f2b62f13..791235cd9b00 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -156,8 +156,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
}
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
-void xchk_stop_reaping(struct xfs_scrub *sc);
-void xchk_start_reaping(struct xfs_scrub *sc);
/*
* Setting up a hook to wait for intents to drain is costly -- we have to take
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index faa315be7978..e382a35e98d8 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -150,13 +150,6 @@ xchk_setup_fscounters(
if (error)
return error;
- /*
- * Pause background reclaim while we're scrubbing to reduce the
- * likelihood of background perturbations to the counters throwing off
- * our calculations.
- */
- xchk_stop_reaping(sc);
-
return xchk_trans_alloc(sc, 0);
}
@@ -454,6 +447,12 @@ xchk_fscounters(
xchk_set_corrupt(sc);
/*
+ * XXX: We can't quiesce percpu counter updates, so exit early.
+ * This can be re-enabled when we gain exclusive freeze functionality.
+ */
+ return 0;
+
+ /*
* If ifree exceeds icount by more than the minimum variance then
* something's probably wrong with the counters.
*/
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 02819bedc5b1..3d98f604765e 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -186,8 +186,6 @@ xchk_teardown(
}
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
mnt_drop_write_file(sc->file);
- if (sc->flags & XCHK_REAPING_DISABLED)
- xchk_start_reaping(sc);
if (sc->buf) {
if (sc->buf_cleanup)
sc->buf_cleanup(sc->buf);
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index e71903474cd7..b38e93830dde 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -106,7 +106,6 @@ struct xfs_scrub {
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
-#define XCHK_REAPING_DISABLED (1 << 1) /* background block reaping paused */
#define XCHK_FSGATES_DRAIN (1 << 2) /* defer ops draining enabled */
#define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to drain defer ops */
#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 68efd6fda61c..b3894daeb86a 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -98,7 +98,6 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
#define XFS_SCRUB_STATE_STRINGS \
{ XCHK_TRY_HARDER, "try_harder" }, \
- { XCHK_REAPING_DISABLED, "reaping_disabled" }, \
{ XCHK_FSGATES_DRAIN, "fsgates_drain" }, \
{ XCHK_NEED_DRAIN, "need_drain" }, \
{ XREP_ALREADY_FIXED, "already_fixed" }
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f032d3a4b727..fbb675563208 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -558,7 +558,9 @@ xfs_getbmap(
if (!xfs_iext_next_extent(ifp, &icur, &got)) {
xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
- out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
+ if (bmv->bmv_entries > 0)
+ out[bmv->bmv_entries - 1].bmv_oflags |=
+ BMV_OF_LAST;
if (whichfork != XFS_ATTR_FORK && bno < end &&
!xfs_getbmap_full(bmv)) {
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 351849fc18ff..0f60e301eb1f 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -435,18 +435,23 @@ xfs_iget_check_free_state(
}
/* Make all pending inactivation work start immediately. */
-static void
+static bool
xfs_inodegc_queue_all(
struct xfs_mount *mp)
{
struct xfs_inodegc *gc;
int cpu;
+ bool ret = false;
for_each_online_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu);
- if (!llist_empty(&gc->list))
+ if (!llist_empty(&gc->list)) {
mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
+ ret = true;
+ }
}
+
+ return ret;
}
/*
@@ -1856,6 +1861,8 @@ xfs_inodegc_worker(
struct xfs_inode *ip, *n;
unsigned int nofs_flag;
+ ASSERT(gc->cpu == smp_processor_id());
+
WRITE_ONCE(gc->items, 0);
if (!node)
@@ -1909,24 +1916,41 @@ xfs_inodegc_flush(
/*
* Flush all the pending work and then disable the inode inactivation background
- * workers and wait for them to stop.
+ * workers and wait for them to stop. Caller must hold sb->s_umount to
+ * coordinate changes in the inodegc_enabled state.
*/
void
xfs_inodegc_stop(
struct xfs_mount *mp)
{
+ bool rerun;
+
if (!xfs_clear_inodegc_enabled(mp))
return;
+ /*
+ * Drain all pending inodegc work, including inodes that could be
+ * queued by racing xfs_inodegc_queue or xfs_inodegc_shrinker_scan
+ * threads that sample the inodegc state just prior to us clearing it.
+ * The inodegc flag state prevents new threads from queuing more
+ * inodes, so we queue pending work items and flush the workqueue until
+ * all inodegc lists are empty. IOWs, we cannot use drain_workqueue
+ * here because it does not allow other unserialized mechanisms to
+ * reschedule inodegc work while this draining is in progress.
+ */
xfs_inodegc_queue_all(mp);
- drain_workqueue(mp->m_inodegc_wq);
+ do {
+ flush_workqueue(mp->m_inodegc_wq);
+ rerun = xfs_inodegc_queue_all(mp);
+ } while (rerun);
trace_xfs_inodegc_stop(mp, __return_address);
}
/*
* Enable the inode inactivation background workers and schedule deferred inode
- * inactivation work if there is any.
+ * inactivation work if there is any. Caller must hold sb->s_umount to
+ * coordinate changes in the inodegc_enabled state.
*/
void
xfs_inodegc_start(
@@ -2069,7 +2093,8 @@ xfs_inodegc_queue(
queue_delay = 0;
trace_xfs_inodegc_queue(mp, __return_address);
- mod_delayed_work(mp->m_inodegc_wq, &gc->work, queue_delay);
+ mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
+ queue_delay);
put_cpu_ptr(gc);
if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
@@ -2113,7 +2138,8 @@ xfs_inodegc_cpu_dead(
if (xfs_is_inodegc_enabled(mp)) {
trace_xfs_inodegc_queue(mp, __return_address);
- mod_delayed_work(mp->m_inodegc_wq, &gc->work, 0);
+ mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
+ 0);
}
put_cpu_ptr(gc);
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 285885c308bd..18c8f168b153 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1006,8 +1006,9 @@ xfs_buffered_write_iomap_begin(
if (eof)
imap.br_startoff = end_fsb; /* fake hole until the end */
- /* We never need to allocate blocks for zeroing a hole. */
- if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
+ /* We never need to allocate blocks for zeroing or unsharing a hole. */
+ if ((flags & (IOMAP_UNSHARE | IOMAP_ZERO)) &&
+ imap.br_startoff > offset_fsb) {
xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
goto out_unlock;
}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f3269c0626f0..aaaf5ec13492 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -66,6 +66,9 @@ struct xfs_inodegc {
/* approximate count of inodes in the list */
unsigned int items;
unsigned int shrinker_hits;
+#if defined(DEBUG) || defined(XFS_WARN)
+ unsigned int cpu;
+#endif
};
/*
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 4d2e87462ac4..7e706255f165 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1095,6 +1095,9 @@ xfs_inodegc_init_percpu(
for_each_possible_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu);
+#if defined(DEBUG) || defined(XFS_WARN)
+ gc->cpu = cpu;
+#endif
init_llist_head(&gc->list);
gc->items = 0;
INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);