diff options
Diffstat (limited to 'fs/ext4/super.c')
-rw-r--r-- | fs/ext4/super.c | 396 |
1 files changed, 222 insertions, 174 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0f931d0c227d..a50e5c31b937 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -161,8 +161,14 @@ MODULE_ALIAS("ext3"); static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, - bh_end_io_t *end_io) + bh_end_io_t *end_io, bool simu_fail) { + if (simu_fail) { + clear_buffer_uptodate(bh); + unlock_buffer(bh); + return; + } + /* * buffer's verified bit is no longer valid after reading from * disk again due to write out error, clear it to make sure we @@ -176,7 +182,7 @@ static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, } void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags, - bh_end_io_t *end_io) + bh_end_io_t *end_io, bool simu_fail) { BUG_ON(!buffer_locked(bh)); @@ -184,10 +190,11 @@ void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags, unlock_buffer(bh); return; } - __ext4_read_bh(bh, op_flags, end_io); + __ext4_read_bh(bh, op_flags, end_io, simu_fail); } -int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io) +int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, + bh_end_io_t *end_io, bool simu_fail) { BUG_ON(!buffer_locked(bh)); @@ -196,7 +203,7 @@ int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io return 0; } - __ext4_read_bh(bh, op_flags, end_io); + __ext4_read_bh(bh, op_flags, end_io, simu_fail); wait_on_buffer(bh); if (buffer_uptodate(bh)) @@ -208,10 +215,10 @@ int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait) { lock_buffer(bh); if (!wait) { - ext4_read_bh_nowait(bh, op_flags, NULL); + ext4_read_bh_nowait(bh, op_flags, NULL, false); return 0; } - return ext4_read_bh(bh, op_flags, NULL); + return ext4_read_bh(bh, op_flags, NULL, false); } /* @@ -244,7 +251,7 @@ static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb, struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block, blk_opf_t op_flags) { - gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping, + gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping, ~__GFP_FS) | __GFP_MOVABLE; return __ext4_sb_bread_gfp(sb, block, op_flags, gfp); @@ -253,7 +260,7 @@ struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block, struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb, sector_t block) { - gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping, + gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping, ~__GFP_FS); return __ext4_sb_bread_gfp(sb, block, 0, gfp); @@ -266,7 +273,7 @@ void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block) if (likely(bh)) { if (trylock_buffer(bh)) - ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL); + ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL, false); brelse(bh); } } @@ -346,9 +353,9 @@ __u32 ext4_free_group_clusters(struct super_block *sb, __u32 ext4_free_inodes_count(struct super_block *sb, struct ext4_group_desc *bg) { - return le16_to_cpu(bg->bg_free_inodes_count_lo) | + return le16_to_cpu(READ_ONCE(bg->bg_free_inodes_count_lo)) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); + (__u32)le16_to_cpu(READ_ONCE(bg->bg_free_inodes_count_hi)) << 16 : 0); } __u32 ext4_used_dirs_count(struct super_block *sb, @@ -402,9 +409,9 @@ void ext4_free_group_clusters_set(struct super_block *sb, void ext4_free_inodes_set(struct super_block *sb, struct ext4_group_desc *bg, __u32 count) { - bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); + WRITE_ONCE(bg->bg_free_inodes_count_lo, cpu_to_le16((__u16)count)); if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) - bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); + WRITE_ONCE(bg->bg_free_inodes_count_hi, cpu_to_le16(count >> 16)); } void ext4_used_dirs_set(struct super_block *sb, @@ -492,22 +499,6 @@ static void ext4_maybe_update_superblock(struct super_block *sb) schedule_work(&EXT4_SB(sb)->s_sb_upd_work); } -/* - * The del_gendisk() function uninitializes the disk-specific data - * structures, including the bdi structure, without telling anyone - * else. Once this happens, any attempt to call mark_buffer_dirty() - * (for example, by ext4_commit_super), will cause a kernel OOPS. - * This is a kludge to prevent these oops until we can put in a proper - * hook in del_gendisk() to inform the VFS and file system layers. - */ -static int block_device_ejected(struct super_block *sb) -{ - struct inode *bd_inode = sb->s_bdev->bd_inode; - struct backing_dev_info *bdi = inode_to_bdi(bd_inode); - - return bdi->dev == NULL; -} - static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) { struct super_block *sb = journal->j_private; @@ -751,11 +742,12 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); /* - * Make sure updated value of ->s_mount_flags will be visible before - * ->s_flags update + * EXT4_FLAGS_SHUTDOWN was set which stops all filesystem + * modifications. We don't set SB_RDONLY because that requires + * sb->s_umount semaphore and setting it without proper remount + * procedure is confusing code such as freeze_super() leading to + * deadlocks and other problems. */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; } static void update_super_work(struct work_struct *work) @@ -1343,6 +1335,9 @@ static void ext4_put_super(struct super_block *sb) ext4_group_desc_free(sbi); ext4_flex_groups_free(sbi); + + WARN_ON_ONCE(!(sbi->s_mount_state & EXT4_ERROR_FS) && + percpu_counter_sum(&sbi->s_dirtyclusters_counter)); ext4_percpu_param_destroy(sbi); #ifdef CONFIG_QUOTA for (int i = 0; i < EXT4_MAXQUOTAS; i++) @@ -1359,14 +1354,14 @@ static void ext4_put_super(struct super_block *sb) sync_blockdev(sb->s_bdev); invalidate_bdev(sb->s_bdev); - if (sbi->s_journal_bdev_handle) { + if (sbi->s_journal_bdev_file) { /* * Invalidate the journal device's buffers. We don't want them * floating about in memory - the physical journal device may * hotswapped, and it breaks the `ro-after' testing code. */ - sync_blockdev(sbi->s_journal_bdev_handle->bdev); - invalidate_bdev(sbi->s_journal_bdev_handle->bdev); + sync_blockdev(file_bdev(sbi->s_journal_bdev_file)); + invalidate_bdev(file_bdev(sbi->s_journal_bdev_file)); } ext4_xattr_destroy_cache(sbi->s_ea_inode_cache); @@ -1385,8 +1380,6 @@ static void ext4_put_super(struct super_block *sb) */ kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev, NULL); fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); @@ -1473,7 +1466,8 @@ static void ext4_destroy_inode(struct inode *inode) dump_stack(); } - if (EXT4_I(inode)->i_reserved_data_blocks) + if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ERROR_FS) && + WARN_ON_ONCE(EXT4_I(inode)->i_reserved_data_blocks)) ext4_msg(inode->i_sb, KERN_ERR, "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!", inode->i_ino, EXT4_I(inode), @@ -1500,8 +1494,7 @@ static int __init init_inodecache(void) { ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache", sizeof(struct ext4_inode_info), 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| - SLAB_ACCOUNT), + SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT, offsetof(struct ext4_inode_info, i_data), sizeof_field(struct ext4_inode_info, i_data), init_once); @@ -1600,7 +1593,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, static int ext4_quota_enable(struct super_block *sb, int type, int format_id, unsigned int flags); -static struct dquot **ext4_get_dquots(struct inode *inode) +static struct dquot __rcu **ext4_get_dquots(struct inode *inode) { return EXT4_I(inode)->i_dquot; } @@ -1724,10 +1717,6 @@ static const struct constant_table ext4_param_dax[] = { {} }; -/* String parameter that allows empty argument */ -#define fsparam_string_empty(NAME, OPT) \ - __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) - /* * Mount option specification * We don't use fsparam_flag_no because of the way we set the @@ -1742,8 +1731,8 @@ static const struct fs_parameter_spec ext4_param_specs[] = { fsparam_flag ("bsdgroups", Opt_grpid), fsparam_flag ("nogrpid", Opt_nogrpid), fsparam_flag ("sysvgroups", Opt_nogrpid), - fsparam_u32 ("resgid", Opt_resgid), - fsparam_u32 ("resuid", Opt_resuid), + fsparam_gid ("resgid", Opt_resgid), + fsparam_uid ("resuid", Opt_resuid), fsparam_u32 ("sb", Opt_sb), fsparam_enum ("errors", Opt_errors, ext4_param_errors), fsparam_flag ("nouid32", Opt_nouid32), @@ -2079,8 +2068,7 @@ static int unnote_qf_name(struct fs_context *fc, int qtype) { struct ext4_fs_context *ctx = fc->fs_private; - if (ctx->s_qf_names[qtype]) - kfree(ctx->s_qf_names[qtype]); + kfree(ctx->s_qf_names[qtype]); ctx->s_qf_names[qtype] = NULL; ctx->qname_spec |= 1 << qtype; @@ -2113,16 +2101,16 @@ static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param, } #define EXT4_SET_CTX(name) \ -static inline void ctx_set_##name(struct ext4_fs_context *ctx, \ - unsigned long flag) \ +static inline __maybe_unused \ +void ctx_set_##name(struct ext4_fs_context *ctx, unsigned long flag) \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name |= flag; \ } #define EXT4_CLEAR_CTX(name) \ -static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \ - unsigned long flag) \ +static inline __maybe_unused \ +void ctx_clear_##name(struct ext4_fs_context *ctx, unsigned long flag) \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name &= ~flag; \ @@ -2149,8 +2137,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) struct fs_parse_result result; const struct mount_opts *m; int is_remount; - kuid_t uid; - kgid_t gid; int token; token = fs_parse(fc, ext4_param_specs, param, &result); @@ -2292,23 +2278,11 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->spec |= EXT4_SPEC_s_stripe; return 0; case Opt_resuid: - uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(uid)) { - ext4_msg(NULL, KERN_ERR, "Invalid uid value %d", - result.uint_32); - return -EINVAL; - } - ctx->s_resuid = uid; + ctx->s_resuid = result.uid; ctx->spec |= EXT4_SPEC_s_resuid; return 0; case Opt_resgid: - gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(gid)) { - ext4_msg(NULL, KERN_ERR, "Invalid gid value %d", - result.uint_32); - return -EINVAL; - } - ctx->s_resgid = gid; + ctx->s_resgid = result.gid; ctx->spec |= EXT4_SPEC_s_resgid; return 0; case Opt_journal_dev: @@ -2485,8 +2459,7 @@ static int parse_options(struct fs_context *fc, char *options) param.size = v_len; ret = ext4_parse_param(fc, ¶m); - if (param.string) - kfree(param.string); + kfree(param.string); if (ret < 0) return ret; } @@ -3062,6 +3035,9 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PUTS("mb_optimize_scan=1"); } + if (nodefs && !test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) + SEQ_OPTS_PUTS("prefetch_block_bitmaps"); + ext4_show_quota_options(seq, sb); return 0; } @@ -3078,7 +3054,7 @@ int ext4_seq_options_show(struct seq_file *seq, void *offset) seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw"); rc = _ext4_show_options(seq, sb, 1); - seq_puts(seq, "\n"); + seq_putc(seq, '\n'); return rc; } @@ -3609,14 +3585,12 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#if !IS_ENABLED(CONFIG_UNICODE) - if (ext4_has_feature_casefold(sb)) { + if (!IS_ENABLED(CONFIG_UNICODE) && ext4_has_feature_casefold(sb)) { ext4_msg(sb, KERN_ERR, "Filesystem with casefold feature cannot be " "mounted without CONFIG_UNICODE"); return 0; } -#endif if (readonly) return 1; @@ -3743,12 +3717,12 @@ static int ext4_run_li_request(struct ext4_li_request *elr) ret = 1; if (!ret) { - start_time = ktime_get_real_ns(); + start_time = ktime_get_ns(); ret = ext4_init_inode_table(sb, group, elr->lr_timeout ? 0 : 1); trace_ext4_lazy_itable_init(sb, group); if (elr->lr_timeout == 0) { - elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) * + elr->lr_timeout = nsecs_to_jiffies((ktime_get_ns() - start_time) * EXT4_SB(elr->lr_super)->s_li_wait_mult); } elr->lr_next_sched = jiffies + elr->lr_timeout; @@ -3808,8 +3782,9 @@ static int ext4_lazyinit_thread(void *arg) cont_thread: while (true) { - next_wakeup = MAX_JIFFY_OFFSET; + bool next_wakeup_initialized = false; + next_wakeup = 0; mutex_lock(&eli->li_list_mtx); if (list_empty(&eli->li_request_list)) { mutex_unlock(&eli->li_list_mtx); @@ -3822,8 +3797,11 @@ cont_thread: lr_request); if (time_before(jiffies, elr->lr_next_sched)) { - if (time_before(elr->lr_next_sched, next_wakeup)) + if (!next_wakeup_initialized || + time_before(elr->lr_next_sched, next_wakeup)) { next_wakeup = elr->lr_next_sched; + next_wakeup_initialized = true; + } continue; } if (down_read_trylock(&elr->lr_super->s_umount)) { @@ -3851,16 +3829,18 @@ cont_thread: elr->lr_next_sched = jiffies + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ); } - if (time_before(elr->lr_next_sched, next_wakeup)) + if (!next_wakeup_initialized || + time_before(elr->lr_next_sched, next_wakeup)) { next_wakeup = elr->lr_next_sched; + next_wakeup_initialized = true; + } } mutex_unlock(&eli->li_list_mtx); try_to_freeze(); cur = jiffies; - if ((time_after_eq(cur, next_wakeup)) || - (MAX_JIFFY_OFFSET == next_wakeup)) { + if (!next_wakeup_initialized || time_after_eq(cur, next_wakeup)) { cond_resched(); continue; } @@ -4233,7 +4213,7 @@ int ext4_calculate_overhead(struct super_block *sb) * Add the internal journal blocks whether the journal has been * loaded or not */ - if (sbi->s_journal && !sbi->s_journal_bdev_handle) + if (sbi->s_journal && !sbi->s_journal_bdev_file) overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len); else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) { /* j_inum for internal journal is non-zero */ @@ -4422,22 +4402,6 @@ static int ext4_handle_clustersize(struct super_block *sb) } sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - le32_to_cpu(es->s_log_block_size); - sbi->s_clusters_per_group = - le32_to_cpu(es->s_clusters_per_group); - if (sbi->s_clusters_per_group > sb->s_blocksize * 8) { - ext4_msg(sb, KERN_ERR, - "#clusters per group too big: %lu", - sbi->s_clusters_per_group); - return -EINVAL; - } - if (sbi->s_blocks_per_group != - (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) { - ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " - "clusters per group (%lu) inconsistent", - sbi->s_blocks_per_group, - sbi->s_clusters_per_group); - return -EINVAL; - } } else { if (clustersize != sb->s_blocksize) { ext4_msg(sb, KERN_ERR, @@ -4451,9 +4415,21 @@ static int ext4_handle_clustersize(struct super_block *sb) sbi->s_blocks_per_group); return -EINVAL; } - sbi->s_clusters_per_group = sbi->s_blocks_per_group; sbi->s_cluster_bits = 0; } + sbi->s_clusters_per_group = le32_to_cpu(es->s_clusters_per_group); + if (sbi->s_clusters_per_group > sb->s_blocksize * 8) { + ext4_msg(sb, KERN_ERR, "#clusters per group too big: %lu", + sbi->s_clusters_per_group); + return -EINVAL; + } + if (sbi->s_blocks_per_group != + (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) { + ext4_msg(sb, KERN_ERR, + "blocks per group (%lu) and clusters per group (%lu) inconsistent", + sbi->s_blocks_per_group, sbi->s_clusters_per_group); + return -EINVAL; + } sbi->s_cluster_ratio = clustersize / sb->s_blocksize; /* Do we have standard group size of clustersize * 8 blocks ? */ @@ -4463,6 +4439,36 @@ static int ext4_handle_clustersize(struct super_block *sb) return 0; } +/* + * ext4_atomic_write_init: Initializes filesystem min & max atomic write units. + * @sb: super block + * TODO: Later add support for bigalloc + */ +static void ext4_atomic_write_init(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct block_device *bdev = sb->s_bdev; + + if (!bdev_can_atomic_write(bdev)) + return; + + if (!ext4_has_feature_extents(sb)) + return; + + sbi->s_awu_min = max(sb->s_blocksize, + bdev_atomic_write_unit_min_bytes(bdev)); + sbi->s_awu_max = min(sb->s_blocksize, + bdev_atomic_write_unit_max_bytes(bdev)); + if (sbi->s_awu_min && sbi->s_awu_max && + sbi->s_awu_min <= sbi->s_awu_max) { + ext4_msg(sb, KERN_NOTICE, "Supports (experimental) DIO atomic writes awu_min: %u, awu_max: %u", + sbi->s_awu_min, sbi->s_awu_max); + } else { + sbi->s_awu_min = 0; + sbi->s_awu_max = 0; + } +} + static void ext4_fast_commit_init(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -4626,15 +4632,6 @@ static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_blo ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE, ext4_orphan_file_block_trigger); - /* Load the checksum driver */ - sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(sbi->s_chksum_driver)) { - int ret = PTR_ERR(sbi->s_chksum_driver); - ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); - sbi->s_chksum_driver = NULL; - return ret; - } - /* Check superblock checksum */ if (!ext4_superblock_csum_verify(sb, es)) { ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " @@ -5126,16 +5123,27 @@ out: return ret; } -static void ext4_hash_info_init(struct super_block *sb) +static int ext4_hash_info_init(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; unsigned int i; + sbi->s_def_hash_version = es->s_def_hash_version; + + if (sbi->s_def_hash_version > DX_HASH_LAST) { + ext4_msg(sb, KERN_ERR, + "Invalid default hash set in the superblock"); + return -EINVAL; + } else if (sbi->s_def_hash_version == DX_HASH_SIPHASH) { + ext4_msg(sb, KERN_ERR, + "SIPHASH is not a valid default hash value"); + return -EINVAL; + } + for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); - sbi->s_def_hash_version = es->s_def_hash_version; if (ext4_has_feature_dir_index(sb)) { i = le32_to_cpu(es->s_flags); if (i & EXT2_FLAGS_UNSIGNED_HASH) @@ -5153,6 +5161,7 @@ static void ext4_hash_info_init(struct super_block *sb) #endif } } + return 0; } static int ext4_block_group_meta_init(struct super_block *sb, int silent) @@ -5204,6 +5213,18 @@ static int ext4_block_group_meta_init(struct super_block *sb, int silent) return 0; } +/* + * It's hard to get stripe aligned blocks if stripe is not aligned with + * cluster, just disable stripe and alert user to simplify code and avoid + * stripe aligned allocation which will rarely succeed. + */ +static bool ext4_is_stripe_incompatible(struct super_block *sb, unsigned long stripe) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + return (stripe > 0 && sbi->s_cluster_ratio > 1 && + stripe % sbi->s_cluster_ratio != 0); +} + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct ext4_super_block *es = NULL; @@ -5280,6 +5301,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) /* i_version is always enabled now */ sb->s_flags |= SB_I_VERSION; + /* HSM events are allowed by default. */ + sb->s_iflags |= SB_I_ALLOW_HSM; + err = ext4_check_feature_compatibility(sb, es, silent); if (err) goto failed_mount; @@ -5288,7 +5312,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (err) goto failed_mount; - ext4_hash_info_init(sb); + err = ext4_hash_info_init(sb); + if (err) + goto failed_mount; err = ext4_handle_clustersize(sb); if (err) @@ -5311,13 +5337,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto failed_mount3; sbi->s_stripe = ext4_get_stripe_size(sbi); - /* - * It's hard to get stripe aligned blocks if stripe is not aligned with - * cluster, just disable stripe and alert user to simpfy code and avoid - * stripe aligned allocation which will rarely successes. - */ - if (sbi->s_stripe > 0 && sbi->s_cluster_ratio > 1 && - sbi->s_stripe % sbi->s_cluster_ratio != 0) { + if (ext4_is_stripe_incompatible(sb, sbi->s_stripe)) { ext4_msg(sb, KERN_WARNING, "stripe (%lu) is not aligned with cluster size (%u), " "stripe is disabled", @@ -5346,11 +5366,15 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sb->s_qcop = &ext4_qctl_operations; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif - memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); + super_set_uuid(sb, es->s_uuid, sizeof(es->s_uuid)); + super_set_sysfs_name_bdev(sb); INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ mutex_init(&sbi->s_orphan_lock); + spin_lock_init(&sbi->s_bdev_wb_lock); + + ext4_atomic_write_init(sb); ext4_fast_commit_init(sb); sb->s_root = NULL; @@ -5484,6 +5508,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto failed_mount4; } + generic_set_sb_d_ops(sb); sb->s_root = d_make_root(root); if (!sb->s_root) { ext4_msg(sb, KERN_ERR, "get root dentry failed"); @@ -5555,19 +5580,15 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (err) goto failed_mount6; - err = ext4_register_sysfs(sb); - if (err) - goto failed_mount7; - err = ext4_init_orphan_info(sb); if (err) - goto failed_mount8; + goto failed_mount7; #ifdef CONFIG_QUOTA /* Enable quota usage during mount. */ if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) { err = ext4_enable_quotas(sb); if (err) - goto failed_mount9; + goto failed_mount8; } #endif /* CONFIG_QUOTA */ @@ -5575,8 +5596,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) * Save the original bdev mapping's wb_err value which could be * used to detect the metadata async write error. */ - spin_lock_init(&sbi->s_bdev_wb_lock); - errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err, + errseq_check_and_advance(&sb->s_bdev->bd_mapping->wb_err, &sbi->s_bdev_wb_err); EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); @@ -5593,7 +5613,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ext4_msg(sb, KERN_INFO, "recovery complete"); err = ext4_mark_recovery_complete(sb, es); if (err) - goto failed_mount10; + goto failed_mount9; } if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev)) @@ -5610,15 +5630,17 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) atomic_set(&sbi->s_warning_count, 0); atomic_set(&sbi->s_msg_count, 0); + /* Register sysfs after all initializations are complete. */ + err = ext4_register_sysfs(sb); + if (err) + goto failed_mount9; + return 0; -failed_mount10: +failed_mount9: ext4_quotas_off(sb, EXT4_MAXQUOTAS); -failed_mount9: __maybe_unused +failed_mount8: __maybe_unused ext4_release_orphan_info(sb); -failed_mount8: - ext4_unregister_sysfs(sb); - kobject_put(&sbi->s_kobj); failed_mount7: ext4_unregister_li_request(sb); failed_mount6: @@ -5653,13 +5675,10 @@ failed_mount3a: failed_mount3: /* flush s_sb_upd_work before sbi destroy */ flush_work(&sbi->s_sb_upd_work); - del_timer_sync(&sbi->s_err_report); ext4_stop_mmpd(sbi); + del_timer_sync(&sbi->s_err_report); ext4_group_desc_free(sbi); failed_mount: - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); - #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif @@ -5670,9 +5689,9 @@ failed_mount: #endif fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); brelse(sbi->s_sbh); - if (sbi->s_journal_bdev_handle) { - invalidate_bdev(sbi->s_journal_bdev_handle->bdev); - bdev_release(sbi->s_journal_bdev_handle); + if (sbi->s_journal_bdev_file) { + invalidate_bdev(file_bdev(sbi->s_journal_bdev_file)); + bdev_fput(sbi->s_journal_bdev_file); } out_fail: invalidate_bdev(sb->s_bdev); @@ -5842,30 +5861,30 @@ static journal_t *ext4_open_inode_journal(struct super_block *sb, return journal; } -static struct bdev_handle *ext4_get_journal_blkdev(struct super_block *sb, +static struct file *ext4_get_journal_blkdev(struct super_block *sb, dev_t j_dev, ext4_fsblk_t *j_start, ext4_fsblk_t *j_len) { struct buffer_head *bh; struct block_device *bdev; - struct bdev_handle *bdev_handle; + struct file *bdev_file; int hblock, blocksize; ext4_fsblk_t sb_block; unsigned long offset; struct ext4_super_block *es; int errno; - bdev_handle = bdev_open_by_dev(j_dev, + bdev_file = bdev_file_open_by_dev(j_dev, BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES, sb, &fs_holder_ops); - if (IS_ERR(bdev_handle)) { + if (IS_ERR(bdev_file)) { ext4_msg(sb, KERN_ERR, "failed to open journal device unknown-block(%u,%u) %ld", - MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev_handle)); - return bdev_handle; + MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev_file)); + return bdev_file; } - bdev = bdev_handle->bdev; + bdev = file_bdev(bdev_file); blocksize = sb->s_blocksize; hblock = bdev_logical_block_size(bdev); if (blocksize < hblock) { @@ -5877,7 +5896,7 @@ static struct bdev_handle *ext4_get_journal_blkdev(struct super_block *sb, sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; offset = EXT4_MIN_BLOCK_SIZE % blocksize; - set_blocksize(bdev, blocksize); + set_blocksize(bdev_file, blocksize); bh = __bread(bdev, sb_block, blocksize); if (!bh) { ext4_msg(sb, KERN_ERR, "couldn't read superblock of " @@ -5912,12 +5931,12 @@ static struct bdev_handle *ext4_get_journal_blkdev(struct super_block *sb, *j_start = sb_block + 1; *j_len = ext4_blocks_count(es); brelse(bh); - return bdev_handle; + return bdev_file; out_bh: brelse(bh); out_bdev: - bdev_release(bdev_handle); + bdev_fput(bdev_file); return ERR_PTR(errno); } @@ -5927,14 +5946,14 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb, journal_t *journal; ext4_fsblk_t j_start; ext4_fsblk_t j_len; - struct bdev_handle *bdev_handle; + struct file *bdev_file; int errno = 0; - bdev_handle = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len); - if (IS_ERR(bdev_handle)) - return ERR_CAST(bdev_handle); + bdev_file = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len); + if (IS_ERR(bdev_file)) + return ERR_CAST(bdev_file); - journal = jbd2_journal_init_dev(bdev_handle->bdev, sb->s_bdev, j_start, + journal = jbd2_journal_init_dev(file_bdev(bdev_file), sb->s_bdev, j_start, j_len, sb->s_blocksize); if (IS_ERR(journal)) { ext4_msg(sb, KERN_ERR, "failed to create device journal"); @@ -5949,14 +5968,14 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb, goto out_journal; } journal->j_private = sb; - EXT4_SB(sb)->s_journal_bdev_handle = bdev_handle; + EXT4_SB(sb)->s_journal_bdev_file = bdev_file; ext4_init_journal_params(sb, journal); return journal; out_journal: jbd2_journal_destroy(journal); out_bdev: - bdev_release(bdev_handle); + bdev_fput(bdev_file); return ERR_PTR(errno); } @@ -6134,8 +6153,8 @@ static void ext4_update_super(struct super_block *sb) __ext4_update_tstamp(&es->s_first_error_time, &es->s_first_error_time_hi, sbi->s_first_error_time); - strncpy(es->s_first_error_func, sbi->s_first_error_func, - sizeof(es->s_first_error_func)); + strtomem_pad(es->s_first_error_func, + sbi->s_first_error_func, 0); es->s_first_error_line = cpu_to_le32(sbi->s_first_error_line); es->s_first_error_ino = @@ -6148,8 +6167,7 @@ static void ext4_update_super(struct super_block *sb) __ext4_update_tstamp(&es->s_last_error_time, &es->s_last_error_time_hi, sbi->s_last_error_time); - strncpy(es->s_last_error_func, sbi->s_last_error_func, - sizeof(es->s_last_error_func)); + strtomem_pad(es->s_last_error_func, sbi->s_last_error_func, 0); es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line); es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino); es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block); @@ -6176,8 +6194,6 @@ static int ext4_commit_super(struct super_block *sb) if (!sbh) return -EINVAL; - if (block_device_ejected(sb)) - return -ENODEV; ext4_update_super(sb); @@ -6321,7 +6337,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) struct ext4_sb_info *sbi = EXT4_SB(sb); if (unlikely(ext4_forced_shutdown(sb))) - return 0; + return -EIO; trace_ext4_sync_fs(sb, wait); flush_workqueue(sbi->rsv_conversion_wq); @@ -6483,6 +6499,15 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) } + if ((ctx->spec & EXT4_SPEC_s_stripe) && + ext4_is_stripe_incompatible(sb, ctx->s_stripe)) { + ext4_msg(sb, KERN_WARNING, + "stripe (%lu) is not aligned with cluster size (%u), " + "stripe is disabled", + ctx->s_stripe, sbi->s_cluster_ratio); + ctx->s_stripe = 0; + } + /* * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause * two calls to ext4_should_dioread_nolock() to return inconsistent @@ -6529,8 +6554,12 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) goto restore_opts; } - if (test_opt2(sb, ABORT)) - ext4_abort(sb, ESHUTDOWN, "Abort forced by user"); + if ((old_opts.s_mount_opt & EXT4_MOUNT_DELALLOC) && + !test_opt(sb, DELALLOC)) { + ext4_msg(sb, KERN_ERR, "can't disable delalloc during remount"); + err = -EINVAL; + goto restore_opts; + } sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); @@ -6700,6 +6729,14 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb)) ext4_stop_mmpd(sbi); + /* + * Handle aborting the filesystem as the last thing during remount to + * avoid obsure errors during remount when some option changes fail to + * apply due to shutdown filesystem. + */ + if (test_opt2(sb, ABORT)) + ext4_abort(sb, ESHUTDOWN, "Abort forced by user"); + return 0; restore_opts: @@ -6864,6 +6901,10 @@ static int ext4_write_dquot(struct dquot *dquot) if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to commit dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; @@ -6880,6 +6921,10 @@ static int ext4_acquire_dquot(struct dquot *dquot) if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_acquire(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to acquire dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; @@ -6899,6 +6944,10 @@ static int ext4_release_dquot(struct dquot *dquot) return PTR_ERR(handle); } ret = dquot_release(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to release dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; @@ -7314,12 +7363,12 @@ static inline int ext3_feature_set_ok(struct super_block *sb) static void ext4_kill_sb(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct bdev_handle *handle = sbi ? sbi->s_journal_bdev_handle : NULL; + struct file *bdev_file = sbi ? sbi->s_journal_bdev_file : NULL; kill_block_super(sb); - if (handle) - bdev_release(handle); + if (bdev_file) + bdev_fput(bdev_file); } static struct file_system_type ext4_fs_type = { @@ -7328,7 +7377,7 @@ static struct file_system_type ext4_fs_type = { .init_fs_context = ext4_init_fs_context, .parameters = ext4_param_specs, .kill_sb = ext4_kill_sb, - .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME, }; MODULE_ALIAS_FS("ext4"); @@ -7434,6 +7483,5 @@ static void __exit ext4_exit_fs(void) MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); MODULE_DESCRIPTION("Fourth Extended Filesystem"); MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc32c"); module_init(ext4_init_fs) module_exit(ext4_exit_fs) |