diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/Kconfig | 27 | ||||
-rw-r--r-- | fs/ext4/crypto.c | 2 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 36 | ||||
-rw-r--r-- | fs/ext4/fast_commit.c | 2 | ||||
-rw-r--r-- | fs/ext4/file.c | 2 | ||||
-rw-r--r-- | fs/ext4/fsmap.c | 14 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 4 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 47 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 312 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 12 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 6 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 2 | ||||
-rw-r--r-- | fs/ext4/orphan.c | 19 | ||||
-rw-r--r-- | fs/ext4/super.c | 46 | ||||
-rw-r--r-- | fs/ext4/verity.c | 2 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 21 |
17 files changed, 435 insertions, 121 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index c9ca41d91a6c..01873c2a34ad 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -1,31 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -# Ext3 configs are here for backward compatibility with old configs which may -# have EXT3_FS set but not EXT4_FS set and thus would result in non-bootable -# kernels after the removal of ext3 driver. -config EXT3_FS - tristate "The Extended 3 (ext3) filesystem" - select EXT4_FS - help - This config option is here only for backward compatibility. ext3 - filesystem is now handled by the ext4 driver. - -config EXT3_FS_POSIX_ACL - bool "Ext3 POSIX Access Control Lists" - depends on EXT3_FS - select EXT4_FS_POSIX_ACL - select FS_POSIX_ACL - help - This config option is here only for backward compatibility. ext3 - filesystem is now handled by the ext4 driver. - -config EXT3_FS_SECURITY - bool "Ext3 Security Labels" - depends on EXT3_FS - select EXT4_FS_SECURITY - help - This config option is here only for backward compatibility. ext3 - filesystem is now handled by the ext4 driver. - config EXT4_FS tristate "The Extended 4 (ext4) filesystem" select BUFFER_HEAD diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 0a056d97e640..cf0a0970c095 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -227,6 +227,8 @@ static bool ext4_has_stable_inodes(struct super_block *sb) } const struct fscrypt_operations ext4_cryptops = { + .inode_info_offs = (int)offsetof(struct ext4_inode_info, i_crypt_info) - + (int)offsetof(struct ext4_inode_info, vfs_inode), .needs_bounce_pages = 1, .has_32bit_inodes = 1, .supports_subblock_data_units = 1, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 01a6e2de7fc3..57087da6c7be 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1182,6 +1182,14 @@ struct ext4_inode_info { __u32 i_csum_seed; kprojid_t i_projid; + +#ifdef CONFIG_FS_ENCRYPTION + struct fscrypt_inode_info *i_crypt_info; +#endif + +#ifdef CONFIG_FS_VERITY + struct fsverity_info *i_verity_info; +#endif }; /* @@ -1442,7 +1450,9 @@ struct ext4_super_block { __le16 s_encoding; /* Filename charset encoding */ __le16 s_encoding_flags; /* Filename charset encoding flags */ __le32 s_orphan_file_inum; /* Inode for tracking orphan inodes */ - __le32 s_reserved[94]; /* Padding to the end of the block */ + __le16 s_def_resuid_hi; + __le16 s_def_resgid_hi; + __le32 s_reserved[93]; /* Padding to the end of the block */ __le32 s_checksum; /* crc32c(superblock) */ }; @@ -1812,6 +1822,18 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } +static inline int ext4_get_resuid(struct ext4_super_block *es) +{ + return le16_to_cpu(es->s_def_resuid) | + le16_to_cpu(es->s_def_resuid_hi) << 16; +} + +static inline int ext4_get_resgid(struct ext4_super_block *es) +{ + return le16_to_cpu(es->s_def_resgid) | + le16_to_cpu(es->s_def_resgid_hi) << 16; +} + /* * Returns: sbi->field[index] * Used to access an array element from the following sbi fields which require @@ -1982,6 +2004,16 @@ static inline bool ext4_verity_in_progress(struct inode *inode) #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime /* + * Check whether the inode is tracked as orphan (either in orphan file or + * orphan list). + */ +static inline bool ext4_inode_orphan_tracked(struct inode *inode) +{ + return ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) || + !list_empty(&EXT4_I(inode)->i_orphan); +} + +/* * Codes for operating systems */ #define EXT4_OS_LINUX 0 @@ -3134,6 +3166,8 @@ extern struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block, blk_opf_t op_flags); extern struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb, sector_t block); +extern struct buffer_head *ext4_sb_bread_nofail(struct super_block *sb, + sector_t block); extern void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io, bool simu_fail); extern int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 42bee1d4f9f9..fa66b08de999 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -663,7 +663,7 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) { - blk_opf_t write_flags = REQ_SYNC; + blk_opf_t write_flags = JBD2_JOURNAL_REQ_FLAGS; struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; /* Add REQ_FUA | REQ_PREFLUSH only its tail */ diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 93240e35ee36..7a8b30932189 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -354,7 +354,7 @@ static void ext4_inode_extension_cleanup(struct inode *inode, bool need_trunc) * to cleanup the orphan list in ext4_handle_inode_extension(). Do it * now. */ - if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { + if (ext4_inode_orphan_tracked(inode) && inode->i_nlink) { handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) { diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 91185c40f755..22fc333244ef 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -74,7 +74,8 @@ static int ext4_getfsmap_dev_compare(const void *p1, const void *p2) static bool ext4_getfsmap_rec_before_low_key(struct ext4_getfsmap_info *info, struct ext4_fsmap *rec) { - return rec->fmr_physical < info->gfi_low.fmr_physical; + return rec->fmr_physical + rec->fmr_length <= + info->gfi_low.fmr_physical; } /* @@ -200,15 +201,18 @@ static int ext4_getfsmap_meta_helper(struct super_block *sb, ext4_group_first_block_no(sb, agno)); fs_end = fs_start + EXT4_C2B(sbi, len); - /* Return relevant extents from the meta_list */ + /* + * Return relevant extents from the meta_list. We emit all extents that + * partially/fully overlap with the query range + */ list_for_each_entry_safe(p, tmp, &info->gfi_meta_list, fmr_list) { - if (p->fmr_physical < info->gfi_next_fsblk) { + if (p->fmr_physical + p->fmr_length <= info->gfi_next_fsblk) { list_del(&p->fmr_list); kfree(p); continue; } - if (p->fmr_physical <= fs_start || - p->fmr_physical + p->fmr_length <= fs_end) { + if (p->fmr_physical <= fs_end && + p->fmr_physical + p->fmr_length > fs_start) { /* Emit the retained free extent record if present */ if (info->gfi_lastfree.fmr_owner) { error = ext4_getfsmap_helper(sb, info, diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index df4051613b29..ba4fd9aba1c1 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -252,10 +252,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) "nonexistent device\n", __func__, __LINE__); return; } - if (atomic_read(&inode->i_count) > 1) { + if (icount_read(inode) > 1) { ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d", __func__, __LINE__, inode->i_ino, - atomic_read(&inode->i_count)); + icount_read(inode)); return; } if (inode->i_nlink) { diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index d45124318200..da76353b3a57 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -1025,7 +1025,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, } /* Go read the buffer for the next level down */ - bh = ext4_sb_bread(inode->i_sb, nr, 0); + bh = ext4_sb_bread_nofail(inode->i_sb, nr); /* * A read failure? Report error and clear slot diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5b7a15db4953..f9e4ac87211e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3872,47 +3872,12 @@ static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset, return ret; } -static inline bool ext4_want_directio_fallback(unsigned flags, ssize_t written) -{ - /* must be a directio to fall back to buffered */ - if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != - (IOMAP_WRITE | IOMAP_DIRECT)) - return false; - - /* atomic writes are all-or-nothing */ - if (flags & IOMAP_ATOMIC) - return false; - - /* can only try again if we wrote nothing */ - return written == 0; -} - -static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, - ssize_t written, unsigned flags, struct iomap *iomap) -{ - /* - * Check to see whether an error occurred while writing out the data to - * the allocated blocks. If so, return the magic error code for - * non-atomic write so that we fallback to buffered I/O and attempt to - * complete the remainder of the I/O. - * For non-atomic writes, any blocks that may have been - * allocated in preparation for the direct I/O will be reused during - * buffered I/O. For atomic write, we never fallback to buffered-io. - */ - if (ext4_want_directio_fallback(flags, written)) - return -ENOTBLK; - - return 0; -} - const struct iomap_ops ext4_iomap_ops = { .iomap_begin = ext4_iomap_begin, - .iomap_end = ext4_iomap_end, }; const struct iomap_ops ext4_iomap_overwrite_ops = { .iomap_begin = ext4_iomap_overwrite_begin, - .iomap_end = ext4_iomap_end, }; static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, @@ -4287,7 +4252,11 @@ int ext4_can_truncate(struct inode *inode) * We have to make sure i_disksize gets properly updated before we truncate * page cache due to hole punching or zero range. Otherwise i_disksize update * can get lost as it may have been postponed to submission of writeback but - * that will never happen after we truncate page cache. + * that will never happen if we remove the folio containing i_size from the + * page cache. Also if we punch hole within i_size but above i_disksize, + * following ext4_page_mkwrite() may mistakenly allocate written blocks over + * the hole and thus introduce allocated blocks beyond i_disksize which is + * not allowed (e2fsck would complain in case of crash). */ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, loff_t len) @@ -4298,9 +4267,11 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, loff_t size = i_size_read(inode); WARN_ON(!inode_is_locked(inode)); - if (offset > size || offset + len < size) + if (offset > size) return 0; + if (offset + len < size) + size = offset + len; if (EXT4_I(inode)->i_disksize >= size) return 0; @@ -4748,7 +4719,7 @@ static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode * old inodes get re-used with the upper 16 bits of the * uid/gid intact. */ - if (ei->i_dtime && list_empty(&ei->i_orphan)) { + if (ei->i_dtime && !ext4_inode_orphan_tracked(inode)) { raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } else { diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 84e3c73952d7..a93a7baae990 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -27,14 +27,16 @@ #include "fsmap.h" #include <trace/events/ext4.h> -typedef void ext4_update_sb_callback(struct ext4_super_block *es, - const void *arg); +typedef void ext4_update_sb_callback(struct ext4_sb_info *sbi, + struct ext4_super_block *es, + const void *arg); /* * Superblock modification callback function for changing file system * label */ -static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg) +static void ext4_sb_setlabel(struct ext4_sb_info *sbi, + struct ext4_super_block *es, const void *arg) { /* Sanity check, this should never happen */ BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX); @@ -46,7 +48,8 @@ static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg) * Superblock modification callback function for changing file system * UUID. */ -static void ext4_sb_setuuid(struct ext4_super_block *es, const void *arg) +static void ext4_sb_setuuid(struct ext4_sb_info *sbi, + struct ext4_super_block *es, const void *arg) { memcpy(es->s_uuid, (__u8 *)arg, UUID_SIZE); } @@ -71,7 +74,7 @@ int ext4_update_primary_sb(struct super_block *sb, handle_t *handle, goto out_err; lock_buffer(bh); - func(es, arg); + func(sbi, es, arg); ext4_superblock_csum_set(sb); unlock_buffer(bh); @@ -149,7 +152,7 @@ static int ext4_update_backup_sb(struct super_block *sb, unlock_buffer(bh); goto out_bh; } - func(es, arg); + func(EXT4_SB(sb), es, arg); if (ext4_has_feature_metadata_csum(sb)) es->s_checksum = ext4_superblock_csum(es); set_buffer_uptodate(bh); @@ -1230,6 +1233,295 @@ static int ext4_ioctl_setuuid(struct file *filp, return ret; } + +#define TUNE_OPS_SUPPORTED (EXT4_TUNE_FL_ERRORS_BEHAVIOR | \ + EXT4_TUNE_FL_MNT_COUNT | EXT4_TUNE_FL_MAX_MNT_COUNT | \ + EXT4_TUNE_FL_CHECKINTRVAL | EXT4_TUNE_FL_LAST_CHECK_TIME | \ + EXT4_TUNE_FL_RESERVED_BLOCKS | EXT4_TUNE_FL_RESERVED_UID | \ + EXT4_TUNE_FL_RESERVED_GID | EXT4_TUNE_FL_DEFAULT_MNT_OPTS | \ + EXT4_TUNE_FL_DEF_HASH_ALG | EXT4_TUNE_FL_RAID_STRIDE | \ + EXT4_TUNE_FL_RAID_STRIPE_WIDTH | EXT4_TUNE_FL_MOUNT_OPTS | \ + EXT4_TUNE_FL_FEATURES | EXT4_TUNE_FL_EDIT_FEATURES | \ + EXT4_TUNE_FL_FORCE_FSCK | EXT4_TUNE_FL_ENCODING | \ + EXT4_TUNE_FL_ENCODING_FLAGS) + +#define EXT4_TUNE_SET_COMPAT_SUPP \ + (EXT4_FEATURE_COMPAT_DIR_INDEX | \ + EXT4_FEATURE_COMPAT_STABLE_INODES) +#define EXT4_TUNE_SET_INCOMPAT_SUPP \ + (EXT4_FEATURE_INCOMPAT_EXTENTS | \ + EXT4_FEATURE_INCOMPAT_EA_INODE | \ + EXT4_FEATURE_INCOMPAT_ENCRYPT | \ + EXT4_FEATURE_INCOMPAT_CSUM_SEED | \ + EXT4_FEATURE_INCOMPAT_LARGEDIR | \ + EXT4_FEATURE_INCOMPAT_CASEFOLD) +#define EXT4_TUNE_SET_RO_COMPAT_SUPP \ + (EXT4_FEATURE_RO_COMPAT_LARGE_FILE | \ + EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ + EXT4_FEATURE_RO_COMPAT_PROJECT | \ + EXT4_FEATURE_RO_COMPAT_VERITY) + +#define EXT4_TUNE_CLEAR_COMPAT_SUPP (0) +#define EXT4_TUNE_CLEAR_INCOMPAT_SUPP (0) +#define EXT4_TUNE_CLEAR_RO_COMPAT_SUPP (0) + +#define SB_ENC_SUPP_MASK (SB_ENC_STRICT_MODE_FL | \ + SB_ENC_NO_COMPAT_FALLBACK_FL) + +static int ext4_ioctl_get_tune_sb(struct ext4_sb_info *sbi, + struct ext4_tune_sb_params __user *params) +{ + struct ext4_tune_sb_params ret; + struct ext4_super_block *es = sbi->s_es; + + memset(&ret, 0, sizeof(ret)); + ret.set_flags = TUNE_OPS_SUPPORTED; + ret.errors_behavior = le16_to_cpu(es->s_errors); + ret.mnt_count = le16_to_cpu(es->s_mnt_count); + ret.max_mnt_count = le16_to_cpu(es->s_max_mnt_count); + ret.checkinterval = le32_to_cpu(es->s_checkinterval); + ret.last_check_time = le32_to_cpu(es->s_lastcheck); + ret.reserved_blocks = ext4_r_blocks_count(es); + ret.blocks_count = ext4_blocks_count(es); + ret.reserved_uid = ext4_get_resuid(es); + ret.reserved_gid = ext4_get_resgid(es); + ret.default_mnt_opts = le32_to_cpu(es->s_default_mount_opts); + ret.def_hash_alg = es->s_def_hash_version; + ret.raid_stride = le16_to_cpu(es->s_raid_stride); + ret.raid_stripe_width = le32_to_cpu(es->s_raid_stripe_width); + ret.encoding = le16_to_cpu(es->s_encoding); + ret.encoding_flags = le16_to_cpu(es->s_encoding_flags); + strscpy_pad(ret.mount_opts, es->s_mount_opts); + ret.feature_compat = le32_to_cpu(es->s_feature_compat); + ret.feature_incompat = le32_to_cpu(es->s_feature_incompat); + ret.feature_ro_compat = le32_to_cpu(es->s_feature_ro_compat); + ret.set_feature_compat_mask = EXT4_TUNE_SET_COMPAT_SUPP; + ret.set_feature_incompat_mask = EXT4_TUNE_SET_INCOMPAT_SUPP; + ret.set_feature_ro_compat_mask = EXT4_TUNE_SET_RO_COMPAT_SUPP; + ret.clear_feature_compat_mask = EXT4_TUNE_CLEAR_COMPAT_SUPP; + ret.clear_feature_incompat_mask = EXT4_TUNE_CLEAR_INCOMPAT_SUPP; + ret.clear_feature_ro_compat_mask = EXT4_TUNE_CLEAR_RO_COMPAT_SUPP; + if (copy_to_user(params, &ret, sizeof(ret))) + return -EFAULT; + return 0; +} + +static void ext4_sb_setparams(struct ext4_sb_info *sbi, + struct ext4_super_block *es, const void *arg) +{ + const struct ext4_tune_sb_params *params = arg; + + if (params->set_flags & EXT4_TUNE_FL_ERRORS_BEHAVIOR) + es->s_errors = cpu_to_le16(params->errors_behavior); + if (params->set_flags & EXT4_TUNE_FL_MNT_COUNT) + es->s_mnt_count = cpu_to_le16(params->mnt_count); + if (params->set_flags & EXT4_TUNE_FL_MAX_MNT_COUNT) + es->s_max_mnt_count = cpu_to_le16(params->max_mnt_count); + if (params->set_flags & EXT4_TUNE_FL_CHECKINTRVAL) + es->s_checkinterval = cpu_to_le32(params->checkinterval); + if (params->set_flags & EXT4_TUNE_FL_LAST_CHECK_TIME) + es->s_lastcheck = cpu_to_le32(params->last_check_time); + if (params->set_flags & EXT4_TUNE_FL_RESERVED_BLOCKS) { + ext4_fsblk_t blk = params->reserved_blocks; + + es->s_r_blocks_count_lo = cpu_to_le32((u32)blk); + es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); + } + if (params->set_flags & EXT4_TUNE_FL_RESERVED_UID) { + int uid = params->reserved_uid; + + es->s_def_resuid = cpu_to_le16(uid & 0xFFFF); + es->s_def_resuid_hi = cpu_to_le16(uid >> 16); + } + if (params->set_flags & EXT4_TUNE_FL_RESERVED_GID) { + int gid = params->reserved_gid; + + es->s_def_resgid = cpu_to_le16(gid & 0xFFFF); + es->s_def_resgid_hi = cpu_to_le16(gid >> 16); + } + if (params->set_flags & EXT4_TUNE_FL_DEFAULT_MNT_OPTS) + es->s_default_mount_opts = cpu_to_le32(params->default_mnt_opts); + if (params->set_flags & EXT4_TUNE_FL_DEF_HASH_ALG) + es->s_def_hash_version = params->def_hash_alg; + if (params->set_flags & EXT4_TUNE_FL_RAID_STRIDE) + es->s_raid_stride = cpu_to_le16(params->raid_stride); + if (params->set_flags & EXT4_TUNE_FL_RAID_STRIPE_WIDTH) + es->s_raid_stripe_width = + cpu_to_le32(params->raid_stripe_width); + if (params->set_flags & EXT4_TUNE_FL_ENCODING) + es->s_encoding = cpu_to_le16(params->encoding); + if (params->set_flags & EXT4_TUNE_FL_ENCODING_FLAGS) + es->s_encoding_flags = cpu_to_le16(params->encoding_flags); + strscpy_pad(es->s_mount_opts, params->mount_opts); + if (params->set_flags & EXT4_TUNE_FL_EDIT_FEATURES) { + es->s_feature_compat |= + cpu_to_le32(params->set_feature_compat_mask); + es->s_feature_incompat |= + cpu_to_le32(params->set_feature_incompat_mask); + es->s_feature_ro_compat |= + cpu_to_le32(params->set_feature_ro_compat_mask); + es->s_feature_compat &= + ~cpu_to_le32(params->clear_feature_compat_mask); + es->s_feature_incompat &= + ~cpu_to_le32(params->clear_feature_incompat_mask); + es->s_feature_ro_compat &= + ~cpu_to_le32(params->clear_feature_ro_compat_mask); + if (params->set_feature_compat_mask & + EXT4_FEATURE_COMPAT_DIR_INDEX) + es->s_def_hash_version = sbi->s_def_hash_version; + if (params->set_feature_incompat_mask & + EXT4_FEATURE_INCOMPAT_CSUM_SEED) + es->s_checksum_seed = cpu_to_le32(sbi->s_csum_seed); + } + if (params->set_flags & EXT4_TUNE_FL_FORCE_FSCK) + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); +} + +static int ext4_ioctl_set_tune_sb(struct file *filp, + struct ext4_tune_sb_params __user *in) +{ + struct ext4_tune_sb_params params; + struct super_block *sb = file_inode(filp)->i_sb; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + int enabling_casefold = 0; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(¶ms, in, sizeof(params))) + return -EFAULT; + + if ((params.set_flags & ~TUNE_OPS_SUPPORTED) != 0) + return -EOPNOTSUPP; + + if ((params.set_flags & EXT4_TUNE_FL_ERRORS_BEHAVIOR) && + (params.errors_behavior > EXT4_ERRORS_PANIC)) + return -EINVAL; + + if ((params.set_flags & EXT4_TUNE_FL_RESERVED_BLOCKS) && + (params.reserved_blocks > ext4_blocks_count(sbi->s_es) / 2)) + return -EINVAL; + if ((params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG) && + ((params.def_hash_alg > DX_HASH_LAST) || + (params.def_hash_alg == DX_HASH_SIPHASH))) + return -EINVAL; + if ((params.set_flags & EXT4_TUNE_FL_FEATURES) && + (params.set_flags & EXT4_TUNE_FL_EDIT_FEATURES)) + return -EINVAL; + + if (params.set_flags & EXT4_TUNE_FL_FEATURES) { + params.set_feature_compat_mask = + params.feature_compat & + ~le32_to_cpu(es->s_feature_compat); + params.set_feature_incompat_mask = + params.feature_incompat & + ~le32_to_cpu(es->s_feature_incompat); + params.set_feature_ro_compat_mask = + params.feature_ro_compat & + ~le32_to_cpu(es->s_feature_ro_compat); + params.clear_feature_compat_mask = + ~params.feature_compat & + le32_to_cpu(es->s_feature_compat); + params.clear_feature_incompat_mask = + ~params.feature_incompat & + le32_to_cpu(es->s_feature_incompat); + params.clear_feature_ro_compat_mask = + ~params.feature_ro_compat & + le32_to_cpu(es->s_feature_ro_compat); + params.set_flags |= EXT4_TUNE_FL_EDIT_FEATURES; + } + if (params.set_flags & EXT4_TUNE_FL_EDIT_FEATURES) { + if ((params.set_feature_compat_mask & + ~EXT4_TUNE_SET_COMPAT_SUPP) || + (params.set_feature_incompat_mask & + ~EXT4_TUNE_SET_INCOMPAT_SUPP) || + (params.set_feature_ro_compat_mask & + ~EXT4_TUNE_SET_RO_COMPAT_SUPP) || + (params.clear_feature_compat_mask & + ~EXT4_TUNE_CLEAR_COMPAT_SUPP) || + (params.clear_feature_incompat_mask & + ~EXT4_TUNE_CLEAR_INCOMPAT_SUPP) || + (params.clear_feature_ro_compat_mask & + ~EXT4_TUNE_CLEAR_RO_COMPAT_SUPP)) + return -EOPNOTSUPP; + + /* + * Filter out the features that are already set from + * the set_mask. + */ + params.set_feature_compat_mask &= + ~le32_to_cpu(es->s_feature_compat); + params.set_feature_incompat_mask &= + ~le32_to_cpu(es->s_feature_incompat); + params.set_feature_ro_compat_mask &= + ~le32_to_cpu(es->s_feature_ro_compat); + if ((params.set_feature_incompat_mask & + EXT4_FEATURE_INCOMPAT_CASEFOLD)) { + enabling_casefold = 1; + if (!(params.set_flags & EXT4_TUNE_FL_ENCODING)) { + params.encoding = EXT4_ENC_UTF8_12_1; + params.set_flags |= EXT4_TUNE_FL_ENCODING; + } + if (!(params.set_flags & EXT4_TUNE_FL_ENCODING_FLAGS)) { + params.encoding_flags = 0; + params.set_flags |= EXT4_TUNE_FL_ENCODING_FLAGS; + } + } + if ((params.set_feature_compat_mask & + EXT4_FEATURE_COMPAT_DIR_INDEX)) { + uuid_t uu; + + memcpy(&uu, sbi->s_hash_seed, UUID_SIZE); + if (uuid_is_null(&uu)) + generate_random_uuid((char *) + &sbi->s_hash_seed); + if (params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG) + sbi->s_def_hash_version = params.def_hash_alg; + else if (sbi->s_def_hash_version == 0) + sbi->s_def_hash_version = DX_HASH_HALF_MD4; + if (!(es->s_flags & + cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH)) && + !(es->s_flags & + cpu_to_le32(EXT2_FLAGS_SIGNED_HASH))) { +#ifdef __CHAR_UNSIGNED__ + sbi->s_hash_unsigned = 3; +#else + sbi->s_hash_unsigned = 0; +#endif + } + } + } + if (params.set_flags & EXT4_TUNE_FL_ENCODING) { + if (!enabling_casefold) + return -EINVAL; + if (params.encoding == 0) + params.encoding = EXT4_ENC_UTF8_12_1; + else if (params.encoding != EXT4_ENC_UTF8_12_1) + return -EINVAL; + } + if (params.set_flags & EXT4_TUNE_FL_ENCODING_FLAGS) { + if (!enabling_casefold) + return -EINVAL; + if (params.encoding_flags & ~SB_ENC_SUPP_MASK) + return -EINVAL; + } + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + ret = ext4_update_superblocks_fn(sb, ext4_sb_setparams, ¶ms); + mnt_drop_write_file(filp); + + if (params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG) + sbi->s_def_hash_version = params.def_hash_alg; + + return ret; +} + static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1616,6 +1908,11 @@ resizefs_out: return ext4_ioctl_getuuid(EXT4_SB(sb), (void __user *)arg); case EXT4_IOC_SETFSUUID: return ext4_ioctl_setuuid(filp, (const void __user *)arg); + case EXT4_IOC_GET_TUNE_SB_PARAM: + return ext4_ioctl_get_tune_sb(EXT4_SB(sb), + (void __user *)arg); + case EXT4_IOC_SET_TUNE_SB_PARAM: + return ext4_ioctl_set_tune_sb(filp, (void __user *)arg); default: return -ENOTTY; } @@ -1703,7 +2000,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } #endif -static void set_overhead(struct ext4_super_block *es, const void *arg) +static void set_overhead(struct ext4_sb_info *sbi, + struct ext4_super_block *es, const void *arg) { es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg)); } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 5898d92ba19f..9087183602e4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3655,16 +3655,26 @@ static void ext4_discard_work(struct work_struct *work) static inline void ext4_mb_avg_fragment_size_destroy(struct ext4_sb_info *sbi) { + if (!sbi->s_mb_avg_fragment_size) + return; + for (int i = 0; i < MB_NUM_ORDERS(sbi->s_sb); i++) xa_destroy(&sbi->s_mb_avg_fragment_size[i]); + kfree(sbi->s_mb_avg_fragment_size); + sbi->s_mb_avg_fragment_size = NULL; } static inline void ext4_mb_largest_free_orders_destroy(struct ext4_sb_info *sbi) { + if (!sbi->s_mb_largest_free_orders) + return; + for (int i = 0; i < MB_NUM_ORDERS(sbi->s_sb); i++) xa_destroy(&sbi->s_mb_largest_free_orders[i]); + kfree(sbi->s_mb_largest_free_orders); + sbi->s_mb_largest_free_orders = NULL; } int ext4_mb_init(struct super_block *sb) @@ -3995,7 +4005,7 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid) list_splice_tail(&freed_data_list, &sbi->s_discard_list); spin_unlock(&sbi->s_md_lock); if (wake) - queue_work(system_unbound_wq, &sbi->s_discard_work); + queue_work(system_dfl_wq, &sbi->s_discard_work); } else { list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list) kmem_cache_free(ext4_free_data_cachep, entry); diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 51661570cf3b..ab1ff51302fb 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -231,9 +231,9 @@ static int kmmpd(void *data) * Adjust the mmp_check_interval depending on how much time * it took for the MMP block to be written. */ - mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ, - EXT4_MMP_MAX_CHECK_INTERVAL), - EXT4_MMP_MIN_CHECK_INTERVAL); + mmp_check_interval = clamp(EXT4_MMP_CHECK_MULT * diff / HZ, + EXT4_MMP_MIN_CHECK_INTERVAL, + EXT4_MMP_MAX_CHECK_INTERVAL); mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); } diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index adae3caf175a..4b091c21908f 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -225,7 +225,7 @@ static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to) do { if (bh_offset(bh) + blocksize <= from) continue; - if (bh_offset(bh) > to) + if (bh_offset(bh) >= to) break; wait_on_buffer(bh); if (buffer_uptodate(bh)) diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c index 524d4658fa40..33c3a89396b1 100644 --- a/fs/ext4/orphan.c +++ b/fs/ext4/orphan.c @@ -109,11 +109,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && !inode_is_locked(inode)); - /* - * Inode orphaned in orphan file or in orphan list? - */ - if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) || - !list_empty(&EXT4_I(inode)->i_orphan)) + if (ext4_inode_orphan_tracked(inode)) return 0; /* @@ -587,9 +583,20 @@ int ext4_init_orphan_info(struct super_block *sb) ext4_msg(sb, KERN_ERR, "get orphan inode failed"); return PTR_ERR(inode); } + /* + * This is just an artificial limit to prevent corrupted fs from + * consuming absurd amounts of memory when pinning blocks of orphan + * file in memory. + */ + if (inode->i_size > 8 << 20) { + ext4_msg(sb, KERN_ERR, "orphan file too big: %llu", + (unsigned long long)inode->i_size); + ret = -EFSCORRUPTED; + goto out_put; + } oi->of_blocks = inode->i_size >> sb->s_blocksize_bits; oi->of_csum_seed = EXT4_I(inode)->i_csum_seed; - oi->of_binfo = kmalloc_array(oi->of_blocks, + oi->of_binfo = kvmalloc_array(oi->of_blocks, sizeof(struct ext4_orphan_block), GFP_KERNEL); if (!oi->of_binfo) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 699c15db28a8..33e7c08c9529 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -265,6 +265,15 @@ struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb, return __ext4_sb_bread_gfp(sb, block, 0, gfp); } +struct buffer_head *ext4_sb_bread_nofail(struct super_block *sb, + sector_t block) +{ + gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping, + ~__GFP_FS) | __GFP_MOVABLE | __GFP_NOFAIL; + + return __ext4_sb_bread_gfp(sb, block, 0, gfp); +} + void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block) { struct buffer_head *bh = bdev_getblk(sb->s_bdev, block, @@ -1417,7 +1426,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) static int ext4_drop_inode(struct inode *inode) { - int drop = generic_drop_inode(inode); + int drop = inode_generic_drop(inode); if (!drop) drop = fscrypt_drop_inode(inode); @@ -1438,9 +1447,9 @@ static void ext4_free_in_core_inode(struct inode *inode) static void ext4_destroy_inode(struct inode *inode) { - if (!list_empty(&(EXT4_I(inode)->i_orphan))) { + if (ext4_inode_orphan_tracked(inode)) { ext4_msg(inode->i_sb, KERN_ERR, - "Inode %lu (%p): orphan list check failed!", + "Inode %lu (%p): inode tracked as orphan!", inode->i_ino, EXT4_I(inode)); print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, EXT4_I(inode), sizeof(struct ext4_inode_info), @@ -1470,6 +1479,12 @@ static void init_once(void *foo) init_rwsem(&ei->i_data_sem); inode_init_once(&ei->vfs_inode); ext4_fc_init_inode(&ei->vfs_inode); +#ifdef CONFIG_FS_ENCRYPTION + ei->i_crypt_info = NULL; +#endif +#ifdef CONFIG_FS_VERITY + ei->i_verity_info = NULL; +#endif } static int __init init_inodecache(void) @@ -2460,7 +2475,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb, struct ext4_fs_context *m_ctx) { struct ext4_sb_info *sbi = EXT4_SB(sb); - char *s_mount_opts = NULL; + char s_mount_opts[65]; struct ext4_fs_context *s_ctx = NULL; struct fs_context *fc = NULL; int ret = -ENOMEM; @@ -2468,15 +2483,11 @@ static int parse_apply_sb_mount_options(struct super_block *sb, if (!sbi->s_es->s_mount_opts[0]) return 0; - s_mount_opts = kstrndup(sbi->s_es->s_mount_opts, - sizeof(sbi->s_es->s_mount_opts), - GFP_KERNEL); - if (!s_mount_opts) - return ret; + strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts); fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); if (!fc) - goto out_free; + return -ENOMEM; s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL); if (!s_ctx) @@ -2508,11 +2519,8 @@ parse_failed: ret = 0; out_free: - if (fc) { - ext4_fc_free(fc); - kfree(fc); - } - kfree(s_mount_opts); + ext4_fc_free(fc); + kfree(fc); return ret; } @@ -2958,11 +2966,11 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, } if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) || - le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) + ext4_get_resuid(es) != EXT4_DEF_RESUID) SEQ_OPTS_PRINT("resuid=%u", from_kuid_munged(&init_user_ns, sbi->s_resuid)); if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) || - le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) + ext4_get_resgid(es) != EXT4_DEF_RESGID) SEQ_OPTS_PRINT("resgid=%u", from_kgid_munged(&init_user_ns, sbi->s_resgid)); def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); @@ -5277,8 +5285,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ext4_set_def_opts(sb, es); - sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); - sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); + sbi->s_resuid = make_kuid(&init_user_ns, ext4_get_resuid(es)); + sbi->s_resgid = make_kgid(&init_user_ns, ext4_get_resuid(es)); sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index d9203228ce97..b0acb0c50313 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -389,6 +389,8 @@ static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, } const struct fsverity_operations ext4_verityops = { + .inode_info_offs = (int)offsetof(struct ext4_inode_info, i_verity_info) - + (int)offsetof(struct ext4_inode_info, vfs_inode), .begin_enable_verity = ext4_begin_enable_verity, .end_enable_verity = ext4_end_enable_verity, .get_verity_descriptor = ext4_get_verity_descriptor, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 5a6fe1513fd2..ce7253b3f549 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -251,6 +251,10 @@ check_xattrs(struct inode *inode, struct buffer_head *bh, err_str = "invalid ea_ino"; goto errout; } + if (ea_ino && !size) { + err_str = "invalid size in ea xattr"; + goto errout; + } if (size > EXT4_XATTR_SIZE_MAX) { err_str = "e_value size too large"; goto errout; @@ -1019,7 +1023,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, int ref_change) { struct ext4_iloc iloc; - s64 ref_count; + u64 ref_count; int ret; inode_lock_nested(ea_inode, I_MUTEX_XATTR); @@ -1029,13 +1033,17 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, goto out; ref_count = ext4_xattr_inode_get_ref(ea_inode); + if ((ref_count == 0 && ref_change < 0) || (ref_count == U64_MAX && ref_change > 0)) { + ext4_error_inode(ea_inode, __func__, __LINE__, 0, + "EA inode %lu ref wraparound: ref_count=%lld ref_change=%d", + ea_inode->i_ino, ref_count, ref_change); + ret = -EFSCORRUPTED; + goto out; + } ref_count += ref_change; ext4_xattr_inode_set_ref(ea_inode, ref_count); if (ref_change > 0) { - WARN_ONCE(ref_count <= 0, "EA inode %lu ref_count=%lld", - ea_inode->i_ino, ref_count); - if (ref_count == 1) { WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u", ea_inode->i_ino, ea_inode->i_nlink); @@ -1044,9 +1052,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, ext4_orphan_del(handle, ea_inode); } } else { - WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld", - ea_inode->i_ino, ref_count); - if (ref_count == 0) { WARN_ONCE(ea_inode->i_nlink != 1, "EA inode %lu i_nlink=%u", @@ -1530,7 +1535,7 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value, WARN_ON_ONCE(ext4_handle_valid(journal_current_handle()) && !(current->flags & PF_MEMALLOC_NOFS)); - ea_data = kvmalloc(value_len, GFP_KERNEL); + ea_data = kvmalloc(value_len, GFP_NOFS); if (!ea_data) { mb_cache_entry_put(ea_inode_cache, ce); return NULL; |