summaryrefslogtreecommitdiff
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig27
-rw-r--r--fs/ext4/crypto.c2
-rw-r--r--fs/ext4/ext4.h36
-rw-r--r--fs/ext4/fast_commit.c2
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/fsmap.c14
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/indirect.c2
-rw-r--r--fs/ext4/inode.c47
-rw-r--r--fs/ext4/ioctl.c312
-rw-r--r--fs/ext4/mballoc.c12
-rw-r--r--fs/ext4/mmp.c6
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/ext4/orphan.c19
-rw-r--r--fs/ext4/super.c46
-rw-r--r--fs/ext4/verity.c2
-rw-r--r--fs/ext4/xattr.c21
17 files changed, 435 insertions, 121 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index c9ca41d91a6c..01873c2a34ad 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -1,31 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-# Ext3 configs are here for backward compatibility with old configs which may
-# have EXT3_FS set but not EXT4_FS set and thus would result in non-bootable
-# kernels after the removal of ext3 driver.
-config EXT3_FS
- tristate "The Extended 3 (ext3) filesystem"
- select EXT4_FS
- help
- This config option is here only for backward compatibility. ext3
- filesystem is now handled by the ext4 driver.
-
-config EXT3_FS_POSIX_ACL
- bool "Ext3 POSIX Access Control Lists"
- depends on EXT3_FS
- select EXT4_FS_POSIX_ACL
- select FS_POSIX_ACL
- help
- This config option is here only for backward compatibility. ext3
- filesystem is now handled by the ext4 driver.
-
-config EXT3_FS_SECURITY
- bool "Ext3 Security Labels"
- depends on EXT3_FS
- select EXT4_FS_SECURITY
- help
- This config option is here only for backward compatibility. ext3
- filesystem is now handled by the ext4 driver.
-
config EXT4_FS
tristate "The Extended 4 (ext4) filesystem"
select BUFFER_HEAD
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index 0a056d97e640..cf0a0970c095 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -227,6 +227,8 @@ static bool ext4_has_stable_inodes(struct super_block *sb)
}
const struct fscrypt_operations ext4_cryptops = {
+ .inode_info_offs = (int)offsetof(struct ext4_inode_info, i_crypt_info) -
+ (int)offsetof(struct ext4_inode_info, vfs_inode),
.needs_bounce_pages = 1,
.has_32bit_inodes = 1,
.supports_subblock_data_units = 1,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 01a6e2de7fc3..57087da6c7be 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1182,6 +1182,14 @@ struct ext4_inode_info {
__u32 i_csum_seed;
kprojid_t i_projid;
+
+#ifdef CONFIG_FS_ENCRYPTION
+ struct fscrypt_inode_info *i_crypt_info;
+#endif
+
+#ifdef CONFIG_FS_VERITY
+ struct fsverity_info *i_verity_info;
+#endif
};
/*
@@ -1442,7 +1450,9 @@ struct ext4_super_block {
__le16 s_encoding; /* Filename charset encoding */
__le16 s_encoding_flags; /* Filename charset encoding flags */
__le32 s_orphan_file_inum; /* Inode for tracking orphan inodes */
- __le32 s_reserved[94]; /* Padding to the end of the block */
+ __le16 s_def_resuid_hi;
+ __le16 s_def_resgid_hi;
+ __le32 s_reserved[93]; /* Padding to the end of the block */
__le32 s_checksum; /* crc32c(superblock) */
};
@@ -1812,6 +1822,18 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
}
+static inline int ext4_get_resuid(struct ext4_super_block *es)
+{
+ return le16_to_cpu(es->s_def_resuid) |
+ le16_to_cpu(es->s_def_resuid_hi) << 16;
+}
+
+static inline int ext4_get_resgid(struct ext4_super_block *es)
+{
+ return le16_to_cpu(es->s_def_resgid) |
+ le16_to_cpu(es->s_def_resgid_hi) << 16;
+}
+
/*
* Returns: sbi->field[index]
* Used to access an array element from the following sbi fields which require
@@ -1982,6 +2004,16 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
/*
+ * Check whether the inode is tracked as orphan (either in orphan file or
+ * orphan list).
+ */
+static inline bool ext4_inode_orphan_tracked(struct inode *inode)
+{
+ return ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) ||
+ !list_empty(&EXT4_I(inode)->i_orphan);
+}
+
+/*
* Codes for operating systems
*/
#define EXT4_OS_LINUX 0
@@ -3134,6 +3166,8 @@ extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
sector_t block, blk_opf_t op_flags);
extern struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
sector_t block);
+extern struct buffer_head *ext4_sb_bread_nofail(struct super_block *sb,
+ sector_t block);
extern void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
bh_end_io_t *end_io, bool simu_fail);
extern int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 42bee1d4f9f9..fa66b08de999 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -663,7 +663,7 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star
static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
{
- blk_opf_t write_flags = REQ_SYNC;
+ blk_opf_t write_flags = JBD2_JOURNAL_REQ_FLAGS;
struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
/* Add REQ_FUA | REQ_PREFLUSH only its tail */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 93240e35ee36..7a8b30932189 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -354,7 +354,7 @@ static void ext4_inode_extension_cleanup(struct inode *inode, bool need_trunc)
* to cleanup the orphan list in ext4_handle_inode_extension(). Do it
* now.
*/
- if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
+ if (ext4_inode_orphan_tracked(inode) && inode->i_nlink) {
handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index 91185c40f755..22fc333244ef 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -74,7 +74,8 @@ static int ext4_getfsmap_dev_compare(const void *p1, const void *p2)
static bool ext4_getfsmap_rec_before_low_key(struct ext4_getfsmap_info *info,
struct ext4_fsmap *rec)
{
- return rec->fmr_physical < info->gfi_low.fmr_physical;
+ return rec->fmr_physical + rec->fmr_length <=
+ info->gfi_low.fmr_physical;
}
/*
@@ -200,15 +201,18 @@ static int ext4_getfsmap_meta_helper(struct super_block *sb,
ext4_group_first_block_no(sb, agno));
fs_end = fs_start + EXT4_C2B(sbi, len);
- /* Return relevant extents from the meta_list */
+ /*
+ * Return relevant extents from the meta_list. We emit all extents that
+ * partially/fully overlap with the query range
+ */
list_for_each_entry_safe(p, tmp, &info->gfi_meta_list, fmr_list) {
- if (p->fmr_physical < info->gfi_next_fsblk) {
+ if (p->fmr_physical + p->fmr_length <= info->gfi_next_fsblk) {
list_del(&p->fmr_list);
kfree(p);
continue;
}
- if (p->fmr_physical <= fs_start ||
- p->fmr_physical + p->fmr_length <= fs_end) {
+ if (p->fmr_physical <= fs_end &&
+ p->fmr_physical + p->fmr_length > fs_start) {
/* Emit the retained free extent record if present */
if (info->gfi_lastfree.fmr_owner) {
error = ext4_getfsmap_helper(sb, info,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index df4051613b29..ba4fd9aba1c1 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -252,10 +252,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
"nonexistent device\n", __func__, __LINE__);
return;
}
- if (atomic_read(&inode->i_count) > 1) {
+ if (icount_read(inode) > 1) {
ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d",
__func__, __LINE__, inode->i_ino,
- atomic_read(&inode->i_count));
+ icount_read(inode));
return;
}
if (inode->i_nlink) {
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index d45124318200..da76353b3a57 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1025,7 +1025,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
}
/* Go read the buffer for the next level down */
- bh = ext4_sb_bread(inode->i_sb, nr, 0);
+ bh = ext4_sb_bread_nofail(inode->i_sb, nr);
/*
* A read failure? Report error and clear slot
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5b7a15db4953..f9e4ac87211e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3872,47 +3872,12 @@ static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset,
return ret;
}
-static inline bool ext4_want_directio_fallback(unsigned flags, ssize_t written)
-{
- /* must be a directio to fall back to buffered */
- if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) !=
- (IOMAP_WRITE | IOMAP_DIRECT))
- return false;
-
- /* atomic writes are all-or-nothing */
- if (flags & IOMAP_ATOMIC)
- return false;
-
- /* can only try again if we wrote nothing */
- return written == 0;
-}
-
-static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
- ssize_t written, unsigned flags, struct iomap *iomap)
-{
- /*
- * Check to see whether an error occurred while writing out the data to
- * the allocated blocks. If so, return the magic error code for
- * non-atomic write so that we fallback to buffered I/O and attempt to
- * complete the remainder of the I/O.
- * For non-atomic writes, any blocks that may have been
- * allocated in preparation for the direct I/O will be reused during
- * buffered I/O. For atomic write, we never fallback to buffered-io.
- */
- if (ext4_want_directio_fallback(flags, written))
- return -ENOTBLK;
-
- return 0;
-}
-
const struct iomap_ops ext4_iomap_ops = {
.iomap_begin = ext4_iomap_begin,
- .iomap_end = ext4_iomap_end,
};
const struct iomap_ops ext4_iomap_overwrite_ops = {
.iomap_begin = ext4_iomap_overwrite_begin,
- .iomap_end = ext4_iomap_end,
};
static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
@@ -4287,7 +4252,11 @@ int ext4_can_truncate(struct inode *inode)
* We have to make sure i_disksize gets properly updated before we truncate
* page cache due to hole punching or zero range. Otherwise i_disksize update
* can get lost as it may have been postponed to submission of writeback but
- * that will never happen after we truncate page cache.
+ * that will never happen if we remove the folio containing i_size from the
+ * page cache. Also if we punch hole within i_size but above i_disksize,
+ * following ext4_page_mkwrite() may mistakenly allocate written blocks over
+ * the hole and thus introduce allocated blocks beyond i_disksize which is
+ * not allowed (e2fsck would complain in case of crash).
*/
int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
loff_t len)
@@ -4298,9 +4267,11 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
loff_t size = i_size_read(inode);
WARN_ON(!inode_is_locked(inode));
- if (offset > size || offset + len < size)
+ if (offset > size)
return 0;
+ if (offset + len < size)
+ size = offset + len;
if (EXT4_I(inode)->i_disksize >= size)
return 0;
@@ -4748,7 +4719,7 @@ static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode
* old inodes get re-used with the upper 16 bits of the
* uid/gid intact.
*/
- if (ei->i_dtime && list_empty(&ei->i_orphan)) {
+ if (ei->i_dtime && !ext4_inode_orphan_tracked(inode)) {
raw_inode->i_uid_high = 0;
raw_inode->i_gid_high = 0;
} else {
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 84e3c73952d7..a93a7baae990 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -27,14 +27,16 @@
#include "fsmap.h"
#include <trace/events/ext4.h>
-typedef void ext4_update_sb_callback(struct ext4_super_block *es,
- const void *arg);
+typedef void ext4_update_sb_callback(struct ext4_sb_info *sbi,
+ struct ext4_super_block *es,
+ const void *arg);
/*
* Superblock modification callback function for changing file system
* label
*/
-static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
+static void ext4_sb_setlabel(struct ext4_sb_info *sbi,
+ struct ext4_super_block *es, const void *arg)
{
/* Sanity check, this should never happen */
BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX);
@@ -46,7 +48,8 @@ static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
* Superblock modification callback function for changing file system
* UUID.
*/
-static void ext4_sb_setuuid(struct ext4_super_block *es, const void *arg)
+static void ext4_sb_setuuid(struct ext4_sb_info *sbi,
+ struct ext4_super_block *es, const void *arg)
{
memcpy(es->s_uuid, (__u8 *)arg, UUID_SIZE);
}
@@ -71,7 +74,7 @@ int ext4_update_primary_sb(struct super_block *sb, handle_t *handle,
goto out_err;
lock_buffer(bh);
- func(es, arg);
+ func(sbi, es, arg);
ext4_superblock_csum_set(sb);
unlock_buffer(bh);
@@ -149,7 +152,7 @@ static int ext4_update_backup_sb(struct super_block *sb,
unlock_buffer(bh);
goto out_bh;
}
- func(es, arg);
+ func(EXT4_SB(sb), es, arg);
if (ext4_has_feature_metadata_csum(sb))
es->s_checksum = ext4_superblock_csum(es);
set_buffer_uptodate(bh);
@@ -1230,6 +1233,295 @@ static int ext4_ioctl_setuuid(struct file *filp,
return ret;
}
+
+#define TUNE_OPS_SUPPORTED (EXT4_TUNE_FL_ERRORS_BEHAVIOR | \
+ EXT4_TUNE_FL_MNT_COUNT | EXT4_TUNE_FL_MAX_MNT_COUNT | \
+ EXT4_TUNE_FL_CHECKINTRVAL | EXT4_TUNE_FL_LAST_CHECK_TIME | \
+ EXT4_TUNE_FL_RESERVED_BLOCKS | EXT4_TUNE_FL_RESERVED_UID | \
+ EXT4_TUNE_FL_RESERVED_GID | EXT4_TUNE_FL_DEFAULT_MNT_OPTS | \
+ EXT4_TUNE_FL_DEF_HASH_ALG | EXT4_TUNE_FL_RAID_STRIDE | \
+ EXT4_TUNE_FL_RAID_STRIPE_WIDTH | EXT4_TUNE_FL_MOUNT_OPTS | \
+ EXT4_TUNE_FL_FEATURES | EXT4_TUNE_FL_EDIT_FEATURES | \
+ EXT4_TUNE_FL_FORCE_FSCK | EXT4_TUNE_FL_ENCODING | \
+ EXT4_TUNE_FL_ENCODING_FLAGS)
+
+#define EXT4_TUNE_SET_COMPAT_SUPP \
+ (EXT4_FEATURE_COMPAT_DIR_INDEX | \
+ EXT4_FEATURE_COMPAT_STABLE_INODES)
+#define EXT4_TUNE_SET_INCOMPAT_SUPP \
+ (EXT4_FEATURE_INCOMPAT_EXTENTS | \
+ EXT4_FEATURE_INCOMPAT_EA_INODE | \
+ EXT4_FEATURE_INCOMPAT_ENCRYPT | \
+ EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
+ EXT4_FEATURE_INCOMPAT_LARGEDIR | \
+ EXT4_FEATURE_INCOMPAT_CASEFOLD)
+#define EXT4_TUNE_SET_RO_COMPAT_SUPP \
+ (EXT4_FEATURE_RO_COMPAT_LARGE_FILE | \
+ EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
+ EXT4_FEATURE_RO_COMPAT_PROJECT | \
+ EXT4_FEATURE_RO_COMPAT_VERITY)
+
+#define EXT4_TUNE_CLEAR_COMPAT_SUPP (0)
+#define EXT4_TUNE_CLEAR_INCOMPAT_SUPP (0)
+#define EXT4_TUNE_CLEAR_RO_COMPAT_SUPP (0)
+
+#define SB_ENC_SUPP_MASK (SB_ENC_STRICT_MODE_FL | \
+ SB_ENC_NO_COMPAT_FALLBACK_FL)
+
+static int ext4_ioctl_get_tune_sb(struct ext4_sb_info *sbi,
+ struct ext4_tune_sb_params __user *params)
+{
+ struct ext4_tune_sb_params ret;
+ struct ext4_super_block *es = sbi->s_es;
+
+ memset(&ret, 0, sizeof(ret));
+ ret.set_flags = TUNE_OPS_SUPPORTED;
+ ret.errors_behavior = le16_to_cpu(es->s_errors);
+ ret.mnt_count = le16_to_cpu(es->s_mnt_count);
+ ret.max_mnt_count = le16_to_cpu(es->s_max_mnt_count);
+ ret.checkinterval = le32_to_cpu(es->s_checkinterval);
+ ret.last_check_time = le32_to_cpu(es->s_lastcheck);
+ ret.reserved_blocks = ext4_r_blocks_count(es);
+ ret.blocks_count = ext4_blocks_count(es);
+ ret.reserved_uid = ext4_get_resuid(es);
+ ret.reserved_gid = ext4_get_resgid(es);
+ ret.default_mnt_opts = le32_to_cpu(es->s_default_mount_opts);
+ ret.def_hash_alg = es->s_def_hash_version;
+ ret.raid_stride = le16_to_cpu(es->s_raid_stride);
+ ret.raid_stripe_width = le32_to_cpu(es->s_raid_stripe_width);
+ ret.encoding = le16_to_cpu(es->s_encoding);
+ ret.encoding_flags = le16_to_cpu(es->s_encoding_flags);
+ strscpy_pad(ret.mount_opts, es->s_mount_opts);
+ ret.feature_compat = le32_to_cpu(es->s_feature_compat);
+ ret.feature_incompat = le32_to_cpu(es->s_feature_incompat);
+ ret.feature_ro_compat = le32_to_cpu(es->s_feature_ro_compat);
+ ret.set_feature_compat_mask = EXT4_TUNE_SET_COMPAT_SUPP;
+ ret.set_feature_incompat_mask = EXT4_TUNE_SET_INCOMPAT_SUPP;
+ ret.set_feature_ro_compat_mask = EXT4_TUNE_SET_RO_COMPAT_SUPP;
+ ret.clear_feature_compat_mask = EXT4_TUNE_CLEAR_COMPAT_SUPP;
+ ret.clear_feature_incompat_mask = EXT4_TUNE_CLEAR_INCOMPAT_SUPP;
+ ret.clear_feature_ro_compat_mask = EXT4_TUNE_CLEAR_RO_COMPAT_SUPP;
+ if (copy_to_user(params, &ret, sizeof(ret)))
+ return -EFAULT;
+ return 0;
+}
+
+static void ext4_sb_setparams(struct ext4_sb_info *sbi,
+ struct ext4_super_block *es, const void *arg)
+{
+ const struct ext4_tune_sb_params *params = arg;
+
+ if (params->set_flags & EXT4_TUNE_FL_ERRORS_BEHAVIOR)
+ es->s_errors = cpu_to_le16(params->errors_behavior);
+ if (params->set_flags & EXT4_TUNE_FL_MNT_COUNT)
+ es->s_mnt_count = cpu_to_le16(params->mnt_count);
+ if (params->set_flags & EXT4_TUNE_FL_MAX_MNT_COUNT)
+ es->s_max_mnt_count = cpu_to_le16(params->max_mnt_count);
+ if (params->set_flags & EXT4_TUNE_FL_CHECKINTRVAL)
+ es->s_checkinterval = cpu_to_le32(params->checkinterval);
+ if (params->set_flags & EXT4_TUNE_FL_LAST_CHECK_TIME)
+ es->s_lastcheck = cpu_to_le32(params->last_check_time);
+ if (params->set_flags & EXT4_TUNE_FL_RESERVED_BLOCKS) {
+ ext4_fsblk_t blk = params->reserved_blocks;
+
+ es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
+ es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
+ }
+ if (params->set_flags & EXT4_TUNE_FL_RESERVED_UID) {
+ int uid = params->reserved_uid;
+
+ es->s_def_resuid = cpu_to_le16(uid & 0xFFFF);
+ es->s_def_resuid_hi = cpu_to_le16(uid >> 16);
+ }
+ if (params->set_flags & EXT4_TUNE_FL_RESERVED_GID) {
+ int gid = params->reserved_gid;
+
+ es->s_def_resgid = cpu_to_le16(gid & 0xFFFF);
+ es->s_def_resgid_hi = cpu_to_le16(gid >> 16);
+ }
+ if (params->set_flags & EXT4_TUNE_FL_DEFAULT_MNT_OPTS)
+ es->s_default_mount_opts = cpu_to_le32(params->default_mnt_opts);
+ if (params->set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
+ es->s_def_hash_version = params->def_hash_alg;
+ if (params->set_flags & EXT4_TUNE_FL_RAID_STRIDE)
+ es->s_raid_stride = cpu_to_le16(params->raid_stride);
+ if (params->set_flags & EXT4_TUNE_FL_RAID_STRIPE_WIDTH)
+ es->s_raid_stripe_width =
+ cpu_to_le32(params->raid_stripe_width);
+ if (params->set_flags & EXT4_TUNE_FL_ENCODING)
+ es->s_encoding = cpu_to_le16(params->encoding);
+ if (params->set_flags & EXT4_TUNE_FL_ENCODING_FLAGS)
+ es->s_encoding_flags = cpu_to_le16(params->encoding_flags);
+ strscpy_pad(es->s_mount_opts, params->mount_opts);
+ if (params->set_flags & EXT4_TUNE_FL_EDIT_FEATURES) {
+ es->s_feature_compat |=
+ cpu_to_le32(params->set_feature_compat_mask);
+ es->s_feature_incompat |=
+ cpu_to_le32(params->set_feature_incompat_mask);
+ es->s_feature_ro_compat |=
+ cpu_to_le32(params->set_feature_ro_compat_mask);
+ es->s_feature_compat &=
+ ~cpu_to_le32(params->clear_feature_compat_mask);
+ es->s_feature_incompat &=
+ ~cpu_to_le32(params->clear_feature_incompat_mask);
+ es->s_feature_ro_compat &=
+ ~cpu_to_le32(params->clear_feature_ro_compat_mask);
+ if (params->set_feature_compat_mask &
+ EXT4_FEATURE_COMPAT_DIR_INDEX)
+ es->s_def_hash_version = sbi->s_def_hash_version;
+ if (params->set_feature_incompat_mask &
+ EXT4_FEATURE_INCOMPAT_CSUM_SEED)
+ es->s_checksum_seed = cpu_to_le32(sbi->s_csum_seed);
+ }
+ if (params->set_flags & EXT4_TUNE_FL_FORCE_FSCK)
+ es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+}
+
+static int ext4_ioctl_set_tune_sb(struct file *filp,
+ struct ext4_tune_sb_params __user *in)
+{
+ struct ext4_tune_sb_params params;
+ struct super_block *sb = file_inode(filp)->i_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ int enabling_casefold = 0;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&params, in, sizeof(params)))
+ return -EFAULT;
+
+ if ((params.set_flags & ~TUNE_OPS_SUPPORTED) != 0)
+ return -EOPNOTSUPP;
+
+ if ((params.set_flags & EXT4_TUNE_FL_ERRORS_BEHAVIOR) &&
+ (params.errors_behavior > EXT4_ERRORS_PANIC))
+ return -EINVAL;
+
+ if ((params.set_flags & EXT4_TUNE_FL_RESERVED_BLOCKS) &&
+ (params.reserved_blocks > ext4_blocks_count(sbi->s_es) / 2))
+ return -EINVAL;
+ if ((params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG) &&
+ ((params.def_hash_alg > DX_HASH_LAST) ||
+ (params.def_hash_alg == DX_HASH_SIPHASH)))
+ return -EINVAL;
+ if ((params.set_flags & EXT4_TUNE_FL_FEATURES) &&
+ (params.set_flags & EXT4_TUNE_FL_EDIT_FEATURES))
+ return -EINVAL;
+
+ if (params.set_flags & EXT4_TUNE_FL_FEATURES) {
+ params.set_feature_compat_mask =
+ params.feature_compat &
+ ~le32_to_cpu(es->s_feature_compat);
+ params.set_feature_incompat_mask =
+ params.feature_incompat &
+ ~le32_to_cpu(es->s_feature_incompat);
+ params.set_feature_ro_compat_mask =
+ params.feature_ro_compat &
+ ~le32_to_cpu(es->s_feature_ro_compat);
+ params.clear_feature_compat_mask =
+ ~params.feature_compat &
+ le32_to_cpu(es->s_feature_compat);
+ params.clear_feature_incompat_mask =
+ ~params.feature_incompat &
+ le32_to_cpu(es->s_feature_incompat);
+ params.clear_feature_ro_compat_mask =
+ ~params.feature_ro_compat &
+ le32_to_cpu(es->s_feature_ro_compat);
+ params.set_flags |= EXT4_TUNE_FL_EDIT_FEATURES;
+ }
+ if (params.set_flags & EXT4_TUNE_FL_EDIT_FEATURES) {
+ if ((params.set_feature_compat_mask &
+ ~EXT4_TUNE_SET_COMPAT_SUPP) ||
+ (params.set_feature_incompat_mask &
+ ~EXT4_TUNE_SET_INCOMPAT_SUPP) ||
+ (params.set_feature_ro_compat_mask &
+ ~EXT4_TUNE_SET_RO_COMPAT_SUPP) ||
+ (params.clear_feature_compat_mask &
+ ~EXT4_TUNE_CLEAR_COMPAT_SUPP) ||
+ (params.clear_feature_incompat_mask &
+ ~EXT4_TUNE_CLEAR_INCOMPAT_SUPP) ||
+ (params.clear_feature_ro_compat_mask &
+ ~EXT4_TUNE_CLEAR_RO_COMPAT_SUPP))
+ return -EOPNOTSUPP;
+
+ /*
+ * Filter out the features that are already set from
+ * the set_mask.
+ */
+ params.set_feature_compat_mask &=
+ ~le32_to_cpu(es->s_feature_compat);
+ params.set_feature_incompat_mask &=
+ ~le32_to_cpu(es->s_feature_incompat);
+ params.set_feature_ro_compat_mask &=
+ ~le32_to_cpu(es->s_feature_ro_compat);
+ if ((params.set_feature_incompat_mask &
+ EXT4_FEATURE_INCOMPAT_CASEFOLD)) {
+ enabling_casefold = 1;
+ if (!(params.set_flags & EXT4_TUNE_FL_ENCODING)) {
+ params.encoding = EXT4_ENC_UTF8_12_1;
+ params.set_flags |= EXT4_TUNE_FL_ENCODING;
+ }
+ if (!(params.set_flags & EXT4_TUNE_FL_ENCODING_FLAGS)) {
+ params.encoding_flags = 0;
+ params.set_flags |= EXT4_TUNE_FL_ENCODING_FLAGS;
+ }
+ }
+ if ((params.set_feature_compat_mask &
+ EXT4_FEATURE_COMPAT_DIR_INDEX)) {
+ uuid_t uu;
+
+ memcpy(&uu, sbi->s_hash_seed, UUID_SIZE);
+ if (uuid_is_null(&uu))
+ generate_random_uuid((char *)
+ &sbi->s_hash_seed);
+ if (params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
+ sbi->s_def_hash_version = params.def_hash_alg;
+ else if (sbi->s_def_hash_version == 0)
+ sbi->s_def_hash_version = DX_HASH_HALF_MD4;
+ if (!(es->s_flags &
+ cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH)) &&
+ !(es->s_flags &
+ cpu_to_le32(EXT2_FLAGS_SIGNED_HASH))) {
+#ifdef __CHAR_UNSIGNED__
+ sbi->s_hash_unsigned = 3;
+#else
+ sbi->s_hash_unsigned = 0;
+#endif
+ }
+ }
+ }
+ if (params.set_flags & EXT4_TUNE_FL_ENCODING) {
+ if (!enabling_casefold)
+ return -EINVAL;
+ if (params.encoding == 0)
+ params.encoding = EXT4_ENC_UTF8_12_1;
+ else if (params.encoding != EXT4_ENC_UTF8_12_1)
+ return -EINVAL;
+ }
+ if (params.set_flags & EXT4_TUNE_FL_ENCODING_FLAGS) {
+ if (!enabling_casefold)
+ return -EINVAL;
+ if (params.encoding_flags & ~SB_ENC_SUPP_MASK)
+ return -EINVAL;
+ }
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ ret = ext4_update_superblocks_fn(sb, ext4_sb_setparams, &params);
+ mnt_drop_write_file(filp);
+
+ if (params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
+ sbi->s_def_hash_version = params.def_hash_alg;
+
+ return ret;
+}
+
static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -1616,6 +1908,11 @@ resizefs_out:
return ext4_ioctl_getuuid(EXT4_SB(sb), (void __user *)arg);
case EXT4_IOC_SETFSUUID:
return ext4_ioctl_setuuid(filp, (const void __user *)arg);
+ case EXT4_IOC_GET_TUNE_SB_PARAM:
+ return ext4_ioctl_get_tune_sb(EXT4_SB(sb),
+ (void __user *)arg);
+ case EXT4_IOC_SET_TUNE_SB_PARAM:
+ return ext4_ioctl_set_tune_sb(filp, (void __user *)arg);
default:
return -ENOTTY;
}
@@ -1703,7 +2000,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
#endif
-static void set_overhead(struct ext4_super_block *es, const void *arg)
+static void set_overhead(struct ext4_sb_info *sbi,
+ struct ext4_super_block *es, const void *arg)
{
es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg));
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5898d92ba19f..9087183602e4 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3655,16 +3655,26 @@ static void ext4_discard_work(struct work_struct *work)
static inline void ext4_mb_avg_fragment_size_destroy(struct ext4_sb_info *sbi)
{
+ if (!sbi->s_mb_avg_fragment_size)
+ return;
+
for (int i = 0; i < MB_NUM_ORDERS(sbi->s_sb); i++)
xa_destroy(&sbi->s_mb_avg_fragment_size[i]);
+
kfree(sbi->s_mb_avg_fragment_size);
+ sbi->s_mb_avg_fragment_size = NULL;
}
static inline void ext4_mb_largest_free_orders_destroy(struct ext4_sb_info *sbi)
{
+ if (!sbi->s_mb_largest_free_orders)
+ return;
+
for (int i = 0; i < MB_NUM_ORDERS(sbi->s_sb); i++)
xa_destroy(&sbi->s_mb_largest_free_orders[i]);
+
kfree(sbi->s_mb_largest_free_orders);
+ sbi->s_mb_largest_free_orders = NULL;
}
int ext4_mb_init(struct super_block *sb)
@@ -3995,7 +4005,7 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
list_splice_tail(&freed_data_list, &sbi->s_discard_list);
spin_unlock(&sbi->s_md_lock);
if (wake)
- queue_work(system_unbound_wq, &sbi->s_discard_work);
+ queue_work(system_dfl_wq, &sbi->s_discard_work);
} else {
list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
kmem_cache_free(ext4_free_data_cachep, entry);
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 51661570cf3b..ab1ff51302fb 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -231,9 +231,9 @@ static int kmmpd(void *data)
* Adjust the mmp_check_interval depending on how much time
* it took for the MMP block to be written.
*/
- mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
- EXT4_MMP_MAX_CHECK_INTERVAL),
- EXT4_MMP_MIN_CHECK_INTERVAL);
+ mmp_check_interval = clamp(EXT4_MMP_CHECK_MULT * diff / HZ,
+ EXT4_MMP_MIN_CHECK_INTERVAL,
+ EXT4_MMP_MAX_CHECK_INTERVAL);
mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
}
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index adae3caf175a..4b091c21908f 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -225,7 +225,7 @@ static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
do {
if (bh_offset(bh) + blocksize <= from)
continue;
- if (bh_offset(bh) > to)
+ if (bh_offset(bh) >= to)
break;
wait_on_buffer(bh);
if (buffer_uptodate(bh))
diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c
index 524d4658fa40..33c3a89396b1 100644
--- a/fs/ext4/orphan.c
+++ b/fs/ext4/orphan.c
@@ -109,11 +109,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
!inode_is_locked(inode));
- /*
- * Inode orphaned in orphan file or in orphan list?
- */
- if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) ||
- !list_empty(&EXT4_I(inode)->i_orphan))
+ if (ext4_inode_orphan_tracked(inode))
return 0;
/*
@@ -587,9 +583,20 @@ int ext4_init_orphan_info(struct super_block *sb)
ext4_msg(sb, KERN_ERR, "get orphan inode failed");
return PTR_ERR(inode);
}
+ /*
+ * This is just an artificial limit to prevent corrupted fs from
+ * consuming absurd amounts of memory when pinning blocks of orphan
+ * file in memory.
+ */
+ if (inode->i_size > 8 << 20) {
+ ext4_msg(sb, KERN_ERR, "orphan file too big: %llu",
+ (unsigned long long)inode->i_size);
+ ret = -EFSCORRUPTED;
+ goto out_put;
+ }
oi->of_blocks = inode->i_size >> sb->s_blocksize_bits;
oi->of_csum_seed = EXT4_I(inode)->i_csum_seed;
- oi->of_binfo = kmalloc_array(oi->of_blocks,
+ oi->of_binfo = kvmalloc_array(oi->of_blocks,
sizeof(struct ext4_orphan_block),
GFP_KERNEL);
if (!oi->of_binfo) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 699c15db28a8..33e7c08c9529 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -265,6 +265,15 @@ struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
return __ext4_sb_bread_gfp(sb, block, 0, gfp);
}
+struct buffer_head *ext4_sb_bread_nofail(struct super_block *sb,
+ sector_t block)
+{
+ gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
+ ~__GFP_FS) | __GFP_MOVABLE | __GFP_NOFAIL;
+
+ return __ext4_sb_bread_gfp(sb, block, 0, gfp);
+}
+
void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
{
struct buffer_head *bh = bdev_getblk(sb->s_bdev, block,
@@ -1417,7 +1426,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
static int ext4_drop_inode(struct inode *inode)
{
- int drop = generic_drop_inode(inode);
+ int drop = inode_generic_drop(inode);
if (!drop)
drop = fscrypt_drop_inode(inode);
@@ -1438,9 +1447,9 @@ static void ext4_free_in_core_inode(struct inode *inode)
static void ext4_destroy_inode(struct inode *inode)
{
- if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
+ if (ext4_inode_orphan_tracked(inode)) {
ext4_msg(inode->i_sb, KERN_ERR,
- "Inode %lu (%p): orphan list check failed!",
+ "Inode %lu (%p): inode tracked as orphan!",
inode->i_ino, EXT4_I(inode));
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
EXT4_I(inode), sizeof(struct ext4_inode_info),
@@ -1470,6 +1479,12 @@ static void init_once(void *foo)
init_rwsem(&ei->i_data_sem);
inode_init_once(&ei->vfs_inode);
ext4_fc_init_inode(&ei->vfs_inode);
+#ifdef CONFIG_FS_ENCRYPTION
+ ei->i_crypt_info = NULL;
+#endif
+#ifdef CONFIG_FS_VERITY
+ ei->i_verity_info = NULL;
+#endif
}
static int __init init_inodecache(void)
@@ -2460,7 +2475,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
struct ext4_fs_context *m_ctx)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char *s_mount_opts = NULL;
+ char s_mount_opts[65];
struct ext4_fs_context *s_ctx = NULL;
struct fs_context *fc = NULL;
int ret = -ENOMEM;
@@ -2468,15 +2483,11 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
if (!sbi->s_es->s_mount_opts[0])
return 0;
- s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
- sizeof(sbi->s_es->s_mount_opts),
- GFP_KERNEL);
- if (!s_mount_opts)
- return ret;
+ strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
- goto out_free;
+ return -ENOMEM;
s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
if (!s_ctx)
@@ -2508,11 +2519,8 @@ parse_failed:
ret = 0;
out_free:
- if (fc) {
- ext4_fc_free(fc);
- kfree(fc);
- }
- kfree(s_mount_opts);
+ ext4_fc_free(fc);
+ kfree(fc);
return ret;
}
@@ -2958,11 +2966,11 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
}
if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
- le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
+ ext4_get_resuid(es) != EXT4_DEF_RESUID)
SEQ_OPTS_PRINT("resuid=%u",
from_kuid_munged(&init_user_ns, sbi->s_resuid));
if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
- le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
+ ext4_get_resgid(es) != EXT4_DEF_RESGID)
SEQ_OPTS_PRINT("resgid=%u",
from_kgid_munged(&init_user_ns, sbi->s_resgid));
def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
@@ -5277,8 +5285,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
ext4_set_def_opts(sb, es);
- sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
- sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
+ sbi->s_resuid = make_kuid(&init_user_ns, ext4_get_resuid(es));
+ sbi->s_resgid = make_kgid(&init_user_ns, ext4_get_resuid(es));
sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index d9203228ce97..b0acb0c50313 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -389,6 +389,8 @@ static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf,
}
const struct fsverity_operations ext4_verityops = {
+ .inode_info_offs = (int)offsetof(struct ext4_inode_info, i_verity_info) -
+ (int)offsetof(struct ext4_inode_info, vfs_inode),
.begin_enable_verity = ext4_begin_enable_verity,
.end_enable_verity = ext4_end_enable_verity,
.get_verity_descriptor = ext4_get_verity_descriptor,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 5a6fe1513fd2..ce7253b3f549 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -251,6 +251,10 @@ check_xattrs(struct inode *inode, struct buffer_head *bh,
err_str = "invalid ea_ino";
goto errout;
}
+ if (ea_ino && !size) {
+ err_str = "invalid size in ea xattr";
+ goto errout;
+ }
if (size > EXT4_XATTR_SIZE_MAX) {
err_str = "e_value size too large";
goto errout;
@@ -1019,7 +1023,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
int ref_change)
{
struct ext4_iloc iloc;
- s64 ref_count;
+ u64 ref_count;
int ret;
inode_lock_nested(ea_inode, I_MUTEX_XATTR);
@@ -1029,13 +1033,17 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
goto out;
ref_count = ext4_xattr_inode_get_ref(ea_inode);
+ if ((ref_count == 0 && ref_change < 0) || (ref_count == U64_MAX && ref_change > 0)) {
+ ext4_error_inode(ea_inode, __func__, __LINE__, 0,
+ "EA inode %lu ref wraparound: ref_count=%lld ref_change=%d",
+ ea_inode->i_ino, ref_count, ref_change);
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
ref_count += ref_change;
ext4_xattr_inode_set_ref(ea_inode, ref_count);
if (ref_change > 0) {
- WARN_ONCE(ref_count <= 0, "EA inode %lu ref_count=%lld",
- ea_inode->i_ino, ref_count);
-
if (ref_count == 1) {
WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u",
ea_inode->i_ino, ea_inode->i_nlink);
@@ -1044,9 +1052,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
ext4_orphan_del(handle, ea_inode);
}
} else {
- WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
- ea_inode->i_ino, ref_count);
-
if (ref_count == 0) {
WARN_ONCE(ea_inode->i_nlink != 1,
"EA inode %lu i_nlink=%u",
@@ -1530,7 +1535,7 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
WARN_ON_ONCE(ext4_handle_valid(journal_current_handle()) &&
!(current->flags & PF_MEMALLOC_NOFS));
- ea_data = kvmalloc(value_len, GFP_KERNEL);
+ ea_data = kvmalloc(value_len, GFP_NOFS);
if (!ea_data) {
mb_cache_entry_put(ea_inode_cache, ce);
return NULL;