From ff694ab60c29cfeba81b3d5068d3c908f22110ed Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 28 May 2020 07:59:55 -0700 Subject: fs/ext4: Narrow scope of DAX check in setflags When preventing DAX and journaling on an inode. Use the effective DAX check rather than the mount option. This will be required to support per inode DAX flags. Reviewed-by: Jan Kara Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20200528150003.828793-2-ira.weiny@intel.com Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4/ioctl.c') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bfc1281fc4cb..5813e5e73eab 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -393,9 +393,9 @@ flags_err: if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { /* * Changes to the journaling mode can cause unsafe changes to - * S_DAX if we are using the DAX mount option. + * S_DAX if the inode is DAX */ - if (test_opt(inode->i_sb, DAX)) { + if (IS_DAX(inode)) { err = -EBUSY; goto flags_out; } -- cgit From 043546e46dc70c25ff7e2cf6d09cbb0424fc9978 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 28 May 2020 07:59:59 -0700 Subject: fs/ext4: Only change S_DAX on inode load To prevent complications with in memory inodes we only set S_DAX on inode load. FS_XFLAG_DAX can be changed at any time and S_DAX will change after inode eviction and reload. Add init bool to ext4_set_inode_flags() to indicate if the inode is being newly initialized. Assert that S_DAX is not set on an inode which is just being loaded. Reviewed-by: Jan Kara Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20200528150003.828793-6-ira.weiny@intel.com Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ext4/ioctl.c') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 5813e5e73eab..145083e8cd1e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -381,7 +381,8 @@ static int ext4_ioctl_setflags(struct inode *inode, ext4_clear_inode_flag(inode, i); } - ext4_set_inode_flags(inode); + ext4_set_inode_flags(inode, false); + inode->i_ctime = current_time(inode); err = ext4_mark_iloc_dirty(handle, inode, &iloc); -- cgit From fcebc7949cd2ff97407e5b77ed99a7211674c6de Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 28 May 2020 08:00:01 -0700 Subject: fs/ext4: Remove jflag variable The jflag variable serves almost no purpose. Remove it. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20200528150003.828793-8-ira.weiny@intel.com Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'fs/ext4/ioctl.c') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 145083e8cd1e..779631e8e849 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -300,7 +300,6 @@ static int ext4_ioctl_setflags(struct inode *inode, int err = -EPERM, migrate = 0; struct ext4_iloc iloc; unsigned int oldflags, mask, i; - unsigned int jflag; struct super_block *sb = inode->i_sb; /* Is it quota file? Do not allow user to mess with it */ @@ -309,9 +308,6 @@ static int ext4_ioctl_setflags(struct inode *inode, oldflags = ei->i_flags; - /* The JOURNAL_DATA flag is modifiable only by root */ - jflag = flags & EXT4_JOURNAL_DATA_FL; - err = vfs_ioc_setflags_prepare(inode, oldflags, flags); if (err) goto flags_out; @@ -320,7 +316,7 @@ static int ext4_ioctl_setflags(struct inode *inode, * The JOURNAL_DATA flag can only be changed by * the relevant capability. */ - if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { + if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { if (!capable(CAP_SYS_RESOURCE)) goto flags_out; } @@ -391,7 +387,7 @@ flags_err: if (err) goto flags_out; - if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { + if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { /* * Changes to the journaling mode can cause unsafe changes to * S_DAX if the inode is DAX @@ -401,7 +397,8 @@ flags_err: goto flags_out; } - err = ext4_change_inode_journal_flag(inode, jflag); + err = ext4_change_inode_journal_flag(inode, + flags & EXT4_JOURNAL_DATA_FL); if (err) goto flags_out; } -- cgit From b383a73f2b832491a2f9e6e8ada26aad53b5763d Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 28 May 2020 08:00:02 -0700 Subject: fs/ext4: Introduce DAX inode flag Add a flag ([EXT4|FS]_DAX_FL) to preserve FS_XFLAG_DAX in the ext4 inode. Set the flag to be user visible and changeable. Set the flag to be inherited. Allow applications to change the flag at any time except if it conflicts with the set of mutually exclusive flags (Currently VERITY, ENCRYPT, JOURNAL_DATA). Furthermore, restrict setting any of the exclusive flags if DAX is set. While conceptually possible, we do not allow setting EXT4_DAX_FL while at the same time clearing exclusion flags (or vice versa) for 2 reasons: 1) The DAX flag does not take effect immediately which introduces quite a bit of complexity 2) There is no clear use case for being this flexible Finally, on regular files, flag the inode to not be cached to facilitate changing S_DAX on the next creation of the inode. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20200528150003.828793-9-ira.weiny@intel.com Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) (limited to 'fs/ext4/ioctl.c') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 779631e8e849..1b520d07d371 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -292,6 +292,38 @@ static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid, return 0; } +static void ext4_dax_dontcache(struct inode *inode, unsigned int flags) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + + if (S_ISDIR(inode->i_mode)) + return; + + if (test_opt2(inode->i_sb, DAX_NEVER) || + test_opt(inode->i_sb, DAX_ALWAYS)) + return; + + if ((ei->i_flags ^ flags) & EXT4_DAX_FL) + d_mark_dontcache(inode); +} + +static bool dax_compatible(struct inode *inode, unsigned int oldflags, + unsigned int flags) +{ + if (flags & EXT4_DAX_FL) { + if ((oldflags & EXT4_DAX_MUT_EXCL) || + ext4_test_inode_state(inode, + EXT4_STATE_VERITY_IN_PROGRESS)) { + return false; + } + } + + if ((flags & EXT4_DAX_MUT_EXCL) && (oldflags & EXT4_DAX_FL)) + return false; + + return true; +} + static int ext4_ioctl_setflags(struct inode *inode, unsigned int flags) { @@ -320,6 +352,12 @@ static int ext4_ioctl_setflags(struct inode *inode, if (!capable(CAP_SYS_RESOURCE)) goto flags_out; } + + if (!dax_compatible(inode, oldflags, flags)) { + err = -EOPNOTSUPP; + goto flags_out; + } + if ((flags ^ oldflags) & EXT4_EXTENTS_FL) migrate = 1; @@ -365,6 +403,8 @@ static int ext4_ioctl_setflags(struct inode *inode, if (err) goto flags_err; + ext4_dax_dontcache(inode, flags); + for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { if (!(mask & EXT4_FL_USER_MODIFIABLE)) continue; @@ -525,12 +565,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags) xflags |= FS_XFLAG_NOATIME; if (iflags & EXT4_PROJINHERIT_FL) xflags |= FS_XFLAG_PROJINHERIT; + if (iflags & EXT4_DAX_FL) + xflags |= FS_XFLAG_DAX; return xflags; } #define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \ FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \ - FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT) + FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \ + FS_XFLAG_DAX) /* Transfer xflags flags to internal */ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) @@ -549,6 +592,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) iflags |= EXT4_NOATIME_FL; if (xflags & FS_XFLAG_PROJINHERIT) iflags |= EXT4_PROJINHERIT_FL; + if (xflags & FS_XFLAG_DAX) + iflags |= EXT4_DAX_FL; return iflags; } -- cgit From cb29a02d3a9d51c749f556b3bbf6551fbc0454eb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 14 Jul 2020 16:09:09 -0700 Subject: ext4: use generic names for generic ioctls Don't define EXT4_IOC_* aliases to ioctls that already have a generic FS_IOC_* name. These aliases are unnecessary, and they make it unclear which ioctls are ext4-specific and which are generic. Exception: leave EXT4_IOC_GETVERSION_OLD and EXT4_IOC_SETVERSION_OLD as-is for now, since renaming them to FS_IOC_GETVERSION and FS_IOC_SETVERSION would probably make them more likely to be confused with EXT4_IOC_GETVERSION and EXT4_IOC_SETVERSION which also exist. Signed-off-by: Eric Biggers Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200714230909.56349-1-ebiggers@kernel.org Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'fs/ext4/ioctl.c') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 999cf6add39c..6e70a63dcca7 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -819,12 +819,12 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case FS_IOC_GETFSMAP: return ext4_ioc_getfsmap(sb, (void __user *)arg); - case EXT4_IOC_GETFLAGS: + case FS_IOC_GETFLAGS: flags = ei->i_flags & EXT4_FL_USER_VISIBLE; if (S_ISREG(inode->i_mode)) flags &= ~EXT4_PROJINHERIT_FL; return put_user(flags, (int __user *) arg); - case EXT4_IOC_SETFLAGS: { + case FS_IOC_SETFLAGS: { int err; if (!inode_owner_or_capable(inode)) @@ -1129,12 +1129,12 @@ resizefs_out: case EXT4_IOC_PRECACHE_EXTENTS: return ext4_ext_precache(inode); - case EXT4_IOC_SET_ENCRYPTION_POLICY: + case FS_IOC_SET_ENCRYPTION_POLICY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); - case EXT4_IOC_GET_ENCRYPTION_PWSALT: { + case FS_IOC_GET_ENCRYPTION_PWSALT: { #ifdef CONFIG_FS_ENCRYPTION int err, err2; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1174,7 +1174,7 @@ resizefs_out: return -EOPNOTSUPP; #endif } - case EXT4_IOC_GET_ENCRYPTION_POLICY: + case FS_IOC_GET_ENCRYPTION_POLICY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); @@ -1236,7 +1236,7 @@ resizefs_out: case EXT4_IOC_GET_ES_CACHE: return ext4_ioctl_get_es_cache(filp, arg); - case EXT4_IOC_FSGETXATTR: + case FS_IOC_FSGETXATTR: { struct fsxattr fa; @@ -1247,7 +1247,7 @@ resizefs_out: return -EFAULT; return 0; } - case EXT4_IOC_FSSETXATTR: + case FS_IOC_FSSETXATTR: { struct fsxattr fa, old_fa; int err; @@ -1313,11 +1313,11 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { - case EXT4_IOC32_GETFLAGS: - cmd = EXT4_IOC_GETFLAGS; + case FS_IOC32_GETFLAGS: + cmd = FS_IOC_GETFLAGS; break; - case EXT4_IOC32_SETFLAGS: - cmd = EXT4_IOC_SETFLAGS; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; break; case EXT4_IOC32_GETVERSION: cmd = EXT4_IOC_GETVERSION; @@ -1361,9 +1361,9 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case EXT4_IOC_RESIZE_FS: case FITRIM: case EXT4_IOC_PRECACHE_EXTENTS: - case EXT4_IOC_SET_ENCRYPTION_POLICY: - case EXT4_IOC_GET_ENCRYPTION_PWSALT: - case EXT4_IOC_GET_ENCRYPTION_POLICY: + case FS_IOC_SET_ENCRYPTION_POLICY: + case FS_IOC_GET_ENCRYPTION_PWSALT: + case FS_IOC_GET_ENCRYPTION_POLICY: case FS_IOC_GET_ENCRYPTION_POLICY_EX: case FS_IOC_ADD_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY: @@ -1377,8 +1377,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case EXT4_IOC_CLEAR_ES_CACHE: case EXT4_IOC_GETSTATE: case EXT4_IOC_GET_ES_CACHE: - case EXT4_IOC_FSGETXATTR: - case EXT4_IOC_FSSETXATTR: + case FS_IOC_FSGETXATTR: + case FS_IOC_FSSETXATTR: break; default: return -ENOIOCTLCMD; -- cgit From 27bc446e2def38db3244a6eb4bb1d6312936610a Mon Sep 17 00:00:00 2001 From: brookxu Date: Mon, 17 Aug 2020 15:36:15 +0800 Subject: ext4: limit the length of per-inode prealloc list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the scenario of writing sparse files, the per-inode prealloc list may be very long, resulting in high overhead for ext4_mb_use_preallocated(). To circumvent this problem, we limit the maximum length of per-inode prealloc list to 512 and allow users to modify it. After patching, we observed that the sys ratio of cpu has dropped, and the system throughput has increased significantly. We created a process to write the sparse file, and the running time of the process on the fixed kernel was significantly reduced, as follows: Running time on unfixed kernel: [root@TENCENT64 ~]# time taskset 0x01 ./sparse /data1/sparce.dat real 0m2.051s user 0m0.008s sys 0m2.026s Running time on fixed kernel: [root@TENCENT64 ~]# time taskset 0x01 ./sparse /data1/sparce.dat real 0m0.471s user 0m0.004s sys 0m0.395s Signed-off-by: Chunguang Xu Link: https://lore.kernel.org/r/d7a98178-056b-6db5-6bce-4ead23f4a257@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/ioctl.c') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 6e70a63dcca7..36eca3bc036a 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -202,7 +202,7 @@ static long swap_inode_boot_loader(struct super_block *sb, reset_inode_seed(inode); reset_inode_seed(inode_bl); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); err = ext4_mark_inode_dirty(handle, inode); if (err < 0) { -- cgit