From 38f1755a3e59a3f88e33030f8e4ee0421de2f05a Mon Sep 17 00:00:00 2001 From: Min-Hua Chen Date: Fri, 12 May 2023 00:46:25 +0800 Subject: fs: use correct __poll_t type Fix the following sparse warnings by using __poll_t instead of unsigned type. fs/eventpoll.c:541:9: sparse: warning: restricted __poll_t degrades to integer fs/eventfd.c:67:17: sparse: warning: restricted __poll_t degrades to integer Signed-off-by: Min-Hua Chen Message-Id: <20230511164628.336586-1-minhuadotchen@gmail.com> Signed-off-by: Christian Brauner --- fs/eventfd.c | 2 +- fs/eventpoll.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/eventfd.c b/fs/eventfd.c index 95850a13ce8d..6c06a527747f 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -43,7 +43,7 @@ struct eventfd_ctx { int id; }; -__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask) +__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask) { unsigned long flags; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 980483455cc0..e0eabaae7402 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -536,7 +536,7 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, #else static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, - unsigned pollflags) + __poll_t pollflags) { wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags); } -- cgit From c642256b91770e201519d037a91f255a617a4602 Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Wed, 10 May 2023 22:11:19 +0000 Subject: vfs: Replace all non-returning strlcpy with strscpy strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated [1]. In an effort to remove strlcpy() completely [2], replace strlcpy() here with strscpy(). No return values were used, so direct replacement is safe. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [2] https://github.com/KSPP/linux/issues/89 Signed-off-by: Azeem Shaikh Reviewed-by: Kees Cook Message-Id: <20230510221119.3508930-1-azeemshaikh38@gmail.com> Signed-off-by: Christian Brauner --- fs/char_dev.c | 2 +- fs/super.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/char_dev.c b/fs/char_dev.c index 13deb45f1ec6..950b6919fb87 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -150,7 +150,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, cd->major = major; cd->baseminor = baseminor; cd->minorct = minorct; - strlcpy(cd->name, name, sizeof(cd->name)); + strscpy(cd->name, name, sizeof(cd->name)); if (!prev) { cd->next = curr; diff --git a/fs/super.c b/fs/super.c index 34afe411cf2b..8d8d68799b34 100644 --- a/fs/super.c +++ b/fs/super.c @@ -595,7 +595,7 @@ retry: fc->s_fs_info = NULL; s->s_type = fc->fs_type; s->s_iflags |= fc->s_iflags; - strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id)); + strscpy(s->s_id, s->s_type->name, sizeof(s->s_id)); list_add_tail(&s->s_list, &super_blocks); hlist_add_head(&s->s_instances, &s->s_type->fs_supers); spin_unlock(&sb_lock); @@ -674,7 +674,7 @@ retry: return ERR_PTR(err); } s->s_type = type; - strlcpy(s->s_id, type->name, sizeof(s->s_id)); + strscpy(s->s_id, type->name, sizeof(s->s_id)); list_add_tail(&s->s_list, &super_blocks); hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); -- cgit From 55650b2fddb958e42036c5b07ed82983ce532865 Mon Sep 17 00:00:00 2001 From: Anuradha Weeraman Date: Sat, 6 May 2023 23:59:27 +0530 Subject: fs/open.c: Fix W=1 kernel doc warnings fs/open.c: In functions 'setattr_vfsuid' and 'setattr_vfsgid': warning: Function parameter or member 'attr' not described - Fix warning by removing kernel-doc for these as they are static inline functions and not required to be exposed via kernel-doc. fs/open.c: warning: Excess function parameter 'opened' description in 'finish_open' warning: Excess function parameter 'cred' description in 'vfs_open' - Fix by removing the parameters from the kernel-doc as they are no longer required by the function. Signed-off-by: Anuradha Weeraman Message-Id: <20230506182928.384105-1-anuradha@debian.org> Signed-off-by: Christian Brauner --- fs/open.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 4478adcc4f3a..d8b87caaa1c2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -700,10 +700,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) return do_fchmodat(AT_FDCWD, filename, mode); } -/** - * setattr_vfsuid - check and set ia_fsuid attribute - * @kuid: new inode owner - * +/* * Check whether @kuid is valid and if so generate and set vfsuid_t in * ia_vfsuid. * @@ -718,10 +715,7 @@ static inline bool setattr_vfsuid(struct iattr *attr, kuid_t kuid) return true; } -/** - * setattr_vfsgid - check and set ia_fsgid attribute - * @kgid: new inode owner - * +/* * Check whether @kgid is valid and if so generate and set vfsgid_t in * ia_vfsgid. * @@ -989,7 +983,6 @@ cleanup_file: * @file: file pointer * @dentry: pointer to dentry * @open: open callback - * @opened: state of open * * This can be used to finish opening a file passed to i_op->atomic_open(). * @@ -1043,7 +1036,6 @@ EXPORT_SYMBOL(file_path); * vfs_open - open the file at the given path * @path: path to open * @file: newly allocated file with f_flag initialized - * @cred: credentials to use */ int vfs_open(const struct path *path, struct file *file) { -- cgit From 1168f095417643f663caa341211e117db552989f Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sat, 6 May 2023 06:56:12 +0200 Subject: jffs2: reduce stack usage in jffs2_build_xattr_subsystem() Use kcalloc() for allocation/flush of 128 pointers table to reduce stack usage. Function now returns -ENOMEM or 0 on success. stackusage Before: ./fs/jffs2/xattr.c:775 jffs2_build_xattr_subsystem 1208 dynamic,bounded After: ./fs/jffs2/xattr.c:775 jffs2_build_xattr_subsystem 192 dynamic,bounded Also update definition when CONFIG_JFFS2_FS_XATTR is not enabled Tested with an MTD mount point and some user set/getfattr. Many current target on OpenWRT also suffer from a compilation warning (that become an error with CONFIG_WERROR) with the following output: fs/jffs2/xattr.c: In function 'jffs2_build_xattr_subsystem': fs/jffs2/xattr.c:887:1: error: the frame size of 1088 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] 887 | } | ^ Using dynamic allocation fix this compilation warning. Fixes: c9f700f840bd ("[JFFS2][XATTR] using 'delete marker' for xdatum/xref deletion") Reported-by: Tim Gardner Reported-by: kernel test robot Reported-by: Ron Economos Reported-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Fabian Frederick Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org Message-Id: <20230506045612.16616-1-ansuelsmth@gmail.com> Signed-off-by: Christian Brauner --- fs/jffs2/build.c | 5 ++++- fs/jffs2/xattr.c | 13 +++++++++---- fs/jffs2/xattr.h | 4 ++-- 3 files changed, 15 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index 837cd55fd4c5..6ae9d6fefb86 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -211,7 +211,10 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) ic->scan_dents = NULL; cond_resched(); } - jffs2_build_xattr_subsystem(c); + ret = jffs2_build_xattr_subsystem(c); + if (ret) + goto exit; + c->flags &= ~JFFS2_SB_FLAG_BUILDING; dbg_fsbuild("FS build complete\n"); diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index aa4048a27f31..3b6bdc9a49e1 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -772,10 +772,10 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c) } #define XREF_TMPHASH_SIZE (128) -void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) +int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) { struct jffs2_xattr_ref *ref, *_ref; - struct jffs2_xattr_ref *xref_tmphash[XREF_TMPHASH_SIZE]; + struct jffs2_xattr_ref **xref_tmphash; struct jffs2_xattr_datum *xd, *_xd; struct jffs2_inode_cache *ic; struct jffs2_raw_node_ref *raw; @@ -784,9 +784,12 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING)); + xref_tmphash = kcalloc(XREF_TMPHASH_SIZE, + sizeof(struct jffs2_xattr_ref *), GFP_KERNEL); + if (!xref_tmphash) + return -ENOMEM; + /* Phase.1 : Merge same xref */ - for (i=0; i < XREF_TMPHASH_SIZE; i++) - xref_tmphash[i] = NULL; for (ref=c->xref_temp; ref; ref=_ref) { struct jffs2_xattr_ref *tmp; @@ -884,6 +887,8 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) "%u of xref (%u dead, %u orphan) found.\n", xdatum_count, xdatum_unchecked_count, xdatum_orphan_count, xref_count, xref_dead_count, xref_orphan_count); + kfree(xref_tmphash); + return 0; } struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h index 720007b2fd65..1b5030a3349d 100644 --- a/fs/jffs2/xattr.h +++ b/fs/jffs2/xattr.h @@ -71,7 +71,7 @@ static inline int is_xattr_ref_dead(struct jffs2_xattr_ref *ref) #ifdef CONFIG_JFFS2_FS_XATTR extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c); -extern void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c); +extern int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c); extern void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c); extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, @@ -103,7 +103,7 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t); #else #define jffs2_init_xattr_subsystem(c) -#define jffs2_build_xattr_subsystem(c) +#define jffs2_build_xattr_subsystem(c) (0) #define jffs2_clear_xattr_subsystem(c) #define jffs2_xattr_do_crccheck_inode(c, ic) -- cgit From cedd0bdc166001fef26da475143ba4ebaf230261 Mon Sep 17 00:00:00 2001 From: Min-Hua Chen Date: Wed, 3 May 2023 07:22:08 +0800 Subject: fs: fix incorrect fmode_t casts Use __FMODE_NONOTIFY instead of FMODE_NONOTIFY to fixes the following sparce warnings: fs/overlayfs/file.c:48:37: sparse: warning: restricted fmode_t degrades to integer fs/overlayfs/file.c:128:13: sparse: warning: restricted fmode_t degrades to integer fs/open.c:1159:21: sparse: warning: restricted fmode_t degrades to integer Signed-off-by: Min-Hua Chen Message-Id: <20230502232210.119063-1-minhuadotchen@gmail.com> Signed-off-by: Christian Brauner --- fs/open.c | 2 +- fs/overlayfs/file.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index d8b87caaa1c2..fa5d53282dfe 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1148,7 +1148,7 @@ inline struct open_how build_open_how(int flags, umode_t mode) inline int build_open_flags(const struct open_how *how, struct open_flags *op) { u64 flags = how->flags; - u64 strip = FMODE_NONOTIFY | O_CLOEXEC; + u64 strip = __FMODE_NONOTIFY | O_CLOEXEC; int lookup_flags = 0; int acc_mode = ACC_MODE(flags); diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 7c04f033aadd..0801917f932e 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -35,7 +35,7 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode) } /* No atime modification nor notify on underlying */ -#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY) +#define OVL_OPEN_FLAGS (O_NOATIME | __FMODE_NONOTIFY) static struct file *ovl_open_realfile(const struct file *file, const struct path *realpath) -- cgit From 88e4607034ee49e09e32d91d083dced5c2f4f127 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 20 Apr 2023 15:04:09 +0300 Subject: coredump: require O_WRONLY instead of O_RDWR The motivation for this patch has been to enable using a stricter apparmor profile to prevent programs from reading any coredump in the system. However, this became something else. The following details are based on Christian's and Linus' archeology into the history of the number "2" in the coredump handling code. To make sure we're not accidently introducing some subtle behavioral change into the coredump code we set out on a voyage into the depths of history.git to figure out why this was O_RDWR in the first place. Coredump handling was introduced over 30 years ago in commit ddc733f452e0 ("[PATCH] Linux-0.97 (August 1, 1992)"). The original code used O_WRONLY: open_namei("core",O_CREAT | O_WRONLY | O_TRUNC,0600,&inode,NULL) However, this changed in 1993 and starting with commit 9cb9f18b5d26 ("[PATCH] Linux-0.99.10 (June 7, 1993)") the coredump code suddenly used the constant "2": open_namei("core",O_CREAT | 2 | O_TRUNC,0600,&inode,NULL) This was curious as in the same commit the kernel switched from constants to proper defines in other places such as KERNEL_DS and USER_DS and O_RDWR did already exist. So why was "2" used? It turns out that open_namei() - an early version of what later turned into filp_open() - didn't accept O_RDWR. A semantic quirk of the open() uapi is the definition of the O_RDONLY flag. It would seem natural to define: #define O_RDWR (O_RDONLY | O_WRONLY) but that isn't possible because: #define O_RDONLY 0 This makes O_RDONLY effectively meaningless when passed to the kernel. In other words, there has never been a way - until O_PATH at least - to open a file without any permission; O_RDONLY was always implied on the uapi side while the kernel does in fact allow opening files without permissions. The trouble comes when trying to map the uapi flags onto the corresponding file mode flags FMODE_{READ,WRITE}. This mapping still happens today and is causing issues to this day (We ran into this during additions for openat2() for example.). So the special value "3" was used to indicate that the file was opened for special access: f->f_flags = flag = flags; f->f_mode = (flag+1) & O_ACCMODE; if (f->f_mode) flag++; This allowed the file mode to be set to FMODE_READ | FMODE_WRITE mapping the O_{RDONLY,WRONLY,RDWR} flags into the FMODE_{READ,WRITE} flags. The special access then required read-write permissions and 0 was used to access symlinks. But back when ddc733f452e0 ("[PATCH] Linux-0.97 (August 1, 1992)") added coredump handling open_namei() took the FMODE_{READ,WRITE} flags as an argument. So the coredump handling introduced in ddc733f452e0 ("[PATCH] Linux-0.97 (August 1, 1992)") was buggy because O_WRONLY shouldn't have been passed. Since O_WRONLY is 1 but open_namei() took FMODE_{READ,WRITE} it was passed FMODE_READ on accident. So 9cb9f18b5d26 ("[PATCH] Linux-0.99.10 (June 7, 1993)") was a bugfix for this and the 2 didn't really mean O_RDWR, it meant FMODE_WRITE which was correct. The clue is that FMODE_{READ,WRITE} didn't exist yet and thus a raw "2" value was passed. Fast forward 5 years when around 2.2.4pre4 (February 16, 1999) this code was changed to: - dentry = open_namei(corefile,O_CREAT | 2 | O_TRUNC | O_NOFOLLOW, 0600); ... + file = filp_open(corefile,O_CREAT | 2 | O_TRUNC | O_NOFOLLOW, 0600); At this point the raw "2" should have become O_WRONLY again as filp_open() didn't take FMODE_{READ,WRITE} but O_{RDONLY,WRONLY,RDWR}. Another 17 years later, the code was changed again cementing the mistake and making it almost impossible to detect when commit 378c6520e7d2 ("fs/coredump: prevent fsuid=0 dumps into user-controlled directories") replaced the raw "2" with O_RDWR. And now, here we are with this patch that sent us on a quest to answer the big questions in life such as "Why are coredump files opened with O_RDWR?" and "Is it safe to just use O_WRONLY?". So with this commit we're reintroducing O_WRONLY again and bringing this code back to its original state when it was first introduced in commit ddc733f452e0 ("[PATCH] Linux-0.97 (August 1, 1992)") over 30 years ago. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20230420120409.602576-1-vsementsov@yandex-team.ru> [brauner@kernel.org: completely rewritten commit message] Signed-off-by: Linus Torvalds Signed-off-by: Christian Brauner --- fs/coredump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index ece7badf701b..ead3b05fb8f4 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -646,7 +646,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) } else { struct mnt_idmap *idmap; struct inode *inode; - int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW | + int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW | O_LARGEFILE | O_EXCL; if (cprm.limit < binfmt->min_coredump) -- cgit From df67cb4c58fbb80399a99d47a554a67829f90dda Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2023 21:54:38 +0200 Subject: fs: d_path: include internal.h make W=1 warns about a missing prototype that is defined but not visible at point where simple_dname() is defined: fs/d_path.c:317:7: error: no previous prototype for 'simple_dname' [-Werror=missing-prototypes] Signed-off-by: Arnd Bergmann Message-Id: <20230516195444.551461-1-arnd@kernel.org> Signed-off-by: Christian Brauner --- fs/d_path.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/d_path.c b/fs/d_path.c index 56a6ee4c6331..5f4da5c8d5db 100644 --- a/fs/d_path.c +++ b/fs/d_path.c @@ -7,6 +7,7 @@ #include #include #include "mount.h" +#include "internal.h" struct prepend_buffer { char *buf; -- cgit From b7a9a503c38d665c05a789132b632d81ec0b2703 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 23 May 2023 18:26:28 +0200 Subject: fs: use UB-safe check for signed addition overflow in remap_verify_area The following warning pops up with enabled UBSAN in tests fstests/generic/303: [23127.529395] UBSAN: Undefined behaviour in fs/read_write.c:1725:7 [23127.529400] signed integer overflow: [23127.529403] 4611686018427322368 + 9223372036854775807 cannot be represented in type 'long long int' [23127.529412] CPU: 4 PID: 26180 Comm: xfs_io Not tainted 5.2.0-rc2-1.ge195904-vanilla+ #450 [23127.556999] Hardware name: empty empty/S3993, BIOS PAQEX0-3 02/24/2008 [23127.557001] Call Trace: [23127.557060] dump_stack+0x67/0x9b [23127.557070] ubsan_epilogue+0x9/0x40 [23127.573496] handle_overflow+0xb3/0xc0 [23127.573514] do_clone_file_range+0x28f/0x2a0 [23127.573547] vfs_clone_file_range+0x35/0xb0 [23127.573564] ioctl_file_clone+0x8d/0xc0 [23127.590144] do_vfs_ioctl+0x300/0x700 [23127.590160] ksys_ioctl+0x70/0x80 [23127.590203] ? trace_hardirqs_off_thunk+0x1a/0x1c [23127.590210] __x64_sys_ioctl+0x16/0x20 [23127.590215] do_syscall_64+0x5c/0x1d0 [23127.590224] entry_SYSCALL_64_after_hwframe+0x49/0xbe [23127.590231] RIP: 0033:0x7ff6d7250327 [23127.590241] RSP: 002b:00007ffe3a38f1d8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010 [23127.590246] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007ff6d7250327 [23127.590249] RDX: 00007ffe3a38f220 RSI: 000000004020940d RDI: 0000000000000003 [23127.590252] RBP: 0000000000000000 R08: 00007ffe3a3c80a0 R09: 00007ffe3a3c8080 [23127.590255] R10: 000000000fa99fa0 R11: 0000000000000206 R12: 0000000000000000 [23127.590260] R13: 0000000000000000 R14: 3fffffffffff0000 R15: 00007ff6d750a20c As loff_t is a signed type, we should use the safe overflow checks instead of relying on compiler implementation. The bogus values are intentional and the test is supposed to verify the boundary conditions. Signed-off-by: David Sterba Message-Id: <20230523162628.17071-1-dsterba@suse.com> Signed-off-by: Christian Brauner --- fs/remap_range.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/remap_range.c b/fs/remap_range.c index 1331a890f2f2..87ae4f0dc3aa 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "internal.h" #include @@ -101,10 +102,12 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in, static int remap_verify_area(struct file *file, loff_t pos, loff_t len, bool write) { + loff_t tmp; + if (unlikely(pos < 0 || len < 0)) return -EINVAL; - if (unlikely((loff_t) (pos + len) < 0)) + if (unlikely(check_add_overflow(pos, len, &tmp))) return -EINVAL; return security_file_permission(file, write ? MAY_WRITE : MAY_READ); -- cgit From ea2b62f305893992156a798f665847e0663c9f41 Mon Sep 17 00:00:00 2001 From: Prince Kumar Maurya Date: Tue, 30 May 2023 18:31:41 -0700 Subject: fs/sysv: Null check to prevent null-ptr-deref bug sb_getblk(inode->i_sb, parent) return a null ptr and taking lock on that leads to the null-ptr-deref bug. Reported-by: syzbot+aad58150cbc64ba41bdc@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=aad58150cbc64ba41bdc Signed-off-by: Prince Kumar Maurya Message-Id: <20230531013141.19487-1-princekumarmaurya06@gmail.com> Signed-off-by: Christian Brauner --- fs/sysv/itree.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index b22764fe669c..58d7f43a1371 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -145,6 +145,10 @@ static int alloc_branch(struct inode *inode, */ parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key); bh = sb_getblk(inode->i_sb, parent); + if (!bh) { + sysv_free_block(inode->i_sb, branch[n].key); + break; + } lock_buffer(bh); memset(bh->b_data, 0, blocksize); branch[n].bh = bh; -- cgit From 820eb59da8c7ca7e705a02f37dda2be316807847 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 4 Feb 2023 10:33:56 -0800 Subject: jfs: Use unsigned variable for length calculations To avoid confusing the compiler about possible negative sizes, switch "ssize" which can never be negative from int to u32. Seen with GCC 13: ../fs/jfs/namei.c: In function 'jfs_symlink': ../include/linux/fortify-string.h:57:33: warning: '__builtin_memcpy' pointer overflow between offset 0 and size [-2147483648, -1] [-Warray-bounds=] 57 | #define __underlying_memcpy __builtin_memcpy | ^ ... ../fs/jfs/namei.c:950:17: note: in expansion of macro 'memcpy' 950 | memcpy(ip->i_link, name, ssize); | ^~~~~~ Cc: Dave Kleikamp Cc: Christian Brauner Cc: Dave Chinner Cc: jfs-discussion@lists.sourceforge.net Signed-off-by: Kees Cook Acked-by: Jeff Xu Message-Id: <20230204183355.never.877-kees@kernel.org> Signed-off-by: Christian Brauner --- fs/jfs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index b29d68b5eec5..494b9f4043cf 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -876,7 +876,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip, tid_t tid; ino_t ino = 0; struct component_name dname; - int ssize; /* source pathname size */ + u32 ssize; /* source pathname size */ struct btstack btstack; struct inode *ip = d_inode(dentry); s64 xlen = 0; @@ -957,7 +957,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip, if (ssize > sizeof (JFS_IP(ip)->i_inline)) JFS_IP(ip)->mode2 &= ~INLINEEA; - jfs_info("jfs_symlink: fast symlink added ssize:%d name:%s ", + jfs_info("jfs_symlink: fast symlink added ssize:%u name:%s ", ssize, name); } /* @@ -987,7 +987,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip, ip->i_size = ssize - 1; while (ssize) { /* This is kind of silly since PATH_MAX == 4K */ - int copy_size = min(ssize, PSIZE); + u32 copy_size = min_t(u32, ssize, PSIZE); mp = get_metapage(ip, xaddr, PSIZE, 1); -- cgit From 79aa28494638f03a9e664163cb4620eb0482aaa2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 19 May 2023 18:21:20 +0100 Subject: cachefiles: Allow the cache to be non-root Set mode 0600 on files in the cache so that cachefilesd can run as an unprivileged user rather than leaving the files all with 0. Directories are already set to 0700. Userspace then needs to set the uid and gid before issuing the "bind" command and the cache must've been chown'd to those IDs. Signed-off-by: David Howells Reviewed-by: Jeff Layton Reviewed-by: Gao Xiang cc: David Howells cc: Jeff Layton cc: linux-cachefs@redhat.com cc: linux-erofs@lists.ozlabs.org cc: linux-fsdevel@vger.kernel.org Message-Id: <1853230.1684516880@warthog.procyon.org.uk> Signed-off-by: Christian Brauner --- fs/cachefiles/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 82219a8f6084..66482c193e86 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -451,7 +451,8 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) ret = cachefiles_inject_write_error(); if (ret == 0) { - file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath, S_IFREG, + file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath, + S_IFREG | 0600, O_RDWR | O_LARGEFILE | O_DIRECT, cache->cache_cred); ret = PTR_ERR_OR_ZERO(file); -- cgit From d0e135408e196921da2c85ee424235382c9ed614 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Fri, 2 Jun 2023 12:33:07 +0200 Subject: highmem: Rename put_and_unmap_page() to unmap_and_put_page() With commit 849ad04cf562a ("new helper: put_and_unmap_page()"), Al Viro introduced the put_and_unmap_page() to use in those many places where we have a common pattern consisting of calls to kunmap_local() + put_page(). Obviously, first we unmap and then we put pages. Instead, the original name of this helper seems to imply that we first put and then unmap. Therefore, rename the helper and change the only known upstreamed user (i.e., fs/sysv) before this helper enters common use and might become difficult to find all call sites and instead easy to break the builds. Cc: Al Viro Signed-off-by: Fabio M. De Francesco Reviewed-by: Eric Biggers Message-Id: <20230602103307.5637-1-fmdefrancesco@gmail.com> Signed-off-by: Christian Brauner --- fs/sysv/dir.c | 22 +++++++++++----------- fs/sysv/namei.c | 8 ++++---- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index cdb3d632c63d..0140010aa0c3 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -52,7 +52,7 @@ static int sysv_handle_dirsync(struct inode *dir) } /* - * Calls to dir_get_page()/put_and_unmap_page() must be nested according to the + * Calls to dir_get_page()/unmap_and_put_page() must be nested according to the * rules documented in mm/highmem.rst. * * NOTE: sysv_find_entry() and sysv_dotdot() act as calls to dir_get_page() @@ -103,11 +103,11 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit(ctx, name, strnlen(name,SYSV_NAMELEN), fs16_to_cpu(SYSV_SB(sb), de->inode), DT_UNKNOWN)) { - put_and_unmap_page(page, kaddr); + unmap_and_put_page(page, kaddr); return 0; } } - put_and_unmap_page(page, kaddr); + unmap_and_put_page(page, kaddr); } return 0; } @@ -131,7 +131,7 @@ static inline int namecompare(int len, int maxlen, * itself (as a parameter - res_dir). It does NOT read the inode of the * entry - you'll have to do that yourself if you want to. * - * On Success put_and_unmap_page() should be called on *res_page. + * On Success unmap_and_put_page() should be called on *res_page. * * sysv_find_entry() acts as a call to dir_get_page() and must be treated * accordingly for nesting purposes. @@ -166,7 +166,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_ name, de->name)) goto found; } - put_and_unmap_page(page, kaddr); + unmap_and_put_page(page, kaddr); } if (++n >= npages) @@ -209,7 +209,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode) goto out_page; de++; } - put_and_unmap_page(page, kaddr); + unmap_and_put_page(page, kaddr); } BUG(); return -EINVAL; @@ -228,7 +228,7 @@ got_it: mark_inode_dirty(dir); err = sysv_handle_dirsync(dir); out_page: - put_and_unmap_page(page, kaddr); + unmap_and_put_page(page, kaddr); return err; out_unlock: unlock_page(page); @@ -321,12 +321,12 @@ int sysv_empty_dir(struct inode * inode) if (de->name[1] != '.' || de->name[2]) goto not_empty; } - put_and_unmap_page(page, kaddr); + unmap_and_put_page(page, kaddr); } return 1; not_empty: - put_and_unmap_page(page, kaddr); + unmap_and_put_page(page, kaddr); return 0; } @@ -352,7 +352,7 @@ int sysv_set_link(struct sysv_dir_entry *de, struct page *page, } /* - * Calls to dir_get_page()/put_and_unmap_page() must be nested according to the + * Calls to dir_get_page()/unmap_and_put_page() must be nested according to the * rules documented in mm/highmem.rst. * * sysv_dotdot() acts as a call to dir_get_page() and must be treated @@ -376,7 +376,7 @@ ino_t sysv_inode_by_name(struct dentry *dentry) if (de) { res = fs16_to_cpu(SYSV_SB(dentry->d_sb), de->inode); - put_and_unmap_page(page, de); + unmap_and_put_page(page, de); } return res; } diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 2b2dba4c4f56..fcf163fea3ad 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -164,7 +164,7 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry) inode->i_ctime = dir->i_ctime; inode_dec_link_count(inode); } - put_and_unmap_page(page, de); + unmap_and_put_page(page, de); return err; } @@ -227,7 +227,7 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (!new_de) goto out_dir; err = sysv_set_link(new_de, new_page, old_inode); - put_and_unmap_page(new_page, new_de); + unmap_and_put_page(new_page, new_de); if (err) goto out_dir; new_inode->i_ctime = current_time(new_inode); @@ -256,9 +256,9 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir, out_dir: if (dir_de) - put_and_unmap_page(dir_page, dir_de); + unmap_and_put_page(dir_page, dir_de); out_old: - put_and_unmap_page(old_page, old_de); + unmap_and_put_page(old_page, old_de); out: return err; } -- cgit From 62176420274db5b5127cd7a0083a9aeb461756ee Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 7 Jun 2023 19:28:48 +0200 Subject: fs: avoid empty option when generating legacy mount string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As each option string fragment is always prepended with a comma it would happen that the whole string always starts with a comma. This could be interpreted by filesystem drivers as an empty option and may produce errors. For example the NTFS driver from ntfs.ko behaves like this and fails when mounted via the new API. Link: https://github.com/util-linux/util-linux/issues/2298 Signed-off-by: Thomas Weißschuh Fixes: 3e1aeb00e6d1 ("vfs: Implement a filesystem superblock creation/configuration context") Cc: stable@vger.kernel.org Message-Id: <20230607-fs-empty-option-v1-1-20c8dbf4671b@weissschuh.net> Signed-off-by: Christian Brauner --- fs/fs_context.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fs_context.c b/fs/fs_context.c index 24ce12f0db32..851214d1d013 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -561,7 +561,8 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) return -ENOMEM; } - ctx->legacy_data[size++] = ','; + if (size) + ctx->legacy_data[size++] = ','; len = strlen(param->key); memcpy(ctx->legacy_data + size, param->key, len); size += len; -- cgit From 4bb218a65a43782b1e75f5510744cb44795a1105 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Jun 2023 14:29:58 +0200 Subject: fs: unexport buffer_check_dirty_writeback buffer_check_dirty_writeback is only used by the block device aops, remove the export. Signed-off-by: Christoph Hellwig Message-Id: <20230608122958.276954-1-hch@lst.de> Signed-off-by: Christian Brauner --- fs/buffer.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index a7fc561758b1..fe64356e89b8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -111,7 +111,6 @@ void buffer_check_dirty_writeback(struct folio *folio, bh = bh->b_this_page; } while (bh != head); } -EXPORT_SYMBOL(buffer_check_dirty_writeback); /* * Block until a buffer comes unlocked. This doesn't stop it -- cgit From 5c075c5b8fc4ebc34aac188be4eccc238521eb6f Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Fri, 9 Jun 2023 16:59:37 +0200 Subject: fs/aio: Stop allocating aio rings from HIGHMEM There is no need to allocate aio rings from HIGHMEM because of very little memory needed here. Therefore, use GFP_USER flag in find_or_create_page() and get rid of kmap*() mappings. Cc: Al Viro Cc: Ira Weiny Suggested-by: Matthew Wilcox Signed-off-by: Fabio M. De Francesco Reviewed-by: Ira Weiny Message-Id: <20230609145937.17610-1-fmdefrancesco@gmail.com> Signed-off-by: Christian Brauner --- fs/aio.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index b0b17bd098bb..77e33619de40 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -530,7 +530,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) for (i = 0; i < nr_pages; i++) { struct page *page; page = find_or_create_page(file->f_mapping, - i, GFP_HIGHUSER | __GFP_ZERO); + i, GFP_USER | __GFP_ZERO); if (!page) break; pr_debug("pid(%d) page[%d]->count=%d\n", @@ -571,7 +571,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) ctx->user_id = ctx->mmap_base; ctx->nr_events = nr_events; /* trusted copy */ - ring = kmap_atomic(ctx->ring_pages[0]); + ring = page_address(ctx->ring_pages[0]); ring->nr = nr_events; /* user copy */ ring->id = ~0U; ring->head = ring->tail = 0; @@ -579,7 +579,6 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) ring->compat_features = AIO_RING_COMPAT_FEATURES; ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; ring->header_length = sizeof(struct aio_ring); - kunmap_atomic(ring); flush_dcache_page(ctx->ring_pages[0]); return 0; @@ -682,9 +681,8 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) * we are protected from page migration * changes ring_pages by ->ring_lock. */ - ring = kmap_atomic(ctx->ring_pages[0]); + ring = page_address(ctx->ring_pages[0]); ring->id = ctx->id; - kunmap_atomic(ring); return 0; } @@ -1025,9 +1023,8 @@ static void user_refill_reqs_available(struct kioctx *ctx) * against ctx->completed_events below will make sure we do the * safe/right thing. */ - ring = kmap_atomic(ctx->ring_pages[0]); + ring = page_address(ctx->ring_pages[0]); head = ring->head; - kunmap_atomic(ring); refill_reqs_available(ctx, head, ctx->tail); } @@ -1133,12 +1130,11 @@ static void aio_complete(struct aio_kiocb *iocb) if (++tail >= ctx->nr_events) tail = 0; - ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); + ev_page = page_address(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); event = ev_page + pos % AIO_EVENTS_PER_PAGE; *event = iocb->ki_res; - kunmap_atomic(ev_page); flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb, @@ -1152,10 +1148,9 @@ static void aio_complete(struct aio_kiocb *iocb) ctx->tail = tail; - ring = kmap_atomic(ctx->ring_pages[0]); + ring = page_address(ctx->ring_pages[0]); head = ring->head; ring->tail = tail; - kunmap_atomic(ring); flush_dcache_page(ctx->ring_pages[0]); ctx->completed_events++; @@ -1215,10 +1210,9 @@ static long aio_read_events_ring(struct kioctx *ctx, mutex_lock(&ctx->ring_lock); /* Access to ->ring_pages here is protected by ctx->ring_lock. */ - ring = kmap_atomic(ctx->ring_pages[0]); + ring = page_address(ctx->ring_pages[0]); head = ring->head; tail = ring->tail; - kunmap_atomic(ring); /* * Ensure that once we've read the current tail pointer, that @@ -1250,10 +1244,9 @@ static long aio_read_events_ring(struct kioctx *ctx, avail = min(avail, nr - ret); avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos); - ev = kmap(page); + ev = page_address(page); copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * avail); - kunmap(page); if (unlikely(copy_ret)) { ret = -EFAULT; @@ -1265,9 +1258,8 @@ static long aio_read_events_ring(struct kioctx *ctx, head %= ctx->nr_events; } - ring = kmap_atomic(ctx->ring_pages[0]); + ring = page_address(ctx->ring_pages[0]); ring->head = head; - kunmap_atomic(ring); flush_dcache_page(ctx->ring_pages[0]); pr_debug("%li h%u t%u\n", ret, head, tail); -- cgit From 33d8b5d7824c7175ed968b8e89e6db3566e9c177 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Wed, 14 Jun 2023 01:01:22 +0800 Subject: eventfd: show the EFD_SEMAPHORE flag in fdinfo The EFD_SEMAPHORE flag should be displayed in fdinfo, as different value could affect the behavior of eventfd. Suggested-by: Christian Brauner Signed-off-by: Wen Yang Cc: Alexander Viro Cc: Jens Axboe Cc: Christian Brauner Cc: Christoph Hellwig Cc: Dylan Yudaken Cc: David Woodhouse Cc: Matthew Wilcox Cc: Eric Biggers Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Message-Id: Signed-off-by: Christian Brauner --- fs/eventfd.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/eventfd.c b/fs/eventfd.c index 6c06a527747f..8aa36cd37351 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -33,10 +33,10 @@ struct eventfd_ctx { /* * Every time that a write(2) is performed on an eventfd, the * value of the __u64 being written is added to "count" and a - * wakeup is performed on "wqh". A read(2) will return the "count" - * value to userspace, and will reset "count" to zero. The kernel - * side eventfd_signal() also, adds to the "count" counter and - * issue a wakeup. + * wakeup is performed on "wqh". If EFD_SEMAPHORE flag was not + * specified, a read(2) will return the "count" value to userspace, + * and will reset "count" to zero. The kernel side eventfd_signal() + * also, adds to the "count" counter and issue a wakeup. */ __u64 count; unsigned int flags; @@ -301,6 +301,8 @@ static void eventfd_show_fdinfo(struct seq_file *m, struct file *f) (unsigned long long)ctx->count); spin_unlock_irq(&ctx->wqh.lock); seq_printf(m, "eventfd-id: %d\n", ctx->id); + seq_printf(m, "eventfd-semaphore: %d\n", + !!(ctx->flags & EFD_SEMAPHORE)); } #endif -- cgit From 797a1d894d7b7586f422cabf8d7807cd39d0b5aa Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 12 Jun 2023 06:45:19 -0400 Subject: autofs: set ctime as well when mtime changes on a dir When adding entries to a directory, POSIX generally requires that the ctime also be updated alongside the mtime. Signed-off-by: Jeff Layton Acked-by: Ian Kent Message-Id: <20230612104524.17058-4-jlayton@kernel.org> Signed-off-by: Christian Brauner --- fs/autofs/root.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 6baf90b08e0e..93046c9dc461 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -600,7 +600,7 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap, p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count++; - dir->i_mtime = current_time(dir); + dir->i_mtime = dir->i_ctime = current_time(dir); return 0; } @@ -633,7 +633,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry) d_inode(dentry)->i_size = 0; clear_nlink(d_inode(dentry)); - dir->i_mtime = current_time(dir); + dir->i_mtime = dir->i_ctime = current_time(dir); spin_lock(&sbi->lookup_lock); __autofs_add_expiring(dentry); @@ -749,7 +749,7 @@ static int autofs_dir_mkdir(struct mnt_idmap *idmap, p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count++; inc_nlink(dir); - dir->i_mtime = current_time(dir); + dir->i_mtime = dir->i_ctime = current_time(dir); return 0; } -- cgit From c541dce86c537714b6761a79a969c1623dfa222b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 15 Jun 2023 13:38:48 +0200 Subject: fs: Protect reconfiguration of sb read-write from racing writes The reconfigure / remount code takes a lot of effort to protect filesystem's reconfiguration code from racing writes on remounting read-only. However during remounting read-only filesystem to read-write mode userspace writes can start immediately once we clear SB_RDONLY flag. This is inconvenient for example for ext4 because we need to do some writes to the filesystem (such as preparation of quota files) before we can take userspace writes so we are clearing SB_RDONLY flag before we are fully ready to accept userpace writes and syzbot has found a way to exploit this [1]. Also as far as I'm reading the code the filesystem remount code was protected from racing writes in the legacy mount path by the mount's MNT_READONLY flag so this is relatively new problem. It is actually fairly easy to protect remount read-write from racing writes using sb->s_readonly_remount flag so let's just do that instead of having to workaround these races in the filesystem code. [1] https://lore.kernel.org/all/00000000000006a0df05f6667499@google.com/T/ Signed-off-by: Jan Kara Message-Id: <20230615113848.8439-1-jack@suse.cz> Signed-off-by: Christian Brauner --- fs/super.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index 8d8d68799b34..5bf056087acc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -903,6 +903,7 @@ int reconfigure_super(struct fs_context *fc) struct super_block *sb = fc->root->d_sb; int retval; bool remount_ro = false; + bool remount_rw = false; bool force = fc->sb_flags & SB_FORCE; if (fc->sb_flags_mask & ~MS_RMT_MASK) @@ -920,7 +921,7 @@ int reconfigure_super(struct fs_context *fc) bdev_read_only(sb->s_bdev)) return -EACCES; #endif - + remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb); remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb); } @@ -950,6 +951,14 @@ int reconfigure_super(struct fs_context *fc) if (retval) return retval; } + } else if (remount_rw) { + /* + * We set s_readonly_remount here to protect filesystem's + * reconfigure code from writes from userspace until + * reconfigure finishes. + */ + sb->s_readonly_remount = 1; + smp_wmb(); } if (fc->ops->reconfigure) { -- cgit From d7439fb1f4338fffd0bc68bb62d78f7712725f26 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 20 Jun 2023 13:28:32 +0200 Subject: fs: Provide helpers for manipulating sb->s_readonly_remount Provide helpers to set and clear sb->s_readonly_remount including appropriate memory barriers. Also use this opportunity to document what the barriers pair with and why they are needed. Suggested-by: Dave Chinner Signed-off-by: Jan Kara Reviewed-by: Dave Chinner Message-Id: <20230620112832.5158-1-jack@suse.cz> Signed-off-by: Christian Brauner --- fs/internal.h | 41 +++++++++++++++++++++++++++++++++++++++++ fs/namespace.c | 25 ++++++++++++++++--------- fs/super.c | 17 ++++++----------- 3 files changed, 63 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/internal.h b/fs/internal.h index bd3b2810a36b..b916b84809f3 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -120,6 +120,47 @@ void put_super(struct super_block *sb); extern bool mount_capable(struct fs_context *); int sb_init_dio_done_wq(struct super_block *sb); +/* + * Prepare superblock for changing its read-only state (i.e., either remount + * read-write superblock read-only or vice versa). After this function returns + * mnt_is_readonly() will return true for any mount of the superblock if its + * caller is able to observe any changes done by the remount. This holds until + * sb_end_ro_state_change() is called. + */ +static inline void sb_start_ro_state_change(struct super_block *sb) +{ + WRITE_ONCE(sb->s_readonly_remount, 1); + /* + * For RO->RW transition, the barrier pairs with the barrier in + * mnt_is_readonly() making sure if mnt_is_readonly() sees SB_RDONLY + * cleared, it will see s_readonly_remount set. + * For RW->RO transition, the barrier pairs with the barrier in + * __mnt_want_write() before the mnt_is_readonly() check. The barrier + * makes sure if __mnt_want_write() sees MNT_WRITE_HOLD already + * cleared, it will see s_readonly_remount set. + */ + smp_wmb(); +} + +/* + * Ends section changing read-only state of the superblock. After this function + * returns if mnt_is_readonly() returns false, the caller will be able to + * observe all the changes remount did to the superblock. + */ +static inline void sb_end_ro_state_change(struct super_block *sb) +{ + /* + * This barrier provides release semantics that pairs with + * the smp_rmb() acquire semantics in mnt_is_readonly(). + * This barrier pair ensure that when mnt_is_readonly() sees + * 0 for sb->s_readonly_remount, it will also see all the + * preceding flag changes that were made during the RO state + * change. + */ + smp_wmb(); + WRITE_ONCE(sb->s_readonly_remount, 0); +} + /* * open.c */ diff --git a/fs/namespace.c b/fs/namespace.c index 54847db5b819..5ba1eca6f720 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -309,9 +309,16 @@ static unsigned int mnt_get_writers(struct mount *mnt) static int mnt_is_readonly(struct vfsmount *mnt) { - if (mnt->mnt_sb->s_readonly_remount) + if (READ_ONCE(mnt->mnt_sb->s_readonly_remount)) return 1; - /* Order wrt setting s_flags/s_readonly_remount in do_remount() */ + /* + * The barrier pairs with the barrier in sb_start_ro_state_change() + * making sure if we don't see s_readonly_remount set yet, we also will + * not see any superblock / mount flag changes done by remount. + * It also pairs with the barrier in sb_end_ro_state_change() + * assuring that if we see s_readonly_remount already cleared, we will + * see the values of superblock / mount flags updated by remount. + */ smp_rmb(); return __mnt_is_readonly(mnt); } @@ -364,9 +371,11 @@ int __mnt_want_write(struct vfsmount *m) } } /* - * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will - * be set to match its requirements. So we must not load that until - * MNT_WRITE_HOLD is cleared. + * The barrier pairs with the barrier sb_start_ro_state_change() making + * sure that if we see MNT_WRITE_HOLD cleared, we will also see + * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in + * mnt_is_readonly() and bail in case we are racing with remount + * read-only. */ smp_rmb(); if (mnt_is_readonly(m)) { @@ -588,10 +597,8 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (!err && atomic_long_read(&sb->s_remove_count)) err = -EBUSY; - if (!err) { - sb->s_readonly_remount = 1; - smp_wmb(); - } + if (!err) + sb_start_ro_state_change(sb); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; diff --git a/fs/super.c b/fs/super.c index 5bf056087acc..5ee5da1fd498 100644 --- a/fs/super.c +++ b/fs/super.c @@ -944,8 +944,7 @@ int reconfigure_super(struct fs_context *fc) */ if (remount_ro) { if (force) { - sb->s_readonly_remount = 1; - smp_wmb(); + sb_start_ro_state_change(sb); } else { retval = sb_prepare_remount_readonly(sb); if (retval) @@ -953,12 +952,10 @@ int reconfigure_super(struct fs_context *fc) } } else if (remount_rw) { /* - * We set s_readonly_remount here to protect filesystem's - * reconfigure code from writes from userspace until - * reconfigure finishes. + * Protect filesystem's reconfigure code from writes from + * userspace until reconfigure finishes. */ - sb->s_readonly_remount = 1; - smp_wmb(); + sb_start_ro_state_change(sb); } if (fc->ops->reconfigure) { @@ -974,9 +971,7 @@ int reconfigure_super(struct fs_context *fc) WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) | (fc->sb_flags & fc->sb_flags_mask))); - /* Needs to be ordered wrt mnt_is_readonly() */ - smp_wmb(); - sb->s_readonly_remount = 0; + sb_end_ro_state_change(sb); /* * Some filesystems modify their metadata via some other path than the @@ -991,7 +986,7 @@ int reconfigure_super(struct fs_context *fc) return 0; cancel_readonly: - sb->s_readonly_remount = 0; + sb_end_ro_state_change(sb); return retval; } -- cgit From 2507135e4ff231a368eae38000a501da0b96c662 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Jun 2023 11:30:36 -0600 Subject: readdir: Replace one-element arrays with flexible-array members One-element arrays are deprecated, and we are replacing them with flexible array members instead. So, replace one-element arrays with flexible-array members in multiple structures. Address the following -Wstringop-overflow warnings seen when built m68k architecture with m5307c3_defconfig configuration: In function '__put_user_fn', inlined from 'fillonedir' at fs/readdir.c:170:2: include/asm-generic/uaccess.h:49:35: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=] 49 | *(u8 __force *)to = *(u8 *)from; | ~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~ fs/readdir.c: In function 'fillonedir': fs/readdir.c:134:25: note: at offset 1 into destination object 'd_name' of size 1 134 | char d_name[1]; | ^~~~~~ In function '__put_user_fn', inlined from 'filldir' at fs/readdir.c:257:2: include/asm-generic/uaccess.h:49:35: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=] 49 | *(u8 __force *)to = *(u8 *)from; | ~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~ fs/readdir.c: In function 'filldir': fs/readdir.c:211:25: note: at offset 1 into destination object 'd_name' of size 1 211 | char d_name[1]; | ^~~~~~ This helps with the ongoing efforts to globally enable -Wstringop-overflow. This results in no differences in binary output. Link: https://github.com/KSPP/linux/issues/79 Link: https://github.com/KSPP/linux/issues/312 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Message-Id: Signed-off-by: Christian Brauner --- fs/readdir.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/readdir.c b/fs/readdir.c index 9c53edb60c03..b264ce60114d 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -131,7 +131,7 @@ struct old_linux_dirent { unsigned long d_ino; unsigned long d_offset; unsigned short d_namlen; - char d_name[1]; + char d_name[]; }; struct readdir_callback { @@ -208,7 +208,7 @@ struct linux_dirent { unsigned long d_ino; unsigned long d_off; unsigned short d_reclen; - char d_name[1]; + char d_name[]; }; struct getdents_callback { @@ -388,7 +388,7 @@ struct compat_old_linux_dirent { compat_ulong_t d_ino; compat_ulong_t d_offset; unsigned short d_namlen; - char d_name[1]; + char d_name[]; }; struct compat_readdir_callback { @@ -460,7 +460,7 @@ struct compat_linux_dirent { compat_ulong_t d_ino; compat_ulong_t d_off; unsigned short d_reclen; - char d_name[1]; + char d_name[]; }; struct compat_getdents_callback { -- cgit