summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl1
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl1
-rw-r--r--fs/aio.c8
-rw-r--r--fs/char_dev.c3
-rw-r--r--fs/fs_context.c30
-rw-r--r--fs/internal.h1
-rw-r--r--fs/mount.h12
-rw-r--r--fs/namei.c21
-rw-r--r--fs/namespace.c91
-rw-r--r--fs/open.c58
-rw-r--r--fs/proc_namespace.c4
-rw-r--r--fs/stat.c11
-rw-r--r--fs/utimes.c6
-rw-r--r--include/linux/device_cgroup.h3
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/mount.h4
-rw-r--r--include/linux/stat.h1
-rw-r--r--include/linux/syscalls.h6
-rw-r--r--include/uapi/asm-generic/unistd.h4
-rw-r--r--include/uapi/linux/fcntl.h10
-rw-r--r--include/uapi/linux/stat.h18
-rw-r--r--samples/vfs/test-statx.c2
-rw-r--r--tools/include/uapi/linux/stat.h11
39 files changed, 234 insertions, 96 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 36d42da7466a..5ddd128d4b7a 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -477,3 +477,4 @@
# 545 reserved for clone3
547 common openat2 sys_openat2
548 common pidfd_getfd sys_pidfd_getfd
+549 common faccessat2 sys_faccessat2
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 4d1cf74a2caa..d5cae5ffede0 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -451,3 +451,4 @@
435 common clone3 sys_clone3
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 803039d504de..3b859596840d 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 439
+#define __NR_compat_syscalls 440
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index c1c61635f89c..6d95d0c8bf2f 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -883,6 +883,8 @@ __SYSCALL(__NR_clone3, sys_clone3)
__SYSCALL(__NR_openat2, sys_openat2)
#define __NR_pidfd_getfd 438
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
+#define __NR_faccessat2 439
+__SYSCALL(__NR_faccessat2, sys_faccessat2)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 042911e670b8..49e325b604b3 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -358,3 +358,4 @@
# 435 reserved for clone3
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index f4f49fcb76d0..f71b1bbcc198 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -437,3 +437,4 @@
435 common clone3 __sys_clone3
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 4c67b11f9c9e..edacc4561f2b 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -443,3 +443,4 @@
435 common clone3 sys_clone3
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 1f9e8ad636cc..f777141f5256 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -376,3 +376,4 @@
435 n32 clone3 __sys_clone3
437 n32 openat2 sys_openat2
438 n32 pidfd_getfd sys_pidfd_getfd
+439 n32 faccessat2 sys_faccessat2
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index c0b9d802dbf6..da8c76394e17 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -352,3 +352,4 @@
435 n64 clone3 __sys_clone3
437 n64 openat2 sys_openat2
438 n64 pidfd_getfd sys_pidfd_getfd
+439 n64 faccessat2 sys_faccessat2
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index ac586774c980..13280625d312 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -425,3 +425,4 @@
435 o32 clone3 __sys_clone3
437 o32 openat2 sys_openat2
438 o32 pidfd_getfd sys_pidfd_getfd
+439 o32 faccessat2 sys_faccessat2
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 52a15f5cd130..5a758fa6ec52 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -435,3 +435,4 @@
435 common clone3 sys_clone3_wrapper
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 220ae11555f2..f833a3190822 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -527,3 +527,4 @@
435 spu clone3 sys_ni_syscall
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index bd7bd3581a0f..bfdcb7633957 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -440,3 +440,4 @@
435 common clone3 sys_clone3 sys_clone3
437 common openat2 sys_openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2 sys_faccessat2
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index c7a30fcd135f..acc35daa1b79 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -440,3 +440,4 @@
# 435 reserved for clone3
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index f13615ecdecc..8004a276cb74 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -483,3 +483,4 @@
# 435 reserved for clone3
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 54581ac671b4..d8f8a1a69ed1 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -442,3 +442,4 @@
435 i386 clone3 sys_clone3
437 i386 openat2 sys_openat2
438 i386 pidfd_getfd sys_pidfd_getfd
+439 i386 faccessat2 sys_faccessat2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 37b844f839bc..78847b32e137 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -359,6 +359,7 @@
435 common clone3 sys_clone3
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 85a9ab1bc04d..69d0d73876b3 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -408,3 +408,4 @@
435 common clone3 sys_clone3
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
diff --git a/fs/aio.c b/fs/aio.c
index 5f3d3d814928..6483f9274d5e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -176,6 +176,7 @@ struct fsync_iocb {
struct file *file;
struct work_struct work;
bool datasync;
+ struct cred *creds;
};
struct poll_iocb {
@@ -1589,8 +1590,11 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
static void aio_fsync_work(struct work_struct *work)
{
struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
+ const struct cred *old_cred = override_creds(iocb->fsync.creds);
iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
+ revert_creds(old_cred);
+ put_cred(iocb->fsync.creds);
iocb_put(iocb);
}
@@ -1604,6 +1608,10 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
if (unlikely(!req->file->f_op->fsync))
return -EINVAL;
+ req->creds = prepare_creds();
+ if (!req->creds)
+ return -ENOMEM;
+
req->datasync = datasync;
INIT_WORK(&req->work, aio_fsync_work);
schedule_work(&req->work);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index c5e6eff5a381..ba0ded7842a7 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -483,6 +483,9 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
p->dev = dev;
p->count = count;
+ if (WARN_ON(dev == WHITEOUT_DEV))
+ return -EBUSY;
+
error = kobj_map(cdev_map, dev, count, NULL,
exact_match, exact_lock, p);
if (error)
diff --git a/fs/fs_context.c b/fs/fs_context.c
index fc9f6ef93b55..7d5c5dd2b1d5 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -42,7 +42,6 @@ static const struct constant_table common_set_sb_flag[] = {
{ "dirsync", SB_DIRSYNC },
{ "lazytime", SB_LAZYTIME },
{ "mand", SB_MANDLOCK },
- { "posixacl", SB_POSIXACL },
{ "ro", SB_RDONLY },
{ "sync", SB_SYNCHRONOUS },
{ },
@@ -53,44 +52,15 @@ static const struct constant_table common_clear_sb_flag[] = {
{ "nolazytime", SB_LAZYTIME },
{ "nomand", SB_MANDLOCK },
{ "rw", SB_RDONLY },
- { "silent", SB_SILENT },
{ },
};
-static const char *const forbidden_sb_flag[] = {
- "bind",
- "dev",
- "exec",
- "move",
- "noatime",
- "nodev",
- "nodiratime",
- "noexec",
- "norelatime",
- "nostrictatime",
- "nosuid",
- "private",
- "rec",
- "relatime",
- "remount",
- "shared",
- "slave",
- "strictatime",
- "suid",
- "unbindable",
-};
-
/*
* Check for a common mount option that manipulates s_flags.
*/
static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)
{
unsigned int token;
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(forbidden_sb_flag); i++)
- if (strcmp(key, forbidden_sb_flag[i]) == 0)
- return -EINVAL;
token = lookup_constant(common_set_sb_flag, key, 0);
if (token) {
diff --git a/fs/internal.h b/fs/internal.h
index aa5d45524e87..0d467e32dd7e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -126,7 +126,6 @@ extern struct open_how build_open_how(int flags, umode_t mode);
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
-long do_faccessat(int dfd, const char __user *filename, int mode);
int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
diff --git a/fs/mount.h b/fs/mount.h
index 711a4093e475..c7abb7b394d8 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -9,7 +9,13 @@ struct mnt_namespace {
atomic_t count;
struct ns_common ns;
struct mount * root;
+ /*
+ * Traversal and modification of .list is protected by either
+ * - taking namespace_sem for write, OR
+ * - taking namespace_sem for read AND taking .ns_lock.
+ */
struct list_head list;
+ spinlock_t ns_lock;
struct user_namespace *user_ns;
struct ucounts *ucounts;
u64 seq; /* Sequence number to prevent loops */
@@ -133,9 +139,7 @@ struct proc_mounts {
struct mnt_namespace *ns;
struct path root;
int (*show)(struct seq_file *, struct vfsmount *);
- void *cached_mount;
- u64 cached_event;
- loff_t cached_index;
+ struct mount cursor;
};
extern const struct seq_operations mounts_op;
@@ -153,3 +157,5 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
{
return ns->seq == 0;
}
+
+extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
diff --git a/fs/namei.c b/fs/namei.c
index a320371899cf..d81f73ff1a8b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3505,12 +3505,14 @@ EXPORT_SYMBOL(user_path_create);
int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
+ bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
int error = may_create(dir, dentry);
if (error)
return error;
- if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
+ if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout &&
+ !capable(CAP_MKNOD))
return -EPERM;
if (!dir->i_op->mknod)
@@ -4345,9 +4347,6 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
(flags & RENAME_EXCHANGE))
return -EINVAL;
- if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
- return -EPERM;
-
if (flags & RENAME_EXCHANGE)
target_flags = 0;
@@ -4483,20 +4482,6 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
}
-int vfs_whiteout(struct inode *dir, struct dentry *dentry)
-{
- int error = may_create(dir, dentry);
- if (error)
- return error;
-
- if (!dir->i_op->mknod)
- return -EPERM;
-
- return dir->i_op->mknod(dir, dentry,
- S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
-}
-EXPORT_SYMBOL(vfs_whiteout);
-
int readlink_copy(char __user *buffer, int buflen, const char *link)
{
int len = PTR_ERR(link);
diff --git a/fs/namespace.c b/fs/namespace.c
index 5f036dc711b6..a6baee3c7904 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -648,6 +648,21 @@ struct vfsmount *lookup_mnt(const struct path *path)
return m;
}
+static inline void lock_ns_list(struct mnt_namespace *ns)
+{
+ spin_lock(&ns->ns_lock);
+}
+
+static inline void unlock_ns_list(struct mnt_namespace *ns)
+{
+ spin_unlock(&ns->ns_lock);
+}
+
+static inline bool mnt_is_cursor(struct mount *mnt)
+{
+ return mnt->mnt.mnt_flags & MNT_CURSOR;
+}
+
/*
* __is_local_mountpoint - Test to see if dentry is a mountpoint in the
* current mount namespace.
@@ -673,11 +688,15 @@ bool __is_local_mountpoint(struct dentry *dentry)
goto out;
down_read(&namespace_sem);
+ lock_ns_list(ns);
list_for_each_entry(mnt, &ns->list, mnt_list) {
+ if (mnt_is_cursor(mnt))
+ continue;
is_covered = (mnt->mnt_mountpoint == dentry);
if (is_covered)
break;
}
+ unlock_ns_list(ns);
up_read(&namespace_sem);
out:
return is_covered;
@@ -1245,46 +1264,71 @@ struct vfsmount *mnt_clone_internal(const struct path *path)
}
#ifdef CONFIG_PROC_FS
+static struct mount *mnt_list_next(struct mnt_namespace *ns,
+ struct list_head *p)
+{
+ struct mount *mnt, *ret = NULL;
+
+ lock_ns_list(ns);
+ list_for_each_continue(p, &ns->list) {
+ mnt = list_entry(p, typeof(*mnt), mnt_list);
+ if (!mnt_is_cursor(mnt)) {
+ ret = mnt;
+ break;
+ }
+ }
+ unlock_ns_list(ns);
+
+ return ret;
+}
+
/* iterator; we want it to have access to namespace_sem, thus here... */
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_mounts *p = m->private;
+ struct list_head *prev;
down_read(&namespace_sem);
- if (p->cached_event == p->ns->event) {
- void *v = p->cached_mount;
- if (*pos == p->cached_index)
- return v;
- if (*pos == p->cached_index + 1) {
- v = seq_list_next(v, &p->ns->list, &p->cached_index);
- return p->cached_mount = v;
- }
+ if (!*pos) {
+ prev = &p->ns->list;
+ } else {
+ prev = &p->cursor.mnt_list;
+
+ /* Read after we'd reached the end? */
+ if (list_empty(prev))
+ return NULL;
}
- p->cached_event = p->ns->event;
- p->cached_mount = seq_list_start(&p->ns->list, *pos);
- p->cached_index = *pos;
- return p->cached_mount;
+ return mnt_list_next(p->ns, prev);
}
static void *m_next(struct seq_file *m, void *v, loff_t *pos)
{
struct proc_mounts *p = m->private;
+ struct mount *mnt = v;
- p->cached_mount = seq_list_next(v, &p->ns->list, pos);
- p->cached_index = *pos;
- return p->cached_mount;
+ ++*pos;
+ return mnt_list_next(p->ns, &mnt->mnt_list);
}
static void m_stop(struct seq_file *m, void *v)
{
+ struct proc_mounts *p = m->private;
+ struct mount *mnt = v;
+
+ lock_ns_list(p->ns);
+ if (mnt)
+ list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
+ else
+ list_del_init(&p->cursor.mnt_list);
+ unlock_ns_list(p->ns);
up_read(&namespace_sem);
}
static int m_show(struct seq_file *m, void *v)
{
struct proc_mounts *p = m->private;
- struct mount *r = list_entry(v, struct mount, mnt_list);
+ struct mount *r = v;
return p->show(m, &r->mnt);
}
@@ -1294,6 +1338,15 @@ const struct seq_operations mounts_op = {
.stop = m_stop,
.show = m_show,
};
+
+void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
+{
+ down_read(&namespace_sem);
+ lock_ns_list(ns);
+ list_del(&cursor->mnt_list);
+ unlock_ns_list(ns);
+ up_read(&namespace_sem);
+}
#endif /* CONFIG_PROC_FS */
/**
@@ -3202,6 +3255,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
atomic_set(&new_ns->count, 1);
INIT_LIST_HEAD(&new_ns->list);
init_waitqueue_head(&new_ns->poll);
+ spin_lock_init(&new_ns->ns_lock);
new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts;
return new_ns;
@@ -3842,10 +3896,14 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
bool visible = false;
down_read(&namespace_sem);
+ lock_ns_list(ns);
list_for_each_entry(mnt, &ns->list, mnt_list) {
struct mount *child;
int mnt_flags;
+ if (mnt_is_cursor(mnt))
+ continue;
+
if (mnt->mnt.mnt_sb->s_type != sb->s_type)
continue;
@@ -3893,6 +3951,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
next: ;
}
found:
+ unlock_ns_list(ns);
up_read(&namespace_sem);
return visible;
}
diff --git a/fs/open.c b/fs/open.c
index 719b320ede52..e62b1db06638 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -345,21 +345,14 @@ SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
* We do this by temporarily clearing all FS-related capabilities and
* switching the fsuid/fsgid around to the real ones.
*/
-long do_faccessat(int dfd, const char __user *filename, int mode)
+static const struct cred *access_override_creds(void)
{
const struct cred *old_cred;
struct cred *override_cred;
- struct path path;
- struct inode *inode;
- int res;
- unsigned int lookup_flags = LOOKUP_FOLLOW;
-
- if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
- return -EINVAL;
override_cred = prepare_creds();
if (!override_cred)
- return -ENOMEM;
+ return NULL;
override_cred->fsuid = override_cred->uid;
override_cred->fsgid = override_cred->gid;
@@ -394,6 +387,38 @@ long do_faccessat(int dfd, const char __user *filename, int mode)
override_cred->non_rcu = 1;
old_cred = override_creds(override_cred);
+
+ /* override_cred() gets its own ref */
+ put_cred(override_cred);
+
+ return old_cred;
+}
+
+long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
+{
+ struct path path;
+ struct inode *inode;
+ int res;
+ unsigned int lookup_flags = LOOKUP_FOLLOW;
+ const struct cred *old_cred = NULL;
+
+ if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
+ return -EINVAL;
+
+ if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
+ return -EINVAL;
+
+ if (flags & AT_SYMLINK_NOFOLLOW)
+ lookup_flags &= ~LOOKUP_FOLLOW;
+ if (flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+
+ if (!(flags & AT_EACCESS)) {
+ old_cred = access_override_creds();
+ if (!old_cred)
+ return -ENOMEM;
+ }
+
retry:
res = user_path_at(dfd, filename, lookup_flags, &path);
if (res)
@@ -435,19 +460,26 @@ out_path_release:
goto retry;
}
out:
- revert_creds(old_cred);
- put_cred(override_cred);
+ if (old_cred)
+ revert_creds(old_cred);
+
return res;
}
SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
{
- return do_faccessat(dfd, filename, mode);
+ return do_faccessat(dfd, filename, mode, 0);
+}
+
+SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode,
+ int, flags)
+{
+ return do_faccessat(dfd, filename, mode, flags);
}
SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
{
- return do_faccessat(AT_FDCWD, filename, mode);
+ return do_faccessat(AT_FDCWD, filename, mode, 0);
}
int ksys_chdir(const char __user *filename)
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 273ee82d8aa9..e4d70c0dffe9 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -279,7 +279,8 @@ static int mounts_open_common(struct inode *inode, struct file *file,
p->ns = ns;
p->root = root;
p->show = show;
- p->cached_event = ~0ULL;
+ INIT_LIST_HEAD(&p->cursor.mnt_list);
+ p->cursor.mnt.mnt_flags = MNT_CURSOR;
return 0;
@@ -296,6 +297,7 @@ static int mounts_release(struct inode *inode, struct file *file)
struct seq_file *m = file->private_data;
struct proc_mounts *p = m->private;
path_put(&p->root);
+ mnt_cursor_del(p->ns, &p->cursor);
put_mnt_ns(p->ns);
return seq_release_private(inode, file);
}
diff --git a/fs/stat.c b/fs/stat.c
index 030008796479..b9faa6cafafe 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -22,6 +22,7 @@
#include <asm/unistd.h>
#include "internal.h"
+#include "mount.h"
/**
* generic_fillattr - Fill in the basic attributes from the inode struct
@@ -70,11 +71,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
memset(stat, 0, sizeof(*stat));
stat->result_mask |= STATX_BASIC_STATS;
- request_mask &= STATX_ALL;
query_flags &= KSTAT_QUERY_FLAGS;
/* allow the fs to override these if it really wants to */
- if (IS_NOATIME(inode))
+ /* SB_NOATIME means filesystem supplies dummy atime value */
+ if (inode->i_sb->s_flags & SB_NOATIME)
stat->result_mask &= ~STATX_ATIME;
if (IS_AUTOMOUNT(inode))
stat->attributes |= STATX_ATTR_AUTOMOUNT;
@@ -199,6 +200,11 @@ retry:
goto out;
error = vfs_getattr(&path, stat, request_mask, flags);
+ stat->mnt_id = real_mount(path.mnt)->mnt_id;
+ stat->result_mask |= STATX_MNT_ID;
+ if (path.mnt->mnt_root == path.dentry)
+ stat->attributes |= STATX_ATTR_MOUNT_ROOT;
+ stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -563,6 +569,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
tmp.stx_rdev_minor = MINOR(stat->rdev);
tmp.stx_dev_major = MAJOR(stat->dev);
tmp.stx_dev_minor = MINOR(stat->dev);
+ tmp.stx_mnt_id = stat->mnt_id;
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}
diff --git a/fs/utimes.c b/fs/utimes.c
index 1d17ce98cb80..b7b927502d6e 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -95,13 +95,13 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times,
goto out;
}
- if (flags & ~AT_SYMLINK_NOFOLLOW)
+ if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
goto out;
if (filename == NULL && dfd != AT_FDCWD) {
struct fd f;
- if (flags & AT_SYMLINK_NOFOLLOW)
+ if (flags)
goto out;
f = fdget(dfd);
@@ -117,6 +117,8 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times,
if (!(flags & AT_SYMLINK_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
+ if (flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
retry:
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h
index 9a72214496e5..d02f32b7514e 100644
--- a/include/linux/device_cgroup.h
+++ b/include/linux/device_cgroup.h
@@ -44,6 +44,9 @@ static inline int devcgroup_inode_mknod(int mode, dev_t dev)
if (!S_ISBLK(mode) && !S_ISCHR(mode))
return 0;
+ if (S_ISCHR(mode) && dev == WHITEOUT_DEV)
+ return 0;
+
if (S_ISBLK(mode))
type = DEVCG_DEV_BLOCK;
else
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 45cc10cdf6dd..109b5d9dbdc7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1721,7 +1721,11 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino
extern int vfs_rmdir(struct inode *, struct dentry *);
extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
-extern int vfs_whiteout(struct inode *, struct dentry *);
+
+static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry)
+{
+ return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+}
extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
int open_flag);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index bf8cc4108b8f..7edac8c7a9c1 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -50,7 +50,8 @@ struct fs_context;
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
- MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
+ MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \
+ MNT_CURSOR)
#define MNT_INTERNAL 0x4000
@@ -64,6 +65,7 @@ struct fs_context;
#define MNT_SYNC_UMOUNT 0x2000000
#define MNT_MARKED 0x4000000
#define MNT_UMOUNT 0x8000000
+#define MNT_CURSOR 0x10000000
struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 528c4baad091..56614af83d4a 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -47,6 +47,7 @@ struct kstat {
struct timespec64 ctime;
struct timespec64 btime; /* File creation time */
u64 blocks;
+ u64 mnt_id;
};
#endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 1815065d52f3..7c354c2955f5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -428,6 +428,8 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length);
#endif
asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode);
+asmlinkage long sys_faccessat2(int dfd, const char __user *filename, int mode,
+ int flags);
asmlinkage long sys_chdir(const char __user *filename);
asmlinkage long sys_fchdir(unsigned int fd);
asmlinkage long sys_chroot(const char __user *filename);
@@ -1333,11 +1335,11 @@ static inline int ksys_chmod(const char __user *filename, umode_t mode)
return do_fchmodat(AT_FDCWD, filename, mode);
}
-extern long do_faccessat(int dfd, const char __user *filename, int mode);
+long do_faccessat(int dfd, const char __user *filename, int mode, int flags);
static inline long ksys_access(const char __user *filename, int mode)
{
- return do_faccessat(AT_FDCWD, filename, mode);
+ return do_faccessat(AT_FDCWD, filename, mode, 0);
}
extern int do_fchownat(int dfd, const char __user *filename, uid_t user,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 3a3201e4618e..f4a01305d9a6 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -855,9 +855,11 @@ __SYSCALL(__NR_clone3, sys_clone3)
__SYSCALL(__NR_openat2, sys_openat2)
#define __NR_pidfd_getfd 438
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
+#define __NR_faccessat2 439
+__SYSCALL(__NR_faccessat2, sys_faccessat2)
#undef __NR_syscalls
-#define __NR_syscalls 439
+#define __NR_syscalls 440
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index ca88b7bce553..2f86b2ad6d7e 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -84,10 +84,20 @@
#define DN_ATTRIB 0x00000020 /* File changed attibutes */
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
+/*
+ * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is
+ * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
+ * unlinkat. The two functions do completely different things and therefore,
+ * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to
+ * faccessat would be undefined behavior and thus treating it equivalent to
+ * AT_EACCESS is valid undefined behavior.
+ */
#define AT_FDCWD -100 /* Special value used to indicate
openat should use the current
working directory. */
#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
+#define AT_EACCESS 0x200 /* Test access permitted for
+ effective IDs, not real IDs. */
#define AT_REMOVEDIR 0x200 /* Remove directory instead of
unlinking file. */
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index ad80a5c885d5..6df9348bb277 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -123,7 +123,10 @@ struct statx {
__u32 stx_dev_major; /* ID of device containing file [uncond] */
__u32 stx_dev_minor;
/* 0x90 */
- __u64 __spare2[14]; /* Spare space for future expansion */
+ __u64 stx_mnt_id;
+ __u64 __spare2;
+ /* 0xa0 */
+ __u64 __spare3[12]; /* Spare space for future expansion */
/* 0x100 */
};
@@ -148,9 +151,19 @@ struct statx {
#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
-#define STATX_ALL 0x00000fffU /* All currently supported flags */
+#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
+
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
+#ifndef __KERNEL__
+/*
+ * This is deprecated, and shall remain the same value in the future. To avoid
+ * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME)
+ * instead.
+ */
+#define STATX_ALL 0x00000fffU
+#endif
+
/*
* Attributes to be found in stx_attributes and masked in stx_attributes_mask.
*
@@ -168,6 +181,7 @@ struct statx {
#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
+#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
diff --git a/samples/vfs/test-statx.c b/samples/vfs/test-statx.c
index a3d68159fb51..76c577ea4fd8 100644
--- a/samples/vfs/test-statx.c
+++ b/samples/vfs/test-statx.c
@@ -216,7 +216,7 @@ int main(int argc, char **argv)
struct statx stx;
int ret, raw = 0, atflag = AT_SYMLINK_NOFOLLOW;
- unsigned int mask = STATX_ALL;
+ unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
for (argv++; *argv; argv++) {
if (strcmp(*argv, "-F") == 0) {
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index ad80a5c885d5..d1192783139a 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -148,9 +148,18 @@ struct statx {
#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
-#define STATX_ALL 0x00000fffU /* All currently supported flags */
+
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
+#ifndef __KERNEL__
+/*
+ * This is deprecated, and shall remain the same value in the future. To avoid
+ * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME)
+ * instead.
+ */
+#define STATX_ALL 0x00000fffU
+#endif
+
/*
* Attributes to be found in stx_attributes and masked in stx_attributes_mask.
*