summaryrefslogtreecommitdiff
path: root/fs/overlayfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/overlayfs')
-rw-r--r--fs/overlayfs/Kconfig20
-rw-r--r--fs/overlayfs/Makefile3
-rw-r--r--fs/overlayfs/copy_up.c875
-rw-r--r--fs/overlayfs/dir.c1037
-rw-r--r--fs/overlayfs/export.c278
-rw-r--r--fs/overlayfs/file.c645
-rw-r--r--fs/overlayfs/inode.c857
-rw-r--r--fs/overlayfs/namei.c1007
-rw-r--r--fs/overlayfs/overlayfs.h772
-rw-r--r--fs/overlayfs/ovl_entry.h155
-rw-r--r--fs/overlayfs/params.c1081
-rw-r--r--fs/overlayfs/params.h44
-rw-r--r--fs/overlayfs/readdir.c599
-rw-r--r--fs/overlayfs/super.c1996
-rw-r--r--fs/overlayfs/util.c1012
-rw-r--r--fs/overlayfs/xattrs.c261
16 files changed, 7442 insertions, 3200 deletions
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 2ef91be2a04e..2ac67e04a6fb 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -1,5 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
config OVERLAY_FS
tristate "Overlay filesystem support"
+ select FS_STACK
select EXPORTFS
help
An overlay filesystem combines two filesystems - an 'upper' filesystem
@@ -8,7 +10,7 @@ config OVERLAY_FS
'lower' filesystem is either hidden or, in the case of directories,
merged with the 'upper' object.
- For more information see Documentation/filesystems/overlayfs.txt
+ For more information see Documentation/filesystems/overlayfs.rst
config OVERLAY_FS_REDIRECT_DIR
bool "Overlayfs: turn on redirect directory feature by default"
@@ -37,7 +39,7 @@ config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
If backward compatibility is not an issue, then it is safe and
recommended to say N here.
- For more information, see Documentation/filesystems/overlayfs.txt
+ For more information, see Documentation/filesystems/overlayfs.rst
If unsure, say Y.
@@ -92,16 +94,17 @@ config OVERLAY_FS_XINO_AUTO
bool "Overlayfs: auto enable inode number mapping"
default n
depends on OVERLAY_FS
+ depends on 64BIT
help
If this config option is enabled then overlay filesystems will use
- unused high bits in undelying filesystem inode numbers to map all
+ unused high bits in underlying filesystem inode numbers to map all
inodes to a unified address space. The mapped 64bit inode numbers
might not be compatible with applications that expect 32bit inodes.
If compatibility with applications that expect 32bit inodes is not an
issue, then it is safe and recommended to say Y here.
- For more information, see Documentation/filesystems/overlayfs.txt
+ For more information, see Documentation/filesystems/overlayfs.rst
If unsure, say N.
@@ -122,3 +125,12 @@ config OVERLAY_FS_METACOPY
that doesn't support this feature will have unexpected results.
If unsure, say N.
+
+config OVERLAY_FS_DEBUG
+ bool "Overlayfs: turn on extra debugging checks"
+ default n
+ depends on OVERLAY_FS
+ help
+ Say Y here to enable extra debugging checks in overlayfs.
+
+ If unsure, say N.
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 46e1ff8ac056..5648954f8588 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Makefile for the overlay filesystem.
#
@@ -5,4 +6,4 @@
obj-$(CONFIG_OVERLAY_FS) += overlay.o
overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
- copy_up.o export.o
+ copy_up.o export.o params.o xattrs.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 9e62dcf06fc4..758611ee4475 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -1,16 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
*
* Copyright (C) 2011 Novell Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/file.h>
+#include <linux/fileattr.h>
#include <linux/splice.h>
#include <linux/xattr.h>
#include <linux/security.h>
@@ -18,7 +16,6 @@
#include <linux/sched/signal.h>
#include <linux/cred.h>
#include <linux/namei.h>
-#include <linux/fdtable.h>
#include <linux/ratelimit.h>
#include <linux/exportfs.h>
#include "overlayfs.h"
@@ -27,7 +24,7 @@
static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
{
- pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n");
+ pr_warn("\"check_copy_up\" module option is obsolete\n");
return 0;
}
@@ -37,17 +34,53 @@ static int ovl_ccup_get(char *buf, const struct kernel_param *param)
}
module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
-MODULE_PARM_DESC(ovl_check_copy_up, "Obsolete; does nothing");
+MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing");
-int ovl_copy_xattr(struct dentry *old, struct dentry *new)
+static bool ovl_must_copy_xattr(const char *name)
{
+ return !strcmp(name, XATTR_POSIX_ACL_ACCESS) ||
+ !strcmp(name, XATTR_POSIX_ACL_DEFAULT) ||
+ !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN);
+}
+
+static int ovl_copy_acl(struct ovl_fs *ofs, const struct path *path,
+ struct dentry *dentry, const char *acl_name)
+{
+ int err;
+ struct posix_acl *clone, *real_acl = NULL;
+
+ real_acl = ovl_get_acl_path(path, acl_name, false);
+ if (!real_acl)
+ return 0;
+
+ if (IS_ERR(real_acl)) {
+ err = PTR_ERR(real_acl);
+ if (err == -ENODATA || err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ clone = posix_acl_clone(real_acl, GFP_KERNEL);
+ posix_acl_release(real_acl); /* release original acl */
+ if (!clone)
+ return -ENOMEM;
+
+ err = ovl_do_set_acl(ofs, dentry, acl_name, clone);
+
+ /* release cloned acl */
+ posix_acl_release(clone);
+ return err;
+}
+
+int ovl_copy_xattr(struct super_block *sb, const struct path *oldpath, struct dentry *new)
+{
+ struct dentry *old = oldpath->dentry;
ssize_t list_size, size, value_size = 0;
char *buf, *name, *value = NULL;
- int uninitialized_var(error);
+ int error = 0;
size_t slen;
- if (!(old->d_inode->i_opflags & IOP_XATTR) ||
- !(new->d_inode->i_opflags & IOP_XATTR))
+ if (!old->d_inode->i_op->listxattr || !new->d_inode->i_op->listxattr)
return 0;
list_size = vfs_listxattr(old, NULL, 0);
@@ -57,7 +90,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
return list_size;
}
- buf = kzalloc(list_size, GFP_KERNEL);
+ buf = kvzalloc(list_size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -77,12 +110,29 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
}
list_size -= slen;
- if (ovl_is_private_xattr(name))
+ if (ovl_is_private_xattr(sb, name))
continue;
+
+ error = security_inode_copy_up_xattr(old, name);
+ if (error == -ECANCELED) {
+ error = 0;
+ continue; /* Discard */
+ }
+ if (error < 0 && error != -EOPNOTSUPP)
+ break;
+
+ if (is_posix_acl_xattr(name)) {
+ error = ovl_copy_acl(OVL_FS(sb), oldpath, new, name);
+ if (!error)
+ continue;
+ /* POSIX ACLs must be copied. */
+ break;
+ }
+
retry:
- size = vfs_getxattr(old, name, value, value_size);
+ size = ovl_do_getxattr(oldpath, name, value, value_size);
if (size == -ERANGE)
- size = vfs_getxattr(old, name, NULL, 0);
+ size = ovl_do_getxattr(oldpath, name, NULL, 0);
if (size < 0) {
error = size;
@@ -92,65 +142,163 @@ retry:
if (size > value_size) {
void *new;
- new = krealloc(value, size, GFP_KERNEL);
+ new = kvmalloc(size, GFP_KERNEL);
if (!new) {
error = -ENOMEM;
break;
}
+ kvfree(value);
value = new;
value_size = size;
goto retry;
}
- error = security_inode_copy_up_xattr(name);
- if (error < 0 && error != -EOPNOTSUPP)
- break;
- if (error == 1) {
+ error = ovl_do_setxattr(OVL_FS(sb), new, name, value, size, 0);
+ if (error) {
+ if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))
+ break;
+
+ /* Ignore failure to copy unknown xattrs */
error = 0;
- continue; /* Discard */
}
- error = vfs_setxattr(new, name, value, size, 0);
- if (error)
- break;
}
- kfree(value);
+ kvfree(value);
out:
- kfree(buf);
+ kvfree(buf);
return error;
}
-static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
+static int ovl_copy_fileattr(struct inode *inode, const struct path *old,
+ const struct path *new)
+{
+ struct file_kattr oldfa = { .flags_valid = true };
+ struct file_kattr newfa = { .flags_valid = true };
+ int err;
+
+ err = ovl_real_fileattr_get(old, &oldfa);
+ if (err) {
+ /* Ntfs-3g returns -EINVAL for "no fileattr support" */
+ if (err == -ENOTTY || err == -EINVAL)
+ return 0;
+ pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n",
+ old->dentry, err);
+ return err;
+ }
+
+ /*
+ * We cannot set immutable and append-only flags on upper inode,
+ * because we would not be able to link upper inode to upper dir
+ * not set overlay private xattr on upper inode.
+ * Store these flags in overlay.protattr xattr instead.
+ */
+ if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) {
+ err = ovl_set_protattr(inode, new->dentry, &oldfa);
+ if (err == -EPERM)
+ pr_warn_once("copying fileattr: no xattr on upper\n");
+ else if (err)
+ return err;
+ }
+
+ /* Don't bother copying flags if none are set */
+ if (!(oldfa.flags & OVL_COPY_FS_FLAGS_MASK))
+ return 0;
+
+ err = ovl_real_fileattr_get(new, &newfa);
+ if (err) {
+ /*
+ * Returning an error if upper doesn't support fileattr will
+ * result in a regression, so revert to the old behavior.
+ */
+ if (err == -ENOTTY || err == -EINVAL) {
+ pr_warn_once("copying fileattr: no support on upper\n");
+ return 0;
+ }
+ pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n",
+ new->dentry, err);
+ return err;
+ }
+
+ BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL);
+ newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK;
+ newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK);
+
+ BUILD_BUG_ON(OVL_COPY_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON);
+ newfa.fsx_xflags &= ~OVL_COPY_FSX_FLAGS_MASK;
+ newfa.fsx_xflags |= (oldfa.fsx_xflags & OVL_COPY_FSX_FLAGS_MASK);
+
+ return ovl_real_fileattr_set(new, &newfa);
+}
+
+static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
+{
+ loff_t tmp;
+
+ if (pos != pos2)
+ return -EIO;
+ if (pos < 0 || len < 0 || totlen < 0)
+ return -EIO;
+ if (check_add_overflow(pos, len, &tmp))
+ return -EIO;
+ return 0;
+}
+
+static int ovl_sync_file(const struct path *path)
{
- struct file *old_file;
struct file *new_file;
+ int err;
+
+ new_file = ovl_path_open(path, O_LARGEFILE | O_RDONLY);
+ if (IS_ERR(new_file))
+ return PTR_ERR(new_file);
+
+ err = vfs_fsync(new_file, 0);
+ fput(new_file);
+
+ return err;
+}
+
+static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
+ struct file *new_file, loff_t len,
+ bool datasync)
+{
+ struct path datapath;
+ struct file *old_file;
loff_t old_pos = 0;
loff_t new_pos = 0;
loff_t cloned;
+ loff_t data_pos = -1;
+ loff_t hole_len;
+ bool skip_hole = false;
int error = 0;
- if (len == 0)
- return 0;
+ ovl_path_lowerdata(dentry, &datapath);
+ if (WARN_ON_ONCE(datapath.dentry == NULL) ||
+ WARN_ON_ONCE(len < 0))
+ return -EIO;
- old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
+ old_file = ovl_path_open(&datapath, O_LARGEFILE | O_RDONLY);
if (IS_ERR(old_file))
return PTR_ERR(old_file);
- new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
- if (IS_ERR(new_file)) {
- error = PTR_ERR(new_file);
- goto out_fput;
- }
-
/* Try to use clone_file_range to clone up within the same fs */
- cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
+ cloned = vfs_clone_file_range(old_file, 0, new_file, 0, len, 0);
if (cloned == len)
- goto out;
+ goto out_fput;
+
/* Couldn't clone, so now we try to copy the data */
+ error = rw_verify_area(READ, old_file, &old_pos, len);
+ if (!error)
+ error = rw_verify_area(WRITE, new_file, &new_pos, len);
+ if (error)
+ goto out_fput;
+
+ /* Check if lower fs supports seek operation */
+ if (old_file->f_mode & FMODE_LSEEK)
+ skip_hole = true;
- /* FIXME: copy up sparse files efficiently */
while (len) {
size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
- long bytes;
+ ssize_t bytes;
if (len < this_len)
this_len = len;
@@ -160,6 +308,44 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
break;
}
+ /*
+ * Fill zero for hole will cost unnecessary disk space
+ * and meanwhile slow down the copy-up speed, so we do
+ * an optimization for hole during copy-up, it relies
+ * on SEEK_DATA implementation in lower fs so if lower
+ * fs does not support it, copy-up will behave as before.
+ *
+ * Detail logic of hole detection as below:
+ * When we detect next data position is larger than current
+ * position we will skip that hole, otherwise we copy
+ * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually,
+ * it may not recognize all kind of holes and sometimes
+ * only skips partial of hole area. However, it will be
+ * enough for most of the use cases.
+ *
+ * We do not hold upper sb_writers throughout the loop to avert
+ * lockdep warning with llseek of lower file in nested overlay:
+ * - upper sb_writers
+ * -- lower ovl_inode_lock (ovl_llseek)
+ */
+ if (skip_hole && data_pos < old_pos) {
+ data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
+ if (data_pos > old_pos) {
+ hole_len = data_pos - old_pos;
+ len -= hole_len;
+ old_pos = new_pos = data_pos;
+ continue;
+ } else if (data_pos == -ENXIO) {
+ break;
+ } else if (data_pos < 0) {
+ skip_hole = false;
+ }
+ }
+
+ error = ovl_verify_area(old_pos, new_pos, this_len, len);
+ if (error)
+ break;
+
bytes = do_splice_direct(old_file, &old_pos,
new_file, &new_pos,
this_len, SPLICE_F_MOVE);
@@ -171,38 +357,40 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
len -= bytes;
}
-out:
- if (!error)
+ /* call fsync once, either now or later along with metadata */
+ if (!error && ovl_should_sync(ofs) && datasync)
error = vfs_fsync(new_file, 0);
- fput(new_file);
out_fput:
fput(old_file);
return error;
}
-static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
+static int ovl_set_size(struct ovl_fs *ofs,
+ struct dentry *upperdentry, struct kstat *stat)
{
struct iattr attr = {
.ia_valid = ATTR_SIZE,
.ia_size = stat->size,
};
- return notify_change(upperdentry, &attr, NULL);
+ return ovl_do_notify_change(ofs, upperdentry, &attr);
}
-static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
+static int ovl_set_timestamps(struct ovl_fs *ofs, struct dentry *upperdentry,
+ struct kstat *stat)
{
struct iattr attr = {
.ia_valid =
- ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
+ ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_CTIME,
.ia_atime = stat->atime,
.ia_mtime = stat->mtime,
};
- return notify_change(upperdentry, &attr, NULL);
+ return ovl_do_notify_change(ofs, upperdentry, &attr);
}
-int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
+int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upperdentry,
+ struct kstat *stat)
{
int err = 0;
@@ -211,32 +399,37 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
.ia_valid = ATTR_MODE,
.ia_mode = stat->mode,
};
- err = notify_change(upperdentry, &attr, NULL);
+ err = ovl_do_notify_change(ofs, upperdentry, &attr);
}
if (!err) {
struct iattr attr = {
.ia_valid = ATTR_UID | ATTR_GID,
- .ia_uid = stat->uid,
- .ia_gid = stat->gid,
+ .ia_vfsuid = VFSUIDT_INIT(stat->uid),
+ .ia_vfsgid = VFSGIDT_INIT(stat->gid),
};
- err = notify_change(upperdentry, &attr, NULL);
+ err = ovl_do_notify_change(ofs, upperdentry, &attr);
}
if (!err)
- ovl_set_timestamps(upperdentry, stat);
+ ovl_set_timestamps(ofs, upperdentry, stat);
return err;
}
-struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
+struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode,
+ bool is_upper)
{
struct ovl_fh *fh;
- int fh_type, fh_len, dwords;
- void *buf;
+ int fh_type, dwords;
int buflen = MAX_HANDLE_SZ;
- uuid_t *uuid = &real->d_sb->s_uuid;
+ uuid_t *uuid = &realinode->i_sb->s_uuid;
+ int err;
- buf = kmalloc(buflen, GFP_KERNEL);
- if (!buf)
+ /* Make sure the real fid stays 32bit aligned */
+ BUILD_BUG_ON(OVL_FH_FID_OFFSET % 4);
+ BUILD_BUG_ON(MAX_HANDLE_SZ + OVL_FH_FID_OFFSET > 255);
+
+ fh = kzalloc(buflen + OVL_FH_FID_OFFSET, GFP_KERNEL);
+ if (!fh)
return ERR_PTR(-ENOMEM);
/*
@@ -245,27 +438,19 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
* the price or reconnecting the dentry.
*/
dwords = buflen >> 2;
- fh_type = exportfs_encode_fh(real, buf, &dwords, 0);
+ fh_type = exportfs_encode_inode_fh(realinode, (void *)fh->fb.fid,
+ &dwords, NULL, 0);
buflen = (dwords << 2);
- fh = ERR_PTR(-EIO);
- if (WARN_ON(fh_type < 0) ||
- WARN_ON(buflen > MAX_HANDLE_SZ) ||
- WARN_ON(fh_type == FILEID_INVALID))
- goto out;
-
- BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
- fh_len = offsetof(struct ovl_fh, fid) + buflen;
- fh = kmalloc(fh_len, GFP_KERNEL);
- if (!fh) {
- fh = ERR_PTR(-ENOMEM);
- goto out;
- }
+ err = -EIO;
+ if (fh_type < 0 || fh_type == FILEID_INVALID ||
+ WARN_ON(buflen > MAX_HANDLE_SZ))
+ goto out_err;
- fh->version = OVL_FH_VERSION;
- fh->magic = OVL_FH_MAGIC;
- fh->type = fh_type;
- fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+ fh->fb.version = OVL_FH_VERSION;
+ fh->fb.magic = OVL_FH_MAGIC;
+ fh->fb.type = fh_type;
+ fh->fb.flags = OVL_FH_FLAG_CPU_ENDIAN;
/*
* When we will want to decode an overlay dentry from this handle
* and all layers are on the same fs, if we get a disconncted real
@@ -273,54 +458,58 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
* it to upperdentry or to lowerstack is by checking this flag.
*/
if (is_upper)
- fh->flags |= OVL_FH_FLAG_PATH_UPPER;
- fh->len = fh_len;
- fh->uuid = *uuid;
- memcpy(fh->fid, buf, buflen);
+ fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
+ fh->fb.len = sizeof(fh->fb) + buflen;
+ if (ovl_origin_uuid(ofs))
+ fh->fb.uuid = *uuid;
-out:
- kfree(buf);
return fh;
+
+out_err:
+ kfree(fh);
+ return ERR_PTR(err);
}
-int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
- struct dentry *upper)
+struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin)
{
- const struct ovl_fh *fh = NULL;
- int err;
-
/*
* When lower layer doesn't support export operations store a 'null' fh,
* so we can use the overlay.origin xattr to distignuish between a copy
* up and a pure upper inode.
*/
- if (ovl_can_decode_fh(lower->d_sb)) {
- fh = ovl_encode_real_fh(lower, false);
- if (IS_ERR(fh))
- return PTR_ERR(fh);
- }
+ if (!ovl_can_decode_fh(origin->d_sb))
+ return NULL;
+
+ return ovl_encode_real_fh(ofs, d_inode(origin), false);
+}
+
+int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
+ struct dentry *upper)
+{
+ int err;
/*
* Do not fail when upper doesn't support xattrs.
*/
- err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
- fh ? fh->len : 0, 0);
- kfree(fh);
+ err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf,
+ fh ? fh->fb.len : 0, 0);
- return err;
+ /* Ignore -EPERM from setting "user.*" on symlink/special */
+ return err == -EPERM ? 0 : err;
}
/* Store file handle of @upper dir in @index dir entry */
-static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
+static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
+ struct dentry *index)
{
const struct ovl_fh *fh;
int err;
- fh = ovl_encode_real_fh(upper, true);
+ fh = ovl_encode_real_fh(ofs, d_inode(upper), true);
if (IS_ERR(fh))
return PTR_ERR(fh);
- err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh, fh->len, 0);
+ err = ovl_setxattr(ofs, index, OVL_XATTR_UPPER, fh->buf, fh->fb.len);
kfree(fh);
return err;
@@ -328,16 +517,14 @@ static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
/*
* Create and install index entry.
- *
- * Caller must hold i_mutex on indexdir.
*/
-static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
+static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh,
struct dentry *upper)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
- struct inode *dir = d_inode(indexdir);
- struct dentry *index = NULL;
struct dentry *temp = NULL;
+ struct renamedata rd = {};
struct qstr name = { };
int err;
@@ -356,29 +543,31 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
return -EIO;
- err = ovl_get_index_name(origin, &name);
+ err = ovl_get_index_name_fh(fh, &name);
if (err)
return err;
- temp = ovl_create_temp(indexdir, OVL_CATTR(S_IFDIR | 0));
+ temp = ovl_create_temp(ofs, indexdir, OVL_CATTR(S_IFDIR | 0));
err = PTR_ERR(temp);
if (IS_ERR(temp))
goto free_name;
- err = ovl_set_upper_fh(upper, temp);
+ err = ovl_set_upper_fh(ofs, upper, temp);
if (err)
goto out;
- index = lookup_one_len(name.name, indexdir, name.len);
- if (IS_ERR(index)) {
- err = PTR_ERR(index);
- } else {
- err = ovl_do_rename(dir, temp, dir, index, 0);
- dput(index);
- }
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = indexdir;
+ rd.new_parent = indexdir;
+ err = start_renaming_dentry(&rd, 0, temp, &name);
+ if (err)
+ goto out;
+
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
out:
if (err)
- ovl_cleanup(dir, temp);
+ ovl_cleanup(ofs, indexdir, temp);
dput(temp);
free_name:
kfree(name.name);
@@ -395,9 +584,12 @@ struct ovl_copy_up_ctx {
struct dentry *destdir;
struct qstr destname;
struct dentry *workdir;
+ const struct ovl_fh *origin_fh;
bool origin;
bool indexed;
bool metacopy;
+ bool metacopy_digest;
+ bool metadata_fsync;
};
static int ovl_link_up(struct ovl_copy_up_ctx *c)
@@ -405,48 +597,87 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
int err;
struct dentry *upper;
struct dentry *upperdir = ovl_dentry_upper(c->parent);
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct inode *udir = d_inode(upperdir);
+ ovl_start_write(c->dentry);
+
/* Mark parent "impure" because it may now contain non-pure upper */
err = ovl_set_impure(c->parent, upperdir);
if (err)
- return err;
+ goto out;
err = ovl_set_nlink_lower(c->dentry);
if (err)
- return err;
+ goto out;
- inode_lock_nested(udir, I_MUTEX_PARENT);
- upper = lookup_one_len(c->dentry->d_name.name, upperdir,
- c->dentry->d_name.len);
+ upper = ovl_start_creating_upper(ofs, upperdir,
+ &QSTR_LEN(c->dentry->d_name.name,
+ c->dentry->d_name.len));
err = PTR_ERR(upper);
if (!IS_ERR(upper)) {
- err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper);
- dput(upper);
+ err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper);
if (!err) {
/* Restore timestamps on parent (best effort) */
- ovl_set_timestamps(upperdir, &c->pstat);
+ ovl_set_timestamps(ofs, upperdir, &c->pstat);
ovl_dentry_set_upper_alias(c->dentry);
+ ovl_dentry_update_reval(c->dentry, upper);
}
+ end_creating(upper);
}
- inode_unlock(udir);
if (err)
- return err;
+ goto out;
err = ovl_set_nlink_upper(c->dentry);
+out:
+ ovl_end_write(c->dentry);
return err;
}
-static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
+static int ovl_copy_up_data(struct ovl_copy_up_ctx *c, const struct path *temp)
{
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+ struct file *new_file;
int err;
- err = ovl_copy_xattr(c->lowerpath.dentry, temp);
+ if (!S_ISREG(c->stat.mode) || c->metacopy || !c->stat.size)
+ return 0;
+
+ new_file = ovl_path_open(temp, O_LARGEFILE | O_WRONLY);
+ if (IS_ERR(new_file))
+ return PTR_ERR(new_file);
+
+ err = ovl_copy_up_file(ofs, c->dentry, new_file, c->stat.size,
+ !c->metadata_fsync);
+ fput(new_file);
+
+ return err;
+}
+
+static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
+{
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+ struct inode *inode = d_inode(c->dentry);
+ struct path upperpath = { .mnt = ovl_upper_mnt(ofs), .dentry = temp };
+ int err;
+
+ err = ovl_copy_xattr(c->dentry->d_sb, &c->lowerpath, temp);
if (err)
return err;
+ if (inode->i_flags & OVL_FATTR_I_FLAGS_MASK &&
+ (S_ISREG(c->stat.mode) || S_ISDIR(c->stat.mode))) {
+ /*
+ * Copy the fileattr inode flags that are the source of already
+ * copied i_flags
+ */
+ err = ovl_copy_fileattr(inode, &c->lowerpath, &upperpath);
+ if (err)
+ return err;
+ }
+
/*
* Store identifier of lower inode in upper inode xattr to
* allow lookup of the copy up origin inode.
@@ -455,79 +686,82 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
* hard link.
*/
if (c->origin) {
- err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
+ err = ovl_set_origin_fh(ofs, c->origin_fh, temp);
if (err)
return err;
}
- if (S_ISREG(c->stat.mode) && !c->metacopy) {
- struct path upperpath, datapath;
-
- ovl_path_upper(c->dentry, &upperpath);
- BUG_ON(upperpath.dentry != NULL);
- upperpath.dentry = temp;
+ if (c->metacopy) {
+ struct path lowerdatapath;
+ struct ovl_metacopy metacopy_data = OVL_METACOPY_INIT;
- ovl_path_lowerdata(c->dentry, &datapath);
- err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+ ovl_path_lowerdata(c->dentry, &lowerdatapath);
+ if (WARN_ON_ONCE(lowerdatapath.dentry == NULL))
+ return -EIO;
+ err = ovl_get_verity_digest(ofs, &lowerdatapath, &metacopy_data);
if (err)
return err;
- }
- if (c->metacopy) {
- err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
- NULL, 0, -EOPNOTSUPP);
+ if (metacopy_data.digest_algo)
+ c->metacopy_digest = true;
+
+ err = ovl_set_metacopy_xattr(ofs, temp, &metacopy_data);
if (err)
return err;
}
inode_lock(temp->d_inode);
- if (c->metacopy)
- err = ovl_set_size(temp, &c->stat);
+ if (S_ISREG(c->stat.mode))
+ err = ovl_set_size(ofs, temp, &c->stat);
if (!err)
- err = ovl_set_attr(temp, &c->stat);
+ err = ovl_set_attr(ofs, temp, &c->stat);
inode_unlock(temp->d_inode);
+ /* fsync metadata before moving it into upper dir */
+ if (!err && ovl_should_sync(ofs) && c->metadata_fsync)
+ err = ovl_sync_file(&upperpath);
+
return err;
}
-struct ovl_cu_creds {
- const struct cred *old;
- struct cred *new;
-};
-
-static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc)
+static const struct cred *ovl_prepare_copy_up_creds(struct dentry *dentry)
{
+ struct cred *copy_up_cred = NULL;
int err;
- cc->old = cc->new = NULL;
- err = security_inode_copy_up(dentry, &cc->new);
+ err = security_inode_copy_up(dentry, &copy_up_cred);
if (err < 0)
- return err;
+ return ERR_PTR(err);
- if (cc->new)
- cc->old = override_creds(cc->new);
+ if (!copy_up_cred)
+ return NULL;
- return 0;
+ return override_creds(copy_up_cred);
}
-static void ovl_revert_cu_creds(struct ovl_cu_creds *cc)
+static void ovl_revert_copy_up_creds(const struct cred *orig_cred)
{
- if (cc->new) {
- revert_creds(cc->old);
- put_cred(cc->new);
- }
+ const struct cred *copy_up_cred;
+
+ copy_up_cred = revert_creds(orig_cred);
+ put_cred(copy_up_cred);
}
+DEFINE_CLASS(copy_up_creds, const struct cred *,
+ if (!IS_ERR_OR_NULL(_T)) ovl_revert_copy_up_creds(_T),
+ ovl_prepare_copy_up_creds(dentry), struct dentry *dentry)
+
/*
* Copyup using workdir to prepare temp file. Used when copying up directories,
* special files or when upper fs doesn't support O_TMPFILE.
*/
static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
{
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct inode *inode;
- struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
- struct dentry *temp, *upper;
- struct ovl_cu_creds cc;
+ struct path path = { .mnt = ovl_upper_mnt(ofs) };
+ struct renamedata rd = {};
+ struct dentry *temp;
int err;
struct ovl_cattr cattr = {
/* Can't properly set mode on creation because of the umask */
@@ -536,101 +770,145 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
.link = c->link
};
- err = ovl_lock_rename_workdir(c->workdir, c->destdir);
- if (err)
- return err;
-
- err = ovl_prep_cu_creds(c->dentry, &cc);
- if (err)
- goto unlock;
+ scoped_class(copy_up_creds, copy_up_creds, c->dentry) {
+ if (IS_ERR(copy_up_creds))
+ return PTR_ERR(copy_up_creds);
- temp = ovl_create_temp(c->workdir, &cattr);
- ovl_revert_cu_creds(&cc);
+ ovl_start_write(c->dentry);
+ temp = ovl_create_temp(ofs, c->workdir, &cattr);
+ ovl_end_write(c->dentry);
+ }
- err = PTR_ERR(temp);
if (IS_ERR(temp))
- goto unlock;
+ return PTR_ERR(temp);
- err = ovl_copy_up_inode(c, temp);
+ /*
+ * Copy up data first and then xattrs. Writing data after
+ * xattrs will remove security.capability xattr automatically.
+ */
+ path.dentry = temp;
+ err = ovl_copy_up_data(c, &path);
+ ovl_start_write(c->dentry);
if (err)
- goto cleanup;
+ goto cleanup_unlocked;
if (S_ISDIR(c->stat.mode) && c->indexed) {
- err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
+ err = ovl_create_index(c->dentry, c->origin_fh, temp);
if (err)
- goto cleanup;
+ goto cleanup_unlocked;
}
- upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
- err = PTR_ERR(upper);
- if (IS_ERR(upper))
- goto cleanup;
+ /*
+ * We cannot hold lock_rename() throughout this helper, because of
+ * lock ordering with sb_writers, which shouldn't be held when calling
+ * ovl_copy_up_data(), so lock workdir and destdir and make sure that
+ * temp wasn't moved before copy up completion or cleanup.
+ */
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = c->workdir;
+ rd.new_parent = c->destdir;
+ rd.flags = 0;
+ err = start_renaming_dentry(&rd, 0, temp,
+ &QSTR_LEN(c->destname.name, c->destname.len));
+ if (err) {
+ /* temp or workdir moved underneath us? map to -EIO */
+ err = -EIO;
+ }
+ if (err)
+ goto cleanup_unlocked;
+
+ err = ovl_copy_up_metadata(c, temp);
+ if (!err)
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
- err = ovl_do_rename(wdir, temp, udir, upper, 0);
- dput(upper);
if (err)
- goto cleanup;
+ goto cleanup_unlocked;
- if (!c->metacopy)
- ovl_set_upperdata(d_inode(c->dentry));
inode = d_inode(c->dentry);
+ if (c->metacopy_digest)
+ ovl_set_flag(OVL_HAS_DIGEST, inode);
+ else
+ ovl_clear_flag(OVL_HAS_DIGEST, inode);
+ ovl_clear_flag(OVL_VERIFIED_DIGEST, inode);
+
+ if (!c->metacopy)
+ ovl_set_upperdata(inode);
ovl_inode_update(inode, temp);
if (S_ISDIR(inode->i_mode))
ovl_set_flag(OVL_WHITEOUTS, inode);
-unlock:
- unlock_rename(c->workdir, c->destdir);
+out:
+ ovl_end_write(c->dentry);
return err;
-cleanup:
- ovl_cleanup(wdir, temp);
+cleanup_unlocked:
+ ovl_cleanup(ofs, c->workdir, temp);
dput(temp);
- goto unlock;
+ goto out;
}
/* Copyup using O_TMPFILE which does not require cross dir locking */
static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
{
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct inode *udir = d_inode(c->destdir);
struct dentry *temp, *upper;
- struct ovl_cu_creds cc;
+ struct file *tmpfile;
int err;
- err = ovl_prep_cu_creds(c->dentry, &cc);
- if (err)
- return err;
+ scoped_class(copy_up_creds, copy_up_creds, c->dentry) {
+ if (IS_ERR(copy_up_creds))
+ return PTR_ERR(copy_up_creds);
- temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
- ovl_revert_cu_creds(&cc);
+ ovl_start_write(c->dentry);
+ tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode);
+ ovl_end_write(c->dentry);
+ }
- if (IS_ERR(temp))
- return PTR_ERR(temp);
+ if (IS_ERR(tmpfile))
+ return PTR_ERR(tmpfile);
- err = ovl_copy_up_inode(c, temp);
- if (err)
- goto out_dput;
+ temp = tmpfile->f_path.dentry;
+ if (!c->metacopy && c->stat.size) {
+ err = ovl_copy_up_file(ofs, c->dentry, tmpfile, c->stat.size,
+ !c->metadata_fsync);
+ if (err)
+ goto out_fput;
+ }
- inode_lock_nested(udir, I_MUTEX_PARENT);
+ ovl_start_write(c->dentry);
- upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+ err = ovl_copy_up_metadata(c, temp);
+ if (err)
+ goto out;
+
+ upper = ovl_start_creating_upper(ofs, c->destdir,
+ &QSTR_LEN(c->destname.name,
+ c->destname.len));
err = PTR_ERR(upper);
if (!IS_ERR(upper)) {
- err = ovl_do_link(temp, udir, upper);
- dput(upper);
+ err = ovl_do_link(ofs, temp, udir, upper);
+ end_creating(upper);
}
- inode_unlock(udir);
if (err)
- goto out_dput;
+ goto out;
+
+ if (c->metacopy_digest)
+ ovl_set_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
+ else
+ ovl_clear_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
+ ovl_clear_flag(OVL_VERIFIED_DIGEST, d_inode(c->dentry));
if (!c->metacopy)
ovl_set_upperdata(d_inode(c->dentry));
- ovl_inode_update(d_inode(c->dentry), temp);
-
- return 0;
+ ovl_inode_update(d_inode(c->dentry), dget(temp));
-out_dput:
- dput(temp);
+out:
+ ovl_end_write(c->dentry);
+out_fput:
+ fput(tmpfile);
return err;
}
@@ -646,7 +924,9 @@ out_dput:
static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
{
int err;
- struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+ struct dentry *origin = c->lowerpath.dentry;
+ struct ovl_fh *fh = NULL;
bool to_index = false;
/*
@@ -663,25 +943,42 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
to_index = true;
}
- if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
+ if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) {
+ fh = ovl_get_origin_fh(ofs, origin);
+ if (IS_ERR(fh))
+ return PTR_ERR(fh);
+
+ /* origin_fh may be NULL */
+ c->origin_fh = fh;
c->origin = true;
+ }
if (to_index) {
c->destdir = ovl_indexdir(c->dentry->d_sb);
- err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
+ err = ovl_get_index_name(ofs, origin, &c->destname);
if (err)
- return err;
+ goto out_free_fh;
} else if (WARN_ON(!c->parent)) {
/* Disconnected dentry must be copied up to index dir */
- return -EIO;
+ err = -EIO;
+ goto out_free_fh;
} else {
/*
+ * c->dentry->d_name is stabilzed by ovl_copy_up_start(),
+ * because if we got here, it means that c->dentry has no upper
+ * alias and changing ->d_name means going through ovl_rename()
+ * that will call ovl_copy_up() on source and target dentry.
+ */
+ c->destname = c->dentry->d_name;
+ /*
* Mark parent "impure" because it may now contain non-pure
* upper
*/
+ ovl_start_write(c->dentry);
err = ovl_set_impure(c->parent, c->destdir);
+ ovl_end_write(c->dentry);
if (err)
- return err;
+ goto out_free_fh;
}
/* Should we copyup with O_TMPFILE or with workdir? */
@@ -695,6 +992,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
if (c->indexed)
ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
+ ovl_start_write(c->dentry);
if (to_index) {
/* Initialize nlink for copy up of disconnected dentry */
err = ovl_set_nlink_upper(c->dentry);
@@ -703,22 +1001,26 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
/* Restore timestamps on parent (best effort) */
inode_lock(udir);
- ovl_set_timestamps(c->destdir, &c->pstat);
+ ovl_set_timestamps(ofs, c->destdir, &c->pstat);
inode_unlock(udir);
ovl_dentry_set_upper_alias(c->dentry);
+ ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry));
}
+ ovl_end_write(c->dentry);
out:
if (to_index)
kfree(c->destname.name);
+out_free_fh:
+ kfree(fh);
return err;
}
static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
int flags)
{
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
if (!ofs->config.metacopy)
return false;
@@ -729,32 +1031,92 @@ static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
return false;
+ /* Fall back to full copy if no fsverity on source data and we require verity */
+ if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
+ struct path lowerdata;
+
+ ovl_path_lowerdata(dentry, &lowerdata);
+
+ if (WARN_ON_ONCE(lowerdata.dentry == NULL) ||
+ ovl_ensure_verity_loaded(&lowerdata) ||
+ !fsverity_active(d_inode(lowerdata.dentry))) {
+ return false;
+ }
+ }
+
return true;
}
+static ssize_t ovl_getxattr_value(const struct path *path, char *name, char **value)
+{
+ ssize_t res;
+ char *buf;
+
+ res = ovl_do_getxattr(path, name, NULL, 0);
+ if (res == -ENODATA || res == -EOPNOTSUPP)
+ res = 0;
+
+ if (res > 0) {
+ buf = kzalloc(res, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ res = ovl_do_getxattr(path, name, buf, res);
+ if (res < 0)
+ kfree(buf);
+ else
+ *value = buf;
+ }
+ return res;
+}
+
/* Copy up data of an inode which was copied up metadata only in the past. */
static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
{
- struct path upperpath, datapath;
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+ struct path upperpath;
int err;
+ char *capability = NULL;
+ ssize_t cap_size;
ovl_path_upper(c->dentry, &upperpath);
if (WARN_ON(upperpath.dentry == NULL))
return -EIO;
- ovl_path_lowerdata(c->dentry, &datapath);
- if (WARN_ON(datapath.dentry == NULL))
- return -EIO;
+ if (c->stat.size) {
+ err = cap_size = ovl_getxattr_value(&upperpath, XATTR_NAME_CAPS,
+ &capability);
+ if (cap_size < 0)
+ goto out;
+ }
- err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+ err = ovl_copy_up_data(c, &upperpath);
if (err)
- return err;
+ goto out_free;
- err = vfs_removexattr(upperpath.dentry, OVL_XATTR_METACOPY);
+ /*
+ * Writing to upper file will clear security.capability xattr. We
+ * don't want that to happen for normal copy-up operation.
+ */
+ ovl_start_write(c->dentry);
+ if (capability) {
+ err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS,
+ capability, cap_size, 0);
+ }
+ if (!err) {
+ err = ovl_removexattr(ofs, upperpath.dentry,
+ OVL_XATTR_METACOPY);
+ }
+ ovl_end_write(c->dentry);
if (err)
- return err;
+ goto out_free;
+ ovl_clear_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
+ ovl_clear_flag(OVL_VERIFIED_DIGEST, d_inode(c->dentry));
ovl_set_upperdata(d_inode(c->dentry));
+out_free:
+ kfree(capability);
+out:
return err;
}
@@ -779,12 +1141,26 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
if (err)
return err;
+ if (!kuid_has_mapping(current_user_ns(), ctx.stat.uid) ||
+ !kgid_has_mapping(current_user_ns(), ctx.stat.gid))
+ return -EOVERFLOW;
+
+ /*
+ * With metacopy disabled, we fsync after final metadata copyup, for
+ * both regular files and directories to get atomic copyup semantics
+ * on filesystems that do not use strict metadata ordering (e.g. ubifs).
+ *
+ * With metacopy enabled we want to avoid fsync on all meta copyup
+ * that will hurt performance of workloads such as chown -R, so we
+ * only fsync on data copyup as legacy behavior.
+ */
+ ctx.metadata_fsync = !OVL_FS(dentry->d_sb)->config.metacopy &&
+ (S_ISREG(ctx.stat.mode) || S_ISDIR(ctx.stat.mode));
ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
if (parent) {
ovl_path_upper(parent, &parentpath);
ctx.destdir = parentpath.dentry;
- ctx.destname = dentry->d_name;
err = vfs_getattr(&parentpath, &ctx.pstat,
STATX_ATIME | STATX_MTIME,
@@ -822,10 +1198,9 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
return err;
}
-int ovl_copy_up_flags(struct dentry *dentry, int flags)
+static int ovl_copy_up_flags(struct dentry *dentry, int flags)
{
int err = 0;
- const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
/*
@@ -836,6 +1211,15 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
if (WARN_ON(disconnected && d_is_dir(dentry)))
return -EIO;
+ /*
+ * We may not need lowerdata if we are only doing metacopy up, but it is
+ * not very important to optimize this case, so do lazy lowerdata lookup
+ * before any copy up, so we can do it before taking ovl_inode_lock().
+ */
+ err = ovl_verify_lowerdata(dentry);
+ if (err)
+ return err;
+
while (!err) {
struct dentry *next;
struct dentry *parent = NULL;
@@ -855,12 +1239,12 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
next = parent;
}
- err = ovl_copy_up_one(parent, next, flags);
+ with_ovl_creds(dentry->d_sb)
+ err = ovl_copy_up_one(parent, next, flags);
dput(parent);
dput(next);
}
- revert_creds(old_cred);
return err;
}
@@ -880,19 +1264,12 @@ static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
return true;
}
-int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
+int ovl_maybe_copy_up(struct dentry *dentry, int flags)
{
- int err = 0;
-
- if (ovl_open_need_copy_up(dentry, file_flags)) {
- err = ovl_want_write(dentry);
- if (!err) {
- err = ovl_copy_up_flags(dentry, file_flags);
- ovl_drop_write(dentry);
- }
- }
+ if (!ovl_open_need_copy_up(dentry, flags))
+ return 0;
- return err;
+ return ovl_copy_up_flags(dentry, flags);
}
int ovl_copy_up_with_data(struct dentry *dentry)
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 82c129bfe58d..ff3dbd1ca61f 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
*
* Copyright (C) 2011 Novell Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
*/
#include <linux/fs.h>
@@ -17,83 +14,120 @@
#include <linux/posix_acl_xattr.h>
#include <linux/atomic.h>
#include <linux/ratelimit.h>
+#include <linux/backing-file.h>
#include "overlayfs.h"
static unsigned short ovl_redirect_max = 256;
module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
-MODULE_PARM_DESC(ovl_redirect_max,
+MODULE_PARM_DESC(redirect_max,
"Maximum length of absolute redirect xattr value");
static int ovl_set_redirect(struct dentry *dentry, bool samedir);
-int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
+static int ovl_cleanup_locked(struct ovl_fs *ofs, struct inode *wdir,
+ struct dentry *wdentry)
{
int err;
dget(wdentry);
if (d_is_dir(wdentry))
- err = ovl_do_rmdir(wdir, wdentry);
+ err = ovl_do_rmdir(ofs, wdir, wdentry);
else
- err = ovl_do_unlink(wdir, wdentry);
+ err = ovl_do_unlink(ofs, wdir, wdentry);
dput(wdentry);
if (err) {
- pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
+ pr_err("cleanup of '%pd2' failed (%i)\n",
wdentry, err);
}
return err;
}
-static struct dentry *ovl_lookup_temp(struct dentry *workdir)
+int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir,
+ struct dentry *wdentry)
+{
+ wdentry = start_removing_dentry(workdir, wdentry);
+ if (IS_ERR(wdentry))
+ return PTR_ERR(wdentry);
+
+ ovl_cleanup_locked(ofs, workdir->d_inode, wdentry);
+ end_removing(wdentry);
+
+ return 0;
+}
+
+void ovl_tempname(char name[OVL_TEMPNAME_SIZE])
{
- struct dentry *temp;
- char name[20];
static atomic_t temp_id = ATOMIC_INIT(0);
/* counter is allowed to wrap, since temp dentries are ephemeral */
- snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
+ snprintf(name, OVL_TEMPNAME_SIZE, "#%x", atomic_inc_return(&temp_id));
+}
- temp = lookup_one_len(name, workdir, strlen(name));
- if (!IS_ERR(temp) && temp->d_inode) {
- pr_err("overlayfs: workdir/%s already exists\n", name);
- dput(temp);
- temp = ERR_PTR(-EIO);
- }
+static struct dentry *ovl_start_creating_temp(struct ovl_fs *ofs,
+ struct dentry *workdir)
+{
+ char name[OVL_TEMPNAME_SIZE];
- return temp;
+ ovl_tempname(name);
+ return start_creating(ovl_upper_mnt_idmap(ofs), workdir,
+ &QSTR(name));
}
-/* caller holds i_mutex on workdir */
-static struct dentry *ovl_whiteout(struct dentry *workdir)
+static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
{
int err;
- struct dentry *whiteout;
+ struct dentry *whiteout, *link;
+ struct dentry *workdir = ofs->workdir;
struct inode *wdir = workdir->d_inode;
- whiteout = ovl_lookup_temp(workdir);
- if (IS_ERR(whiteout))
- return whiteout;
+ guard(mutex)(&ofs->whiteout_lock);
- err = ovl_do_whiteout(wdir, whiteout);
- if (err) {
- dput(whiteout);
- whiteout = ERR_PTR(err);
+ if (!ofs->whiteout) {
+ whiteout = ovl_start_creating_temp(ofs, workdir);
+ if (IS_ERR(whiteout))
+ return whiteout;
+ err = ovl_do_whiteout(ofs, wdir, whiteout);
+ if (!err)
+ ofs->whiteout = dget(whiteout);
+ end_creating(whiteout);
+ if (err)
+ return ERR_PTR(err);
}
+ if (!ofs->no_shared_whiteout) {
+ link = ovl_start_creating_temp(ofs, workdir);
+ if (IS_ERR(link))
+ return link;
+ err = ovl_do_link(ofs, ofs->whiteout, wdir, link);
+ if (!err)
+ whiteout = dget(link);
+ end_creating(link);
+ if (!err)
+ return whiteout;
+
+ if (err != -EMLINK) {
+ pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%u)\n",
+ ofs->whiteout->d_inode->i_nlink,
+ err);
+ ofs->no_shared_whiteout = true;
+ }
+ }
+ whiteout = ofs->whiteout;
+ ofs->whiteout = NULL;
return whiteout;
}
-/* Caller must hold i_mutex on both workdir and dir */
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir,
struct dentry *dentry)
{
- struct inode *wdir = workdir->d_inode;
struct dentry *whiteout;
+ struct renamedata rd = {};
int err;
int flags = 0;
- whiteout = ovl_whiteout(workdir);
+ whiteout = ovl_whiteout(ofs);
err = PTR_ERR(whiteout);
if (IS_ERR(whiteout))
return err;
@@ -101,55 +135,33 @@ int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
if (d_is_dir(dentry))
flags = RENAME_EXCHANGE;
- err = ovl_do_rename(wdir, whiteout, dir, dentry, flags);
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = ofs->workdir;
+ rd.new_parent = dir;
+ rd.flags = flags;
+ err = start_renaming_two_dentries(&rd, whiteout, dentry);
+ if (!err) {
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
+ }
if (err)
goto kill_whiteout;
if (flags)
- ovl_cleanup(wdir, dentry);
+ ovl_cleanup(ofs, ofs->workdir, dentry);
out:
dput(whiteout);
return err;
kill_whiteout:
- ovl_cleanup(wdir, whiteout);
+ ovl_cleanup(ofs, ofs->workdir, whiteout);
goto out;
}
-static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry,
- umode_t mode)
-{
- int err;
- struct dentry *d, *dentry = *newdentry;
-
- err = ovl_do_mkdir(dir, dentry, mode);
- if (err)
- return err;
-
- if (likely(!d_unhashed(dentry)))
- return 0;
-
- /*
- * vfs_mkdir() may succeed and leave the dentry passed
- * to it unhashed and negative. If that happens, try to
- * lookup a new hashed and positive dentry.
- */
- d = lookup_one_len(dentry->d_name.name, dentry->d_parent,
- dentry->d_name.len);
- if (IS_ERR(d)) {
- pr_warn("overlayfs: failed lookup after mkdir (%pd2, err=%i).\n",
- dentry, err);
- return PTR_ERR(d);
- }
- dput(dentry);
- *newdentry = d;
-
- return 0;
-}
-
-struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
- struct ovl_cattr *attr)
+struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent,
+ struct dentry *newdentry, struct ovl_cattr *attr)
{
+ struct inode *dir = parent->d_inode;
int err;
if (IS_ERR(newdentry))
@@ -160,61 +172,95 @@ struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
goto out;
if (attr->hardlink) {
- err = ovl_do_link(attr->hardlink, dir, newdentry);
+ err = ovl_do_link(ofs, attr->hardlink, dir, newdentry);
} else {
switch (attr->mode & S_IFMT) {
case S_IFREG:
- err = ovl_do_create(dir, newdentry, attr->mode);
+ err = ovl_do_create(ofs, dir, newdentry, attr->mode);
break;
case S_IFDIR:
/* mkdir is special... */
- err = ovl_mkdir_real(dir, &newdentry, attr->mode);
+ newdentry = ovl_do_mkdir(ofs, dir, newdentry, attr->mode);
+ err = PTR_ERR_OR_ZERO(newdentry);
+ /* expect to inherit casefolding from workdir/upperdir */
+ if (!err && ofs->casefold != ovl_dentry_casefolded(newdentry)) {
+ pr_warn_ratelimited("wrong inherited casefold (%pd2)\n",
+ newdentry);
+ end_creating(newdentry);
+ err = -EINVAL;
+ }
break;
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
- err = ovl_do_mknod(dir, newdentry, attr->mode,
+ err = ovl_do_mknod(ofs, dir, newdentry, attr->mode,
attr->rdev);
break;
case S_IFLNK:
- err = ovl_do_symlink(dir, newdentry, attr->link);
+ err = ovl_do_symlink(ofs, dir, newdentry, attr->link);
break;
default:
err = -EPERM;
}
}
- if (!err && WARN_ON(!newdentry->d_inode)) {
+ if (err)
+ goto out;
+
+ if (WARN_ON(!newdentry->d_inode)) {
/*
* Not quite sure if non-instantiated dentry is legal or not.
* VFS doesn't seem to care so check and warn here.
*/
err = -EIO;
+ } else if (d_unhashed(newdentry)) {
+ struct dentry *d;
+ /*
+ * Some filesystems (i.e. casefolded) may return an unhashed
+ * negative dentry from the ovl_lookup_upper() call before
+ * ovl_create_real().
+ * In that case, lookup again after making the newdentry
+ * positive, so ovl_create_upper() always returns a hashed
+ * positive dentry.
+ */
+ d = ovl_lookup_upper(ofs, newdentry->d_name.name, parent,
+ newdentry->d_name.len);
+ dput(newdentry);
+ if (IS_ERR_OR_NULL(d))
+ err = d ? PTR_ERR(d) : -ENOENT;
+ else
+ return d;
}
out:
if (err) {
- dput(newdentry);
+ end_creating(newdentry);
return ERR_PTR(err);
}
return newdentry;
}
-struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr)
+struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
+ struct ovl_cattr *attr)
{
- return ovl_create_real(d_inode(workdir), ovl_lookup_temp(workdir),
- attr);
+ struct dentry *ret;
+ ret = ovl_start_creating_temp(ofs, workdir);
+ if (IS_ERR(ret))
+ return ret;
+ ret = ovl_create_real(ofs, workdir, ret, attr);
+ return end_creating_keep(ret);
}
static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
int xerr)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
int err;
- err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
+ err = ovl_check_setxattr(ofs, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
if (!err)
ovl_dentry_set_opaque(dentry);
@@ -237,15 +283,16 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
* may not use to instantiate the new dentry.
*/
static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
- struct dentry *newdentry, bool hardlink)
+ struct dentry *newdentry, bool hardlink, struct file *tmpfile)
{
struct ovl_inode_params oip = {
.upperdentry = newdentry,
.newinode = inode,
};
- ovl_dir_modified(dentry->d_parent, false);
ovl_dentry_set_upper_alias(dentry);
+ ovl_dentry_init_reval(dentry, newdentry, NULL);
+
if (!hardlink) {
/*
* ovl_obtain_alias() can be called after ovl_create_real()
@@ -257,20 +304,26 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
* XXX: if we ever use ovl_obtain_alias() to decode directory
* file handles, need to use ovl_get_inode_locked() and
* d_instantiate_new() here to prevent from creating two
- * hashed directory inode aliases.
+ * hashed directory inode aliases. We then need to return
+ * the obtained alias to ovl_mkdir().
*/
inode = ovl_get_inode(dentry->d_sb, &oip);
- if (WARN_ON(IS_ERR(inode)))
+ if (IS_ERR(inode))
return PTR_ERR(inode);
+ if (inode == oip.newinode)
+ ovl_set_flag(OVL_UPPERDATA, inode);
} else {
WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
dput(newdentry);
inc_nlink(inode);
}
+ if (tmpfile)
+ d_mark_tmpfile(tmpfile, inode);
+
d_instantiate(dentry, inode);
if (inode != oip.newinode) {
- pr_warn_ratelimited("overlayfs: newly created inode found in cache (%pd2)\n",
+ pr_warn_ratelimited("newly created inode found in cache (%pd2)\n",
dentry);
}
@@ -294,49 +347,47 @@ static bool ovl_type_origin(struct dentry *dentry)
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
struct ovl_cattr *attr)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
- struct inode *udir = upperdir->d_inode;
struct dentry *newdentry;
int err;
- if (!attr->hardlink && !IS_POSIXACL(udir))
- attr->mode &= ~current_umask();
-
- inode_lock_nested(udir, I_MUTEX_PARENT);
- newdentry = ovl_create_real(udir,
- lookup_one_len(dentry->d_name.name,
- upperdir,
- dentry->d_name.len),
- attr);
- err = PTR_ERR(newdentry);
+ newdentry = ovl_start_creating_upper(ofs, upperdir,
+ &QSTR_LEN(dentry->d_name.name,
+ dentry->d_name.len));
if (IS_ERR(newdentry))
- goto out_unlock;
+ return PTR_ERR(newdentry);
+ newdentry = ovl_create_real(ofs, upperdir, newdentry, attr);
+ if (IS_ERR(newdentry))
+ return PTR_ERR(newdentry);
+
+ end_creating_keep(newdentry);
- if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
+ if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) &&
+ !ovl_allow_offline_changes(ofs)) {
/* Setting opaque here is just an optimization, allow to fail */
ovl_set_opaque(dentry, newdentry);
}
- err = ovl_instantiate(dentry, inode, newdentry, !!attr->hardlink);
+ ovl_dir_modified(dentry->d_parent, false);
+ err = ovl_instantiate(dentry, inode, newdentry, !!attr->hardlink, NULL);
if (err)
goto out_cleanup;
-out_unlock:
- inode_unlock(udir);
- return err;
+ return 0;
out_cleanup:
- ovl_cleanup(udir, newdentry);
+ ovl_cleanup(ofs, upperdir, newdentry);
dput(newdentry);
- goto out_unlock;
+ return err;
}
static struct dentry *ovl_clear_empty(struct dentry *dentry,
struct list_head *list)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *workdir = ovl_workdir(dentry);
- struct inode *wdir = workdir->d_inode;
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
- struct inode *udir = upperdir->d_inode;
+ struct renamedata rd = {};
struct path upperpath;
struct dentry *upper;
struct dentry *opaquedir;
@@ -346,29 +397,31 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (WARN_ON(!workdir))
return ERR_PTR(-EROFS);
- err = ovl_lock_rename_workdir(workdir, upperdir);
- if (err)
- goto out;
-
ovl_path_upper(dentry, &upperpath);
err = vfs_getattr(&upperpath, &stat,
STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
if (err)
- goto out_unlock;
+ goto out;
err = -ESTALE;
if (!S_ISDIR(stat.mode))
- goto out_unlock;
+ goto out;
upper = upperpath.dentry;
- if (upper->d_parent->d_inode != udir)
- goto out_unlock;
- opaquedir = ovl_create_temp(workdir, OVL_CATTR(stat.mode));
+ opaquedir = ovl_create_temp(ofs, workdir, OVL_CATTR(stat.mode));
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
- goto out_unlock;
+ goto out;
+
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = workdir;
+ rd.new_parent = upperdir;
+ rd.flags = RENAME_EXCHANGE;
+ err = start_renaming_two_dentries(&rd, opaquedir, upper);
+ if (err)
+ goto out_cleanup_unlocked;
- err = ovl_copy_xattr(upper, opaquedir);
+ err = ovl_copy_xattr(dentry->d_sb, &upperpath, opaquedir);
if (err)
goto out_cleanup;
@@ -377,18 +430,18 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
goto out_cleanup;
inode_lock(opaquedir->d_inode);
- err = ovl_set_attr(opaquedir, &stat);
+ err = ovl_set_attr(ofs, opaquedir, &stat);
inode_unlock(opaquedir->d_inode);
if (err)
goto out_cleanup;
- err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
if (err)
- goto out_cleanup;
+ goto out_cleanup_unlocked;
- ovl_cleanup_whiteouts(upper, list);
- ovl_cleanup(wdir, upper);
- unlock_rename(workdir, upperdir);
+ ovl_cleanup_whiteouts(ofs, upper, list);
+ ovl_cleanup(ofs, workdir, upper);
/* dentry's upper doesn't match now, get rid of it */
d_drop(dentry);
@@ -396,46 +449,30 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
return opaquedir;
out_cleanup:
- ovl_cleanup(wdir, opaquedir);
+ end_renaming(&rd);
+out_cleanup_unlocked:
+ ovl_cleanup(ofs, workdir, opaquedir);
dput(opaquedir);
-out_unlock:
- unlock_rename(workdir, upperdir);
out:
return ERR_PTR(err);
}
-static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
- const struct posix_acl *acl)
+static int ovl_set_upper_acl(struct ovl_fs *ofs, struct dentry *upperdentry,
+ const char *acl_name, struct posix_acl *acl)
{
- void *buffer;
- size_t size;
- int err;
-
if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
return 0;
- size = posix_acl_xattr_size(acl->a_count);
- buffer = kmalloc(size, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- if (err < 0)
- goto out_free;
-
- err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
-out_free:
- kfree(buffer);
- return err;
+ return ovl_do_set_acl(ofs, upperdentry, acl_name, acl);
}
static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
struct ovl_cattr *cattr)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *workdir = ovl_workdir(dentry);
- struct inode *wdir = workdir->d_inode;
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
- struct inode *udir = upperdir->d_inode;
+ struct renamedata rd = {};
struct dentry *upper;
struct dentry *newdentry;
int err;
@@ -452,25 +489,29 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
return err;
}
- err = ovl_lock_rename_workdir(workdir, upperdir);
- if (err)
- goto out;
-
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
+ upper = ovl_lookup_upper_unlocked(ofs, dentry->d_name.name, upperdir,
+ dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
- goto out_unlock;
+ goto out;
err = -ESTALE;
- if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
+ if (d_is_negative(upper) || !ovl_upper_is_whiteout(ofs, upper))
goto out_dput;
- newdentry = ovl_create_temp(workdir, cattr);
+ newdentry = ovl_create_temp(ofs, workdir, cattr);
err = PTR_ERR(newdentry);
if (IS_ERR(newdentry))
goto out_dput;
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = workdir;
+ rd.new_parent = upperdir;
+ rd.flags = 0;
+ err = start_renaming_two_dentries(&rd, newdentry, upper);
+ if (err)
+ goto out_cleanup_unlocked;
+
/*
* mode could have been mutilated due to umask (e.g. sgid directory)
*/
@@ -482,19 +523,19 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
.ia_mode = cattr->mode,
};
inode_lock(newdentry->d_inode);
- err = notify_change(newdentry, &attr, NULL);
+ err = ovl_do_notify_change(ofs, newdentry, &attr);
inode_unlock(newdentry->d_inode);
if (err)
goto out_cleanup;
}
if (!hardlink) {
- err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
- acl);
+ err = ovl_set_upper_acl(ofs, newdentry,
+ XATTR_NAME_POSIX_ACL_ACCESS, acl);
if (err)
goto out_cleanup;
- err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
- default_acl);
+ err = ovl_set_upper_acl(ofs, newdentry,
+ XATTR_NAME_POSIX_ACL_DEFAULT, default_acl);
if (err)
goto out_cleanup;
}
@@ -504,24 +545,27 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (err)
goto out_cleanup;
- err = ovl_do_rename(wdir, newdentry, udir, upper,
- RENAME_EXCHANGE);
+ rd.flags = RENAME_EXCHANGE;
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
if (err)
- goto out_cleanup;
+ goto out_cleanup_unlocked;
- ovl_cleanup(wdir, upper);
+ ovl_cleanup(ofs, workdir, upper);
} else {
- err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
if (err)
- goto out_cleanup;
+ goto out_cleanup_unlocked;
+ }
+ ovl_dir_modified(dentry->d_parent, false);
+ err = ovl_instantiate(dentry, inode, newdentry, hardlink, NULL);
+ if (err) {
+ ovl_cleanup(ofs, upperdir, newdentry);
+ dput(newdentry);
}
- err = ovl_instantiate(dentry, inode, newdentry, hardlink);
- if (err)
- goto out_cleanup;
out_dput:
dput(upper);
-out_unlock:
- unlock_rename(workdir, upperdir);
out:
if (!hardlink) {
posix_acl_release(acl);
@@ -530,59 +574,103 @@ out:
return err;
out_cleanup:
- ovl_cleanup(wdir, newdentry);
+ end_renaming(&rd);
+out_cleanup_unlocked:
+ ovl_cleanup(ofs, workdir, newdentry);
dput(newdentry);
goto out_dput;
}
+static const struct cred *ovl_override_creator_creds(const struct cred *original_creds,
+ struct dentry *dentry, struct inode *inode, umode_t mode)
+{
+ int err;
+
+ if (WARN_ON_ONCE(current->cred != ovl_creds(dentry->d_sb)))
+ return ERR_PTR(-EINVAL);
+
+ CLASS(prepare_creds, override_cred)();
+ if (!override_cred)
+ return ERR_PTR(-ENOMEM);
+
+ override_cred->fsuid = inode->i_uid;
+ override_cred->fsgid = inode->i_gid;
+
+ err = security_dentry_create_files_as(dentry, mode, &dentry->d_name,
+ original_creds, override_cred);
+ if (err)
+ return ERR_PTR(err);
+
+ return override_creds(no_free_ptr(override_cred));
+}
+
+static void ovl_revert_creator_creds(const struct cred *old_cred)
+{
+ const struct cred *override_cred;
+
+ override_cred = revert_creds(old_cred);
+ put_cred(override_cred);
+}
+
+DEFINE_CLASS(ovl_override_creator_creds,
+ const struct cred *,
+ if (!IS_ERR_OR_NULL(_T)) ovl_revert_creator_creds(_T),
+ ovl_override_creator_creds(original_creds, dentry, inode, mode),
+ const struct cred *original_creds,
+ struct dentry *dentry,
+ struct inode *inode,
+ umode_t mode)
+
+static int ovl_create_handle_whiteouts(struct dentry *dentry,
+ struct inode *inode,
+ struct ovl_cattr *attr)
+{
+ if (!ovl_dentry_is_whiteout(dentry))
+ return ovl_create_upper(dentry, inode, attr);
+
+ return ovl_create_over_whiteout(dentry, inode, attr);
+}
+
static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
struct ovl_cattr *attr, bool origin)
{
int err;
- const struct cred *old_cred;
- struct cred *override_cred;
struct dentry *parent = dentry->d_parent;
- err = ovl_copy_up(parent);
- if (err)
- return err;
+ scoped_class(override_creds_ovl, original_creds, dentry->d_sb) {
+ /*
+ * When linking a file with copy up origin into a new parent, mark the
+ * new parent dir "impure".
+ */
+ if (origin) {
+ err = ovl_set_impure(parent, ovl_dentry_upper(parent));
+ if (err)
+ return err;
+ }
- old_cred = ovl_override_creds(dentry->d_sb);
+ /*
+ * In the creation cases(create, mkdir, mknod, symlink),
+ * ovl should transfer current's fs{u,g}id to underlying
+ * fs. Because underlying fs want to initialize its new
+ * inode owner using current's fs{u,g}id. And in this
+ * case, the @inode is a new inode that is initialized
+ * in inode_init_owner() to current's fs{u,g}id. So use
+ * the inode's i_{u,g}id to override the cred's fs{u,g}id.
+ *
+ * But in the other hardlink case, ovl_link() does not
+ * create a new inode, so just use the ovl mounter's
+ * fs{u,g}id.
+ */
- /*
- * When linking a file with copy up origin into a new parent, mark the
- * new parent dir "impure".
- */
- if (origin) {
- err = ovl_set_impure(parent, ovl_dentry_upper(parent));
- if (err)
- goto out_revert_creds;
- }
+ if (attr->hardlink)
+ return ovl_create_handle_whiteouts(dentry, inode, attr);
- err = -ENOMEM;
- override_cred = prepare_creds();
- if (override_cred) {
- override_cred->fsuid = inode->i_uid;
- override_cred->fsgid = inode->i_gid;
- if (!attr->hardlink) {
- err = security_dentry_create_files_as(dentry,
- attr->mode, &dentry->d_name, old_cred,
- override_cred);
- if (err) {
- put_cred(override_cred);
- goto out_revert_creds;
- }
+ scoped_class(ovl_override_creator_creds, cred, original_creds, dentry, inode, attr->mode) {
+ if (IS_ERR(cred))
+ return PTR_ERR(cred);
+ return ovl_create_handle_whiteouts(dentry, inode, attr);
}
- put_cred(override_creds(override_cred));
- put_cred(override_cred);
-
- if (!ovl_dentry_is_whiteout(dentry))
- err = ovl_create_upper(dentry, inode, attr);
- else
- err = ovl_create_over_whiteout(dentry, inode, attr);
}
-out_revert_creds:
- revert_creds(old_cred);
return err;
}
@@ -596,6 +684,10 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
.link = link,
};
+ err = ovl_copy_up(dentry->d_parent);
+ if (err)
+ return err;
+
err = ovl_want_write(dentry);
if (err)
goto out;
@@ -607,10 +699,10 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
goto out_drop_write;
spin_lock(&inode->i_lock);
- inode->i_state |= I_CREATING;
+ inode_state_set(inode, I_CREATING);
spin_unlock(&inode->i_lock);
- inode_init_owner(inode, dentry->d_parent->d_inode, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dentry->d_parent->d_inode, mode);
attr.mode = inode->i_mode;
err = ovl_create_or_link(dentry, inode, &attr, false);
@@ -624,19 +716,20 @@ out:
return err;
}
-static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int ovl_create(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
}
-static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static struct dentry *ovl_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
- return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
+ return ERR_PTR(ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL));
}
-static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
- dev_t rdev)
+static int ovl_mknod(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
/* Don't allow creation of "whiteout" on overlay */
if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
@@ -645,22 +738,16 @@ static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
return ovl_create_object(dentry, mode, rdev, NULL);
}
-static int ovl_symlink(struct inode *dir, struct dentry *dentry,
- const char *link)
+static int ovl_symlink(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, const char *link)
{
return ovl_create_object(dentry, S_IFLNK, 0, link);
}
static int ovl_set_link_redirect(struct dentry *dentry)
{
- const struct cred *old_cred;
- int err;
-
- old_cred = ovl_override_creds(dentry->d_sb);
- err = ovl_set_redirect(dentry, false);
- revert_creds(old_cred);
-
- return err;
+ with_ovl_creds(dentry->d_sb)
+ return ovl_set_redirect(dentry, false);
}
static int ovl_link(struct dentry *old, struct inode *newdir,
@@ -669,28 +756,24 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
int err;
struct inode *inode;
- err = ovl_want_write(old);
+ err = ovl_copy_up(old);
if (err)
goto out;
- err = ovl_copy_up(old);
+ err = ovl_copy_up(new->d_parent);
if (err)
- goto out_drop_write;
+ goto out;
- err = ovl_copy_up(new->d_parent);
+ err = ovl_nlink_start(old);
if (err)
- goto out_drop_write;
+ goto out;
if (ovl_is_metacopy_dentry(old)) {
err = ovl_set_link_redirect(old);
if (err)
- goto out_drop_write;
+ goto out_nlink_end;
}
- err = ovl_nlink_start(old);
- if (err)
- goto out_drop_write;
-
inode = d_inode(old);
ihold(inode);
@@ -700,9 +783,8 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
iput(inode);
+out_nlink_end:
ovl_nlink_end(old);
-out_drop_write:
- ovl_drop_write(old);
out:
return err;
}
@@ -715,6 +797,7 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
static int ovl_remove_and_whiteout(struct dentry *dentry,
struct list_head *list)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *workdir = ovl_workdir(dentry);
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct dentry *upper;
@@ -731,15 +814,11 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
goto out;
}
- err = ovl_lock_rename_workdir(workdir, upperdir);
- if (err)
- goto out_dput;
-
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
+ upper = ovl_lookup_upper_unlocked(ofs, dentry->d_name.name, upperdir,
+ dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
- goto out_unlock;
+ goto out_dput;
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
@@ -748,17 +827,13 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
goto out_dput_upper;
}
- err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
- if (err)
- goto out_d_drop;
+ err = ovl_cleanup_and_whiteout(ofs, upperdir, upper);
+ if (!err)
+ ovl_dir_modified(dentry->d_parent, true);
- ovl_dir_modified(dentry->d_parent, true);
-out_d_drop:
d_drop(dentry);
out_dput_upper:
dput(upper);
-out_unlock:
- unlock_rename(workdir, upperdir);
out_dput:
dput(opaquedir);
out:
@@ -768,6 +843,7 @@ out:
static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
struct list_head *list)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct inode *dir = upperdir->d_inode;
struct dentry *upper;
@@ -781,22 +857,22 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
goto out;
}
- inode_lock_nested(dir, I_MUTEX_PARENT);
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
+ upper = ovl_start_removing_upper(ofs, upperdir,
+ &QSTR_LEN(dentry->d_name.name,
+ dentry->d_name.len));
err = PTR_ERR(upper);
if (IS_ERR(upper))
- goto out_unlock;
+ goto out_dput;
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
(!opaquedir && !ovl_matches_upper(dentry, upper)))
- goto out_dput_upper;
+ goto out_unlock;
if (is_dir)
- err = vfs_rmdir(dir, upper);
+ err = ovl_do_rmdir(ofs, dir, upper);
else
- err = vfs_unlink(dir, upper, NULL);
+ err = ovl_do_unlink(ofs, dir, upper);
ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry));
/*
@@ -807,10 +883,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
*/
if (!err)
d_drop(dentry);
-out_dput_upper:
- dput(upper);
out_unlock:
- inode_unlock(dir);
+ end_removing(upper);
+out_dput:
dput(opaquedir);
out:
return err;
@@ -822,11 +897,31 @@ static bool ovl_pure_upper(struct dentry *dentry)
!ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
}
+static void ovl_drop_nlink(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ struct dentry *alias;
+
+ /* Try to find another, hashed alias */
+ spin_lock(&inode->i_lock);
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+ if (alias != dentry && !d_unhashed(alias))
+ break;
+ }
+ spin_unlock(&inode->i_lock);
+
+ /*
+ * Changes to underlying layers may cause i_nlink to lose sync with
+ * reality. In this case prevent the link count from going to zero
+ * prematurely.
+ */
+ if (inode->i_nlink > !!alias)
+ drop_nlink(inode);
+}
+
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
{
int err;
- const struct cred *old_cred;
- struct dentry *upperdentry;
bool lower_positive = ovl_lower_positive(dentry);
LIST_HEAD(list);
@@ -837,29 +932,25 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
goto out;
}
- err = ovl_want_write(dentry);
- if (err)
- goto out;
-
err = ovl_copy_up(dentry->d_parent);
if (err)
- goto out_drop_write;
+ goto out;
err = ovl_nlink_start(dentry);
if (err)
- goto out_drop_write;
+ goto out;
- old_cred = ovl_override_creds(dentry->d_sb);
- if (!lower_positive)
- err = ovl_remove_upper(dentry, is_dir, &list);
- else
- err = ovl_remove_and_whiteout(dentry, &list);
- revert_creds(old_cred);
+ with_ovl_creds(dentry->d_sb) {
+ if (!lower_positive)
+ err = ovl_remove_upper(dentry, is_dir, &list);
+ else
+ err = ovl_remove_and_whiteout(dentry, &list);
+ }
if (!err) {
if (is_dir)
clear_nlink(dentry->d_inode);
else
- drop_nlink(dentry->d_inode);
+ ovl_drop_nlink(dentry);
}
ovl_nlink_end(dentry);
@@ -869,12 +960,9 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
* Note: we fail to update ctime if there was no copy-up, only a
* whiteout
*/
- upperdentry = ovl_dentry_upper(dentry);
- if (upperdentry)
- ovl_copyattr(d_inode(upperdentry), d_inode(dentry));
+ if (ovl_dentry_upper(dentry))
+ ovl_copyattr(d_inode(dentry));
-out_drop_write:
- ovl_drop_write(dentry);
out:
ovl_cache_free(&list);
return err;
@@ -899,7 +987,7 @@ static bool ovl_type_merge_or_lower(struct dentry *dentry)
static bool ovl_can_move(struct dentry *dentry)
{
- return ovl_redirect_dir(dentry->d_sb) ||
+ return ovl_redirect_dir(OVL_FS(dentry->d_sb)) ||
!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
}
@@ -943,8 +1031,8 @@ static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
buflen -= thislen;
memcpy(&buf[buflen], name, thislen);
- tmp = dget_dlock(d->d_parent);
spin_unlock(&d->d_lock);
+ tmp = dget_parent(d);
dput(d);
d = tmp;
@@ -993,6 +1081,7 @@ static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir)
static int ovl_set_redirect(struct dentry *dentry, bool samedir)
{
int err;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
const char *redirect = ovl_dentry_get_redirect(dentry);
bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir);
@@ -1003,7 +1092,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
if (IS_ERR(redirect))
return PTR_ERR(redirect);
- err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
+ err = ovl_check_setxattr(ofs, ovl_dentry_upper(dentry),
OVL_XATTR_REDIRECT,
redirect, strlen(redirect), -EXDEV);
if (!err) {
@@ -1012,7 +1101,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
spin_unlock(&dentry->d_lock);
} else {
kfree(redirect);
- pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n",
+ pr_warn_ratelimited("failed to set redirect (%i)\n",
err);
/* Fall back to userspace copy-up */
err = -EXDEV;
@@ -1020,98 +1109,107 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
return err;
}
-static int ovl_rename(struct inode *olddir, struct dentry *old,
- struct inode *newdir, struct dentry *new,
- unsigned int flags)
+struct ovl_renamedata {
+ struct renamedata;
+ struct dentry *opaquedir;
+ bool cleanup_whiteout;
+ bool update_nlink;
+ bool overwrite;
+};
+
+static int ovl_rename_start(struct ovl_renamedata *ovlrd, struct list_head *list)
{
- int err;
- struct dentry *old_upperdir;
- struct dentry *new_upperdir;
- struct dentry *olddentry;
- struct dentry *newdentry;
- struct dentry *trap;
- bool old_opaque;
- bool new_opaque;
- bool cleanup_whiteout = false;
- bool update_nlink = false;
- bool overwrite = !(flags & RENAME_EXCHANGE);
+ struct dentry *old = ovlrd->old_dentry;
+ struct dentry *new = ovlrd->new_dentry;
bool is_dir = d_is_dir(old);
bool new_is_dir = d_is_dir(new);
- bool samedir = olddir == newdir;
- struct dentry *opaquedir = NULL;
- const struct cred *old_cred = NULL;
- LIST_HEAD(list);
+ int err;
- err = -EINVAL;
- if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
- goto out;
+ if (ovlrd->flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
+ return -EINVAL;
- flags &= ~RENAME_NOREPLACE;
+ ovlrd->flags &= ~RENAME_NOREPLACE;
/* Don't copy up directory trees */
err = -EXDEV;
if (!ovl_can_move(old))
- goto out;
- if (!overwrite && !ovl_can_move(new))
- goto out;
+ return err;
+ if (!ovlrd->overwrite && !ovl_can_move(new))
+ return err;
- if (overwrite && new_is_dir && !ovl_pure_upper(new)) {
- err = ovl_check_empty_dir(new, &list);
+ if (ovlrd->overwrite && new_is_dir && !ovl_pure_upper(new)) {
+ err = ovl_check_empty_dir(new, list);
if (err)
- goto out;
+ return err;
}
- if (overwrite) {
+ if (ovlrd->overwrite) {
if (ovl_lower_positive(old)) {
if (!ovl_dentry_is_whiteout(new)) {
/* Whiteout source */
- flags |= RENAME_WHITEOUT;
+ ovlrd->flags |= RENAME_WHITEOUT;
} else {
/* Switch whiteouts */
- flags |= RENAME_EXCHANGE;
+ ovlrd->flags |= RENAME_EXCHANGE;
}
} else if (is_dir && ovl_dentry_is_whiteout(new)) {
- flags |= RENAME_EXCHANGE;
- cleanup_whiteout = true;
+ ovlrd->flags |= RENAME_EXCHANGE;
+ ovlrd->cleanup_whiteout = true;
}
}
- err = ovl_want_write(old);
- if (err)
- goto out;
-
err = ovl_copy_up(old);
if (err)
- goto out_drop_write;
+ return err;
err = ovl_copy_up(new->d_parent);
if (err)
- goto out_drop_write;
- if (!overwrite) {
+ return err;
+
+ if (!ovlrd->overwrite) {
err = ovl_copy_up(new);
if (err)
- goto out_drop_write;
+ return err;
} else if (d_inode(new)) {
err = ovl_nlink_start(new);
if (err)
- goto out_drop_write;
+ return err;
- update_nlink = true;
+ ovlrd->update_nlink = true;
}
- old_cred = ovl_override_creds(old->d_sb);
-
- if (!list_empty(&list)) {
- opaquedir = ovl_clear_empty(new, &list);
- err = PTR_ERR(opaquedir);
- if (IS_ERR(opaquedir)) {
- opaquedir = NULL;
- goto out_revert_creds;
- }
+ if (!ovlrd->update_nlink) {
+ /* ovl_nlink_start() took ovl_want_write() */
+ err = ovl_want_write(old);
+ if (err)
+ return err;
}
- old_upperdir = ovl_dentry_upper(old->d_parent);
- new_upperdir = ovl_dentry_upper(new->d_parent);
+ return 0;
+}
+
+static int ovl_rename_upper(struct ovl_renamedata *ovlrd, struct list_head *list)
+{
+ struct dentry *old = ovlrd->old_dentry;
+ struct dentry *new = ovlrd->new_dentry;
+ struct ovl_fs *ofs = OVL_FS(old->d_sb);
+ struct dentry *old_upperdir = ovl_dentry_upper(old->d_parent);
+ struct dentry *new_upperdir = ovl_dentry_upper(new->d_parent);
+ bool is_dir = d_is_dir(old);
+ bool new_is_dir = d_is_dir(new);
+ bool samedir = old->d_parent == new->d_parent;
+ struct renamedata rd = {};
+ struct dentry *de;
+ struct dentry *whiteout = NULL;
+ bool old_opaque, new_opaque;
+ int err;
+
+ if (!list_empty(list)) {
+ de = ovl_clear_empty(new, list);
+ if (IS_ERR(de))
+ return PTR_ERR(de);
+ ovlrd->opaquedir = de;
+ }
if (!samedir) {
/*
@@ -1123,118 +1221,242 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (ovl_type_origin(old)) {
err = ovl_set_impure(new->d_parent, new_upperdir);
if (err)
- goto out_revert_creds;
+ return err;
}
- if (!overwrite && ovl_type_origin(new)) {
+ if (!ovlrd->overwrite && ovl_type_origin(new)) {
err = ovl_set_impure(old->d_parent, old_upperdir);
if (err)
- goto out_revert_creds;
+ return err;
}
}
- trap = lock_rename(new_upperdir, old_upperdir);
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = old_upperdir;
+ rd.new_parent = new_upperdir;
+ rd.flags = ovlrd->flags;
- olddentry = lookup_one_len(old->d_name.name, old_upperdir,
- old->d_name.len);
- err = PTR_ERR(olddentry);
- if (IS_ERR(olddentry))
- goto out_unlock;
+ err = start_renaming(&rd, 0,
+ &QSTR_LEN(old->d_name.name, old->d_name.len),
+ &QSTR_LEN(new->d_name.name, new->d_name.len));
+ if (err)
+ return err;
err = -ESTALE;
- if (!ovl_matches_upper(old, olddentry))
- goto out_dput_old;
-
- newdentry = lookup_one_len(new->d_name.name, new_upperdir,
- new->d_name.len);
- err = PTR_ERR(newdentry);
- if (IS_ERR(newdentry))
- goto out_dput_old;
+ if (!ovl_matches_upper(old, rd.old_dentry))
+ goto out_unlock;
old_opaque = ovl_dentry_is_opaque(old);
new_opaque = ovl_dentry_is_opaque(new);
err = -ESTALE;
if (d_inode(new) && ovl_dentry_upper(new)) {
- if (opaquedir) {
- if (newdentry != opaquedir)
- goto out_dput;
+ if (ovlrd->opaquedir) {
+ if (rd.new_dentry != ovlrd->opaquedir)
+ goto out_unlock;
} else {
- if (!ovl_matches_upper(new, newdentry))
- goto out_dput;
+ if (!ovl_matches_upper(new, rd.new_dentry))
+ goto out_unlock;
}
} else {
- if (!d_is_negative(newdentry) &&
- (!new_opaque || !ovl_is_whiteout(newdentry)))
- goto out_dput;
+ if (!d_is_negative(rd.new_dentry)) {
+ if (!new_opaque || !ovl_upper_is_whiteout(ofs, rd.new_dentry))
+ goto out_unlock;
+ } else {
+ if (ovlrd->flags & RENAME_EXCHANGE)
+ goto out_unlock;
+ }
}
- if (olddentry == trap)
- goto out_dput;
- if (newdentry == trap)
- goto out_dput;
-
- if (WARN_ON(olddentry->d_inode == newdentry->d_inode))
- goto out_dput;
+ if (rd.old_dentry->d_inode == rd.new_dentry->d_inode)
+ goto out_unlock;
err = 0;
if (ovl_type_merge_or_lower(old))
err = ovl_set_redirect(old, samedir);
else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent))
- err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
+ err = ovl_set_opaque_xerr(old, rd.old_dentry, -EXDEV);
if (err)
- goto out_dput;
+ goto out_unlock;
- if (!overwrite && ovl_type_merge_or_lower(new))
+ if (!ovlrd->overwrite && ovl_type_merge_or_lower(new))
err = ovl_set_redirect(new, samedir);
- else if (!overwrite && new_is_dir && !new_opaque &&
+ else if (!ovlrd->overwrite && new_is_dir && !new_opaque &&
ovl_type_merge(old->d_parent))
- err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
+ err = ovl_set_opaque_xerr(new, rd.new_dentry, -EXDEV);
if (err)
- goto out_dput;
+ goto out_unlock;
+
+ err = ovl_do_rename_rd(&rd);
+
+ if (!err && ovlrd->cleanup_whiteout)
+ whiteout = dget(rd.new_dentry);
+
+out_unlock:
+ end_renaming(&rd);
- err = ovl_do_rename(old_upperdir->d_inode, olddentry,
- new_upperdir->d_inode, newdentry, flags);
if (err)
- goto out_dput;
+ return err;
- if (cleanup_whiteout)
- ovl_cleanup(old_upperdir->d_inode, newdentry);
+ if (whiteout) {
+ ovl_cleanup(ofs, old_upperdir, whiteout);
+ dput(whiteout);
+ }
- if (overwrite && d_inode(new)) {
+ if (ovlrd->overwrite && d_inode(new)) {
if (new_is_dir)
clear_nlink(d_inode(new));
else
- drop_nlink(d_inode(new));
+ ovl_drop_nlink(new);
}
ovl_dir_modified(old->d_parent, ovl_type_origin(old) ||
- (!overwrite && ovl_type_origin(new)));
+ (!ovlrd->overwrite && ovl_type_origin(new)));
ovl_dir_modified(new->d_parent, ovl_type_origin(old) ||
(d_inode(new) && ovl_type_origin(new)));
/* copy ctime: */
- ovl_copyattr(d_inode(olddentry), d_inode(old));
+ ovl_copyattr(d_inode(old));
if (d_inode(new) && ovl_dentry_upper(new))
- ovl_copyattr(d_inode(newdentry), d_inode(new));
+ ovl_copyattr(d_inode(new));
-out_dput:
- dput(newdentry);
-out_dput_old:
- dput(olddentry);
-out_unlock:
- unlock_rename(new_upperdir, old_upperdir);
-out_revert_creds:
- revert_creds(old_cred);
- if (update_nlink)
- ovl_nlink_end(new);
-out_drop_write:
- ovl_drop_write(old);
-out:
- dput(opaquedir);
+ return err;
+}
+
+static void ovl_rename_end(struct ovl_renamedata *ovlrd)
+{
+ if (ovlrd->update_nlink)
+ ovl_nlink_end(ovlrd->new_dentry);
+ else
+ ovl_drop_write(ovlrd->old_dentry);
+}
+
+static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
+ struct dentry *old, struct inode *newdir,
+ struct dentry *new, unsigned int flags)
+{
+ struct ovl_renamedata ovlrd = {
+ .old_parent = old->d_parent,
+ .old_dentry = old,
+ .new_parent = new->d_parent,
+ .new_dentry = new,
+ .flags = flags,
+ .overwrite = !(flags & RENAME_EXCHANGE),
+ };
+ LIST_HEAD(list);
+ int err;
+
+ err = ovl_rename_start(&ovlrd, &list);
+ if (!err) {
+ with_ovl_creds(old->d_sb)
+ err = ovl_rename_upper(&ovlrd, &list);
+ ovl_rename_end(&ovlrd);
+ }
+
+ dput(ovlrd.opaquedir);
ovl_cache_free(&list);
return err;
}
+static int ovl_create_tmpfile(struct file *file, struct dentry *dentry,
+ struct inode *inode, umode_t mode)
+{
+ struct path realparentpath;
+ struct file *realfile;
+ struct ovl_file *of;
+ struct dentry *newdentry;
+ /* It's okay to set O_NOATIME, since the owner will be current fsuid */
+ int flags = file->f_flags | OVL_OPEN_FLAGS;
+ int err;
+
+ scoped_class(override_creds_ovl, original_creds, dentry->d_sb) {
+ scoped_class(ovl_override_creator_creds, cred, original_creds, dentry, inode, mode) {
+ if (IS_ERR(cred))
+ return PTR_ERR(cred);
+
+ ovl_path_upper(dentry->d_parent, &realparentpath);
+ realfile = backing_tmpfile_open(&file->f_path, flags, &realparentpath,
+ mode, current_cred());
+ err = PTR_ERR_OR_ZERO(realfile);
+ pr_debug("tmpfile/open(%pd2, 0%o) = %i\n", realparentpath.dentry, mode, err);
+ if (err)
+ return err;
+
+ of = ovl_file_alloc(realfile);
+ if (!of) {
+ fput(realfile);
+ return -ENOMEM;
+ }
+
+ /* ovl_instantiate() consumes the newdentry reference on success */
+ newdentry = dget(realfile->f_path.dentry);
+ err = ovl_instantiate(dentry, inode, newdentry, false, file);
+ if (!err) {
+ file->private_data = of;
+ } else {
+ dput(newdentry);
+ ovl_file_free(of);
+ }
+ }
+ }
+ return err;
+}
+
+static int ovl_dummy_open(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+
+static int ovl_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
+ struct file *file, umode_t mode)
+{
+ int err;
+ struct dentry *dentry = file->f_path.dentry;
+ struct inode *inode;
+
+ if (!OVL_FS(dentry->d_sb)->tmpfile)
+ return -EOPNOTSUPP;
+
+ err = ovl_copy_up(dentry->d_parent);
+ if (err)
+ return err;
+
+ err = ovl_want_write(dentry);
+ if (err)
+ return err;
+
+ err = -ENOMEM;
+ inode = ovl_new_inode(dentry->d_sb, mode, 0);
+ if (!inode)
+ goto drop_write;
+
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
+ err = ovl_create_tmpfile(file, dentry, inode, inode->i_mode);
+ if (err)
+ goto put_inode;
+
+ /*
+ * Check if the preallocated inode was actually used. Having something
+ * else assigned to the dentry shouldn't happen as that would indicate
+ * that the backing tmpfile "leaked" out of overlayfs.
+ */
+ err = -EIO;
+ if (WARN_ON(inode != d_inode(dentry)))
+ goto put_realfile;
+
+ /* inode reference was transferred to dentry */
+ inode = NULL;
+ err = finish_open(file, dentry, ovl_dummy_open);
+put_realfile:
+ /* Without FMODE_OPENED ->release() won't be called on @file */
+ if (!(file->f_mode & FMODE_OPENED))
+ ovl_file_free(file->private_data);
+put_inode:
+ iput(inode);
+drop_write:
+ ovl_drop_write(dentry);
+ return err;
+}
+
const struct inode_operations ovl_dir_inode_operations = {
.lookup = ovl_lookup,
.mkdir = ovl_mkdir,
@@ -1249,6 +1471,11 @@ const struct inode_operations ovl_dir_inode_operations = {
.permission = ovl_permission,
.getattr = ovl_getattr,
.listxattr = ovl_listxattr,
+ .get_inode_acl = ovl_get_inode_acl,
.get_acl = ovl_get_acl,
+ .set_acl = ovl_set_acl,
.update_time = ovl_update_time,
+ .fileattr_get = ovl_fileattr_get,
+ .fileattr_set = ovl_fileattr_set,
+ .tmpfile = ovl_tmpfile,
};
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 54e5d17d7f3e..83f80fdb1567 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Overlayfs NFS export support.
*
* Amir Goldstein <amir73il@gmail.com>
*
* Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
*/
#include <linux/fs.h>
@@ -26,14 +23,9 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry)
if (ovl_dentry_upper(dentry))
return 0;
- err = ovl_want_write(dentry);
- if (!err) {
- err = ovl_copy_up(dentry);
- ovl_drop_write(dentry);
- }
-
+ err = ovl_copy_up(dentry);
if (err) {
- pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n",
+ pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n",
dentry, err);
}
@@ -83,7 +75,7 @@ static int ovl_connectable_layer(struct dentry *dentry)
/* We can get overlay root from root of any layer */
if (dentry == dentry->d_sb->s_root)
- return oe->numlower;
+ return ovl_numlower(oe);
/*
* If it's an unindexed merge dir, then it's not connectable with any
@@ -94,7 +86,7 @@ static int ovl_connectable_layer(struct dentry *dentry)
return 0;
/* We can get upper/overlay path from indexed/lower dentry */
- return oe->lowerstack[0].layer->idx;
+ return ovl_lowerstack(oe)->layer->idx;
}
/*
@@ -108,6 +100,7 @@ static int ovl_connectable_layer(struct dentry *dentry)
static int ovl_connect_layer(struct dentry *dentry)
{
struct dentry *next, *parent = NULL;
+ struct ovl_entry *oe = OVL_E(dentry);
int origin_layer;
int err = 0;
@@ -115,7 +108,7 @@ static int ovl_connect_layer(struct dentry *dentry)
WARN_ON(!ovl_dentry_lower(dentry)))
return -EIO;
- origin_layer = OVL_E(dentry)->lowerstack[0].layer->idx;
+ origin_layer = ovl_lowerstack(oe)->layer->idx;
if (ovl_dentry_test_flag(OVL_E_CONNECTED, dentry))
return origin_layer;
@@ -176,29 +169,44 @@ static int ovl_connect_layer(struct dentry *dentry)
* U = upper file handle
* L = lower file handle
*
- * (*) Connecting an overlay dir from real lower dentry is not always
+ * (*) Decoding a connected overlay dir from real lower dentry is not always
* possible when there are redirects in lower layers and non-indexed merge dirs.
* To mitigate those case, we may copy up the lower dir ancestor before encode
- * a lower dir file handle.
+ * of a decodable file handle for non-upper dir.
*
* Return 0 for upper file handle, > 0 for lower file handle or < 0 on error.
*/
-static int ovl_check_encode_origin(struct dentry *dentry)
+static int ovl_check_encode_origin(struct inode *inode)
{
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+ bool decodable = ofs->config.nfs_export;
+ struct dentry *dentry;
+ int err;
+
+ /* No upper layer? */
+ if (!ovl_upper_mnt(ofs))
+ return 1;
+
+ /* Lower file handle for non-upper non-decodable */
+ if (!ovl_inode_upper(inode) && !decodable)
+ return 1;
/* Upper file handle for pure upper */
- if (!ovl_dentry_lower(dentry))
+ if (!ovl_inode_lower(inode))
return 0;
/*
- * Upper file handle for non-indexed upper.
- *
* Root is never indexed, so if there's an upper layer, encode upper for
* root.
*/
- if (ovl_dentry_upper(dentry) &&
- !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ if (inode == d_inode(inode->i_sb->s_root))
+ return 0;
+
+ /*
+ * Upper decodable file handle for non-indexed upper.
+ */
+ if (ovl_inode_upper(inode) && decodable &&
+ !ovl_test_flag(OVL_INDEX, inode))
return 0;
/*
@@ -207,84 +215,77 @@ static int ovl_check_encode_origin(struct dentry *dentry)
* ovl_connect_layer() will try to make origin's layer "connected" by
* copying up a "connectable" ancestor.
*/
- if (d_is_dir(dentry) && ofs->upper_mnt)
- return ovl_connect_layer(dentry);
+ if (!decodable || !S_ISDIR(inode->i_mode))
+ return 1;
+
+ dentry = d_find_any_alias(inode);
+ if (!dentry)
+ return -ENOENT;
+
+ err = ovl_connect_layer(dentry);
+ dput(dentry);
+ if (err < 0)
+ return err;
/* Lower file handle for indexed and non-upper dir/non-dir */
return 1;
}
-static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
+static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct inode *inode,
+ u32 *fid, int buflen)
{
struct ovl_fh *fh = NULL;
int err, enc_lower;
+ int len;
/*
* Check if we should encode a lower or upper file handle and maybe
* copy up an ancestor to make lower file handle connectable.
*/
- err = enc_lower = ovl_check_encode_origin(dentry);
+ err = enc_lower = ovl_check_encode_origin(inode);
if (enc_lower < 0)
goto fail;
/* Encode an upper or lower file handle */
- fh = ovl_encode_real_fh(enc_lower ? ovl_dentry_lower(dentry) :
- ovl_dentry_upper(dentry), !enc_lower);
- err = PTR_ERR(fh);
+ fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_inode_lower(inode) :
+ ovl_inode_upper(inode), !enc_lower);
if (IS_ERR(fh))
- goto fail;
-
- err = -EOVERFLOW;
- if (fh->len > buflen)
- goto fail;
+ return PTR_ERR(fh);
- memcpy(buf, (char *)fh, fh->len);
- err = fh->len;
+ len = OVL_FH_LEN(fh);
+ if (len <= buflen)
+ memcpy(fid, fh, len);
+ err = len;
out:
kfree(fh);
return err;
fail:
- pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
- dentry, err, buflen, fh ? (int)fh->len : 0,
- fh ? fh->type : 0);
+ pr_warn_ratelimited("failed to encode file handle (ino=%lu, err=%i)\n",
+ inode->i_ino, err);
goto out;
}
-static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len)
-{
- int res, len = *max_len << 2;
-
- res = ovl_d_to_fh(dentry, (char *)fid, len);
- if (res <= 0)
- return FILEID_INVALID;
-
- len = res;
-
- /* Round up to dwords */
- *max_len = (len + 3) >> 2;
- return OVL_FILEID;
-}
-
static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
struct inode *parent)
{
- struct dentry *dentry;
- int type;
+ struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+ int bytes, buflen = *max_len << 2;
/* TODO: encode connectable file handles */
if (parent)
return FILEID_INVALID;
- dentry = d_find_any_alias(inode);
- if (WARN_ON(!dentry))
+ bytes = ovl_dentry_to_fid(ofs, inode, fid, buflen);
+ if (bytes <= 0)
return FILEID_INVALID;
- type = ovl_dentry_to_fh(dentry, fid, max_len);
+ *max_len = bytes >> 2;
+ if (bytes > buflen)
+ return FILEID_INVALID;
- dput(dentry);
- return type;
+ return OVL_FILEID_V1;
}
/*
@@ -297,22 +298,29 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
{
struct dentry *lower = lowerpath ? lowerpath->dentry : NULL;
struct dentry *upper = upper_alias ?: index;
- struct dentry *dentry;
- struct inode *inode;
+ struct inode *inode = NULL;
struct ovl_entry *oe;
struct ovl_inode_params oip = {
- .lowerpath = lowerpath,
.index = index,
- .numlower = !!lower
};
/* We get overlay directory dentries with ovl_lookup_real() */
if (d_is_dir(upper ?: lower))
return ERR_PTR(-EIO);
+ oe = ovl_alloc_entry(!!lower);
+ if (!oe)
+ return ERR_PTR(-ENOMEM);
+
oip.upperdentry = dget(upper);
+ if (lower) {
+ ovl_lowerstack(oe)->dentry = dget(lower);
+ ovl_lowerstack(oe)->layer = lowerpath->layer;
+ }
+ oip.oe = oe;
inode = ovl_get_inode(sb, &oip);
if (IS_ERR(inode)) {
+ ovl_free_entry(oe);
dput(upper);
return ERR_CAST(inode);
}
@@ -320,44 +328,22 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
if (upper)
ovl_set_flag(OVL_UPPERDATA, inode);
- dentry = d_find_any_alias(inode);
- if (!dentry) {
- dentry = d_alloc_anon(inode->i_sb);
- if (!dentry)
- goto nomem;
- oe = ovl_alloc_entry(lower ? 1 : 0);
- if (!oe)
- goto nomem;
-
- if (lower) {
- oe->lowerstack->dentry = dget(lower);
- oe->lowerstack->layer = lowerpath->layer;
- }
- dentry->d_fsdata = oe;
- if (upper_alias)
- ovl_dentry_set_upper_alias(dentry);
- }
-
- return d_instantiate_anon(dentry, inode);
-
-nomem:
- iput(inode);
- dput(dentry);
- return ERR_PTR(-ENOMEM);
+ return d_obtain_alias(inode);
}
-/* Get the upper or lower dentry in stach whose on layer @idx */
+/* Get the upper or lower dentry in stack whose on layer @idx */
static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_entry *oe = OVL_E(dentry);
+ struct ovl_path *lowerstack = ovl_lowerstack(oe);
int i;
if (!idx)
return ovl_dentry_upper(dentry);
- for (i = 0; i < oe->numlower; i++) {
- if (oe->lowerstack[i].layer->idx == idx)
- return oe->lowerstack[i].dentry;
+ for (i = 0; i < ovl_numlower(oe); i++) {
+ if (lowerstack[i].layer->idx == idx)
+ return lowerstack[i].dentry;
}
return NULL;
@@ -371,7 +357,7 @@ static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx)
*/
static struct dentry *ovl_lookup_real_one(struct dentry *connected,
struct dentry *real,
- struct ovl_layer *layer)
+ const struct ovl_layer *layer)
{
struct inode *dir = d_inode(connected);
struct dentry *this, *parent = NULL;
@@ -398,7 +384,11 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
* pointer because we hold no lock on the real dentry.
*/
take_dentry_name_snapshot(&name, real);
- this = lookup_one_len(name.name, connected, strlen(name.name));
+ /*
+ * No idmap handling here: it's an internal lookup.
+ */
+ this = lookup_noperm(&name.name, connected);
+ release_dentry_name_snapshot(&name);
err = PTR_ERR(this);
if (IS_ERR(this)) {
goto fail;
@@ -413,13 +403,12 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
}
out:
- release_dentry_name_snapshot(&name);
dput(parent);
inode_unlock(dir);
return this;
fail:
- pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+ pr_warn_ratelimited("failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
real, layer->idx, connected, err);
this = ERR_PTR(err);
goto out;
@@ -427,17 +416,16 @@ fail:
static struct dentry *ovl_lookup_real(struct super_block *sb,
struct dentry *real,
- struct ovl_layer *layer);
+ const struct ovl_layer *layer);
/*
* Lookup an indexed or hashed overlay dentry by real inode.
*/
static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
struct dentry *real,
- struct ovl_layer *layer)
+ const struct ovl_layer *layer)
{
- struct ovl_fs *ofs = sb->s_fs_info;
- struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
+ struct ovl_fs *ofs = OVL_FS(sb);
struct dentry *index = NULL;
struct dentry *this = NULL;
struct inode *inode;
@@ -458,7 +446,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
* For decoded lower dir file handle, lookup index by origin to check
* if lower dir was copied up and and/or removed.
*/
- if (!this && layer->idx && ofs->indexdir && !WARN_ON(!d_is_dir(real))) {
+ if (!this && layer->idx && ovl_indexdir(sb) && !WARN_ON(!d_is_dir(real))) {
index = ovl_lookup_index(ofs, NULL, real, false);
if (IS_ERR(index))
return index;
@@ -466,7 +454,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
/* Get connected upper overlay dir from index */
if (index) {
- struct dentry *upper = ovl_index_upper(ofs, index);
+ struct dentry *upper = ovl_index_upper(ofs, index, true);
dput(index);
if (IS_ERR_OR_NULL(upper))
@@ -479,14 +467,14 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
* recursive call walks back from indexed upper to the topmost
* connected/hashed upper parent (or up to root).
*/
- this = ovl_lookup_real(sb, upper, &upper_layer);
+ this = ovl_lookup_real(sb, upper, &ofs->layers[0]);
dput(upper);
}
if (IS_ERR_OR_NULL(this))
return this;
- if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) {
+ if (ovl_dentry_real_at(this, layer->idx) != real) {
dput(this);
this = ERR_PTR(-EIO);
}
@@ -500,7 +488,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
*/
static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
struct dentry *real,
- struct ovl_layer *layer)
+ const struct ovl_layer *layer)
{
struct dentry *next, *parent = NULL;
struct dentry *ancestor = ERR_PTR(-EIO);
@@ -553,7 +541,7 @@ static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
*/
static struct dentry *ovl_lookup_real(struct super_block *sb,
struct dentry *real,
- struct ovl_layer *layer)
+ const struct ovl_layer *layer)
{
struct dentry *connected;
int err = 0;
@@ -644,7 +632,7 @@ static struct dentry *ovl_lookup_real(struct super_block *sb,
return connected;
fail:
- pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+ pr_warn_ratelimited("failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
real, layer->idx, connected, err);
dput(connected);
return ERR_PTR(err);
@@ -658,9 +646,8 @@ static struct dentry *ovl_get_dentry(struct super_block *sb,
struct ovl_path *lowerpath,
struct dentry *index)
{
- struct ovl_fs *ofs = sb->s_fs_info;
- struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
- struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer;
+ struct ovl_fs *ofs = OVL_FS(sb);
+ const struct ovl_layer *layer = upper ? &ofs->layers[0] : lowerpath->layer;
struct dentry *real = upper ?: (index ?: lowerpath->dentry);
/*
@@ -684,14 +671,14 @@ static struct dentry *ovl_get_dentry(struct super_block *sb,
static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
struct ovl_fh *fh)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(sb);
struct dentry *dentry;
struct dentry *upper;
- if (!ofs->upper_mnt)
+ if (!ovl_upper_mnt(ofs))
return ERR_PTR(-EACCES);
- upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
+ upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true);
if (IS_ERR_OR_NULL(upper))
return upper;
@@ -704,7 +691,7 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
struct ovl_fh *fh)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(sb);
struct ovl_path origin = { };
struct ovl_path *stack = &origin;
struct dentry *dentry = NULL;
@@ -732,7 +719,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
}
/* Then lookup indexed upper/whiteout by origin fh */
- if (ofs->indexdir) {
+ if (ovl_indexdir(sb)) {
index = ovl_get_index_fh(ofs, fh);
err = PTR_ERR(index);
if (IS_ERR(index)) {
@@ -743,7 +730,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
/* Then try to get a connected upper dir by index */
if (index && d_is_dir(index)) {
- struct dentry *upper = ovl_index_upper(ofs, index);
+ struct dentry *upper = ovl_index_upper(ofs, index, true);
err = PTR_ERR(upper);
if (IS_ERR_OR_NULL(upper))
@@ -763,7 +750,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
goto out_err;
}
if (index) {
- err = ovl_verify_origin(index, origin.dentry, false);
+ err = ovl_verify_origin(ofs, index, origin.dentry, false);
if (err)
goto out_err;
}
@@ -781,24 +768,48 @@ out_err:
goto out;
}
+static struct ovl_fh *ovl_fid_to_fh(struct fid *fid, int buflen, int fh_type)
+{
+ struct ovl_fh *fh;
+
+ /* If on-wire inner fid is aligned - nothing to do */
+ if (fh_type == OVL_FILEID_V1)
+ return (struct ovl_fh *)fid;
+
+ if (fh_type != OVL_FILEID_V0)
+ return ERR_PTR(-EINVAL);
+
+ if (buflen <= OVL_FH_WIRE_OFFSET)
+ return ERR_PTR(-EINVAL);
+
+ fh = kzalloc(buflen, GFP_KERNEL);
+ if (!fh)
+ return ERR_PTR(-ENOMEM);
+
+ /* Copy unaligned inner fh into aligned buffer */
+ memcpy(fh->buf, fid, buflen - OVL_FH_WIRE_OFFSET);
+ return fh;
+}
+
static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
struct dentry *dentry = NULL;
- struct ovl_fh *fh = (struct ovl_fh *) fid;
+ struct ovl_fh *fh = NULL;
int len = fh_len << 2;
unsigned int flags = 0;
int err;
- err = -EINVAL;
- if (fh_type != OVL_FILEID)
+ fh = ovl_fid_to_fh(fid, len, fh_type);
+ err = PTR_ERR(fh);
+ if (IS_ERR(fh))
goto out_err;
err = ovl_check_fh_len(fh, len);
if (err)
goto out_err;
- flags = fh->flags;
+ flags = fh->fb.flags;
dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ?
ovl_upper_fh_to_d(sb, fh) :
ovl_lower_fh_to_d(sb, fh);
@@ -806,18 +817,24 @@ static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
if (IS_ERR(dentry) && err != -ESTALE)
goto out_err;
+out:
+ /* We may have needed to re-align OVL_FILEID_V0 */
+ if (!IS_ERR_OR_NULL(fh) && fh != (void *)fid)
+ kfree(fh);
+
return dentry;
out_err:
- pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
- len, fh_type, flags, err);
- return ERR_PTR(err);
+ pr_warn_ratelimited("failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
+ fh_len, fh_type, flags, err);
+ dentry = ERR_PTR(err);
+ goto out;
}
static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
- pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
+ pr_warn_ratelimited("connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
return ERR_PTR(-EACCES);
}
@@ -849,3 +866,8 @@ const struct export_operations ovl_export_operations = {
.get_name = ovl_get_name,
.get_parent = ovl_get_parent,
};
+
+/* encode_fh() encodes non-decodable file handles with nfs_export=off */
+const struct export_operations ovl_export_fid_operations = {
+ .encode_fh = ovl_encode_fh,
+};
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 84dd957efa24..cbae89457234 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2017 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
*/
#include <linux/cred.h>
@@ -11,6 +8,10 @@
#include <linux/mount.h>
#include <linux/xattr.h>
#include <linux/uio.h>
+#include <linux/uaccess.h>
+#include <linux/security.h>
+#include <linux/fs.h>
+#include <linux/backing-file.h>
#include "overlayfs.h"
static char ovl_whatisit(struct inode *inode, struct inode *realinode)
@@ -24,16 +25,32 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode)
}
static struct file *ovl_open_realfile(const struct file *file,
- struct inode *realinode)
+ const struct path *realpath)
{
+ struct inode *realinode = d_inode(realpath->dentry);
struct inode *inode = file_inode(file);
+ struct mnt_idmap *real_idmap;
struct file *realfile;
- const struct cred *old_cred;
+ int flags = file->f_flags | OVL_OPEN_FLAGS;
+ int acc_mode = ACC_MODE(flags);
+ int err;
- old_cred = ovl_override_creds(inode->i_sb);
- realfile = open_with_fake_path(&file->f_path, file->f_flags | O_NOATIME,
- realinode, current_cred());
- revert_creds(old_cred);
+ if (flags & O_APPEND)
+ acc_mode |= MAY_APPEND;
+
+ with_ovl_creds(inode->i_sb) {
+ real_idmap = mnt_idmap(realpath->mnt);
+ err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
+ if (err) {
+ realfile = ERR_PTR(err);
+ } else {
+ if (!inode_owner_or_capable(real_idmap, realinode))
+ flags &= ~O_NOATIME;
+
+ realfile = backing_file_open(file_user_path(file),
+ flags, realpath, current_cred());
+ }
+ }
pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
file, file, ovl_whatisit(inode, realinode), file->f_flags,
@@ -49,23 +66,13 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
struct inode *inode = file_inode(file);
int err;
- /* No atime modificaton on underlying */
- flags |= O_NOATIME;
-
- /* If some flag changed that cannot be changed then something's amiss */
- if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
- return -EIO;
-
flags &= OVL_SETFL_MASK;
if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
return -EPERM;
- if (flags & O_DIRECT) {
- if (!file->f_mapping->a_ops ||
- !file->f_mapping->a_ops->direct_IO)
- return -EINVAL;
- }
+ if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
+ return -EINVAL;
if (file->f_op->check_flags) {
err = file->f_op->check_flags(flags);
@@ -75,86 +82,220 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
spin_lock(&file->f_lock);
file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
+ file->f_iocb_flags = iocb_flags(file);
spin_unlock(&file->f_lock);
return 0;
}
-static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
- bool allow_meta)
+struct ovl_file {
+ struct file *realfile;
+ struct file *upperfile;
+};
+
+struct ovl_file *ovl_file_alloc(struct file *realfile)
{
- struct inode *inode = file_inode(file);
- struct inode *realinode;
+ struct ovl_file *of = kzalloc(sizeof(struct ovl_file), GFP_KERNEL);
- real->flags = 0;
- real->file = file->private_data;
+ if (unlikely(!of))
+ return NULL;
- if (allow_meta)
- realinode = ovl_inode_real(inode);
- else
- realinode = ovl_inode_realdata(inode);
+ of->realfile = realfile;
+ return of;
+}
+
+void ovl_file_free(struct ovl_file *of)
+{
+ fput(of->realfile);
+ if (of->upperfile)
+ fput(of->upperfile);
+ kfree(of);
+}
+
+static bool ovl_is_real_file(const struct file *realfile,
+ const struct path *realpath)
+{
+ return file_inode(realfile) == d_inode(realpath->dentry);
+}
+
+static struct file *ovl_real_file_path(const struct file *file,
+ const struct path *realpath)
+{
+ struct ovl_file *of = file->private_data;
+ struct file *realfile = of->realfile;
- /* Has it been copied up since we'd opened it? */
- if (unlikely(file_inode(real->file) != realinode)) {
- real->flags = FDPUT_FPUT;
- real->file = ovl_open_realfile(file, realinode);
+ if (WARN_ON_ONCE(!realpath->dentry))
+ return ERR_PTR(-EIO);
- return PTR_ERR_OR_ZERO(real->file);
+ /*
+ * If the realfile that we want is not where the data used to be at
+ * open time, either we'd been copied up, or it's an fsync of a
+ * metacopied file. We need the upperfile either way, so see if it
+ * is already opened and if it is not then open and store it.
+ */
+ if (unlikely(!ovl_is_real_file(realfile, realpath))) {
+ struct file *upperfile = READ_ONCE(of->upperfile);
+ struct file *old;
+
+ if (!upperfile) { /* Nobody opened upperfile yet */
+ upperfile = ovl_open_realfile(file, realpath);
+ if (IS_ERR(upperfile))
+ return upperfile;
+
+ /* Store the upperfile for later */
+ old = cmpxchg_release(&of->upperfile, NULL, upperfile);
+ if (old) { /* Someone opened upperfile before us */
+ fput(upperfile);
+ upperfile = old;
+ }
+ }
+ /*
+ * Stored file must be from the right inode, unless someone's
+ * been corrupting the upper layer.
+ */
+ if (WARN_ON_ONCE(!ovl_is_real_file(upperfile, realpath)))
+ return ERR_PTR(-EIO);
+
+ realfile = upperfile;
}
/* Did the flags change since open? */
- if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
- return ovl_change_flags(real->file, file->f_flags);
+ if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS)) {
+ int err = ovl_change_flags(realfile, file->f_flags);
- return 0;
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ return realfile;
}
-static int ovl_real_fdget(const struct file *file, struct fd *real)
+static struct file *ovl_real_file(const struct file *file)
{
- return ovl_real_fdget_meta(file, real, false);
+ struct dentry *dentry = file_dentry(file);
+ struct path realpath;
+ int err;
+
+ if (d_is_dir(dentry)) {
+ struct file *f = ovl_dir_real_file(file, false);
+
+ if (WARN_ON_ONCE(!f))
+ return ERR_PTR(-EIO);
+ return f;
+ }
+
+ /* lazy lookup and verify of lowerdata */
+ err = ovl_verify_lowerdata(dentry);
+ if (err)
+ return ERR_PTR(err);
+
+ ovl_path_realdata(dentry, &realpath);
+
+ return ovl_real_file_path(file, &realpath);
}
static int ovl_open(struct inode *inode, struct file *file)
{
struct dentry *dentry = file_dentry(file);
struct file *realfile;
+ struct path realpath;
+ struct ovl_file *of;
int err;
- err = ovl_open_maybe_copy_up(dentry, file->f_flags);
+ /* lazy lookup and verify lowerdata */
+ err = ovl_verify_lowerdata(dentry);
+ if (err)
+ return err;
+
+ err = ovl_maybe_copy_up(dentry, file->f_flags);
if (err)
return err;
/* No longer need these flags, so don't pass them on to underlying fs */
file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
- realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
+ ovl_path_realdata(dentry, &realpath);
+ if (!realpath.dentry)
+ return -EIO;
+
+ realfile = ovl_open_realfile(file, &realpath);
if (IS_ERR(realfile))
return PTR_ERR(realfile);
- file->private_data = realfile;
+ of = ovl_file_alloc(realfile);
+ if (!of) {
+ fput(realfile);
+ return -ENOMEM;
+ }
+
+ file->private_data = of;
return 0;
}
static int ovl_release(struct inode *inode, struct file *file)
{
- fput(file->private_data);
-
+ ovl_file_free(file->private_data);
return 0;
}
static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
{
- struct inode *realinode = ovl_inode_real(file_inode(file));
+ struct inode *inode = file_inode(file);
+ struct file *realfile;
+ loff_t ret;
+
+ /*
+ * The two special cases below do not need to involve real fs,
+ * so we can optimizing concurrent callers.
+ */
+ if (offset == 0) {
+ if (whence == SEEK_CUR)
+ return file->f_pos;
- return generic_file_llseek_size(file, offset, whence,
- realinode->i_sb->s_maxbytes,
- i_size_read(realinode));
+ if (whence == SEEK_SET)
+ return vfs_setpos(file, 0, 0);
+ }
+
+ realfile = ovl_real_file(file);
+ if (IS_ERR(realfile))
+ return PTR_ERR(realfile);
+
+ /*
+ * Overlay file f_pos is the master copy that is preserved
+ * through copy up and modified on read/write, but only real
+ * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
+ * limitations that are more strict than ->s_maxbytes for specific
+ * files, so we use the real file to perform seeks.
+ */
+ ovl_inode_lock(inode);
+ realfile->f_pos = file->f_pos;
+
+ with_ovl_creds(inode->i_sb)
+ ret = vfs_llseek(realfile, offset, whence);
+
+ file->f_pos = realfile->f_pos;
+ ovl_inode_unlock(inode);
+
+ return ret;
+}
+
+static void ovl_file_modified(struct file *file)
+{
+ /* Update size/mtime */
+ ovl_copyattr(file_inode(file));
+}
+
+static void ovl_file_end_write(struct kiocb *iocb, ssize_t ret)
+{
+ ovl_file_modified(iocb->ki_filp);
}
static void ovl_file_accessed(struct file *file)
{
struct inode *inode, *upperinode;
+ struct timespec64 ctime, uctime;
+ struct timespec64 mtime, umtime;
if (file->f_flags & O_NOATIME)
return;
@@ -165,91 +306,67 @@ static void ovl_file_accessed(struct file *file)
if (!upperinode)
return;
- if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
- !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
- inode->i_mtime = upperinode->i_mtime;
- inode->i_ctime = upperinode->i_ctime;
+ ctime = inode_get_ctime(inode);
+ uctime = inode_get_ctime(upperinode);
+ mtime = inode_get_mtime(inode);
+ umtime = inode_get_mtime(upperinode);
+ if ((!timespec64_equal(&mtime, &umtime)) ||
+ !timespec64_equal(&ctime, &uctime)) {
+ inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode));
+ inode_set_ctime_to_ts(inode, uctime);
}
touch_atime(&file->f_path);
}
-static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
-{
- int ifl = iocb->ki_flags;
- rwf_t flags = 0;
-
- if (ifl & IOCB_NOWAIT)
- flags |= RWF_NOWAIT;
- if (ifl & IOCB_HIPRI)
- flags |= RWF_HIPRI;
- if (ifl & IOCB_DSYNC)
- flags |= RWF_DSYNC;
- if (ifl & IOCB_SYNC)
- flags |= RWF_SYNC;
-
- return flags;
-}
-
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
- struct fd real;
- const struct cred *old_cred;
- ssize_t ret;
+ struct file *realfile;
+ struct backing_file_ctx ctx = {
+ .cred = ovl_creds(file_inode(file)->i_sb),
+ .accessed = ovl_file_accessed,
+ };
if (!iov_iter_count(iter))
return 0;
- ret = ovl_real_fdget(file, &real);
- if (ret)
- return ret;
-
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
- ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
- revert_creds(old_cred);
-
- ovl_file_accessed(file);
-
- fdput(real);
+ realfile = ovl_real_file(file);
+ if (IS_ERR(realfile))
+ return PTR_ERR(realfile);
- return ret;
+ return backing_file_read_iter(realfile, iter, iocb, iocb->ki_flags,
+ &ctx);
}
static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- struct fd real;
- const struct cred *old_cred;
+ struct file *realfile;
ssize_t ret;
+ int ifl = iocb->ki_flags;
+ struct backing_file_ctx ctx = {
+ .cred = ovl_creds(inode->i_sb),
+ .end_write = ovl_file_end_write,
+ };
if (!iov_iter_count(iter))
return 0;
inode_lock(inode);
/* Update mode */
- ovl_copyattr(ovl_inode_real(inode), inode);
- ret = file_remove_privs(file);
- if (ret)
- goto out_unlock;
+ ovl_copyattr(inode);
- ret = ovl_real_fdget(file, &real);
- if (ret)
+ realfile = ovl_real_file(file);
+ ret = PTR_ERR(realfile);
+ if (IS_ERR(realfile))
goto out_unlock;
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
- file_start_write(real.file);
- ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
- file_end_write(real.file);
- revert_creds(old_cred);
-
- /* Update size */
- ovl_copyattr(ovl_inode_real(inode), inode);
+ if (!ovl_should_sync(OVL_FS(inode->i_sb)))
+ ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
- fdput(real);
+ ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx);
out_unlock:
inode_unlock(inode);
@@ -257,175 +374,147 @@ out_unlock:
return ret;
}
-static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
-{
- struct fd real;
- const struct cred *old_cred;
- int ret;
-
- ret = ovl_real_fdget_meta(file, &real, !datasync);
- if (ret)
- return ret;
-
- /* Don't sync lower file for fear of receiving EROFS error */
- if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
- ret = vfs_fsync_range(real.file, start, end, datasync);
- revert_creds(old_cred);
- }
-
- fdput(real);
-
- return ret;
-}
-
-static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
+static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
{
- struct file *realfile = file->private_data;
- const struct cred *old_cred;
- int ret;
-
- if (!realfile->f_op->mmap)
- return -ENODEV;
-
- if (WARN_ON(file != vma->vm_file))
- return -EIO;
-
- vma->vm_file = get_file(realfile);
-
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
- ret = call_mmap(vma->vm_file, vma);
- revert_creds(old_cred);
+ struct file *realfile;
+ ssize_t ret;
+ struct backing_file_ctx ctx = {
+ .cred = ovl_creds(file_inode(in)->i_sb),
+ .accessed = ovl_file_accessed,
+ };
+ struct kiocb iocb;
- if (ret) {
- /* Drop reference count from new vm_file value */
- fput(realfile);
- } else {
- /* Drop reference count from previous vm_file value */
- fput(file);
- }
+ realfile = ovl_real_file(in);
+ if (IS_ERR(realfile))
+ return PTR_ERR(realfile);
- ovl_file_accessed(file);
+ init_sync_kiocb(&iocb, in);
+ iocb.ki_pos = *ppos;
+ ret = backing_file_splice_read(realfile, &iocb, pipe, len, flags, &ctx);
+ *ppos = iocb.ki_pos;
return ret;
}
-static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+/*
+ * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
+ * due to lock order inversion between pipe->mutex in iter_file_splice_write()
+ * and file_start_write(realfile) in ovl_write_iter().
+ *
+ * So do everything ovl_write_iter() does and call iter_file_splice_write() on
+ * the real file.
+ */
+static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
{
- struct inode *inode = file_inode(file);
- struct fd real;
- const struct cred *old_cred;
- int ret;
+ struct file *realfile;
+ struct inode *inode = file_inode(out);
+ ssize_t ret;
+ struct backing_file_ctx ctx = {
+ .cred = ovl_creds(inode->i_sb),
+ .end_write = ovl_file_end_write,
+ };
+ struct kiocb iocb;
- ret = ovl_real_fdget(file, &real);
- if (ret)
- return ret;
+ inode_lock(inode);
+ /* Update mode */
+ ovl_copyattr(inode);
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
- ret = vfs_fallocate(real.file, mode, offset, len);
- revert_creds(old_cred);
+ realfile = ovl_real_file(out);
+ ret = PTR_ERR(realfile);
+ if (IS_ERR(realfile))
+ goto out_unlock;
- /* Update size */
- ovl_copyattr(ovl_inode_real(inode), inode);
+ init_sync_kiocb(&iocb, out);
+ iocb.ki_pos = *ppos;
+ ret = backing_file_splice_write(pipe, realfile, &iocb, len, flags, &ctx);
+ *ppos = iocb.ki_pos;
- fdput(real);
+out_unlock:
+ inode_unlock(inode);
return ret;
}
-static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
+static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct fd real;
- const struct cred *old_cred;
+ struct dentry *dentry = file_dentry(file);
+ enum ovl_path_type type;
+ struct path upperpath;
+ struct file *upperfile;
int ret;
- ret = ovl_real_fdget(file, &real);
- if (ret)
+ ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
+ if (ret <= 0)
return ret;
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
- ret = vfs_fadvise(real.file, offset, len, advice);
- revert_creds(old_cred);
+ /* Don't sync lower file for fear of receiving EROFS error */
+ type = ovl_path_type(dentry);
+ if (!OVL_TYPE_UPPER(type) || (datasync && OVL_TYPE_MERGE(type)))
+ return 0;
- fdput(real);
+ ovl_path_upper(dentry, &upperpath);
+ upperfile = ovl_real_file_path(file, &upperpath);
+ if (IS_ERR(upperfile))
+ return PTR_ERR(upperfile);
- return ret;
+ with_ovl_creds(file_inode(file)->i_sb)
+ return vfs_fsync_range(upperfile, start, end, datasync);
}
-static long ovl_real_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
+static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
{
- struct fd real;
- const struct cred *old_cred;
- long ret;
-
- ret = ovl_real_fdget(file, &real);
- if (ret)
- return ret;
-
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
- ret = vfs_ioctl(real.file, cmd, arg);
- revert_creds(old_cred);
-
- fdput(real);
+ struct ovl_file *of = file->private_data;
+ struct backing_file_ctx ctx = {
+ .cred = ovl_creds(file_inode(file)->i_sb),
+ .accessed = ovl_file_accessed,
+ };
- return ret;
+ return backing_file_mmap(of->realfile, vma, &ctx);
}
-static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
- long ret;
struct inode *inode = file_inode(file);
+ struct file *realfile;
+ int ret;
- switch (cmd) {
- case FS_IOC_GETFLAGS:
- ret = ovl_real_ioctl(file, cmd, arg);
- break;
-
- case FS_IOC_SETFLAGS:
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
+ inode_lock(inode);
+ /* Update mode */
+ ovl_copyattr(inode);
+ ret = file_remove_privs(file);
+ if (ret)
+ goto out_unlock;
- ret = ovl_copy_up_with_data(file_dentry(file));
- if (!ret) {
- ret = ovl_real_ioctl(file, cmd, arg);
+ realfile = ovl_real_file(file);
+ ret = PTR_ERR(realfile);
+ if (IS_ERR(realfile))
+ goto out_unlock;
- inode_lock(inode);
- ovl_copyflags(ovl_inode_real(inode), inode);
- inode_unlock(inode);
- }
+ with_ovl_creds(inode->i_sb)
+ ret = vfs_fallocate(realfile, mode, offset, len);
- mnt_drop_write_file(file);
- break;
+ /* Update size */
+ ovl_file_modified(file);
- default:
- ret = -ENOTTY;
- }
+out_unlock:
+ inode_unlock(inode);
return ret;
}
-static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
+static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
{
- switch (cmd) {
- case FS_IOC32_GETFLAGS:
- cmd = FS_IOC_GETFLAGS;
- break;
-
- case FS_IOC32_SETFLAGS:
- cmd = FS_IOC_SETFLAGS;
- break;
+ struct file *realfile;
- default:
- return -ENOIOCTLCMD;
- }
+ realfile = ovl_real_file(file);
+ if (IS_ERR(realfile))
+ return PTR_ERR(realfile);
- return ovl_ioctl(file, cmd, arg);
+ with_ovl_creds(file_inode(file)->i_sb)
+ return vfs_fadvise(realfile, offset, len, advice);
}
enum ovl_copyop {
@@ -439,45 +528,53 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
loff_t len, unsigned int flags, enum ovl_copyop op)
{
struct inode *inode_out = file_inode(file_out);
- struct fd real_in, real_out;
- const struct cred *old_cred;
+ struct file *realfile_in, *realfile_out;
loff_t ret;
- ret = ovl_real_fdget(file_out, &real_out);
- if (ret)
- return ret;
-
- ret = ovl_real_fdget(file_in, &real_in);
- if (ret) {
- fdput(real_out);
- return ret;
+ inode_lock(inode_out);
+ if (op != OVL_DEDUPE) {
+ /* Update mode */
+ ovl_copyattr(inode_out);
+ ret = file_remove_privs(file_out);
+ if (ret)
+ goto out_unlock;
}
- old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
- switch (op) {
- case OVL_COPY:
- ret = vfs_copy_file_range(real_in.file, pos_in,
- real_out.file, pos_out, len, flags);
- break;
-
- case OVL_CLONE:
- ret = vfs_clone_file_range(real_in.file, pos_in,
- real_out.file, pos_out, len, flags);
- break;
-
- case OVL_DEDUPE:
- ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
- real_out.file, pos_out, len,
- flags);
- break;
+ realfile_out = ovl_real_file(file_out);
+ ret = PTR_ERR(realfile_out);
+ if (IS_ERR(realfile_out))
+ goto out_unlock;
+
+ realfile_in = ovl_real_file(file_in);
+ ret = PTR_ERR(realfile_in);
+ if (IS_ERR(realfile_in))
+ goto out_unlock;
+
+ with_ovl_creds(file_inode(file_out)->i_sb) {
+ switch (op) {
+ case OVL_COPY:
+ ret = vfs_copy_file_range(realfile_in, pos_in,
+ realfile_out, pos_out, len, flags);
+ break;
+
+ case OVL_CLONE:
+ ret = vfs_clone_file_range(realfile_in, pos_in,
+ realfile_out, pos_out, len, flags);
+ break;
+
+ case OVL_DEDUPE:
+ ret = vfs_dedupe_file_range_one(realfile_in, pos_in,
+ realfile_out, pos_out, len,
+ flags);
+ break;
+ }
}
- revert_creds(old_cred);
/* Update size */
- ovl_copyattr(ovl_inode_real(inode_out), inode_out);
+ ovl_file_modified(file_out);
- fdput(real_in);
- fdput(real_out);
+out_unlock:
+ inode_unlock(inode_out);
return ret;
}
@@ -517,6 +614,23 @@ static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
remap_flags, op);
}
+static int ovl_flush(struct file *file, fl_owner_t id)
+{
+ struct file *realfile;
+ int err = 0;
+
+ realfile = ovl_real_file(file);
+ if (IS_ERR(realfile))
+ return PTR_ERR(realfile);
+
+ if (realfile->f_op->flush) {
+ with_ovl_creds(file_inode(file)->i_sb)
+ err = realfile->f_op->flush(realfile, id);
+ }
+
+ return err;
+}
+
const struct file_operations ovl_file_operations = {
.open = ovl_open,
.release = ovl_release,
@@ -527,8 +641,9 @@ const struct file_operations ovl_file_operations = {
.mmap = ovl_mmap,
.fallocate = ovl_fallocate,
.fadvise = ovl_fadvise,
- .unlocked_ioctl = ovl_ioctl,
- .compat_ioctl = ovl_compat_ioctl,
+ .flush = ovl_flush,
+ .splice_read = ovl_splice_read,
+ .splice_write = ovl_splice_write,
.copy_file_range = ovl_copy_file_range,
.remap_file_range = ovl_remap_file_range,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 3b7ed5d2279c..bdbf86b56a9b 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -1,43 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
*
* Copyright (C) 2011 Novell Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/xattr.h>
-#include <linux/posix_acl.h>
#include <linux/ratelimit.h>
+#include <linux/fiemap.h>
+#include <linux/fileattr.h>
+#include <linux/security.h>
+#include <linux/namei.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
#include "overlayfs.h"
-int ovl_setattr(struct dentry *dentry, struct iattr *attr)
+int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr)
{
int err;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
bool full_copy_up = false;
struct dentry *upperdentry;
- const struct cred *old_cred;
- err = setattr_prepare(dentry, attr);
+ err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (err)
return err;
- err = ovl_want_write(dentry);
- if (err)
- goto out;
-
if (attr->ia_valid & ATTR_SIZE) {
- struct inode *realinode = d_inode(ovl_dentry_real(dentry));
-
- err = -ETXTBSY;
- if (atomic_read(&realinode->i_writecount) < 0)
- goto out_drop_write;
-
/* Truncate should trigger data copy up as well */
full_copy_up = true;
}
@@ -55,34 +48,56 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
winode = d_inode(upperdentry);
err = get_write_access(winode);
if (err)
- goto out_drop_write;
+ goto out;
}
if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
attr->ia_valid &= ~ATTR_MODE;
+ /*
+ * We might have to translate ovl file into real file object
+ * once use cases emerge. For now, simply don't let underlying
+ * filesystem rely on attr->ia_file
+ */
+ attr->ia_valid &= ~ATTR_FILE;
+
+ /*
+ * If open(O_TRUNC) is done, VFS calls ->setattr with ATTR_OPEN
+ * set. Overlayfs does not pass O_TRUNC flag to underlying
+ * filesystem during open -> do not pass ATTR_OPEN. This
+ * disables optimization in fuse which assumes open(O_TRUNC)
+ * already set file size to 0. But we never passed O_TRUNC to
+ * fuse. So by clearing ATTR_OPEN, fuse will be forced to send
+ * setattr request to server.
+ */
+ attr->ia_valid &= ~ATTR_OPEN;
+
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out_put_write;
+
inode_lock(upperdentry->d_inode);
- old_cred = ovl_override_creds(dentry->d_sb);
- err = notify_change(upperdentry, attr, NULL);
- revert_creds(old_cred);
+ with_ovl_creds(dentry->d_sb)
+ err = ovl_do_notify_change(ofs, upperdentry, attr);
if (!err)
- ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
+ ovl_copyattr(dentry->d_inode);
inode_unlock(upperdentry->d_inode);
+ ovl_drop_write(dentry);
+out_put_write:
if (winode)
put_write_access(winode);
}
-out_drop_write:
- ovl_drop_write(dentry);
out:
return err;
}
-static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
- struct ovl_layer *lower_layer)
+static void ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
{
- bool samefs = ovl_same_sb(dentry->d_sb);
- unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ bool samefs = ovl_same_fs(ofs);
+ unsigned int xinobits = ovl_xino_bits(ofs);
+ unsigned int xinoshift = 64 - xinobits;
if (samefs) {
/*
@@ -91,26 +106,24 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
* which is friendly to du -x.
*/
stat->dev = dentry->d_sb->s_dev;
- return 0;
+ return;
} else if (xinobits) {
- unsigned int shift = 64 - xinobits;
/*
* All inode numbers of underlying fs should not be using the
* high xinobits, so we use high xinobits to partition the
* overlay st_ino address space. The high bits holds the fsid
- * (upper fsid is 0). This way overlay inode numbers are unique
- * and all inodes use overlay st_dev. Inode numbers are also
- * persistent for a given layer configuration.
+ * (upper fsid is 0). The lowest xinobit is reserved for mapping
+ * the non-persistent inode numbers range in case of overflow.
+ * This way all overlay inode numbers are unique and use the
+ * overlay st_dev.
*/
- if (stat->ino >> shift) {
- pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
- dentry, stat->ino, xinobits);
- } else {
- if (lower_layer)
- stat->ino |= ((u64)lower_layer->fsid) << shift;
-
+ if (likely(!(stat->ino >> xinoshift))) {
+ stat->ino |= ((u64)fsid) << (xinoshift + 1);
stat->dev = dentry->d_sb->s_dev;
- return 0;
+ return;
+ } else if (ovl_xino_warn(ofs)) {
+ pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
+ dentry, stat->ino, xinobits);
}
}
@@ -127,40 +140,48 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
*/
stat->dev = dentry->d_sb->s_dev;
stat->ino = dentry->d_inode->i_ino;
- } else if (lower_layer && lower_layer->fsid) {
+ } else {
/*
* For non-samefs setup, if we cannot map all layers st_ino
* to a unified address space, we need to make sure that st_dev
- * is unique per lower fs. Upper layer uses real st_dev and
- * lower layers use the unique anonymous bdev assigned to the
- * lower fs.
+ * is unique per underlying fs, so we use the unique anonymous
+ * bdev assigned to the underlying fs.
*/
- stat->dev = lower_layer->fs->pseudo_dev;
+ stat->dev = ofs->fs[fsid].pseudo_dev;
}
+}
- return 0;
+static inline int ovl_real_getattr_nosec(struct super_block *sb,
+ const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int flags)
+{
+ with_ovl_creds(sb)
+ return vfs_getattr_nosec(path, stat, request_mask, flags);
}
-int ovl_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
+ struct super_block *sb = dentry->d_sb;
enum ovl_path_type type;
struct path realpath;
- const struct cred *old_cred;
- bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
- bool samefs = ovl_same_sb(dentry->d_sb);
- struct ovl_layer *lower_layer = NULL;
+ struct inode *inode = d_inode(dentry);
+ bool is_dir = S_ISDIR(inode->i_mode);
+ int fsid = 0;
int err;
bool metacopy_blocks = false;
metacopy_blocks = ovl_is_metacopy_dentry(dentry);
type = ovl_path_real(dentry, &realpath);
- old_cred = ovl_override_creds(dentry->d_sb);
- err = vfs_getattr(&realpath, stat, request_mask, flags);
+ err = ovl_real_getattr_nosec(sb, &realpath, stat, request_mask, flags);
if (err)
- goto out;
+ return err;
+
+ /* Report the effective immutable/append-only STATX flags */
+ generic_fill_statx_attr(inode, stat);
/*
* For non-dir or same fs, we use st_ino of the copy up origin.
@@ -171,19 +192,18 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
* If lower filesystem supports NFS file handles, this also guaranties
* persistent st_ino across mount cycle.
*/
- if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) {
+ if (!is_dir || ovl_same_dev(OVL_FS(dentry->d_sb))) {
if (!OVL_TYPE_UPPER(type)) {
- lower_layer = ovl_layer_lower(dentry);
+ fsid = ovl_layer_lower(dentry)->fsid;
} else if (OVL_TYPE_ORIGIN(type)) {
struct kstat lowerstat;
u32 lowermask = STATX_INO | STATX_BLOCKS |
(!is_dir ? STATX_NLINK : 0);
ovl_path_lower(dentry, &realpath);
- err = vfs_getattr(&realpath, &lowerstat,
- lowermask, flags);
+ err = ovl_real_getattr_nosec(sb, &realpath, &lowerstat, lowermask, flags);
if (err)
- goto out;
+ return err;
/*
* Lower hardlinks may be broken on copy up to different
@@ -203,8 +223,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
(!ovl_verify_lower(dentry->d_sb) &&
(is_dir || lowerstat.nlink == 1))) {
+ fsid = ovl_layer_lower(dentry)->fsid;
stat->ino = lowerstat.ino;
- lower_layer = ovl_layer_lower(dentry);
}
/*
@@ -225,22 +245,27 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
/*
* If lower is not same as lowerdata or if there was
* no origin on upper, we can end up here.
+ * With lazy lowerdata lookup, guess lowerdata blocks
+ * from size to avoid lowerdata lookup on stat(2).
*/
struct kstat lowerdatastat;
u32 lowermask = STATX_BLOCKS;
ovl_path_lowerdata(dentry, &realpath);
- err = vfs_getattr(&realpath, &lowerdatastat,
- lowermask, flags);
- if (err)
- goto out;
+ if (realpath.dentry) {
+ err = ovl_real_getattr_nosec(sb, &realpath, &lowerdatastat,
+ lowermask, flags);
+ if (err)
+ return err;
+ } else {
+ lowerdatastat.blocks =
+ round_up(stat->size, stat->blksize) >> 9;
+ }
stat->blocks = lowerdatastat.blocks;
}
}
- err = ovl_map_dev_ino(dentry, stat, lower_layer);
- if (err)
- goto out;
+ ovl_map_dev_ino(dentry, stat, fsid);
/*
* It's probably not worth it to count subdirs to get the
@@ -259,20 +284,19 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry)))
stat->nlink = dentry->d_inode->i_nlink;
-out:
- revert_creds(old_cred);
-
return err;
}
-int ovl_permission(struct inode *inode, int mask)
+int ovl_permission(struct mnt_idmap *idmap,
+ struct inode *inode, int mask)
{
struct inode *upperinode = ovl_inode_upper(inode);
- struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
- const struct cred *old_cred;
+ struct inode *realinode;
+ struct path realpath;
int err;
/* Careful in RCU walk mode */
+ realinode = ovl_i_path_real(inode, &realpath);
if (!realinode) {
WARN_ON(!(mask & MAY_NOT_BLOCK));
return -ECHILD;
@@ -282,197 +306,419 @@ int ovl_permission(struct inode *inode, int mask)
* Check overlay inode with the creds of task and underlying inode
* with creds of mounter
*/
- err = generic_permission(inode, mask);
+ err = generic_permission(&nop_mnt_idmap, inode, mask);
if (err)
return err;
- old_cred = ovl_override_creds(inode->i_sb);
if (!upperinode &&
!special_file(realinode->i_mode) && mask & MAY_WRITE) {
mask &= ~(MAY_WRITE | MAY_APPEND);
/* Make sure mounter can read file for copy up later */
mask |= MAY_READ;
}
- err = inode_permission(realinode, mask);
- revert_creds(old_cred);
- return err;
+ with_ovl_creds(inode->i_sb)
+ return inode_permission(mnt_idmap(realpath.mnt), realinode, mask);
}
static const char *ovl_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- const struct cred *old_cred;
- const char *p;
-
if (!dentry)
return ERR_PTR(-ECHILD);
- old_cred = ovl_override_creds(dentry->d_sb);
- p = vfs_get_link(ovl_dentry_real(dentry), done);
- revert_creds(old_cred);
- return p;
+ with_ovl_creds(dentry->d_sb)
+ return vfs_get_link(ovl_dentry_real(dentry), done);
+}
+
+#ifdef CONFIG_FS_POSIX_ACL
+/*
+ * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone
+ * of the POSIX ACLs retrieved from the lower layer to this function to not
+ * alter the POSIX ACLs for the underlying filesystem.
+ */
+static void ovl_idmap_posix_acl(const struct inode *realinode,
+ struct mnt_idmap *idmap,
+ struct posix_acl *acl)
+{
+ struct user_namespace *fs_userns = i_user_ns(realinode);
+
+ for (unsigned int i = 0; i < acl->a_count; i++) {
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
+
+ struct posix_acl_entry *e = &acl->a_entries[i];
+ switch (e->e_tag) {
+ case ACL_USER:
+ vfsuid = make_vfsuid(idmap, fs_userns, e->e_uid);
+ e->e_uid = vfsuid_into_kuid(vfsuid);
+ break;
+ case ACL_GROUP:
+ vfsgid = make_vfsgid(idmap, fs_userns, e->e_gid);
+ e->e_gid = vfsgid_into_kgid(vfsgid);
+ break;
+ }
+ }
+}
+
+/*
+ * The @noperm argument is used to skip permission checking and is a temporary
+ * measure. Quoting Miklos from an earlier discussion:
+ *
+ * > So there are two paths to getting an acl:
+ * > 1) permission checking and 2) retrieving the value via getxattr(2).
+ * > This is a similar situation as reading a symlink vs. following it.
+ * > When following a symlink overlayfs always reads the link on the
+ * > underlying fs just as if it was a readlink(2) call, calling
+ * > security_inode_readlink() instead of security_inode_follow_link().
+ * > This is logical: we are reading the link from the underlying storage,
+ * > and following it on overlayfs.
+ * >
+ * > Applying the same logic to acl: we do need to call the
+ * > security_inode_getxattr() on the underlying fs, even if just want to
+ * > check permissions on overlay. This is currently not done, which is an
+ * > inconsistency.
+ * >
+ * > Maybe adding the check to ovl_get_acl() is the right way to go, but
+ * > I'm a little afraid of a performance regression. Will look into that.
+ *
+ * Until we have made a decision allow this helper to take the @noperm
+ * argument. We should hopefully be able to remove it soon.
+ */
+struct posix_acl *ovl_get_acl_path(const struct path *path,
+ const char *acl_name, bool noperm)
+{
+ struct posix_acl *real_acl, *clone;
+ struct mnt_idmap *idmap;
+ struct inode *realinode = d_inode(path->dentry);
+
+ idmap = mnt_idmap(path->mnt);
+
+ if (noperm)
+ real_acl = get_inode_acl(realinode, posix_acl_type(acl_name));
+ else
+ real_acl = vfs_get_acl(idmap, path->dentry, acl_name);
+ if (IS_ERR_OR_NULL(real_acl))
+ return real_acl;
+
+ if (!is_idmapped_mnt(path->mnt))
+ return real_acl;
+
+ /*
+ * We cannot alter the ACLs returned from the relevant layer as that
+ * would alter the cached values filesystem wide for the lower
+ * filesystem. Instead we can clone the ACLs and then apply the
+ * relevant idmapping of the layer.
+ */
+ clone = posix_acl_clone(real_acl, GFP_KERNEL);
+ posix_acl_release(real_acl); /* release original acl */
+ if (!clone)
+ return ERR_PTR(-ENOMEM);
+
+ ovl_idmap_posix_acl(realinode, idmap, clone);
+ return clone;
}
-bool ovl_is_private_xattr(const char *name)
+/*
+ * When the relevant layer is an idmapped mount we need to take the idmapping
+ * of the layer into account and translate any ACL_{GROUP,USER} values
+ * according to the idmapped mount.
+ *
+ * We cannot alter the ACLs returned from the relevant layer as that would
+ * alter the cached values filesystem wide for the lower filesystem. Instead we
+ * can clone the ACLs and then apply the relevant idmapping of the layer.
+ *
+ * This is obviously only relevant when idmapped layers are used.
+ */
+struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap,
+ struct inode *inode, int type,
+ bool rcu, bool noperm)
{
- return strncmp(name, OVL_XATTR_PREFIX,
- sizeof(OVL_XATTR_PREFIX) - 1) == 0;
+ struct inode *realinode;
+ struct posix_acl *acl;
+ struct path realpath;
+
+ /* Careful in RCU walk mode */
+ realinode = ovl_i_path_real(inode, &realpath);
+ if (!realinode) {
+ WARN_ON(!rcu);
+ return ERR_PTR(-ECHILD);
+ }
+
+ if (!IS_POSIXACL(realinode))
+ return NULL;
+
+ if (rcu) {
+ /*
+ * If the layer is idmapped drop out of RCU path walk
+ * so we can clone the ACLs.
+ */
+ if (is_idmapped_mnt(realpath.mnt))
+ return ERR_PTR(-ECHILD);
+
+ acl = get_cached_acl_rcu(realinode, type);
+ } else {
+ with_ovl_creds(inode->i_sb)
+ acl = ovl_get_acl_path(&realpath, posix_acl_xattr_name(type), noperm);
+ }
+
+ return acl;
}
-int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode,
+ struct posix_acl *acl, int type)
{
int err;
- struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+ struct path realpath;
+ const char *acl_name;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ struct dentry *upperdentry = ovl_dentry_upper(dentry);
struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
- const struct cred *old_cred;
- err = ovl_want_write(dentry);
- if (err)
- goto out;
-
- if (!value && !upperdentry) {
- err = vfs_getxattr(realdentry, name, NULL, 0);
- if (err < 0)
- goto out_drop_write;
+ /*
+ * If ACL is to be removed from a lower file, check if it exists in
+ * the first place before copying it up.
+ */
+ acl_name = posix_acl_xattr_name(type);
+ if (!acl && !upperdentry) {
+ struct posix_acl *real_acl;
+
+ ovl_path_lower(dentry, &realpath);
+ with_ovl_creds(dentry->d_sb)
+ real_acl = vfs_get_acl(mnt_idmap(realpath.mnt), realdentry, acl_name);
+ if (IS_ERR(real_acl)) {
+ err = PTR_ERR(real_acl);
+ goto out;
+ }
+ posix_acl_release(real_acl);
}
if (!upperdentry) {
err = ovl_copy_up(dentry);
if (err)
- goto out_drop_write;
+ goto out;
realdentry = ovl_dentry_upper(dentry);
}
- old_cred = ovl_override_creds(dentry->d_sb);
- if (value)
- err = vfs_setxattr(realdentry, name, value, size, flags);
- else {
- WARN_ON(flags != XATTR_REPLACE);
- err = vfs_removexattr(realdentry, name);
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out;
+
+ with_ovl_creds(dentry->d_sb) {
+ if (acl)
+ err = ovl_do_set_acl(ofs, realdentry, acl_name, acl);
+ else
+ err = ovl_do_remove_acl(ofs, realdentry, acl_name);
}
- revert_creds(old_cred);
+ ovl_drop_write(dentry);
/* copy c/mtime */
- ovl_copyattr(d_inode(realdentry), inode);
-
-out_drop_write:
- ovl_drop_write(dentry);
+ ovl_copyattr(inode);
out:
return err;
}
-int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
- void *value, size_t size)
+int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct posix_acl *acl, int type)
{
- ssize_t res;
- const struct cred *old_cred;
- struct dentry *realdentry =
- ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry);
+ int err;
+ struct inode *inode = d_inode(dentry);
+ struct dentry *workdir = ovl_workdir(dentry);
+ struct inode *realinode = ovl_inode_real(inode);
- old_cred = ovl_override_creds(dentry->d_sb);
- res = vfs_getxattr(realdentry, name, value, size);
- revert_creds(old_cred);
- return res;
+ if (!IS_POSIXACL(d_inode(workdir)))
+ return -EOPNOTSUPP;
+ if (!realinode->i_op->set_acl)
+ return -EOPNOTSUPP;
+ if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ if (!inode_owner_or_capable(&nop_mnt_idmap, inode))
+ return -EPERM;
+
+ /*
+ * Check if sgid bit needs to be cleared (actual setacl operation will
+ * be done with mounter's capabilities and so that won't do it for us).
+ */
+ if (unlikely(inode->i_mode & S_ISGID) && type == ACL_TYPE_ACCESS &&
+ !in_group_p(inode->i_gid) &&
+ !capable_wrt_inode_uidgid(&nop_mnt_idmap, inode, CAP_FSETID)) {
+ struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
+
+ err = ovl_setattr(&nop_mnt_idmap, dentry, &iattr);
+ if (err)
+ return err;
+ }
+
+ return ovl_set_or_remove_acl(dentry, inode, acl, type);
}
+#endif
-static bool ovl_can_list(const char *s)
+int ovl_update_time(struct inode *inode, int flags)
{
- /* List all non-trusted xatts */
- if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
- return true;
+ if (flags & S_ATIME) {
+ struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+ struct path upperpath = {
+ .mnt = ovl_upper_mnt(ofs),
+ .dentry = ovl_upperdentry_dereference(OVL_I(inode)),
+ };
+
+ if (upperpath.dentry) {
+ touch_atime(&upperpath);
+ inode_set_atime_to_ts(inode,
+ inode_get_atime(d_inode(upperpath.dentry)));
+ }
+ }
+ return 0;
+}
+
+static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len)
+{
+ struct inode *realinode = ovl_inode_realdata(inode);
+
+ if (!realinode)
+ return -EIO;
+
+ if (!realinode->i_op->fiemap)
+ return -EOPNOTSUPP;
- /* Never list trusted.overlay, list other trusted for superuser only */
- return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
+ with_ovl_creds(inode->i_sb)
+ return realinode->i_op->fiemap(realinode, fieinfo, start, len);
}
-ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
+/*
+ * Work around the fact that security_file_ioctl() takes a file argument.
+ * Introducing security_inode_fileattr_get/set() hooks would solve this issue
+ * properly.
+ */
+static int ovl_security_fileattr(const struct path *realpath, struct file_kattr *fa,
+ bool set)
{
- struct dentry *realdentry = ovl_dentry_real(dentry);
- ssize_t res;
- size_t len;
- char *s;
- const struct cred *old_cred;
+ struct file *file;
+ unsigned int cmd;
+ int err;
+ unsigned int flags;
- old_cred = ovl_override_creds(dentry->d_sb);
- res = vfs_listxattr(realdentry, list, size);
- revert_creds(old_cred);
- if (res <= 0 || size == 0)
- return res;
+ flags = O_RDONLY;
+ if (force_o_largefile())
+ flags |= O_LARGEFILE;
- /* filter out private xattrs */
- for (s = list, len = res; len;) {
- size_t slen = strnlen(s, len) + 1;
+ file = dentry_open(realpath, flags, current_cred());
+ if (IS_ERR(file))
+ return PTR_ERR(file);
- /* underlying fs providing us with an broken xattr list? */
- if (WARN_ON(slen > len))
- return -EIO;
+ if (set)
+ cmd = fa->fsx_valid ? FS_IOC_FSSETXATTR : FS_IOC_SETFLAGS;
+ else
+ cmd = fa->fsx_valid ? FS_IOC_FSGETXATTR : FS_IOC_GETFLAGS;
- len -= slen;
- if (!ovl_can_list(s)) {
- res -= slen;
- memmove(s, s + slen, len);
- } else {
- s += slen;
- }
- }
+ err = security_file_ioctl(file, cmd, 0);
+ fput(file);
- return res;
+ return err;
}
-struct posix_acl *ovl_get_acl(struct inode *inode, int type)
+int ovl_real_fileattr_set(const struct path *realpath, struct file_kattr *fa)
{
- struct inode *realinode = ovl_inode_real(inode);
- const struct cred *old_cred;
- struct posix_acl *acl;
-
- if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
- return NULL;
+ int err;
- old_cred = ovl_override_creds(inode->i_sb);
- acl = get_acl(realinode, type);
- revert_creds(old_cred);
+ err = ovl_security_fileattr(realpath, fa, true);
+ if (err)
+ return err;
- return acl;
+ return vfs_fileattr_set(mnt_idmap(realpath->mnt), realpath->dentry, fa);
}
-int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
+int ovl_fileattr_set(struct mnt_idmap *idmap,
+ struct dentry *dentry, struct file_kattr *fa)
{
- if (flags & S_ATIME) {
- struct ovl_fs *ofs = inode->i_sb->s_fs_info;
- struct path upperpath = {
- .mnt = ofs->upper_mnt,
- .dentry = ovl_upperdentry_dereference(OVL_I(inode)),
- };
+ struct inode *inode = d_inode(dentry);
+ struct path upperpath;
+ unsigned int flags;
+ int err;
- if (upperpath.dentry) {
- touch_atime(&upperpath);
- inode->i_atime = d_inode(upperpath.dentry)->i_atime;
+ err = ovl_copy_up(dentry);
+ if (!err) {
+ ovl_path_real(dentry, &upperpath);
+
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out;
+
+ with_ovl_creds(inode->i_sb) {
+ /*
+ * Store immutable/append-only flags in xattr and clear them
+ * in upper fileattr (in case they were set by older kernel)
+ * so children of "ovl-immutable" directories lower aliases of
+ * "ovl-immutable" hardlinks could be copied up.
+ * Clear xattr when flags are cleared.
+ */
+ err = ovl_set_protattr(inode, upperpath.dentry, fa);
+ if (!err)
+ err = ovl_real_fileattr_set(&upperpath, fa);
}
+ ovl_drop_write(dentry);
+
+ /*
+ * Merge real inode flags with inode flags read from
+ * overlay.protattr xattr
+ */
+ flags = ovl_inode_real(inode)->i_flags & OVL_COPY_I_FLAGS_MASK;
+
+ BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK);
+ flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK;
+ inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK);
+
+ /* Update ctime */
+ ovl_copyattr(inode);
}
- return 0;
+out:
+ return err;
}
-static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- u64 start, u64 len)
+/* Convert inode protection flags to fileattr flags */
+static void ovl_fileattr_prot_flags(struct inode *inode, struct file_kattr *fa)
+{
+ BUILD_BUG_ON(OVL_PROT_FS_FLAGS_MASK & ~FS_COMMON_FL);
+ BUILD_BUG_ON(OVL_PROT_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON);
+
+ if (inode->i_flags & S_APPEND) {
+ fa->flags |= FS_APPEND_FL;
+ fa->fsx_xflags |= FS_XFLAG_APPEND;
+ }
+ if (inode->i_flags & S_IMMUTABLE) {
+ fa->flags |= FS_IMMUTABLE_FL;
+ fa->fsx_xflags |= FS_XFLAG_IMMUTABLE;
+ }
+}
+
+int ovl_real_fileattr_get(const struct path *realpath, struct file_kattr *fa)
{
int err;
- struct inode *realinode = ovl_inode_real(inode);
- const struct cred *old_cred;
- if (!realinode->i_op->fiemap)
- return -EOPNOTSUPP;
+ err = ovl_security_fileattr(realpath, fa, false);
+ if (err)
+ return err;
- old_cred = ovl_override_creds(inode->i_sb);
+ err = vfs_fileattr_get(realpath->dentry, fa);
+ if (err == -ENOIOCTLCMD)
+ err = -ENOTTY;
+ return err;
+}
+
+int ovl_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
+ struct path realpath;
+ int err;
- if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
- filemap_write_and_wait(realinode->i_mapping);
+ ovl_path_real(dentry, &realpath);
- err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
- revert_creds(old_cred);
+ with_ovl_creds(inode->i_sb)
+ err = ovl_real_fileattr_get(&realpath, fa);
+ ovl_fileattr_prot_flags(inode, fa);
return err;
}
@@ -482,9 +728,13 @@ static const struct inode_operations ovl_file_inode_operations = {
.permission = ovl_permission,
.getattr = ovl_getattr,
.listxattr = ovl_listxattr,
+ .get_inode_acl = ovl_get_inode_acl,
.get_acl = ovl_get_acl,
+ .set_acl = ovl_set_acl,
.update_time = ovl_update_time,
.fiemap = ovl_fiemap,
+ .fileattr_get = ovl_fileattr_get,
+ .fileattr_set = ovl_fileattr_set,
};
static const struct inode_operations ovl_symlink_inode_operations = {
@@ -500,7 +750,9 @@ static const struct inode_operations ovl_special_inode_operations = {
.permission = ovl_permission,
.getattr = ovl_getattr,
.listxattr = ovl_listxattr,
+ .get_inode_acl = ovl_get_inode_acl,
.get_acl = ovl_get_acl,
+ .set_acl = ovl_set_acl,
.update_time = ovl_update_time,
};
@@ -511,11 +763,11 @@ static const struct address_space_operations ovl_aops = {
/*
* It is possible to stack overlayfs instance on top of another
- * overlayfs instance as lower layer. We need to annonate the
+ * overlayfs instance as lower layer. We need to annotate the
* stackable i_mutex locks according to stack level of the super
* block instance. An overlayfs instance can never be in stack
* depth 0 (there is always a real fs below it). An overlayfs
- * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
+ * inode lock will use the lockdep annotation ovl_i_mutex_key[depth].
*
* For example, here is a snip from /proc/lockdep_chains after
* dir_iterate of nested overlayfs:
@@ -523,6 +775,27 @@ static const struct address_space_operations ovl_aops = {
* [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2)
* [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
* [...] &type->i_mutex_dir_key (stack_depth=0)
+ *
+ * Locking order w.r.t ovl_want_write() is important for nested overlayfs.
+ *
+ * This chain is valid:
+ * - inode->i_rwsem (inode_lock[2])
+ * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0])
+ * - OVL_I(inode)->lock (ovl_inode_lock[2])
+ * - OVL_I(lowerinode)->lock (ovl_inode_lock[1])
+ *
+ * And this chain is valid:
+ * - inode->i_rwsem (inode_lock[2])
+ * - OVL_I(inode)->lock (ovl_inode_lock[2])
+ * - lowerinode->i_rwsem (inode_lock[1])
+ * - OVL_I(lowerinode)->lock (ovl_inode_lock[1])
+ *
+ * But lowerinode->i_rwsem SHOULD NOT be acquired while ovl_want_write() is
+ * held, because it is in reverse order of the non-nested case using the same
+ * upper fs:
+ * - inode->i_rwsem (inode_lock[1])
+ * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0])
+ * - OVL_I(inode)->lock (ovl_inode_lock[1])
*/
#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
@@ -547,27 +820,73 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
#endif
}
-static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
- unsigned long ino, int fsid)
+static void ovl_next_ino(struct inode *inode)
+{
+ struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+
+ inode->i_ino = atomic_long_inc_return(&ofs->last_ino);
+ if (unlikely(!inode->i_ino))
+ inode->i_ino = atomic_long_inc_return(&ofs->last_ino);
+}
+
+static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid)
{
- int xinobits = ovl_xino_bits(inode->i_sb);
+ struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+ int xinobits = ovl_xino_bits(ofs);
+ unsigned int xinoshift = 64 - xinobits;
/*
- * When NFS export is enabled and d_ino is consistent with st_ino
- * (samefs or i_ino has enough bits to encode layer), set the same
- * value used for d_ino to i_ino, because nfsd readdirplus compares
- * d_ino values to i_ino values of child entries. When called from
- * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real
- * upper inode i_ino on ovl_inode_init() or ovl_inode_update().
+ * When d_ino is consistent with st_ino (samefs or i_ino has enough
+ * bits to encode layer), set the same value used for st_ino to i_ino,
+ * so inode number exposed via /proc/locks and a like will be
+ * consistent with d_ino and st_ino values. An i_ino value inconsistent
+ * with d_ino also causes nfsd readdirplus to fail.
*/
- if (inode->i_sb->s_export_op &&
- (ovl_same_sb(inode->i_sb) || xinobits)) {
- inode->i_ino = ino;
- if (xinobits && fsid && !(ino >> (64 - xinobits)))
- inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
- } else {
- inode->i_ino = get_next_ino();
+ inode->i_ino = ino;
+ if (ovl_same_fs(ofs)) {
+ return;
+ } else if (xinobits && likely(!(ino >> xinoshift))) {
+ inode->i_ino |= (unsigned long)fsid << (xinoshift + 1);
+ return;
+ }
+
+ /*
+ * For directory inodes on non-samefs with xino disabled or xino
+ * overflow, we allocate a non-persistent inode number, to be used for
+ * resolving st_ino collisions in ovl_map_dev_ino().
+ *
+ * To avoid ino collision with legitimate xino values from upper
+ * layer (fsid 0), use the lowest xinobit to map the non
+ * persistent inode numbers to the unified st_ino address space.
+ */
+ if (S_ISDIR(inode->i_mode)) {
+ ovl_next_ino(inode);
+ if (xinobits) {
+ inode->i_ino &= ~0UL >> xinobits;
+ inode->i_ino |= 1UL << xinoshift;
+ }
}
+}
+
+void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip,
+ unsigned long ino, int fsid)
+{
+ struct inode *realinode;
+ struct ovl_inode *oi = OVL_I(inode);
+
+ oi->__upperdentry = oip->upperdentry;
+ oi->oe = oip->oe;
+ oi->redirect = oip->redirect;
+ oi->lowerdata_redirect = oip->lowerdata_redirect;
+
+ realinode = ovl_inode_real(inode);
+ ovl_copyattr(inode);
+ ovl_copyflags(realinode, inode);
+ ovl_map_ino(inode, ino, fsid);
+}
+
+static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
+{
inode->i_mode = mode;
inode->i_flags |= S_NOCMTIME;
#ifdef CONFIG_FS_POSIX_ACL
@@ -611,7 +930,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
* For the first, copy up case, the union nlink does not change, whether the
* operation succeeds or fails, but the upper inode nlink may change.
* Therefore, before copy up, we store the union nlink value relative to the
- * lower inode nlink in the index inode xattr trusted.overlay.nlink.
+ * lower inode nlink in the index inode xattr .overlay.nlink.
*
* For the second, upper hardlink case, the union nlink should be incremented
* or decremented IFF the operation succeeds, aligned with nlink change of the
@@ -646,8 +965,8 @@ static int ovl_set_nlink_common(struct dentry *dentry,
if (WARN_ON(len >= sizeof(buf)))
return -EIO;
- return ovl_do_setxattr(ovl_dentry_upper(dentry),
- OVL_XATTR_NLINK, buf, len, 0);
+ return ovl_setxattr(OVL_FS(inode->i_sb), ovl_dentry_upper(dentry),
+ OVL_XATTR_NLINK, buf, len);
}
int ovl_set_nlink_upper(struct dentry *dentry)
@@ -660,7 +979,7 @@ int ovl_set_nlink_lower(struct dentry *dentry)
return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
}
-unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback)
{
@@ -672,7 +991,8 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry,
if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1)
return fallback;
- err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1);
+ err = ovl_getxattr_upper(ofs, upperdentry, OVL_XATTR_NLINK,
+ &buf, sizeof(buf) - 1);
if (err < 0)
goto fail;
@@ -694,7 +1014,7 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry,
return nlink;
fail:
- pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n",
+ pr_warn_ratelimited("failed to get index nlink (%pd2, err=%i)\n",
upperdentry, err);
return fallback;
}
@@ -705,7 +1025,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
inode = new_inode(sb);
if (inode)
- ovl_fill_inode(inode, mode, rdev, 0, 0);
+ ovl_fill_inode(inode, mode, rdev);
return inode;
}
@@ -777,13 +1097,61 @@ struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
return inode;
}
+bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir)
+{
+ struct inode *key = d_inode(dir);
+ struct inode *trap;
+ bool res;
+
+ trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key);
+ if (!trap)
+ return false;
+
+ res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) &&
+ !ovl_inode_lower(trap);
+
+ iput(trap);
+ return res;
+}
+
+/*
+ * Create an inode cache entry for layer root dir, that will intentionally
+ * fail ovl_verify_inode(), so any lookup that will find some layer root
+ * will fail.
+ */
+struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir)
+{
+ struct inode *key = d_inode(dir);
+ struct inode *trap;
+
+ if (!d_is_dir(dir))
+ return ERR_PTR(-ENOTDIR);
+
+ trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test,
+ ovl_inode_set, key);
+ if (!trap)
+ return ERR_PTR(-ENOMEM);
+
+ if (!(inode_state_read_once(trap) & I_NEW)) {
+ /* Conflicting layer roots? */
+ iput(trap);
+ return ERR_PTR(-ELOOP);
+ }
+
+ trap->i_mode = S_IFDIR;
+ trap->i_flags = S_DEAD;
+ unlock_new_inode(trap);
+
+ return trap;
+}
+
/*
* Does overlay inode need to be hashed by lower inode?
*/
static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
- struct dentry *lower, struct dentry *index)
+ struct dentry *lower, bool index)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(sb);
/* No, if pure upper */
if (!lower)
@@ -794,7 +1162,7 @@ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
return true;
/* Yes, if won't be copied up */
- if (!ofs->upper_mnt)
+ if (!ovl_upper_mnt(ofs))
return true;
/* No, if lower hardlink is or will be broken on copy up */
@@ -803,7 +1171,7 @@ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
return false;
/* No, if non-indexed upper with NFS export */
- if (sb->s_export_op && upper)
+ if (ofs->config.nfs_export && upper)
return false;
/* Otherwise, hash by lower inode for fsnotify */
@@ -822,17 +1190,22 @@ static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode,
struct inode *ovl_get_inode(struct super_block *sb,
struct ovl_inode_params *oip)
{
+ struct ovl_fs *ofs = OVL_FS(sb);
struct dentry *upperdentry = oip->upperdentry;
- struct ovl_path *lowerpath = oip->lowerpath;
+ struct ovl_path *lowerpath = ovl_lowerpath(oip->oe);
struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
struct inode *inode;
struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL;
+ struct path realpath = {
+ .dentry = upperdentry ?: lowerdentry,
+ .mnt = upperdentry ? ovl_upper_mnt(ofs) : lowerpath->layer->mnt,
+ };
bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry,
oip->index);
- int fsid = bylower ? oip->lowerpath->layer->fsid : 0;
- bool is_dir, metacopy = false;
+ int fsid = bylower ? lowerpath->layer->fsid : 0;
+ bool is_dir;
unsigned long ino = 0;
- int err = -ENOMEM;
+ int err = oip->newinode ? -EEXIST : -ENOMEM;
if (!realinode)
realinode = d_inode(lowerdentry);
@@ -850,7 +1223,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
inode = ovl_iget5(sb, oip->newinode, key);
if (!inode)
goto out_err;
- if (!(inode->i_state & I_NEW)) {
+ if (!(inode_state_read_once(inode) & I_NEW)) {
/*
* Verify that the underlying files stored in the inode
* match those in the dentry.
@@ -863,13 +1236,16 @@ struct inode *ovl_get_inode(struct super_block *sb,
}
dput(upperdentry);
+ ovl_free_entry(oip->oe);
kfree(oip->redirect);
+ kfree(oip->lowerdata_redirect);
goto out;
}
/* Recalculate nlink for non-dir due to indexing */
if (!is_dir)
- nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
+ nlink = ovl_get_nlink(ofs, lowerdentry, upperdentry,
+ nlink);
set_nlink(inode, nlink);
ino = key->i_ino;
} else {
@@ -879,44 +1255,41 @@ struct inode *ovl_get_inode(struct super_block *sb,
err = -ENOMEM;
goto out_err;
}
+ ino = realinode->i_ino;
+ fsid = lowerpath->layer->fsid;
}
- ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid);
- ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata);
+ ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
+ ovl_inode_init(inode, oip, ino, fsid);
+ WARN_ON_ONCE(!!IS_CASEFOLDED(inode) != ofs->casefold);
- if (upperdentry && ovl_is_impuredir(upperdentry))
+ if (upperdentry && ovl_is_impuredir(sb, upperdentry))
ovl_set_flag(OVL_IMPURE, inode);
if (oip->index)
ovl_set_flag(OVL_INDEX, inode);
- if (upperdentry) {
- err = ovl_check_metacopy_xattr(upperdentry);
- if (err < 0)
- goto out_err;
- metacopy = err;
- if (!metacopy)
- ovl_set_flag(OVL_UPPERDATA, inode);
- }
-
- OVL_I(inode)->redirect = oip->redirect;
-
if (bylower)
ovl_set_flag(OVL_CONST_INO, inode);
/* Check for non-merge dir that may have whiteouts */
if (is_dir) {
- if (((upperdentry && lowerdentry) || oip->numlower > 1) ||
- ovl_check_origin_xattr(upperdentry ?: lowerdentry)) {
+ if (((upperdentry && lowerdentry) || ovl_numlower(oip->oe) > 1) ||
+ ovl_path_check_origin_xattr(ofs, &realpath)) {
ovl_set_flag(OVL_WHITEOUTS, inode);
}
}
- if (inode->i_state & I_NEW)
+ /* Check for immutable/append-only inode flags in xattr */
+ if (upperdentry)
+ ovl_check_protattr(inode, upperdentry);
+
+ if (inode_state_read_once(inode) & I_NEW)
unlock_new_inode(inode);
out:
return inode;
out_err:
+ pr_warn_ratelimited("failed to get inode (%i)\n", err);
inode = ERR_PTR(err);
goto out;
}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index efd372312ef1..e9a69c95be91 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2011 Novell Inc.
* Copyright (C) 2016 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
*/
#include <linux/fs.h>
@@ -18,31 +15,41 @@
#include "overlayfs.h"
struct ovl_lookup_data {
+ struct super_block *sb;
+ struct dentry *dentry;
+ const struct ovl_layer *layer;
struct qstr name;
bool is_dir;
bool opaque;
+ bool xwhiteouts;
bool stop;
bool last;
char *redirect;
- bool metacopy;
+ char *upperredirect;
+ int metacopy;
+ /* Referring to last redirect xattr */
+ bool absolute_redirect;
};
-static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
+static int ovl_check_redirect(const struct path *path, struct ovl_lookup_data *d,
size_t prelen, const char *post)
{
int res;
char *buf;
+ struct ovl_fs *ofs = OVL_FS(d->sb);
- buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
+ d->absolute_redirect = false;
+ buf = ovl_get_redirect_xattr(ofs, path, prelen + strlen(post));
if (IS_ERR_OR_NULL(buf))
return PTR_ERR(buf);
if (buf[0] == '/') {
+ d->absolute_redirect = true;
/*
* One of the ancestor path elements in an absolute path
* lookup in ovl_lookup_layer() could have been opaque and
* that will stop further lookup in lower layers (d->stop=true)
- * But we have found an absolute redirect in decendant path
+ * But we have found an absolute redirect in descendant path
* element and that should force continue lookup in lower
* layers (reset d->stop).
*/
@@ -86,32 +93,33 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry)
* Return -ENODATA for "origin unknown".
* Return <0 for an invalid file handle.
*/
-int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
+int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
{
- if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
+ if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
return -EINVAL;
- if (fh->magic != OVL_FH_MAGIC)
+ if (fb->magic != OVL_FH_MAGIC)
return -EINVAL;
/* Treat larger version and unknown flags as "origin unknown" */
- if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
+ if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
return -ENODATA;
/* Treat endianness mismatch as "origin unknown" */
- if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
- (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+ if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+ (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
return -ENODATA;
return 0;
}
-static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
+static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *upperdentry,
+ enum ovl_xattr ox)
{
int res, err;
struct ovl_fh *fh = NULL;
- res = vfs_getxattr(dentry, name, NULL, 0);
+ res = ovl_getxattr_upper(ofs, upperdentry, ox, NULL, 0);
if (res < 0) {
if (res == -ENODATA || res == -EOPNOTSUPP)
return NULL;
@@ -121,15 +129,15 @@ static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
if (res == 0)
return NULL;
- fh = kzalloc(res, GFP_KERNEL);
+ fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
if (!fh)
return ERR_PTR(-ENOMEM);
- res = vfs_getxattr(dentry, name, fh, res);
+ res = ovl_getxattr_upper(ofs, upperdentry, ox, fh->buf, res);
if (res < 0)
goto fail;
- err = ovl_check_fh_len(fh, res);
+ err = ovl_check_fb_len(&fh->fb, res);
if (err < 0) {
if (err == -ENODATA)
goto out;
@@ -143,29 +151,35 @@ out:
return NULL;
fail:
- pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+ pr_warn_ratelimited("failed to get origin (%i)\n", res);
goto out;
invalid:
- pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+ pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
goto out;
}
-struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
- bool connected)
+struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+ struct vfsmount *mnt, bool connected)
{
struct dentry *real;
int bytes;
+ if (!capable(CAP_DAC_READ_SEARCH))
+ return NULL;
+
/*
* Make sure that the stored uuid matches the uuid of the lower
* layer where file handle will be decoded.
+ * In case of uuid=off option just make sure that stored uuid is null.
*/
- if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
+ if (ovl_origin_uuid(ofs) ?
+ !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) :
+ !uuid_is_null(&fh->fb.uuid))
return NULL;
- bytes = (fh->len - offsetof(struct ovl_fh, fid));
- real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
- bytes >> 2, (int)fh->type,
+ bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
+ real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
+ bytes >> 2, (int)fh->fb.type,
connected ? ovl_acceptable : NULL, mnt);
if (IS_ERR(real)) {
/*
@@ -175,7 +189,7 @@ struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
* index entries correctly.
*/
if (real == ERR_PTR(-ESTALE) &&
- !(fh->flags & OVL_FH_FLAG_PATH_UPPER))
+ !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
real = NULL;
return real;
}
@@ -188,21 +202,56 @@ struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
return real;
}
-static bool ovl_is_opaquedir(struct dentry *dentry)
+static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
+ const char *name,
+ struct dentry *base, int len,
+ bool drop_negative)
{
- return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
+ struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt),
+ &QSTR_LEN(name, len), base);
+
+ if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
+ if (drop_negative && ret->d_lockref.count == 1) {
+ spin_lock(&ret->d_lock);
+ /* Recheck condition under lock */
+ if (d_is_negative(ret) && ret->d_lockref.count == 1)
+ __d_drop(ret);
+ spin_unlock(&ret->d_lock);
+ }
+ dput(ret);
+ ret = ERR_PTR(-ENOENT);
+ }
+ return ret;
}
static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
const char *name, unsigned int namelen,
size_t prelen, const char *post,
- struct dentry **ret)
+ struct dentry **ret, bool drop_negative)
{
- struct dentry *this;
+ struct ovl_fs *ofs = OVL_FS(d->sb);
+ struct dentry *this = NULL;
+ const char *warn;
+ struct path path;
int err;
bool last_element = !post[0];
+ bool is_upper = d->layer->idx == 0;
+ char val;
+
+ /*
+ * We allow filesystems that are case-folding capable as long as the
+ * layers are consistently enabled in the stack, enabled for every dir
+ * or disabled in all dirs. If someone has modified case folding on a
+ * directory on underlying layer, the warranty of the ovl stack is
+ * voided.
+ */
+ if (ofs->casefold != ovl_dentry_casefolded(base)) {
+ warn = "parent wrong casefold";
+ err = -ESTALE;
+ goto out_warn;
+ }
- this = lookup_one_len_unlocked(name, base, namelen);
+ this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
if (IS_ERR(this)) {
err = PTR_ERR(this);
this = NULL;
@@ -210,15 +259,23 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
goto out;
goto out_err;
}
- if (!this->d_inode)
- goto put_and_out;
+
+ if (ofs->casefold != ovl_dentry_casefolded(this)) {
+ warn = "child wrong casefold";
+ err = -EREMOTE;
+ goto out_warn;
+ }
if (ovl_dentry_weird(this)) {
/* Don't support traversing automounts and other weirdness */
+ warn = "unsupported object type";
err = -EREMOTE;
- goto out_err;
+ goto out_warn;
}
- if (ovl_is_whiteout(this)) {
+
+ path.dentry = this;
+ path.mnt = d->layer->mnt;
+ if (ovl_path_is_whiteout(ofs, &path)) {
d->stop = d->opaque = true;
goto put_and_out;
}
@@ -230,12 +287,13 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
d->stop = true;
goto put_and_out;
}
+
if (!d_can_lookup(this)) {
if (d->is_dir || !last_element) {
d->stop = true;
goto put_and_out;
}
- err = ovl_check_metacopy_xattr(this);
+ err = ovl_check_metacopy_xattr(ofs, &path, NULL);
if (err < 0)
goto out_err;
@@ -244,19 +302,31 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
if (!d->metacopy || d->last)
goto out;
} else {
+ if (ovl_lookup_trap_inode(d->sb, this)) {
+ /* Caught in a trap of overlapping layers */
+ warn = "overlapping layers";
+ err = -ELOOP;
+ goto out_warn;
+ }
+
if (last_element)
d->is_dir = true;
if (d->last)
goto out;
- if (ovl_is_opaquedir(this)) {
+ /* overlay.opaque=x means xwhiteouts directory */
+ val = ovl_get_opaquedir_val(ofs, &path);
+ if (last_element && !is_upper && val == 'x') {
+ d->xwhiteouts = true;
+ ovl_layer_set_xwhiteouts(ofs, d->layer);
+ } else if (val == 'y') {
d->stop = true;
if (last_element)
d->opaque = true;
goto out;
}
}
- err = ovl_check_redirect(this, d, prelen, post);
+ err = ovl_check_redirect(&path, d, prelen, post);
if (err)
goto out_err;
out:
@@ -268,13 +338,17 @@ put_and_out:
this = NULL;
goto out;
+out_warn:
+ pr_warn_ratelimited("failed lookup in %s (%pd2, name='%.*s', err=%i): %s\n",
+ is_upper ? "upper" : "lower", base,
+ namelen, name, err, warn);
out_err:
dput(this);
return err;
}
static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
- struct dentry **ret)
+ struct dentry **ret, bool drop_negative)
{
/* Counting down from the end, since the prefix can change */
size_t rem = d->name.len - 1;
@@ -283,7 +357,7 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
if (d->name.name[0] != '/')
return ovl_lookup_single(base, d, d->name.name, d->name.len,
- 0, "", ret);
+ 0, "", ret, drop_negative);
while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
const char *s = d->name.name + d->name.len - rem;
@@ -296,7 +370,8 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
return -EIO;
err = ovl_lookup_single(base, d, s, thislen,
- d->name.len - rem, next, &base);
+ d->name.len - rem, next, &base,
+ drop_negative);
dput(dentry);
if (err)
return err;
@@ -313,6 +388,61 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
return 0;
}
+static int ovl_lookup_data_layer(struct dentry *dentry, const char *redirect,
+ const struct ovl_layer *layer,
+ struct path *datapath)
+{
+ int err;
+
+ err = vfs_path_lookup(layer->mnt->mnt_root, layer->mnt, redirect,
+ LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS | LOOKUP_NO_XDEV,
+ datapath);
+ pr_debug("lookup lowerdata (%pd2, redirect=\"%s\", layer=%d, err=%i)\n",
+ dentry, redirect, layer->idx, err);
+
+ if (err)
+ return err;
+
+ err = -EREMOTE;
+ if (ovl_dentry_weird(datapath->dentry))
+ goto out_path_put;
+
+ err = -ENOENT;
+ /* Only regular file is acceptable as lower data */
+ if (!d_is_reg(datapath->dentry))
+ goto out_path_put;
+
+ return 0;
+
+out_path_put:
+ path_put(datapath);
+
+ return err;
+}
+
+/* Lookup in data-only layers by absolute redirect to layer root */
+static int ovl_lookup_data_layers(struct dentry *dentry, const char *redirect,
+ struct ovl_path *lowerdata)
+{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ const struct ovl_layer *layer;
+ struct path datapath;
+ int err = -ENOENT;
+ int i;
+
+ layer = &ofs->layers[ofs->numlayer - ofs->numdatalayer];
+ for (i = 0; i < ofs->numdatalayer; i++, layer++) {
+ err = ovl_lookup_data_layer(dentry, redirect, layer, &datapath);
+ if (!err) {
+ mntput(datapath.mnt);
+ lowerdata->dentry = datapath.dentry;
+ lowerdata->layer = layer;
+ return 0;
+ }
+ }
+
+ return err;
+}
int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
struct dentry *upperdentry, struct ovl_path **stackp)
@@ -320,8 +450,16 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
struct dentry *origin = NULL;
int i;
- for (i = 0; i < ofs->numlower; i++) {
- origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
+ for (i = 1; i <= ovl_numlowerlayer(ofs); i++) {
+ /*
+ * If lower fs uuid is not unique among lower fs we cannot match
+ * fh->uuid to layer.
+ */
+ if (ofs->layers[i].fsid &&
+ ofs->layers[i].fs->bad_uuid)
+ continue;
+
+ origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt,
connected);
if (origin)
break;
@@ -332,8 +470,8 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
else if (IS_ERR(origin))
return PTR_ERR(origin);
- if (upperdentry && !ovl_is_whiteout(upperdentry) &&
- ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
+ if (upperdentry && !ovl_upper_is_whiteout(ofs, upperdentry) &&
+ inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
goto invalid;
if (!*stackp)
@@ -344,23 +482,23 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
}
**stackp = (struct ovl_path){
.dentry = origin,
- .layer = &ofs->lower_layers[i]
+ .layer = &ofs->layers[i]
};
return 0;
invalid:
- pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
+ pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
d_inode(origin)->i_mode & S_IFMT);
dput(origin);
- return -EIO;
+ return -ESTALE;
}
static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
- struct ovl_path **stackp, unsigned int *ctrp)
+ struct ovl_path **stackp)
{
- struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
+ struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN);
int err;
if (IS_ERR_OR_NULL(fh))
@@ -375,10 +513,6 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
return err;
}
- if (WARN_ON(*ctrp))
- return -EIO;
-
- *ctrp = 1;
return 0;
}
@@ -386,10 +520,10 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
* Verify that @fh matches the file handle stored in xattr @name.
* Return 0 on match, -ESTALE on mismatch, < 0 on error.
*/
-static int ovl_verify_fh(struct dentry *dentry, const char *name,
- const struct ovl_fh *fh)
+static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, const struct ovl_fh *fh)
{
- struct ovl_fh *ofh = ovl_get_fh(dentry, name);
+ struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox);
int err = 0;
if (!ofh)
@@ -398,13 +532,26 @@ static int ovl_verify_fh(struct dentry *dentry, const char *name,
if (IS_ERR(ofh))
return PTR_ERR(ofh);
- if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
+ if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
err = -ESTALE;
kfree(ofh);
return err;
}
+int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, const struct ovl_fh *fh,
+ bool is_upper, bool set)
+{
+ int err;
+
+ err = ovl_verify_fh(ofs, dentry, ox, fh);
+ if (set && err == -ENODATA)
+ err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
+
+ return err;
+}
+
/*
* Verify that @real dentry matches the file handle stored in xattr @name.
*
@@ -413,23 +560,22 @@ static int ovl_verify_fh(struct dentry *dentry, const char *name,
*
* Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
*/
-int ovl_verify_set_fh(struct dentry *dentry, const char *name,
- struct dentry *real, bool is_upper, bool set)
+int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, struct dentry *real,
+ bool is_upper, bool set)
{
struct inode *inode;
struct ovl_fh *fh;
int err;
- fh = ovl_encode_real_fh(real, is_upper);
+ fh = ovl_encode_real_fh(ofs, d_inode(real), is_upper);
err = PTR_ERR(fh);
if (IS_ERR(fh)) {
fh = NULL;
goto fail;
}
- err = ovl_verify_fh(dentry, name, fh);
- if (set && err == -ENODATA)
- err = ovl_do_setxattr(dentry, name, fh, fh->len, 0);
+ err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set);
if (err)
goto fail;
@@ -439,14 +585,16 @@ out:
fail:
inode = d_inode(real);
- pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n",
+ pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
is_upper ? "upper" : "origin", real,
inode ? inode->i_ino : 0, err);
goto out;
}
+
/* Get upper dentry from index */
-struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
+struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
+ bool connected)
{
struct ovl_fh *fh;
struct dentry *upper;
@@ -454,18 +602,18 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
if (!d_is_dir(index))
return dget(index);
- fh = ovl_get_fh(index, OVL_XATTR_UPPER);
+ fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER);
if (IS_ERR_OR_NULL(fh))
return ERR_CAST(fh);
- upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
+ upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), connected);
kfree(fh);
if (IS_ERR_OR_NULL(upper))
return upper ?: ERR_PTR(-ESTALE);
if (!d_is_dir(upper)) {
- pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n",
+ pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
index, upper);
dput(upper);
return ERR_PTR(-EIO);
@@ -474,12 +622,6 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
return upper;
}
-/* Is this a leftover from create/whiteout of directory index entry? */
-static bool ovl_is_temp_index(struct dentry *index)
-{
- return index->d_name.name[0] == '#';
-}
-
/*
* Verify that an index entry name matches the origin file handle stored in
* OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
@@ -497,26 +639,21 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
if (!d_inode(index))
return 0;
- /* Cleanup leftover from index create/cleanup attempt */
- err = -ESTALE;
- if (ovl_is_temp_index(index))
- goto fail;
-
err = -EINVAL;
- if (index->d_name.len < sizeof(struct ovl_fh)*2)
+ if (index->d_name.len < sizeof(struct ovl_fb)*2)
goto fail;
err = -ENOMEM;
len = index->d_name.len / 2;
- fh = kzalloc(len, GFP_KERNEL);
+ fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
if (!fh)
goto fail;
err = -EINVAL;
- if (hex2bin((u8 *)fh, index->d_name.name, len))
+ if (hex2bin(fh->buf, index->d_name.name, len))
goto fail;
- err = ovl_check_fh_len(fh, len);
+ err = ovl_check_fb_len(&fh->fb, len);
if (err)
goto fail;
@@ -542,7 +679,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
* directly from the index dentry, but for dir index we first need to
* decode the upper directory.
*/
- upper = ovl_index_upper(ofs, index);
+ upper = ovl_index_upper(ofs, index, false);
if (IS_ERR_OR_NULL(upper)) {
err = PTR_ERR(upper);
/*
@@ -558,7 +695,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
goto fail;
}
- err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
+ err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh);
dput(upper);
if (err)
goto fail;
@@ -569,7 +706,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
if (err)
goto fail;
- if (ovl_get_nlink(origin.dentry, index, 0) == 0)
+ if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0)
goto orphan;
}
@@ -579,27 +716,27 @@ out:
return err;
fail:
- pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
+ pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
index, d_inode(index)->i_mode & S_IFMT, err);
goto out;
orphan:
- pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
+ pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
index, d_inode(index)->i_mode & S_IFMT,
d_inode(index)->i_nlink);
err = -ENOENT;
goto out;
}
-static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
+int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name)
{
char *n, *s;
- n = kcalloc(fh->len, 2, GFP_KERNEL);
+ n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
if (!n)
return -ENOMEM;
- s = bin2hex(n, fh, fh->len);
+ s = bin2hex(n, fh->buf, fh->fb.len);
*name = (struct qstr) QSTR_INIT(n, s - n);
return 0;
@@ -618,15 +755,16 @@ static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
* If the index dentry for a copy up origin inode is positive, but points
* to an inode different than the upper inode, then either the upper inode
* has been copied up and not indexed or it was indexed, but since then
- * index dir was cleared. Either way, that index cannot be used to indentify
+ * index dir was cleared. Either way, that index cannot be used to identify
* the overlay inode.
*/
-int ovl_get_index_name(struct dentry *origin, struct qstr *name)
+int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
+ struct qstr *name)
{
struct ovl_fh *fh;
int err;
- fh = ovl_encode_real_fh(origin, false);
+ fh = ovl_encode_real_fh(ofs, d_inode(origin), false);
if (IS_ERR(fh))
return PTR_ERR(fh);
@@ -647,7 +785,7 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
if (err)
return ERR_PTR(err);
- index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+ index = lookup_noperm_positive_unlocked(&name, ofs->workdir);
kfree(name.name);
if (IS_ERR(index)) {
if (PTR_ERR(index) == -ENOENT)
@@ -655,9 +793,7 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
return index;
}
- if (d_is_negative(index))
- err = 0;
- else if (ovl_is_whiteout(index))
+ if (ovl_is_whiteout(index))
err = -ESTALE;
else if (ovl_dentry_weird(index))
err = -EIO;
@@ -677,18 +813,19 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
bool is_dir = d_is_dir(origin);
int err;
- err = ovl_get_index_name(origin, &name);
+ err = ovl_get_index_name(ofs, origin, &name);
if (err)
return ERR_PTR(err);
- index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+ index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), &name,
+ ofs->workdir);
if (IS_ERR(index)) {
err = PTR_ERR(index);
if (err == -ENOENT) {
index = NULL;
goto out;
}
- pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
+ pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
"overlayfs: mount with '-o index=off' to disable inodes index.\n",
d_inode(origin)->i_ino, name.len, name.name,
err);
@@ -696,9 +833,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
}
inode = d_inode(index);
- if (d_is_negative(index)) {
- goto out_dput;
- } else if (ovl_is_whiteout(index) && !verify) {
+ if (ovl_is_whiteout(index) && !verify) {
/*
* When index lookup is called with !verify for decoding an
* overlay file handle, a whiteout index implies that decode
@@ -709,7 +844,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
index = ERR_PTR(-ESTALE);
goto out;
} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
- ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
+ inode_wrong_type(inode, d_inode(origin)->i_mode)) {
/*
* Index should always be of the same file type as origin
* except for the case of a whiteout index. A whiteout
@@ -717,22 +852,22 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
* unlinked, which means that finding a lower origin on lookup
* whose index is a whiteout should be treated as an error.
*/
- pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
+ pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
index, d_inode(index)->i_mode & S_IFMT,
d_inode(origin)->i_mode & S_IFMT);
goto fail;
} else if (is_dir && verify) {
if (!upper) {
- pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
+ pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
origin, index);
goto fail;
}
/* Verify that dir index 'upper' xattr points to upper dir */
- err = ovl_verify_upper(index, upper, false);
+ err = ovl_verify_upper(ofs, index, upper, false);
if (err) {
if (err == -ESTALE) {
- pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
+ pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
upper, origin, index);
}
goto fail;
@@ -759,93 +894,220 @@ fail:
* Returns next layer in stack starting from top.
* Returns -1 if this is the last layer.
*/
-int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
+ const struct ovl_layer **layer)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_entry *oe = OVL_E(dentry);
+ struct ovl_path *lowerstack = ovl_lowerstack(oe);
BUG_ON(idx < 0);
if (idx == 0) {
ovl_path_upper(dentry, path);
- if (path->dentry)
- return oe->numlower ? 1 : -1;
+ if (path->dentry) {
+ *layer = &OVL_FS(dentry->d_sb)->layers[0];
+ return ovl_numlower(oe) ? 1 : -1;
+ }
idx++;
}
- BUG_ON(idx > oe->numlower);
- path->dentry = oe->lowerstack[idx - 1].dentry;
- path->mnt = oe->lowerstack[idx - 1].layer->mnt;
+ BUG_ON(idx > ovl_numlower(oe));
+ path->dentry = lowerstack[idx - 1].dentry;
+ *layer = lowerstack[idx - 1].layer;
+ path->mnt = (*layer)->mnt;
- return (idx < oe->numlower) ? idx + 1 : -1;
+ return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
}
/* Fix missing 'origin' xattr */
-static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
- struct dentry *upper)
+static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
+ struct dentry *lower, struct dentry *upper)
{
+ const struct ovl_fh *fh;
int err;
- if (ovl_check_origin_xattr(upper))
+ if (ovl_check_origin_xattr(ofs, upper))
return 0;
+ fh = ovl_get_origin_fh(ofs, lower);
+ if (IS_ERR(fh))
+ return PTR_ERR(fh);
+
err = ovl_want_write(dentry);
if (err)
- return err;
+ goto out;
- err = ovl_set_origin(dentry, lower, upper);
+ err = ovl_set_origin_fh(ofs, fh, upper);
if (!err)
err = ovl_set_impure(dentry->d_parent, upper->d_parent);
ovl_drop_write(dentry);
+out:
+ kfree(fh);
return err;
}
-struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
+static int ovl_maybe_validate_verity(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ struct inode *inode = d_inode(dentry);
+ struct path datapath, metapath;
+ int err;
+
+ if (!ofs->config.verity_mode ||
+ !ovl_is_metacopy_dentry(dentry) ||
+ ovl_test_flag(OVL_VERIFIED_DIGEST, inode))
+ return 0;
+
+ if (!ovl_test_flag(OVL_HAS_DIGEST, inode)) {
+ if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
+ pr_warn_ratelimited("metacopy file '%pd' has no digest specified\n",
+ dentry);
+ return -EIO;
+ }
+ return 0;
+ }
+
+ ovl_path_lowerdata(dentry, &datapath);
+ if (!datapath.dentry)
+ return -EIO;
+
+ ovl_path_real(dentry, &metapath);
+ if (!metapath.dentry)
+ return -EIO;
+
+ err = ovl_inode_lock_interruptible(inode);
+ if (err)
+ return err;
+
+ if (!ovl_test_flag(OVL_VERIFIED_DIGEST, inode)) {
+ with_ovl_creds(dentry->d_sb)
+ err = ovl_validate_verity(ofs, &metapath, &datapath);
+ if (err == 0)
+ ovl_set_flag(OVL_VERIFIED_DIGEST, inode);
+ }
+
+ ovl_inode_unlock(inode);
+
+ return err;
+}
+
+/* Lazy lookup of lowerdata */
+static int ovl_maybe_lookup_lowerdata(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ const char *redirect = ovl_lowerdata_redirect(inode);
+ struct ovl_path datapath = {};
+ int err;
+
+ if (!redirect || ovl_dentry_lowerdata(dentry))
+ return 0;
+
+ if (redirect[0] != '/')
+ return -EIO;
+
+ err = ovl_inode_lock_interruptible(inode);
+ if (err)
+ return err;
+
+ err = 0;
+ /* Someone got here before us? */
+ if (ovl_dentry_lowerdata(dentry))
+ goto out;
+
+ with_ovl_creds(dentry->d_sb)
+ err = ovl_lookup_data_layers(dentry, redirect, &datapath);
+ if (err)
+ goto out_err;
+
+ err = ovl_dentry_set_lowerdata(dentry, &datapath);
+ if (err)
+ goto out_err;
+
+out:
+ ovl_inode_unlock(inode);
+ dput(datapath.dentry);
+
+ return err;
+
+out_err:
+ pr_warn_ratelimited("lazy lowerdata lookup failed (%pd2, err=%i)\n",
+ dentry, err);
+ goto out;
+}
+
+int ovl_verify_lowerdata(struct dentry *dentry)
{
+ int err;
+
+ err = ovl_maybe_lookup_lowerdata(dentry);
+ if (err)
+ return err;
+
+ return ovl_maybe_validate_verity(dentry);
+}
+
+/*
+ * Following redirects/metacopy can have security consequences: it's like a
+ * symlink into the lower layer without the permission checks.
+ *
+ * This is only a problem if the upper layer is untrusted (e.g comes from an USB
+ * drive). This can allow a non-readable file or directory to become readable.
+ *
+ * Only following redirects when redirects are enabled disables this attack
+ * vector when not necessary.
+ */
+static bool ovl_check_follow_redirect(struct ovl_lookup_data *d)
+{
+ struct ovl_fs *ofs = OVL_FS(d->sb);
+
+ if (d->metacopy && !ofs->config.metacopy) {
+ pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", d->dentry);
+ return false;
+ }
+ if ((d->redirect || d->upperredirect) && !ovl_redirect_follow(ofs)) {
+ pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n", d->dentry);
+ return false;
+ }
+ return true;
+}
+
+struct ovl_lookup_ctx {
+ struct dentry *dentry;
struct ovl_entry *oe;
- const struct cred *old_cred;
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
- struct ovl_entry *poe = dentry->d_parent->d_fsdata;
- struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
- struct ovl_path *stack = NULL, *origin_path = NULL;
- struct dentry *upperdir, *upperdentry = NULL;
+ struct ovl_path *stack;
+ struct ovl_path *origin_path;
+ struct dentry *upperdentry;
+ struct dentry *index;
+ struct inode *inode;
+ unsigned int ctr;
+};
+
+static int ovl_lookup_layers(struct ovl_lookup_ctx *ctx, struct ovl_lookup_data *d)
+{
+ struct dentry *dentry = ctx->dentry;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ struct ovl_entry *poe = OVL_E(dentry->d_parent);
+ struct ovl_entry *roe = OVL_E(dentry->d_sb->s_root);
+ bool check_redirect = (ovl_redirect_follow(ofs) || ofs->numdatalayer);
+ struct dentry *upperdir;
+ struct dentry *this;
struct dentry *origin = NULL;
- struct dentry *index = NULL;
- unsigned int ctr = 0;
- struct inode *inode = NULL;
bool upperopaque = false;
- char *upperredirect = NULL;
- struct dentry *this;
+ bool uppermetacopy = false;
+ int metacopy_size = 0;
unsigned int i;
int err;
- bool metacopy = false;
- struct ovl_lookup_data d = {
- .name = dentry->d_name,
- .is_dir = false,
- .opaque = false,
- .stop = false,
- .last = ofs->config.redirect_follow ? false : !poe->numlower,
- .redirect = NULL,
- .metacopy = false,
- };
-
- if (dentry->d_name.len > ofs->namelen)
- return ERR_PTR(-ENAMETOOLONG);
- old_cred = ovl_override_creds(dentry->d_sb);
upperdir = ovl_dentry_upper(dentry->d_parent);
if (upperdir) {
- err = ovl_lookup_layer(upperdir, &d, &upperdentry);
+ d->layer = &ofs->layers[0];
+ err = ovl_lookup_layer(upperdir, d, &ctx->upperdentry, true);
if (err)
- goto out;
+ return err;
- if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) {
- dput(upperdentry);
- err = -EREMOTE;
- goto out;
- }
- if (upperdentry && !d.is_dir) {
- unsigned int origin_ctr = 0;
+ if (ctx->upperdentry && ctx->upperdentry->d_flags & DCACHE_OP_REAL)
+ return -EREMOTE;
+ if (ctx->upperdentry && !d->is_dir) {
/*
* Lookup copy up origin by decoding origin file handle.
* We may get a disconnected dentry, which is fine,
@@ -856,45 +1118,50 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* number - it's the same as if we held a reference
* to a dentry in lower layer that was moved under us.
*/
- err = ovl_check_origin(ofs, upperdentry, &origin_path,
- &origin_ctr);
+ err = ovl_check_origin(ofs, ctx->upperdentry, &ctx->origin_path);
if (err)
- goto out_put_upper;
+ return err;
- if (d.metacopy)
- metacopy = true;
+ if (d->metacopy)
+ uppermetacopy = true;
+ metacopy_size = d->metacopy;
}
- if (d.redirect) {
+ if (d->redirect) {
err = -ENOMEM;
- upperredirect = kstrdup(d.redirect, GFP_KERNEL);
- if (!upperredirect)
- goto out_put_upper;
- if (d.redirect[0] == '/')
+ d->upperredirect = kstrdup(d->redirect, GFP_KERNEL);
+ if (!d->upperredirect)
+ return err;
+ if (d->redirect[0] == '/')
poe = roe;
}
- upperopaque = d.opaque;
+ upperopaque = d->opaque;
}
- if (!d.stop && poe->numlower) {
+ if (!d->stop && ovl_numlower(poe)) {
err = -ENOMEM;
- stack = kcalloc(ofs->numlower, sizeof(struct ovl_path),
- GFP_KERNEL);
- if (!stack)
- goto out_put_upper;
+ ctx->stack = ovl_stack_alloc(ofs->numlayer - 1);
+ if (!ctx->stack)
+ return err;
}
- for (i = 0; !d.stop && i < poe->numlower; i++) {
- struct ovl_path lower = poe->lowerstack[i];
+ for (i = 0; !d->stop && i < ovl_numlower(poe); i++) {
+ struct ovl_path lower = ovl_lowerstack(poe)[i];
+
+ if (!ovl_check_follow_redirect(d)) {
+ err = -EPERM;
+ return err;
+ }
- if (!ofs->config.redirect_follow)
- d.last = i == poe->numlower - 1;
- else
- d.last = lower.layer->idx == roe->numlower;
+ if (!check_redirect)
+ d->last = i == ovl_numlower(poe) - 1;
+ else if (d->is_dir || !ofs->numdatalayer)
+ d->last = lower.layer->idx == ovl_numlower(roe);
- err = ovl_lookup_layer(lower.dentry, &d, &this);
+ d->layer = lower.layer;
+ err = ovl_lookup_layer(lower.dentry, d, &this, false);
if (err)
- goto out_put;
+ return err;
if (!this)
continue;
@@ -903,11 +1170,11 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* If no origin fh is stored in upper of a merge dir, store fh
* of lower dir and set upper parent "impure".
*/
- if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
- err = ovl_fix_origin(dentry, this, upperdentry);
+ if (ctx->upperdentry && !ctx->ctr && !ofs->noxattr && d->is_dir) {
+ err = ovl_fix_origin(ofs, dentry, this, ctx->upperdentry);
if (err) {
dput(this);
- goto out_put;
+ return err;
}
}
@@ -920,191 +1187,237 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* matches the dentry found using path based lookup,
* otherwise error out.
*/
- if (upperdentry && !ctr &&
- ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
- (!d.is_dir && ofs->config.index && origin_path))) {
- err = ovl_verify_origin(upperdentry, this, false);
+ if (ctx->upperdentry && !ctx->ctr &&
+ ((d->is_dir && ovl_verify_lower(dentry->d_sb)) ||
+ (!d->is_dir && ofs->config.index && ctx->origin_path))) {
+ err = ovl_verify_origin(ofs, ctx->upperdentry, this, false);
if (err) {
dput(this);
- if (d.is_dir)
+ if (d->is_dir)
break;
- goto out_put;
+ return err;
}
origin = this;
}
- if (d.metacopy)
- metacopy = true;
- /*
- * Do not store intermediate metacopy dentries in chain,
- * except top most lower metacopy dentry
- */
- if (d.metacopy && ctr) {
- dput(this);
- continue;
- }
-
- stack[ctr].dentry = this;
- stack[ctr].layer = lower.layer;
- ctr++;
+ if (!ctx->upperdentry && !d->is_dir && !ctx->ctr && d->metacopy)
+ metacopy_size = d->metacopy;
- /*
- * Following redirects can have security consequences: it's like
- * a symlink into the lower layer without the permission checks.
- * This is only a problem if the upper layer is untrusted (e.g
- * comes from an USB drive). This can allow a non-readable file
- * or directory to become readable.
- *
- * Only following redirects when redirects are enabled disables
- * this attack vector when not necessary.
- */
- err = -EPERM;
- if (d.redirect && !ofs->config.redirect_follow) {
- pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n",
- dentry);
- goto out_put;
+ if (d->metacopy && ctx->ctr) {
+ /*
+ * Do not store intermediate metacopy dentries in
+ * lower chain, except top most lower metacopy dentry.
+ * Continue the loop so that if there is an absolute
+ * redirect on this dentry, poe can be reset to roe.
+ */
+ dput(this);
+ this = NULL;
+ } else {
+ ctx->stack[ctx->ctr].dentry = this;
+ ctx->stack[ctx->ctr].layer = lower.layer;
+ ctx->ctr++;
}
- if (d.stop)
+ if (d->stop)
break;
- if (d.redirect && d.redirect[0] == '/' && poe != roe) {
+ if (d->redirect && d->redirect[0] == '/' && poe != roe) {
poe = roe;
/* Find the current layer on the root dentry */
i = lower.layer->idx - 1;
}
}
- if (metacopy) {
- /*
- * Found a metacopy dentry but did not find corresponding
- * data dentry
- */
- if (d.metacopy) {
- err = -EIO;
- goto out_put;
- }
-
+ /*
+ * Defer lookup of lowerdata in data-only layers to first access.
+ * Don't require redirect=follow and metacopy=on in this case.
+ */
+ if (d->metacopy && ctx->ctr && ofs->numdatalayer && d->absolute_redirect) {
+ d->metacopy = 0;
+ ctx->ctr++;
+ } else if (!ovl_check_follow_redirect(d)) {
err = -EPERM;
- if (!ofs->config.metacopy) {
- pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n",
- dentry);
- goto out_put;
- }
- } else if (!d.is_dir && upperdentry && !ctr && origin_path) {
- if (WARN_ON(stack != NULL)) {
+ return err;
+ }
+
+ /*
+ * For regular non-metacopy upper dentries, there is no lower
+ * path based lookup, hence ctr will be zero. If a dentry is found
+ * using ORIGIN xattr on upper, install it in stack.
+ *
+ * For metacopy dentry, path based lookup will find lower dentries.
+ * Just make sure a corresponding data dentry has been found.
+ */
+ if (d->metacopy || (uppermetacopy && !ctx->ctr)) {
+ pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
+ dentry);
+ err = -EIO;
+ return err;
+ } else if (!d->is_dir && ctx->upperdentry && !ctx->ctr && ctx->origin_path) {
+ if (WARN_ON(ctx->stack != NULL)) {
err = -EIO;
- goto out_put;
+ return err;
}
- stack = origin_path;
- ctr = 1;
- origin_path = NULL;
+ ctx->stack = ctx->origin_path;
+ ctx->ctr = 1;
+ origin = ctx->origin_path->dentry;
+ ctx->origin_path = NULL;
}
/*
- * Lookup index by lower inode and verify it matches upper inode.
- * We only trust dir index if we verified that lower dir matches
- * origin, otherwise dir index entries may be inconsistent and we
- * ignore them.
+ * Always lookup index if there is no-upperdentry.
+ *
+ * For the case of upperdentry, we have set origin by now if it
+ * needed to be set. There are basically three cases.
*
- * For non-dir upper metacopy dentry, we already set "origin" if we
- * verified that lower matched upper origin. If upper origin was
- * not present (because lower layer did not support fh encode/decode),
- * or indexing is not enabled, do not set "origin" and skip looking up
- * index. This case should be handled in same way as a non-dir upper
- * without ORIGIN is handled.
+ * For directories, lookup index by lower inode and verify it matches
+ * upper inode. We only trust dir index if we verified that lower dir
+ * matches origin, otherwise dir index entries may be inconsistent
+ * and we ignore them.
+ *
+ * For regular upper, we already set origin if upper had ORIGIN
+ * xattr. There is no verification though as there is no path
+ * based dentry lookup in lower in this case.
+ *
+ * For metacopy upper, we set a verified origin already if index
+ * is enabled and if upper had an ORIGIN xattr.
*
- * Always lookup index of non-dir non-metacopy and non-upper.
*/
- if (ctr && (!upperdentry || (!d.is_dir && !metacopy)))
- origin = stack[0].dentry;
+ if (!ctx->upperdentry && ctx->ctr)
+ origin = ctx->stack[0].dentry;
if (origin && ovl_indexdir(dentry->d_sb) &&
- (!d.is_dir || ovl_index_all(dentry->d_sb))) {
- index = ovl_lookup_index(ofs, upperdentry, origin, true);
- if (IS_ERR(index)) {
- err = PTR_ERR(index);
- index = NULL;
- goto out_put;
+ (!d->is_dir || ovl_index_all(dentry->d_sb))) {
+ ctx->index = ovl_lookup_index(ofs, ctx->upperdentry, origin, true);
+ if (IS_ERR(ctx->index)) {
+ err = PTR_ERR(ctx->index);
+ ctx->index = NULL;
+ return err;
}
}
- oe = ovl_alloc_entry(ctr);
- err = -ENOMEM;
- if (!oe)
- goto out_put;
+ if (ctx->ctr) {
+ ctx->oe = ovl_alloc_entry(ctx->ctr);
+ err = -ENOMEM;
+ if (!ctx->oe)
+ return err;
- memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
- dentry->d_fsdata = oe;
+ ovl_stack_cpy(ovl_lowerstack(ctx->oe), ctx->stack, ctx->ctr);
+ }
if (upperopaque)
ovl_dentry_set_opaque(dentry);
+ if (d->xwhiteouts)
+ ovl_dentry_set_xwhiteouts(dentry);
- if (upperdentry)
+ if (ctx->upperdentry)
ovl_dentry_set_upper_alias(dentry);
- else if (index) {
- upperdentry = dget(index);
- upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
- if (IS_ERR(upperredirect)) {
- err = PTR_ERR(upperredirect);
- upperredirect = NULL;
- goto out_free_oe;
+ else if (ctx->index) {
+ char *upperredirect;
+ struct path upperpath = {
+ .dentry = ctx->upperdentry = dget(ctx->index),
+ .mnt = ovl_upper_mnt(ofs),
+ };
+
+ /*
+ * It's safe to assign upperredirect here: the previous
+ * assignment happens only if upperdentry is non-NULL, and
+ * this one only if upperdentry is NULL.
+ */
+ upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0);
+ if (IS_ERR(upperredirect))
+ return PTR_ERR(upperredirect);
+ d->upperredirect = upperredirect;
+
+ err = ovl_check_metacopy_xattr(ofs, &upperpath, NULL);
+ if (err < 0)
+ return err;
+ d->metacopy = uppermetacopy = err;
+ metacopy_size = err;
+
+ if (!ovl_check_follow_redirect(d)) {
+ err = -EPERM;
+ return err;
}
}
- if (upperdentry || ctr) {
+ if (ctx->upperdentry || ctx->ctr) {
+ struct inode *inode;
struct ovl_inode_params oip = {
- .upperdentry = upperdentry,
- .lowerpath = stack,
- .index = index,
- .numlower = ctr,
- .redirect = upperredirect,
- .lowerdata = (ctr > 1 && !d.is_dir) ?
- stack[ctr - 1].dentry : NULL,
+ .upperdentry = ctx->upperdentry,
+ .oe = ctx->oe,
+ .index = ctx->index,
+ .redirect = d->upperredirect,
};
+ /* Store lowerdata redirect for lazy lookup */
+ if (ctx->ctr > 1 && !d->is_dir && !ctx->stack[ctx->ctr - 1].dentry) {
+ oip.lowerdata_redirect = d->redirect;
+ d->redirect = NULL;
+ }
+
inode = ovl_get_inode(dentry->d_sb, &oip);
- err = PTR_ERR(inode);
if (IS_ERR(inode))
- goto out_free_oe;
+ return PTR_ERR(inode);
+
+ ctx->inode = inode;
+ if (ctx->upperdentry && !uppermetacopy)
+ ovl_set_flag(OVL_UPPERDATA, ctx->inode);
+
+ if (metacopy_size > OVL_METACOPY_MIN_SIZE)
+ ovl_set_flag(OVL_HAS_DIGEST, ctx->inode);
}
- revert_creds(old_cred);
- if (origin_path) {
- dput(origin_path->dentry);
- kfree(origin_path);
+ ovl_dentry_init_reval(dentry, ctx->upperdentry, OVL_I_E(ctx->inode));
+
+ return 0;
+}
+
+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ struct ovl_entry *poe = OVL_E(dentry->d_parent);
+ bool check_redirect = (ovl_redirect_follow(ofs) || ofs->numdatalayer);
+ int err;
+ struct ovl_lookup_ctx ctx = {
+ .dentry = dentry,
+ };
+ struct ovl_lookup_data d = {
+ .sb = dentry->d_sb,
+ .dentry = dentry,
+ .name = dentry->d_name,
+ .last = check_redirect ? false : !ovl_numlower(poe),
+ };
+
+ if (dentry->d_name.len > ofs->namelen)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ with_ovl_creds(dentry->d_sb)
+ err = ovl_lookup_layers(&ctx, &d);
+
+ if (ctx.origin_path) {
+ dput(ctx.origin_path->dentry);
+ kfree(ctx.origin_path);
}
- dput(index);
- kfree(stack);
+ dput(ctx.index);
+ ovl_stack_free(ctx.stack, ctx.ctr);
kfree(d.redirect);
- return d_splice_alias(inode, dentry);
-out_free_oe:
- dentry->d_fsdata = NULL;
- kfree(oe);
-out_put:
- dput(index);
- for (i = 0; i < ctr; i++)
- dput(stack[i].dentry);
- kfree(stack);
-out_put_upper:
- if (origin_path) {
- dput(origin_path->dentry);
- kfree(origin_path);
+ if (err) {
+ ovl_free_entry(ctx.oe);
+ dput(ctx.upperdentry);
+ kfree(d.upperredirect);
+ return ERR_PTR(err);
}
- dput(upperdentry);
- kfree(upperredirect);
-out:
- kfree(d.redirect);
- revert_creds(old_cred);
- return ERR_PTR(err);
+
+ return d_splice_alias(ctx.inode, dentry);
}
bool ovl_lower_positive(struct dentry *dentry)
{
- struct ovl_entry *poe = dentry->d_parent->d_fsdata;
+ struct ovl_entry *poe = OVL_E(dentry->d_parent);
const struct qstr *name = &dentry->d_name;
- const struct cred *old_cred;
unsigned int i;
bool positive = false;
bool done = false;
@@ -1120,37 +1433,45 @@ bool ovl_lower_positive(struct dentry *dentry)
if (!ovl_dentry_upper(dentry))
return true;
- old_cred = ovl_override_creds(dentry->d_sb);
- /* Positive upper -> have to look up lower to see whether it exists */
- for (i = 0; !done && !positive && i < poe->numlower; i++) {
- struct dentry *this;
- struct dentry *lowerdir = poe->lowerstack[i].dentry;
-
- this = lookup_one_len_unlocked(name->name, lowerdir,
- name->len);
- if (IS_ERR(this)) {
- switch (PTR_ERR(this)) {
- case -ENOENT:
- case -ENAMETOOLONG:
- break;
-
- default:
- /*
- * Assume something is there, we just couldn't
- * access it.
- */
- positive = true;
- break;
- }
- } else {
- if (this->d_inode) {
- positive = !ovl_is_whiteout(this);
+ with_ovl_creds(dentry->d_sb) {
+ /* Positive upper -> have to look up lower to see whether it exists */
+ for (i = 0; !done && !positive && i < ovl_numlower(poe); i++) {
+ struct dentry *this;
+ struct ovl_path *parentpath = &ovl_lowerstack(poe)[i];
+
+ /*
+ * We need to make a non-const copy of dentry->d_name,
+ * because lookup_one_positive_unlocked() will hash name
+ * with parentpath base, which is on another (lower fs).
+ */
+ this = lookup_one_positive_unlocked(mnt_idmap(parentpath->layer->mnt),
+ &QSTR_LEN(name->name, name->len),
+ parentpath->dentry);
+ if (IS_ERR(this)) {
+ switch (PTR_ERR(this)) {
+ case -ENOENT:
+ case -ENAMETOOLONG:
+ break;
+
+ default:
+ /*
+ * Assume something is there, we just couldn't
+ * access it.
+ */
+ positive = true;
+ break;
+ }
+ } else {
+ struct path path = {
+ .dentry = this,
+ .mnt = parentpath->layer->mnt,
+ };
+ positive = !ovl_path_is_whiteout(OVL_FS(dentry->d_sb), &path);
done = true;
+ dput(this);
}
- dput(this);
}
}
- revert_creds(old_cred);
return positive;
}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 5e45cb3630a0..f9ac9bdde830 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -1,17 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
*
* Copyright (C) 2011 Novell Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/uuid.h>
#include <linux/fs.h>
+#include <linux/fsverity.h>
+#include <linux/namei.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
#include "ovl_entry.h"
+#undef pr_fmt
+#define pr_fmt(fmt) "overlayfs: " fmt
+
enum ovl_path_type {
__OVL_PATH_UPPER = (1 << 0),
__OVL_PATH_MERGE = (1 << 1),
@@ -22,14 +26,31 @@ enum ovl_path_type {
#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE)
#define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN)
-#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
-#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
-#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
-#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
-#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
-#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
-#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
-#define OVL_XATTR_METACOPY OVL_XATTR_PREFIX "metacopy"
+#define OVL_XATTR_NAMESPACE "overlay."
+#define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE
+#define OVL_XATTR_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1)
+#define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE
+#define OVL_XATTR_USER_PREFIX_LEN (sizeof(OVL_XATTR_USER_PREFIX) - 1)
+
+#define OVL_XATTR_ESCAPE_PREFIX OVL_XATTR_NAMESPACE
+#define OVL_XATTR_ESCAPE_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_PREFIX) - 1)
+#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ESCAPE_PREFIX
+#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_TRUSTED_PREFIX) - 1)
+#define OVL_XATTR_ESCAPE_USER_PREFIX OVL_XATTR_USER_PREFIX OVL_XATTR_ESCAPE_PREFIX
+#define OVL_XATTR_ESCAPE_USER_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_USER_PREFIX) - 1)
+
+enum ovl_xattr {
+ OVL_XATTR_OPAQUE,
+ OVL_XATTR_REDIRECT,
+ OVL_XATTR_ORIGIN,
+ OVL_XATTR_IMPURE,
+ OVL_XATTR_NLINK,
+ OVL_XATTR_UPPER,
+ OVL_XATTR_UUID,
+ OVL_XATTR_METACOPY,
+ OVL_XATTR_PROTATTR,
+ OVL_XATTR_XWHITEOUT,
+};
enum ovl_inode_flag {
/* Pure upper dir that may contain non pure upper entries */
@@ -40,12 +61,42 @@ enum ovl_inode_flag {
OVL_UPPERDATA,
/* Inode number will remain constant over copy up. */
OVL_CONST_INO,
+ OVL_HAS_DIGEST,
+ OVL_VERIFIED_DIGEST,
};
enum ovl_entry_flag {
OVL_E_UPPER_ALIAS,
OVL_E_OPAQUE,
OVL_E_CONNECTED,
+ /* Lower stack may contain xwhiteout entries */
+ OVL_E_XWHITEOUTS,
+};
+
+enum {
+ OVL_REDIRECT_OFF, /* "off" mode is never used. In effect */
+ OVL_REDIRECT_FOLLOW, /* ...it translates to either "follow" */
+ OVL_REDIRECT_NOFOLLOW, /* ...or "nofollow". */
+ OVL_REDIRECT_ON,
+};
+
+enum {
+ OVL_UUID_OFF,
+ OVL_UUID_NULL,
+ OVL_UUID_AUTO,
+ OVL_UUID_ON,
+};
+
+enum {
+ OVL_XINO_OFF,
+ OVL_XINO_AUTO,
+ OVL_XINO_ON,
+};
+
+enum {
+ OVL_VERITY_OFF,
+ OVL_VERITY_ON,
+ OVL_VERITY_REQUIRE,
};
/*
@@ -74,125 +125,317 @@ enum ovl_entry_flag {
#error Endianness not defined
#endif
-/* The type returned by overlay exportfs ops when encoding an ovl_fh handle */
-#define OVL_FILEID 0xfb
+/* The type used to be returned by overlay exportfs for misaligned fid */
+#define OVL_FILEID_V0 0xfb
+/* The type returned by overlay exportfs for 32bit aligned fid */
+#define OVL_FILEID_V1 0xf8
-/* On-disk and in-memeory format for redirect by file handle */
-struct ovl_fh {
+/* On-disk format for "origin" file handle */
+struct ovl_fb {
u8 version; /* 0 */
u8 magic; /* 0xfb */
u8 len; /* size of this header + size of fid */
u8 flags; /* OVL_FH_FLAG_* */
u8 type; /* fid_type of fid */
uuid_t uuid; /* uuid of filesystem */
- u8 fid[0]; /* file identifier */
+ u32 fid[]; /* file identifier should be 32bit aligned in-memory */
+} __packed;
+
+/* In-memory and on-wire format for overlay file handle */
+struct ovl_fh {
+ u8 padding[3]; /* make sure fb.fid is 32bit aligned */
+ union {
+ struct ovl_fb fb;
+ DECLARE_FLEX_ARRAY(u8, buf);
+ };
} __packed;
-static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
+#define OVL_FH_WIRE_OFFSET offsetof(struct ovl_fh, fb)
+#define OVL_FH_LEN(fh) (OVL_FH_WIRE_OFFSET + (fh)->fb.len)
+#define OVL_FH_FID_OFFSET (OVL_FH_WIRE_OFFSET + \
+ offsetof(struct ovl_fb, fid))
+
+/* On-disk format for "metacopy" xattr (if non-zero size) */
+struct ovl_metacopy {
+ u8 version; /* 0 */
+ u8 len; /* size of this header + used digest bytes */
+ u8 flags;
+ u8 digest_algo; /* FS_VERITY_HASH_ALG_* constant, 0 for no digest */
+ u8 digest[FS_VERITY_MAX_DIGEST_SIZE]; /* Only the used part on disk */
+} __packed;
+
+#define OVL_METACOPY_MAX_SIZE (sizeof(struct ovl_metacopy))
+#define OVL_METACOPY_MIN_SIZE (OVL_METACOPY_MAX_SIZE - FS_VERITY_MAX_DIGEST_SIZE)
+#define OVL_METACOPY_INIT { 0, OVL_METACOPY_MIN_SIZE }
+
+static inline int ovl_metadata_digest_size(const struct ovl_metacopy *metacopy)
+{
+ if (metacopy->len < OVL_METACOPY_MIN_SIZE)
+ return 0;
+ return (int)metacopy->len - OVL_METACOPY_MIN_SIZE;
+}
+
+/* No atime modification on underlying */
+#define OVL_OPEN_FLAGS (O_NOATIME)
+
+extern const char *const ovl_xattr_table[][2];
+static inline const char *ovl_xattr(struct ovl_fs *ofs, enum ovl_xattr ox)
{
- int err = vfs_rmdir(dir, dentry);
+ return ovl_xattr_table[ox][ofs->config.userxattr];
+}
+
+/*
+ * When changing ownership of an upper object map the intended ownership
+ * according to the upper layer's idmapping. When an upper mount idmaps files
+ * that are stored on-disk as owned by id 1001 to id 1000 this means stat on
+ * this object will report it as being owned by id 1000 when calling stat via
+ * the upper mount.
+ * In order to change ownership of an object so stat reports id 1000 when
+ * called on an idmapped upper mount the value written to disk - i.e., the
+ * value stored in ia_*id - must 1001. The mount mapping helper will thus take
+ * care to map 1000 to 1001.
+ * The mnt idmapping helpers are nops if the upper layer isn't idmapped.
+ */
+static inline int ovl_do_notify_change(struct ovl_fs *ofs,
+ struct dentry *upperdentry,
+ struct iattr *attr)
+{
+ return notify_change(ovl_upper_mnt_idmap(ofs), upperdentry, attr, NULL);
+}
+
+static inline int ovl_do_rmdir(struct ovl_fs *ofs,
+ struct inode *dir, struct dentry *dentry)
+{
+ int err = vfs_rmdir(ovl_upper_mnt_idmap(ofs), dir, dentry, NULL);
pr_debug("rmdir(%pd2) = %i\n", dentry, err);
return err;
}
-static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
+static inline int ovl_do_unlink(struct ovl_fs *ofs, struct inode *dir,
+ struct dentry *dentry)
{
- int err = vfs_unlink(dir, dentry, NULL);
+ int err = vfs_unlink(ovl_upper_mnt_idmap(ofs), dir, dentry, NULL);
pr_debug("unlink(%pd2) = %i\n", dentry, err);
return err;
}
-static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
- struct dentry *new_dentry)
+static inline int ovl_do_link(struct ovl_fs *ofs, struct dentry *old_dentry,
+ struct inode *dir, struct dentry *new_dentry)
{
- int err = vfs_link(old_dentry, dir, new_dentry, NULL);
+ int err = vfs_link(old_dentry, ovl_upper_mnt_idmap(ofs), dir,
+ new_dentry, NULL);
pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err);
return err;
}
-static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
+static inline int ovl_do_create(struct ovl_fs *ofs,
+ struct inode *dir, struct dentry *dentry,
umode_t mode)
{
- int err = vfs_create(dir, dentry, mode, true);
+ int err = vfs_create(ovl_upper_mnt_idmap(ofs), dentry, mode, NULL);
pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
return err;
}
-static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
- umode_t mode)
+static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs,
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode)
{
- int err = vfs_mkdir(dir, dentry, mode);
- pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
- return err;
+ struct dentry *ret;
+
+ ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, NULL);
+ pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret));
+ return ret;
}
-static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
+static inline int ovl_do_mknod(struct ovl_fs *ofs,
+ struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t dev)
{
- int err = vfs_mknod(dir, dentry, mode, dev);
+ int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev, NULL);
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err);
return err;
}
-static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
+static inline int ovl_do_symlink(struct ovl_fs *ofs,
+ struct inode *dir, struct dentry *dentry,
const char *oldname)
{
- int err = vfs_symlink(dir, dentry, oldname);
+ int err = vfs_symlink(ovl_upper_mnt_idmap(ofs), dir, dentry, oldname, NULL);
pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
return err;
}
-static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
+static inline ssize_t ovl_do_getxattr(const struct path *path, const char *name,
+ void *value, size_t size)
+{
+ int err, len;
+
+ WARN_ON(path->dentry->d_sb != path->mnt->mnt_sb);
+
+ err = vfs_getxattr(mnt_idmap(path->mnt), path->dentry,
+ name, value, size);
+ len = (value && err > 0) ? err : 0;
+
+ pr_debug("getxattr(%pd2, \"%s\", \"%*pE\", %zu, 0) = %i\n",
+ path->dentry, name, min(len, 48), value, size, err);
+ return err;
+}
+
+static inline ssize_t ovl_getxattr_upper(struct ovl_fs *ofs,
+ struct dentry *upperdentry,
+ enum ovl_xattr ox, void *value,
+ size_t size)
+{
+ struct path upperpath = {
+ .dentry = upperdentry,
+ .mnt = ovl_upper_mnt(ofs),
+ };
+
+ return ovl_do_getxattr(&upperpath, ovl_xattr(ofs, ox), value, size);
+}
+
+static inline ssize_t ovl_path_getxattr(struct ovl_fs *ofs,
+ const struct path *path,
+ enum ovl_xattr ox, void *value,
+ size_t size)
+{
+ return ovl_do_getxattr(path, ovl_xattr(ofs, ox), value, size);
+}
+
+static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
+ const char *name, const void *value,
+ size_t size, int flags)
{
- int err = vfs_setxattr(dentry, name, value, size, flags);
- pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
+ int err = vfs_setxattr(ovl_upper_mnt_idmap(ofs), dentry, name,
+ value, size, flags);
+
+ pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n",
dentry, name, min((int)size, 48), value, size, flags, err);
return err;
}
-static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
+static inline int ovl_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, const void *value,
+ size_t size)
{
- int err = vfs_removexattr(dentry, name);
+ return ovl_do_setxattr(ofs, dentry, ovl_xattr(ofs, ox), value, size, 0);
+}
+
+static inline int ovl_do_removexattr(struct ovl_fs *ofs, struct dentry *dentry,
+ const char *name)
+{
+ int err = vfs_removexattr(ovl_upper_mnt_idmap(ofs), dentry, name);
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
return err;
}
-static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
- struct inode *newdir, struct dentry *newdentry,
- unsigned int flags)
+static inline int ovl_removexattr(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox)
+{
+ return ovl_do_removexattr(ofs, dentry, ovl_xattr(ofs, ox));
+}
+
+static inline int ovl_do_set_acl(struct ovl_fs *ofs, struct dentry *dentry,
+ const char *acl_name, struct posix_acl *acl)
+{
+ return vfs_set_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name, acl);
+}
+
+static inline int ovl_do_remove_acl(struct ovl_fs *ofs, struct dentry *dentry,
+ const char *acl_name)
+{
+ return vfs_remove_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name);
+}
+
+static inline int ovl_do_rename_rd(struct renamedata *rd)
{
int err;
- pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags);
- err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
+ pr_debug("rename(%pd2, %pd2, 0x%x)\n", rd->old_dentry, rd->new_dentry,
+ rd->flags);
+ err = vfs_rename(rd);
if (err) {
pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
- olddentry, newdentry, err);
+ rd->old_dentry, rd->new_dentry, err);
}
return err;
}
-static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
+static inline int ovl_do_rename(struct ovl_fs *ofs, struct dentry *olddir,
+ struct dentry *olddentry, struct dentry *newdir,
+ struct dentry *newdentry, unsigned int flags)
{
- int err = vfs_whiteout(dir, dentry);
+ struct renamedata rd = {
+ .mnt_idmap = ovl_upper_mnt_idmap(ofs),
+ .old_parent = olddir,
+ .old_dentry = olddentry,
+ .new_parent = newdir,
+ .new_dentry = newdentry,
+ .flags = flags,
+ };
+
+ return ovl_do_rename_rd(&rd);
+}
+
+static inline int ovl_do_whiteout(struct ovl_fs *ofs,
+ struct inode *dir, struct dentry *dentry)
+{
+ int err = vfs_whiteout(ovl_upper_mnt_idmap(ofs), dir, dentry);
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
return err;
}
-static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
+static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs,
+ struct dentry *dentry, umode_t mode)
{
- struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
- int err = PTR_ERR_OR_ZERO(ret);
+ struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry };
+ struct file *file = kernel_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path,
+ mode, O_LARGEFILE | O_WRONLY,
+ current_cred());
+ int err = PTR_ERR_OR_ZERO(file);
pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
- return ret;
+ return file;
+}
+
+static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs,
+ const char *name,
+ struct dentry *base, int len)
+{
+ return lookup_one(ovl_upper_mnt_idmap(ofs), &QSTR_LEN(name, len), base);
+}
+
+static inline struct dentry *ovl_lookup_upper_unlocked(struct ovl_fs *ofs,
+ const char *name,
+ struct dentry *base,
+ int len)
+{
+ return lookup_one_unlocked(ovl_upper_mnt_idmap(ofs),
+ &QSTR_LEN(name, len), base);
+}
+
+static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs,
+ struct dentry *parent,
+ struct qstr *name)
+{
+ return start_creating(ovl_upper_mnt_idmap(ofs),
+ parent, name);
+}
+
+static inline struct dentry *ovl_start_removing_upper(struct ovl_fs *ofs,
+ struct dentry *parent,
+ struct qstr *name)
+{
+ return start_removing(ovl_upper_mnt_idmap(ofs),
+ parent, name);
}
static inline bool ovl_open_flags_need_copy_up(int flags)
@@ -204,27 +447,61 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
}
/* util.c */
+int ovl_get_write_access(struct dentry *dentry);
+void ovl_put_write_access(struct dentry *dentry);
+void ovl_start_write(struct dentry *dentry);
+void ovl_end_write(struct dentry *dentry);
int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
struct dentry *ovl_workdir(struct dentry *dentry);
const struct cred *ovl_override_creds(struct super_block *sb);
-struct super_block *ovl_same_sb(struct super_block *sb);
+
+EXTEND_CLASS(override_creds, _ovl, ovl_override_creds(sb), struct super_block *sb)
+
+#define with_ovl_creds(sb) \
+ scoped_class(override_creds_ovl, __UNIQUE_ID(label), sb)
+
+static inline const struct cred *ovl_creds(struct super_block *sb)
+{
+ return OVL_FS(sb)->creator_cred;
+}
+
int ovl_can_decode_fh(struct super_block *sb);
struct dentry *ovl_indexdir(struct super_block *sb);
bool ovl_index_all(struct super_block *sb);
bool ovl_verify_lower(struct super_block *sb);
+struct ovl_path *ovl_stack_alloc(unsigned int n);
+void ovl_stack_cpy(struct ovl_path *dst, struct ovl_path *src, unsigned int n);
+void ovl_stack_put(struct ovl_path *stack, unsigned int n);
+void ovl_stack_free(struct ovl_path *stack, unsigned int n);
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
+void ovl_free_entry(struct ovl_entry *oe);
bool ovl_dentry_remote(struct dentry *dentry);
+void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry);
+void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry,
+ struct ovl_entry *oe);
+void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry,
+ struct ovl_entry *oe, unsigned int mask);
bool ovl_dentry_weird(struct dentry *dentry);
+
+static inline bool ovl_dentry_casefolded(struct dentry *dentry)
+{
+ return sb_has_encoding(dentry->d_sb) && IS_CASEFOLDED(d_inode(dentry));
+}
+
enum ovl_path_type ovl_path_type(struct dentry *dentry);
void ovl_path_upper(struct dentry *dentry, struct path *path);
void ovl_path_lower(struct dentry *dentry, struct path *path);
void ovl_path_lowerdata(struct dentry *dentry, struct path *path);
+struct inode *ovl_i_path_real(struct inode *inode, struct path *path);
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
+enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path);
struct dentry *ovl_dentry_upper(struct dentry *dentry);
struct dentry *ovl_dentry_lower(struct dentry *dentry);
struct dentry *ovl_dentry_lowerdata(struct dentry *dentry);
-struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
+int ovl_dentry_set_lowerdata(struct dentry *dentry, struct ovl_path *datapath);
+const struct ovl_layer *ovl_i_layer_lower(struct inode *inode);
+const struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
struct dentry *ovl_dentry_real(struct dentry *dentry);
struct dentry *ovl_i_dentry_upper(struct inode *inode);
struct inode *ovl_inode_upper(struct inode *inode);
@@ -232,6 +509,7 @@ struct inode *ovl_inode_lower(struct inode *inode);
struct inode *ovl_inode_lowerdata(struct inode *inode);
struct inode *ovl_inode_real(struct inode *inode);
struct inode *ovl_inode_realdata(struct inode *inode);
+const char *ovl_lowerdata_redirect(struct inode *inode);
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode);
void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache);
void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry);
@@ -240,57 +518,177 @@ bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry);
bool ovl_dentry_is_opaque(struct dentry *dentry);
bool ovl_dentry_is_whiteout(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry);
+bool ovl_dentry_has_xwhiteouts(struct dentry *dentry);
+void ovl_dentry_set_xwhiteouts(struct dentry *dentry);
+void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs,
+ const struct ovl_layer *layer);
bool ovl_dentry_has_upper_alias(struct dentry *dentry);
void ovl_dentry_set_upper_alias(struct dentry *dentry);
bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags);
bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags);
bool ovl_has_upperdata(struct inode *inode);
void ovl_set_upperdata(struct inode *inode);
-bool ovl_redirect_dir(struct super_block *sb);
const char *ovl_dentry_get_redirect(struct dentry *dentry);
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
-void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
- struct dentry *lowerdentry, struct dentry *lowerdata);
void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
void ovl_dir_modified(struct dentry *dentry, bool impurity);
-u64 ovl_dentry_version_get(struct dentry *dentry);
+u64 ovl_inode_version_get(struct inode *inode);
bool ovl_is_whiteout(struct dentry *dentry);
-struct file *ovl_path_open(struct path *path, int flags);
+bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path);
+struct file *ovl_path_open(const struct path *path, int flags);
int ovl_copy_up_start(struct dentry *dentry, int flags);
void ovl_copy_up_end(struct dentry *dentry);
bool ovl_already_copied_up(struct dentry *dentry, int flags);
-bool ovl_check_origin_xattr(struct dentry *dentry);
-bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
-int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
- const char *name, const void *value, size_t size,
+char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path,
+ enum ovl_xattr ox);
+bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
+bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path);
+bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
+ const struct path *upperpath);
+
+static inline bool ovl_upper_is_whiteout(struct ovl_fs *ofs,
+ struct dentry *upperdentry)
+{
+ struct path upperpath = {
+ .dentry = upperdentry,
+ .mnt = ovl_upper_mnt(ofs),
+ };
+ return ovl_path_is_whiteout(ofs, &upperpath);
+}
+
+static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs,
+ struct dentry *upperdentry)
+{
+ struct path upperpath = {
+ .dentry = upperdentry,
+ .mnt = ovl_upper_mnt(ofs),
+ };
+ return ovl_path_check_origin_xattr(ofs, &upperpath);
+}
+
+int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
+ enum ovl_xattr ox, const void *value, size_t size,
int xerr);
int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
-void ovl_set_flag(unsigned long flag, struct inode *inode);
-void ovl_clear_flag(unsigned long flag, struct inode *inode);
-bool ovl_test_flag(unsigned long flag, struct inode *inode);
bool ovl_inuse_trylock(struct dentry *dentry);
void ovl_inuse_unlock(struct dentry *dentry);
+bool ovl_is_inuse(struct dentry *dentry);
bool ovl_need_index(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry);
void ovl_nlink_end(struct dentry *dentry);
-int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
-int ovl_check_metacopy_xattr(struct dentry *dentry);
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *work,
+ struct dentry *upperdir, struct dentry *upper);
+int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path,
+ struct ovl_metacopy *data);
+int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d,
+ struct ovl_metacopy *metacopy);
bool ovl_is_metacopy_dentry(struct dentry *dentry);
-char *ovl_get_redirect_xattr(struct dentry *dentry, int padding);
+char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding);
+int ovl_ensure_verity_loaded(const struct path *path);
+int ovl_validate_verity(struct ovl_fs *ofs,
+ const struct path *metapath,
+ const struct path *datapath);
+int ovl_get_verity_digest(struct ovl_fs *ofs, const struct path *src,
+ struct ovl_metacopy *metacopy);
+int ovl_sync_status(struct ovl_fs *ofs);
+
+static inline void ovl_set_flag(unsigned long flag, struct inode *inode)
+{
+ set_bit(flag, &OVL_I(inode)->flags);
+}
+
+static inline void ovl_clear_flag(unsigned long flag, struct inode *inode)
+{
+ clear_bit(flag, &OVL_I(inode)->flags);
+}
+
+static inline bool ovl_test_flag(unsigned long flag, struct inode *inode)
+{
+ return test_bit(flag, &OVL_I(inode)->flags);
+}
+
+static inline bool ovl_is_impuredir(struct super_block *sb,
+ struct dentry *upperdentry)
+{
+ struct ovl_fs *ofs = OVL_FS(sb);
+ struct path upperpath = {
+ .dentry = upperdentry,
+ .mnt = ovl_upper_mnt(ofs),
+ };
+
+ return ovl_get_dir_xattr_val(ofs, &upperpath, OVL_XATTR_IMPURE) == 'y';
+}
+
+static inline char ovl_get_opaquedir_val(struct ovl_fs *ofs,
+ const struct path *path)
+{
+ return ovl_get_dir_xattr_val(ofs, path, OVL_XATTR_OPAQUE);
+}
+
+static inline bool ovl_redirect_follow(struct ovl_fs *ofs)
+{
+ return ofs->config.redirect_mode != OVL_REDIRECT_NOFOLLOW;
+}
+
+static inline bool ovl_redirect_dir(struct ovl_fs *ofs)
+{
+ return ofs->config.redirect_mode == OVL_REDIRECT_ON;
+}
+
+static inline bool ovl_origin_uuid(struct ovl_fs *ofs)
+{
+ return ofs->config.uuid != OVL_UUID_OFF;
+}
+
+static inline bool ovl_has_fsid(struct ovl_fs *ofs)
+{
+ return ofs->config.uuid == OVL_UUID_ON ||
+ ofs->config.uuid == OVL_UUID_AUTO;
+}
-static inline bool ovl_is_impuredir(struct dentry *dentry)
+/*
+ * With xino=auto, we do best effort to keep all inodes on same st_dev and
+ * d_ino consistent with st_ino.
+ * With xino=on, we do the same effort but we warn if we failed.
+ */
+static inline bool ovl_xino_warn(struct ovl_fs *ofs)
{
- return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
+ return ofs->config.xino == OVL_XINO_ON;
}
-static inline unsigned int ovl_xino_bits(struct super_block *sb)
+/*
+ * To avoid regressions in existing setups with overlay lower offline changes,
+ * we allow lower changes only if none of the new features are used.
+ */
+static inline bool ovl_allow_offline_changes(struct ovl_fs *ofs)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ return (!ofs->config.index && !ofs->config.metacopy &&
+ !ovl_redirect_dir(ofs) && !ovl_xino_warn(ofs));
+}
- return ofs->xino_bits;
+/* All layers on same fs? */
+static inline bool ovl_same_fs(struct ovl_fs *ofs)
+{
+ return ofs->xino_mode == 0;
}
-static inline int ovl_inode_lock(struct inode *inode)
+/* All overlay inodes have same st_dev? */
+static inline bool ovl_same_dev(struct ovl_fs *ofs)
+{
+ return ofs->xino_mode >= 0;
+}
+
+static inline unsigned int ovl_xino_bits(struct ovl_fs *ofs)
+{
+ return ovl_same_dev(ofs) ? ofs->xino_mode : 0;
+}
+
+static inline void ovl_inode_lock(struct inode *inode)
+{
+ mutex_lock(&OVL_I(inode)->lock);
+}
+
+static inline int ovl_inode_lock_interruptible(struct inode *inode)
{
return mutex_lock_interruptible(&OVL_I(inode)->lock);
}
@@ -302,101 +700,179 @@ static inline void ovl_inode_unlock(struct inode *inode)
/* namei.c */
-int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
-struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
- bool connected);
+int ovl_check_fb_len(struct ovl_fb *fb, int fb_len);
+
+static inline int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
+{
+ if (fh_len < sizeof(struct ovl_fh))
+ return -EINVAL;
+
+ return ovl_check_fb_len(&fh->fb, fh_len - OVL_FH_WIRE_OFFSET);
+}
+
+struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+ struct vfsmount *mnt, bool connected);
int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
struct dentry *upperdentry, struct ovl_path **stackp);
-int ovl_verify_set_fh(struct dentry *dentry, const char *name,
- struct dentry *real, bool is_upper, bool set);
-struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index);
+int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, const struct ovl_fh *fh,
+ bool is_upper, bool set);
+int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, struct dentry *real,
+ bool is_upper, bool set);
+struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
+ bool connected);
int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
-int ovl_get_index_name(struct dentry *origin, struct qstr *name);
+int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name);
+int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
+ struct qstr *name);
struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
struct dentry *origin, bool verify);
-int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
+ const struct ovl_layer **layer);
+int ovl_verify_lowerdata(struct dentry *dentry);
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
bool ovl_lower_positive(struct dentry *dentry);
-static inline int ovl_verify_origin(struct dentry *upper,
+static inline int ovl_verify_origin_fh(struct ovl_fs *ofs, struct dentry *upper,
+ const struct ovl_fh *fh, bool set)
+{
+ return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, fh, false, set);
+}
+
+static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper,
struct dentry *origin, bool set)
{
- return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set);
+ return ovl_verify_origin_xattr(ofs, upper, OVL_XATTR_ORIGIN, origin,
+ false, set);
}
-static inline int ovl_verify_upper(struct dentry *index,
- struct dentry *upper, bool set)
+static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index,
+ struct dentry *upper, bool set)
{
- return ovl_verify_set_fh(index, OVL_XATTR_UPPER, upper, true, set);
+ return ovl_verify_origin_xattr(ofs, index, OVL_XATTR_UPPER, upper,
+ true, set);
}
/* readdir.c */
extern const struct file_operations ovl_dir_operations;
+struct file *ovl_dir_real_file(const struct file *file, bool want_upper);
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
-void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
+void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
+ struct list_head *list);
void ovl_cache_free(struct list_head *list);
void ovl_dir_cache_free(struct inode *inode);
-int ovl_check_d_type_supported(struct path *realpath);
-void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
- struct dentry *dentry, int level);
+int ovl_check_d_type_supported(const struct path *realpath);
+int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent,
+ struct vfsmount *mnt, struct dentry *dentry, int level);
int ovl_indexdir_cleanup(struct ovl_fs *ofs);
+/*
+ * Can we iterate real dir directly?
+ *
+ * Non-merge dir may contain whiteouts from a time it was a merge upper, before
+ * lower dir was removed under it and possibly before it was rotated from upper
+ * to lower layer.
+ */
+static inline bool ovl_dir_is_real(struct inode *dir)
+{
+ return !ovl_test_flag(OVL_WHITEOUTS, dir);
+}
+
/* inode.c */
int ovl_set_nlink_upper(struct dentry *dentry);
int ovl_set_nlink_lower(struct dentry *dentry);
-unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback);
-int ovl_setattr(struct dentry *dentry, struct iattr *attr);
-int ovl_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags);
-int ovl_permission(struct inode *inode, int mask);
-int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
- const void *value, size_t size, int flags);
-int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
- void *value, size_t size);
-ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
-struct posix_acl *ovl_get_acl(struct inode *inode, int type);
-int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
-bool ovl_is_private_xattr(const char *name);
+int ovl_permission(struct mnt_idmap *idmap, struct inode *inode,
+ int mask);
+
+#ifdef CONFIG_FS_POSIX_ACL
+struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap,
+ struct inode *inode, int type,
+ bool rcu, bool noperm);
+static inline struct posix_acl *ovl_get_inode_acl(struct inode *inode, int type,
+ bool rcu)
+{
+ return do_ovl_get_acl(&nop_mnt_idmap, inode, type, rcu, true);
+}
+static inline struct posix_acl *ovl_get_acl(struct mnt_idmap *idmap,
+ struct dentry *dentry, int type)
+{
+ return do_ovl_get_acl(idmap, d_inode(dentry), type, false, false);
+}
+int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct posix_acl *acl, int type);
+struct posix_acl *ovl_get_acl_path(const struct path *path,
+ const char *acl_name, bool noperm);
+#else
+#define ovl_get_inode_acl NULL
+#define ovl_get_acl NULL
+#define ovl_set_acl NULL
+static inline struct posix_acl *ovl_get_acl_path(const struct path *path,
+ const char *acl_name,
+ bool noperm)
+{
+ return NULL;
+}
+#endif
+
+int ovl_update_time(struct inode *inode, int flags);
+bool ovl_is_private_xattr(struct super_block *sb, const char *name);
struct ovl_inode_params {
struct inode *newinode;
struct dentry *upperdentry;
- struct ovl_path *lowerpath;
- struct dentry *index;
- unsigned int numlower;
+ struct ovl_entry *oe;
+ bool index;
char *redirect;
- struct dentry *lowerdata;
+ char *lowerdata_redirect;
};
+void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip,
+ unsigned long ino, int fsid);
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
bool is_upper);
+bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir);
+struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir);
struct inode *ovl_get_inode(struct super_block *sb,
struct ovl_inode_params *oip);
-static inline void ovl_copyattr(struct inode *from, struct inode *to)
-{
- to->i_uid = from->i_uid;
- to->i_gid = from->i_gid;
- to->i_mode = from->i_mode;
- to->i_atime = from->i_atime;
- to->i_mtime = from->i_mtime;
- to->i_ctime = from->i_ctime;
- i_size_write(to, i_size_read(from));
-}
+void ovl_copyattr(struct inode *to);
+
+/* vfs fileattr flags read from overlay.protattr xattr to ovl inode */
+#define OVL_PROT_I_FLAGS_MASK (S_APPEND | S_IMMUTABLE)
+/* vfs fileattr flags copied from real to ovl inode */
+#define OVL_FATTR_I_FLAGS_MASK (OVL_PROT_I_FLAGS_MASK | S_SYNC | S_NOATIME)
+/* vfs inode flags copied from real to ovl inode */
+#define OVL_COPY_I_FLAGS_MASK (OVL_FATTR_I_FLAGS_MASK | S_CASEFOLD)
+
+/*
+ * fileattr flags copied from lower to upper inode on copy up.
+ * We cannot copy up immutable/append-only flags, because that would prevent
+ * linking temp inode to upper dir, so we store them in xattr instead.
+ */
+#define OVL_COPY_FS_FLAGS_MASK (FS_SYNC_FL | FS_NOATIME_FL)
+#define OVL_COPY_FSX_FLAGS_MASK (FS_XFLAG_SYNC | FS_XFLAG_NOATIME)
+#define OVL_PROT_FS_FLAGS_MASK (FS_APPEND_FL | FS_IMMUTABLE_FL)
+#define OVL_PROT_FSX_FLAGS_MASK (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE)
+
+void ovl_check_protattr(struct inode *inode, struct dentry *upper);
+int ovl_set_protattr(struct inode *inode, struct dentry *upper,
+ struct file_kattr *fa);
static inline void ovl_copyflags(struct inode *from, struct inode *to)
{
- unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
+ unsigned int mask = OVL_COPY_I_FLAGS_MASK;
inode_set_flags(to, from->i_flags & mask, mask);
}
/* dir.c */
extern const struct inode_operations ovl_dir_inode_operations;
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir,
struct dentry *dentry);
struct ovl_cattr {
dev_t rdev;
@@ -407,24 +883,56 @@ struct ovl_cattr {
#define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) })
-struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
+struct dentry *ovl_create_real(struct ovl_fs *ofs,
+ struct dentry *parent, struct dentry *newdentry,
+ struct ovl_cattr *attr);
+int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir, struct dentry *dentry);
+#define OVL_TEMPNAME_SIZE 20
+void ovl_tempname(char name[OVL_TEMPNAME_SIZE]);
+struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
struct ovl_cattr *attr);
-int ovl_cleanup(struct inode *dir, struct dentry *dentry);
-struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
/* file.c */
extern const struct file_operations ovl_file_operations;
+int ovl_real_fileattr_get(const struct path *realpath, struct file_kattr *fa);
+int ovl_real_fileattr_set(const struct path *realpath, struct file_kattr *fa);
+int ovl_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
+int ovl_fileattr_set(struct mnt_idmap *idmap,
+ struct dentry *dentry, struct file_kattr *fa);
+struct ovl_file;
+struct ovl_file *ovl_file_alloc(struct file *realfile);
+void ovl_file_free(struct ovl_file *of);
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
int ovl_copy_up_with_data(struct dentry *dentry);
-int ovl_copy_up_flags(struct dentry *dentry, int flags);
-int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
-int ovl_copy_xattr(struct dentry *old, struct dentry *new);
-int ovl_set_attr(struct dentry *upper, struct kstat *stat);
-struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
-int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
- struct dentry *upper);
+int ovl_maybe_copy_up(struct dentry *dentry, int flags);
+int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new);
+int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat);
+struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode,
+ bool is_upper);
+struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin);
+int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
+ struct dentry *upper);
/* export.c */
extern const struct export_operations ovl_export_operations;
+extern const struct export_operations ovl_export_fid_operations;
+
+/* super.c */
+int ovl_fill_super(struct super_block *sb, struct fs_context *fc);
+
+/* Will this overlay be forced to mount/remount ro? */
+static inline bool ovl_force_readonly(struct ovl_fs *ofs)
+{
+ return (!ovl_upper_mnt(ofs) || !ofs->workdir);
+}
+
+/* xattr.c */
+
+const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs);
+int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr);
+int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags);
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index ec237035333a..1d4828dbcf7a 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -1,60 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
*
* Copyright (C) 2011 Novell Inc.
* Copyright (C) 2016 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
*/
struct ovl_config {
- char *lowerdir;
char *upperdir;
char *workdir;
+ char **lowerdirs;
bool default_permissions;
- bool redirect_dir;
- bool redirect_follow;
- const char *redirect_mode;
+ int redirect_mode;
+ int verity_mode;
bool index;
+ int uuid;
bool nfs_export;
int xino;
bool metacopy;
+ bool userxattr;
+ bool ovl_volatile;
};
struct ovl_sb {
struct super_block *sb;
dev_t pseudo_dev;
+ /* Unusable (conflicting) uuid */
+ bool bad_uuid;
+ /* Used as a lower layer (but maybe also as upper) */
+ bool is_lower;
};
struct ovl_layer {
+ /* ovl_free_fs() relies on @mnt being the first member! */
struct vfsmount *mnt;
+ /* Trap in ovl inode cache */
+ struct inode *trap;
struct ovl_sb *fs;
/* Index of this layer in fs root (upper idx == 0) */
int idx;
/* One fsid per unique underlying sb (upper fsid == 0) */
int fsid;
+ /* xwhiteouts were found on this layer */
+ bool has_xwhiteouts;
};
struct ovl_path {
- struct ovl_layer *layer;
+ const struct ovl_layer *layer;
struct dentry *dentry;
};
+struct ovl_entry {
+ unsigned int __numlower;
+ struct ovl_path __lowerstack[] __counted_by(__numlower);
+};
+
/* private information held for overlayfs's superblock */
struct ovl_fs {
- struct vfsmount *upper_mnt;
- unsigned int numlower;
- /* Number of unique lower sb that differ from upper sb */
- unsigned int numlowerfs;
- struct ovl_layer *lower_layers;
- struct ovl_sb *lower_fs;
+ unsigned int numlayer;
+ /* Number of unique fs among layers including upper fs */
+ unsigned int numfs;
+ /* Number of data-only lower layers */
+ unsigned int numdatalayer;
+ struct ovl_layer *layers;
+ struct ovl_sb *fs;
/* workbasedir is the path at workdir= mount option */
struct dentry *workbasedir;
- /* workdir is the 'work' directory under workbasedir */
+ /* workdir is the 'work' or 'index' directory under workbasedir */
struct dentry *workdir;
- /* index directory listing overlay inodes by origin file handle */
- struct dentry *indexdir;
long namelen;
/* pathnames of lower and upper dirs, for show_options */
struct ovl_config config;
@@ -62,43 +74,104 @@ struct ovl_fs {
const struct cred *creator_cred;
bool tmpfile;
bool noxattr;
+ bool nofh;
/* Did we take the inuse lock? */
bool upperdir_locked;
bool workdir_locked;
- /* Inode numbers in all layers do not use the high xino_bits */
- unsigned int xino_bits;
+ /* Traps in ovl inode cache */
+ struct inode *workbasedir_trap;
+ struct inode *workdir_trap;
+ /* -1: disabled, 0: same fs, 1..32: number of unused ino bits */
+ int xino_mode;
+ /* For allocation of non-persistent inode numbers */
+ atomic_long_t last_ino;
+ /* Shared whiteout cache */
+ struct dentry *whiteout;
+ bool no_shared_whiteout;
+ struct mutex whiteout_lock;
+ /* r/o snapshot of upperdir sb's only taken on volatile mounts */
+ errseq_t errseq;
+ bool casefold;
};
-/* private information held for every overlayfs dentry */
-struct ovl_entry {
- union {
- struct {
- unsigned long flags;
- };
- struct rcu_head rcu;
- };
- unsigned numlower;
- struct ovl_path lowerstack[];
-};
+/* Number of lower layers, not including data-only layers */
+static inline unsigned int ovl_numlowerlayer(struct ovl_fs *ofs)
+{
+ return ofs->numlayer - ofs->numdatalayer - 1;
+}
+
+static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
+{
+ return ofs->layers[0].mnt;
+}
-struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
+static inline struct mnt_idmap *ovl_upper_mnt_idmap(struct ovl_fs *ofs)
+{
+ return mnt_idmap(ovl_upper_mnt(ofs));
+}
-static inline struct ovl_entry *OVL_E(struct dentry *dentry)
+extern struct file_system_type ovl_fs_type;
+
+static inline struct ovl_fs *OVL_FS(struct super_block *sb)
+{
+ if (IS_ENABLED(CONFIG_OVERLAY_FS_DEBUG))
+ WARN_ON_ONCE(sb->s_type != &ovl_fs_type);
+
+ return (struct ovl_fs *)sb->s_fs_info;
+}
+
+static inline bool ovl_should_sync(struct ovl_fs *ofs)
+{
+ return !ofs->config.ovl_volatile;
+}
+
+static inline unsigned int ovl_numlower(struct ovl_entry *oe)
{
- return (struct ovl_entry *) dentry->d_fsdata;
+ return oe ? oe->__numlower : 0;
+}
+
+static inline struct ovl_path *ovl_lowerstack(struct ovl_entry *oe)
+{
+ return ovl_numlower(oe) ? oe->__lowerstack : NULL;
+}
+
+static inline struct ovl_path *ovl_lowerpath(struct ovl_entry *oe)
+{
+ return ovl_lowerstack(oe);
+}
+
+static inline struct ovl_path *ovl_lowerdata(struct ovl_entry *oe)
+{
+ struct ovl_path *lowerstack = ovl_lowerstack(oe);
+
+ return lowerstack ? &lowerstack[oe->__numlower - 1] : NULL;
+}
+
+/* May return NULL if lazy lookup of lowerdata is needed */
+static inline struct dentry *ovl_lowerdata_dentry(struct ovl_entry *oe)
+{
+ struct ovl_path *lowerdata = ovl_lowerdata(oe);
+
+ return lowerdata ? READ_ONCE(lowerdata->dentry) : NULL;
+}
+
+/* private information held for every overlayfs dentry */
+static inline unsigned long *OVL_E_FLAGS(struct dentry *dentry)
+{
+ return (unsigned long *) &dentry->d_fsdata;
}
struct ovl_inode {
union {
struct ovl_dir_cache *cache; /* directory */
- struct inode *lowerdata; /* regular file */
+ const char *lowerdata_redirect; /* regular file */
};
const char *redirect;
u64 version;
unsigned long flags;
struct inode vfs_inode;
struct dentry *__upperdentry;
- struct inode *lower;
+ struct ovl_entry *oe;
/* synchronize copy up and more */
struct mutex lock;
@@ -109,6 +182,16 @@ static inline struct ovl_inode *OVL_I(struct inode *inode)
return container_of(inode, struct ovl_inode, vfs_inode);
}
+static inline struct ovl_entry *OVL_I_E(struct inode *inode)
+{
+ return inode ? OVL_I(inode)->oe : NULL;
+}
+
+static inline struct ovl_entry *OVL_E(struct dentry *dentry)
+{
+ return OVL_I_E(d_inode(dentry));
+}
+
static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
{
return READ_ONCE(oi->__upperdentry);
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
new file mode 100644
index 000000000000..63b7346c5ee1
--- /dev/null
+++ b/fs/overlayfs/params.c
@@ -0,0 +1,1081 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/namei.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/seq_file.h>
+#include <linux/xattr.h>
+#include "overlayfs.h"
+#include "params.h"
+
+static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
+module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
+MODULE_PARM_DESC(redirect_dir,
+ "Default to on or off for the redirect_dir feature");
+
+static bool ovl_redirect_always_follow =
+ IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
+module_param_named(redirect_always_follow, ovl_redirect_always_follow,
+ bool, 0644);
+MODULE_PARM_DESC(redirect_always_follow,
+ "Follow redirects even if redirect_dir feature is turned off");
+
+static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
+module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
+MODULE_PARM_DESC(xino_auto,
+ "Auto enable xino feature");
+
+static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
+module_param_named(index, ovl_index_def, bool, 0644);
+MODULE_PARM_DESC(index,
+ "Default to on or off for the inodes index feature");
+
+static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
+module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
+MODULE_PARM_DESC(nfs_export,
+ "Default to on or off for the NFS export feature");
+
+static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
+module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
+MODULE_PARM_DESC(metacopy,
+ "Default to on or off for the metadata only copy up feature");
+
+enum ovl_opt {
+ Opt_lowerdir,
+ Opt_lowerdir_add,
+ Opt_datadir_add,
+ Opt_upperdir,
+ Opt_workdir,
+ Opt_default_permissions,
+ Opt_redirect_dir,
+ Opt_index,
+ Opt_uuid,
+ Opt_nfs_export,
+ Opt_userxattr,
+ Opt_xino,
+ Opt_metacopy,
+ Opt_verity,
+ Opt_volatile,
+ Opt_override_creds,
+};
+
+static const struct constant_table ovl_parameter_bool[] = {
+ { "on", true },
+ { "off", false },
+ {}
+};
+
+static const struct constant_table ovl_parameter_uuid[] = {
+ { "off", OVL_UUID_OFF },
+ { "null", OVL_UUID_NULL },
+ { "auto", OVL_UUID_AUTO },
+ { "on", OVL_UUID_ON },
+ {}
+};
+
+static const char *ovl_uuid_mode(struct ovl_config *config)
+{
+ return ovl_parameter_uuid[config->uuid].name;
+}
+
+static int ovl_uuid_def(void)
+{
+ return OVL_UUID_AUTO;
+}
+
+static const struct constant_table ovl_parameter_xino[] = {
+ { "off", OVL_XINO_OFF },
+ { "auto", OVL_XINO_AUTO },
+ { "on", OVL_XINO_ON },
+ {}
+};
+
+const char *ovl_xino_mode(struct ovl_config *config)
+{
+ return ovl_parameter_xino[config->xino].name;
+}
+
+static int ovl_xino_def(void)
+{
+ return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
+}
+
+const struct constant_table ovl_parameter_redirect_dir[] = {
+ { "off", OVL_REDIRECT_OFF },
+ { "follow", OVL_REDIRECT_FOLLOW },
+ { "nofollow", OVL_REDIRECT_NOFOLLOW },
+ { "on", OVL_REDIRECT_ON },
+ {}
+};
+
+static const char *ovl_redirect_mode(struct ovl_config *config)
+{
+ return ovl_parameter_redirect_dir[config->redirect_mode].name;
+}
+
+static int ovl_redirect_mode_def(void)
+{
+ return ovl_redirect_dir_def ? OVL_REDIRECT_ON :
+ ovl_redirect_always_follow ? OVL_REDIRECT_FOLLOW :
+ OVL_REDIRECT_NOFOLLOW;
+}
+
+static const struct constant_table ovl_parameter_verity[] = {
+ { "off", OVL_VERITY_OFF },
+ { "on", OVL_VERITY_ON },
+ { "require", OVL_VERITY_REQUIRE },
+ {}
+};
+
+static const char *ovl_verity_mode(struct ovl_config *config)
+{
+ return ovl_parameter_verity[config->verity_mode].name;
+}
+
+static int ovl_verity_mode_def(void)
+{
+ return OVL_VERITY_OFF;
+}
+
+const struct fs_parameter_spec ovl_parameter_spec[] = {
+ fsparam_string_empty("lowerdir", Opt_lowerdir),
+ fsparam_file_or_string("lowerdir+", Opt_lowerdir_add),
+ fsparam_file_or_string("datadir+", Opt_datadir_add),
+ fsparam_file_or_string("upperdir", Opt_upperdir),
+ fsparam_file_or_string("workdir", Opt_workdir),
+ fsparam_flag("default_permissions", Opt_default_permissions),
+ fsparam_enum("redirect_dir", Opt_redirect_dir, ovl_parameter_redirect_dir),
+ fsparam_enum("index", Opt_index, ovl_parameter_bool),
+ fsparam_enum("uuid", Opt_uuid, ovl_parameter_uuid),
+ fsparam_enum("nfs_export", Opt_nfs_export, ovl_parameter_bool),
+ fsparam_flag("userxattr", Opt_userxattr),
+ fsparam_enum("xino", Opt_xino, ovl_parameter_xino),
+ fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool),
+ fsparam_enum("verity", Opt_verity, ovl_parameter_verity),
+ fsparam_flag("volatile", Opt_volatile),
+ fsparam_flag_no("override_creds", Opt_override_creds),
+ {}
+};
+
+static char *ovl_next_opt(char **s)
+{
+ char *sbegin = *s;
+ char *p;
+
+ if (sbegin == NULL)
+ return NULL;
+
+ for (p = sbegin; *p; p++) {
+ if (*p == '\\') {
+ p++;
+ if (!*p)
+ break;
+ } else if (*p == ',') {
+ *p = '\0';
+ *s = p + 1;
+ return sbegin;
+ }
+ }
+ *s = NULL;
+ return sbegin;
+}
+
+static int ovl_parse_monolithic(struct fs_context *fc, void *data)
+{
+ return vfs_parse_monolithic_sep(fc, data, ovl_next_opt);
+}
+
+static ssize_t ovl_parse_param_split_lowerdirs(char *str)
+{
+ ssize_t nr_layers = 1, nr_colons = 0;
+ char *s, *d;
+
+ for (s = d = str;; s++, d++) {
+ if (*s == '\\') {
+ /* keep esc chars in split lowerdir */
+ *d++ = *s++;
+ } else if (*s == ':') {
+ bool next_colon = (*(s + 1) == ':');
+
+ nr_colons++;
+ if (nr_colons == 2 && next_colon) {
+ pr_err("only single ':' or double '::' sequences of unescaped colons in lowerdir mount option allowed.\n");
+ return -EINVAL;
+ }
+ /* count layers, not colons */
+ if (!next_colon)
+ nr_layers++;
+
+ *d = '\0';
+ continue;
+ }
+
+ *d = *s;
+ if (!*s) {
+ /* trailing colons */
+ if (nr_colons) {
+ pr_err("unescaped trailing colons in lowerdir mount option.\n");
+ return -EINVAL;
+ }
+ break;
+ }
+ nr_colons = 0;
+ }
+
+ return nr_layers;
+}
+
+static int ovl_mount_dir_noesc(const char *name, struct path *path)
+{
+ int err = -EINVAL;
+
+ if (!*name) {
+ pr_err("empty lowerdir\n");
+ goto out;
+ }
+ err = kern_path(name, LOOKUP_FOLLOW, path);
+ if (err) {
+ pr_err("failed to resolve '%s': %i\n", name, err);
+ goto out;
+ }
+ return 0;
+
+out:
+ return err;
+}
+
+static void ovl_unescape(char *s)
+{
+ char *d = s;
+
+ for (;; s++, d++) {
+ if (*s == '\\')
+ s++;
+ *d = *s;
+ if (!*s)
+ break;
+ }
+}
+
+static int ovl_mount_dir(const char *name, struct path *path)
+{
+ int err = -ENOMEM;
+ char *tmp = kstrdup(name, GFP_KERNEL);
+
+ if (tmp) {
+ ovl_unescape(tmp);
+ err = ovl_mount_dir_noesc(tmp, path);
+ kfree(tmp);
+ }
+ return err;
+}
+
+static int ovl_mount_dir_check(struct fs_context *fc, const struct path *path,
+ enum ovl_opt layer, const char *name, bool upper)
+{
+ bool is_casefolded = ovl_dentry_casefolded(path->dentry);
+ struct ovl_fs_context *ctx = fc->fs_private;
+ struct ovl_fs *ofs = fc->s_fs_info;
+
+ if (!d_is_dir(path->dentry))
+ return invalfc(fc, "%s is not a directory", name);
+
+ /*
+ * Allow filesystems that are case-folding capable but deny composing
+ * ovl stack from inconsistent case-folded directories.
+ */
+ if (!ctx->casefold_set) {
+ ofs->casefold = is_casefolded;
+ ctx->casefold_set = true;
+ }
+
+ if (ofs->casefold != is_casefolded) {
+ return invalfc(fc, "case-%ssensitive directory on %s is inconsistent",
+ is_casefolded ? "in" : "", name);
+ }
+
+ if (ovl_dentry_weird(path->dentry))
+ return invalfc(fc, "filesystem on %s not supported", name);
+
+ /*
+ * Check whether upper path is read-only here to report failures
+ * early. Don't forget to recheck when the superblock is created
+ * as the mount attributes could change.
+ */
+ if (upper) {
+ if (path->dentry->d_flags & DCACHE_OP_REAL)
+ return invalfc(fc, "filesystem on %s not supported as upperdir", name);
+ if (__mnt_is_readonly(path->mnt))
+ return invalfc(fc, "filesystem on %s is read-only", name);
+ } else {
+ if (ctx->lowerdir_all && layer != Opt_lowerdir)
+ return invalfc(fc, "lowerdir+ and datadir+ cannot follow lowerdir");
+ if (ctx->nr_data && layer == Opt_lowerdir_add)
+ return invalfc(fc, "regular lower layers cannot follow data layers");
+ if (ctx->nr == OVL_MAX_STACK)
+ return invalfc(fc, "too many lower directories, limit is %d",
+ OVL_MAX_STACK);
+ }
+ return 0;
+}
+
+static int ovl_ctx_realloc_lower(struct fs_context *fc)
+{
+ struct ovl_fs_context *ctx = fc->fs_private;
+ struct ovl_fs_context_layer *l;
+ size_t nr;
+
+ if (ctx->nr < ctx->capacity)
+ return 0;
+
+ nr = min_t(size_t, max(4096 / sizeof(*l), ctx->capacity * 2),
+ OVL_MAX_STACK);
+ l = krealloc_array(ctx->lower, nr, sizeof(*l), GFP_KERNEL_ACCOUNT);
+ if (!l)
+ return -ENOMEM;
+
+ ctx->lower = l;
+ ctx->capacity = nr;
+ return 0;
+}
+
+static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer,
+ struct path *path, char **pname)
+{
+ struct ovl_fs *ofs = fc->s_fs_info;
+ struct ovl_config *config = &ofs->config;
+ struct ovl_fs_context *ctx = fc->fs_private;
+ struct ovl_fs_context_layer *l;
+
+ switch (layer) {
+ case Opt_workdir:
+ swap(config->workdir, *pname);
+ swap(ctx->work, *path);
+ break;
+ case Opt_upperdir:
+ swap(config->upperdir, *pname);
+ swap(ctx->upper, *path);
+ break;
+ case Opt_datadir_add:
+ ctx->nr_data++;
+ fallthrough;
+ case Opt_lowerdir:
+ fallthrough;
+ case Opt_lowerdir_add:
+ WARN_ON(ctx->nr >= ctx->capacity);
+ l = &ctx->lower[ctx->nr++];
+ memset(l, 0, sizeof(*l));
+ swap(l->name, *pname);
+ swap(l->path, *path);
+ break;
+ default:
+ WARN_ON(1);
+ }
+}
+
+static inline bool is_upper_layer(enum ovl_opt layer)
+{
+ return layer == Opt_upperdir || layer == Opt_workdir;
+}
+
+/* Handle non-file descriptor-based layer options that require path lookup. */
+static inline int ovl_kern_path(const char *layer_name, struct path *layer_path,
+ enum ovl_opt layer)
+{
+ int err;
+
+ switch (layer) {
+ case Opt_upperdir:
+ fallthrough;
+ case Opt_workdir:
+ fallthrough;
+ case Opt_lowerdir:
+ err = ovl_mount_dir(layer_name, layer_path);
+ break;
+ case Opt_lowerdir_add:
+ fallthrough;
+ case Opt_datadir_add:
+ err = ovl_mount_dir_noesc(layer_name, layer_path);
+ break;
+ default:
+ WARN_ON_ONCE(true);
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int ovl_do_parse_layer(struct fs_context *fc, const char *layer_name,
+ struct path *layer_path, enum ovl_opt layer)
+{
+ char *name __free(kfree) = kstrdup(layer_name, GFP_KERNEL);
+ bool upper;
+ int err = 0;
+
+ if (!name)
+ return -ENOMEM;
+
+ upper = is_upper_layer(layer);
+ err = ovl_mount_dir_check(fc, layer_path, layer, name, upper);
+ if (err)
+ return err;
+
+ if (!upper) {
+ err = ovl_ctx_realloc_lower(fc);
+ if (err)
+ return err;
+ }
+
+ /* Store the user provided path string in ctx to show in mountinfo */
+ ovl_add_layer(fc, layer, layer_path, &name);
+ return err;
+}
+
+static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param,
+ enum ovl_opt layer)
+{
+ struct path layer_path __free(path_put) = {};
+ int err = 0;
+
+ switch (param->type) {
+ case fs_value_is_string:
+ err = ovl_kern_path(param->string, &layer_path, layer);
+ if (err)
+ return err;
+ err = ovl_do_parse_layer(fc, param->string, &layer_path, layer);
+ break;
+ case fs_value_is_file: {
+ char *buf __free(kfree);
+ char *layer_name;
+
+ buf = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
+ if (!buf)
+ return -ENOMEM;
+
+ layer_path = param->file->f_path;
+ path_get(&layer_path);
+
+ layer_name = d_path(&layer_path, buf, PATH_MAX);
+ if (IS_ERR(layer_name))
+ return PTR_ERR(layer_name);
+
+ err = ovl_do_parse_layer(fc, layer_name, &layer_path, layer);
+ break;
+ }
+ default:
+ WARN_ON_ONCE(true);
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static void ovl_reset_lowerdirs(struct ovl_fs_context *ctx)
+{
+ struct ovl_fs_context_layer *l = ctx->lower;
+
+ // Reset old user provided lowerdir string
+ kfree(ctx->lowerdir_all);
+ ctx->lowerdir_all = NULL;
+
+ for (size_t nr = 0; nr < ctx->nr; nr++, l++) {
+ path_put(&l->path);
+ kfree(l->name);
+ l->name = NULL;
+ }
+ ctx->nr = 0;
+ ctx->nr_data = 0;
+}
+
+/*
+ * Parse lowerdir= mount option:
+ *
+ * e.g.: lowerdir=/lower1:/lower2:/lower3::/data1::/data2
+ * Set "/lower1", "/lower2", and "/lower3" as lower layers and
+ * "/data1" and "/data2" as data lower layers. Any existing lower
+ * layers are replaced.
+ */
+static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
+{
+ int err;
+ struct ovl_fs_context *ctx = fc->fs_private;
+ char *dup = NULL, *iter;
+ ssize_t nr_lower, nr;
+ bool data_layer = false;
+
+ /*
+ * Ensure we're backwards compatible with mount(2)
+ * by allowing relative paths.
+ */
+
+ /* drop all existing lower layers */
+ ovl_reset_lowerdirs(ctx);
+
+ if (!*name)
+ return 0;
+
+ if (*name == ':') {
+ pr_err("cannot append lower layer\n");
+ return -EINVAL;
+ }
+
+ // Store user provided lowerdir string to show in mount options
+ ctx->lowerdir_all = kstrdup(name, GFP_KERNEL);
+ if (!ctx->lowerdir_all)
+ return -ENOMEM;
+
+ dup = kstrdup(name, GFP_KERNEL);
+ if (!dup)
+ return -ENOMEM;
+
+ err = -EINVAL;
+ nr_lower = ovl_parse_param_split_lowerdirs(dup);
+ if (nr_lower < 0)
+ goto out_err;
+
+ if (nr_lower > OVL_MAX_STACK) {
+ pr_err("too many lower directories, limit is %d\n", OVL_MAX_STACK);
+ goto out_err;
+ }
+
+ iter = dup;
+ for (nr = 0; nr < nr_lower; nr++) {
+ struct path path __free(path_put) = {};
+
+ err = ovl_kern_path(iter, &path, Opt_lowerdir);
+ if (err)
+ goto out_err;
+
+ err = ovl_do_parse_layer(fc, iter, &path, Opt_lowerdir);
+ if (err)
+ goto out_err;
+
+ if (data_layer)
+ ctx->nr_data++;
+
+ /* Calling strchr() again would overrun. */
+ if (ctx->nr == nr_lower)
+ break;
+
+ err = -EINVAL;
+ iter = strchr(iter, '\0') + 1;
+ if (*iter) {
+ /*
+ * This is a regular layer so we require that
+ * there are no data layers.
+ */
+ if (ctx->nr_data > 0) {
+ pr_err("regular lower layers cannot follow data lower layers\n");
+ goto out_err;
+ }
+
+ data_layer = false;
+ continue;
+ }
+
+ /* This is a data lower layer. */
+ data_layer = true;
+ iter++;
+ }
+ kfree(dup);
+ return 0;
+
+out_err:
+ kfree(dup);
+
+ /* Intentionally don't realloc to a smaller size. */
+ return err;
+}
+
+static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ int err = 0;
+ struct fs_parse_result result;
+ struct ovl_fs *ofs = fc->s_fs_info;
+ struct ovl_config *config = &ofs->config;
+ struct ovl_fs_context *ctx = fc->fs_private;
+ int opt;
+
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ /*
+ * On remount overlayfs has always ignored all mount
+ * options no matter if malformed or not so for
+ * backwards compatibility we do the same here.
+ */
+ if (fc->oldapi)
+ return 0;
+
+ /*
+ * Give us the freedom to allow changing mount options
+ * with the new mount api in the future. So instead of
+ * silently ignoring everything we report a proper
+ * error. This is only visible for users of the new
+ * mount api.
+ */
+ return invalfc(fc, "No changes allowed in reconfigure");
+ }
+
+ opt = fs_parse(fc, ovl_parameter_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_lowerdir:
+ err = ovl_parse_param_lowerdir(param->string, fc);
+ break;
+ case Opt_lowerdir_add:
+ case Opt_datadir_add:
+ case Opt_upperdir:
+ case Opt_workdir:
+ err = ovl_parse_layer(fc, param, opt);
+ break;
+ case Opt_default_permissions:
+ config->default_permissions = true;
+ break;
+ case Opt_redirect_dir:
+ config->redirect_mode = result.uint_32;
+ if (config->redirect_mode == OVL_REDIRECT_OFF) {
+ config->redirect_mode = ovl_redirect_always_follow ?
+ OVL_REDIRECT_FOLLOW :
+ OVL_REDIRECT_NOFOLLOW;
+ }
+ ctx->set.redirect = true;
+ break;
+ case Opt_index:
+ config->index = result.uint_32;
+ ctx->set.index = true;
+ break;
+ case Opt_uuid:
+ config->uuid = result.uint_32;
+ break;
+ case Opt_nfs_export:
+ config->nfs_export = result.uint_32;
+ ctx->set.nfs_export = true;
+ break;
+ case Opt_xino:
+ config->xino = result.uint_32;
+ break;
+ case Opt_metacopy:
+ config->metacopy = result.uint_32;
+ ctx->set.metacopy = true;
+ break;
+ case Opt_verity:
+ config->verity_mode = result.uint_32;
+ break;
+ case Opt_volatile:
+ config->ovl_volatile = true;
+ break;
+ case Opt_userxattr:
+ config->userxattr = true;
+ break;
+ case Opt_override_creds: {
+ const struct cred *cred = NULL;
+
+ if (result.negated) {
+ swap(cred, ofs->creator_cred);
+ put_cred(cred);
+ break;
+ }
+
+ if (!current_in_userns(fc->user_ns)) {
+ err = -EINVAL;
+ break;
+ }
+
+ cred = prepare_creds();
+ if (cred)
+ swap(cred, ofs->creator_cred);
+ else
+ err = -ENOMEM;
+
+ put_cred(cred);
+ break;
+ }
+ default:
+ pr_err("unrecognized mount option \"%s\" or missing value\n",
+ param->key);
+ return -EINVAL;
+ }
+
+ return err;
+}
+
+static int ovl_get_tree(struct fs_context *fc)
+{
+ return get_tree_nodev(fc, ovl_fill_super);
+}
+
+static inline void ovl_fs_context_free(struct ovl_fs_context *ctx)
+{
+ ovl_reset_lowerdirs(ctx);
+ path_put(&ctx->upper);
+ path_put(&ctx->work);
+ kfree(ctx->lower);
+ kfree(ctx);
+}
+
+static void ovl_free(struct fs_context *fc)
+{
+ struct ovl_fs *ofs = fc->s_fs_info;
+ struct ovl_fs_context *ctx = fc->fs_private;
+
+ /*
+ * ofs is stored in the fs_context when it is initialized.
+ * ofs is transferred to the superblock on a successful mount,
+ * but if an error occurs before the transfer we have to free
+ * it here.
+ */
+ if (ofs)
+ ovl_free_fs(ofs);
+
+ if (ctx)
+ ovl_fs_context_free(ctx);
+}
+
+static int ovl_reconfigure(struct fs_context *fc)
+{
+ struct super_block *sb = fc->root->d_sb;
+ struct ovl_fs *ofs = OVL_FS(sb);
+ struct super_block *upper_sb;
+ int ret = 0;
+
+ if (!(fc->sb_flags & SB_RDONLY) && ovl_force_readonly(ofs))
+ return -EROFS;
+
+ if (fc->sb_flags & SB_RDONLY && !sb_rdonly(sb)) {
+ upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
+ if (ovl_should_sync(ofs)) {
+ down_read(&upper_sb->s_umount);
+ ret = sync_filesystem(upper_sb);
+ up_read(&upper_sb->s_umount);
+ }
+ }
+
+ return ret;
+}
+
+static const struct fs_context_operations ovl_context_ops = {
+ .parse_monolithic = ovl_parse_monolithic,
+ .parse_param = ovl_parse_param,
+ .get_tree = ovl_get_tree,
+ .reconfigure = ovl_reconfigure,
+ .free = ovl_free,
+};
+
+/*
+ * This is called during fsopen() and will record the user namespace of
+ * the caller in fc->user_ns since we've raised FS_USERNS_MOUNT. We'll
+ * need it when we actually create the superblock to verify that the
+ * process creating the superblock is in the same user namespace as
+ * process that called fsopen().
+ */
+int ovl_init_fs_context(struct fs_context *fc)
+{
+ struct ovl_fs_context *ctx;
+ struct ovl_fs *ofs;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
+ if (!ctx)
+ return -ENOMEM;
+
+ /*
+ * By default we allocate for three lower layers. It's likely
+ * that it'll cover most users.
+ */
+ ctx->lower = kmalloc_array(3, sizeof(*ctx->lower), GFP_KERNEL_ACCOUNT);
+ if (!ctx->lower)
+ goto out_err;
+ ctx->capacity = 3;
+
+ ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
+ if (!ofs)
+ goto out_err;
+
+ ofs->config.redirect_mode = ovl_redirect_mode_def();
+ ofs->config.index = ovl_index_def;
+ ofs->config.uuid = ovl_uuid_def();
+ ofs->config.nfs_export = ovl_nfs_export_def;
+ ofs->config.xino = ovl_xino_def();
+ ofs->config.metacopy = ovl_metacopy_def;
+
+ fc->s_fs_info = ofs;
+ fc->fs_private = ctx;
+ fc->ops = &ovl_context_ops;
+
+ mutex_init(&ofs->whiteout_lock);
+ return 0;
+
+out_err:
+ ovl_fs_context_free(ctx);
+ return -ENOMEM;
+
+}
+
+void ovl_free_fs(struct ovl_fs *ofs)
+{
+ struct vfsmount **mounts;
+ unsigned i;
+
+ iput(ofs->workbasedir_trap);
+ iput(ofs->workdir_trap);
+ dput(ofs->whiteout);
+ dput(ofs->workdir);
+ if (ofs->workdir_locked)
+ ovl_inuse_unlock(ofs->workbasedir);
+ dput(ofs->workbasedir);
+ if (ofs->upperdir_locked)
+ ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
+
+ /* Reuse ofs->config.lowerdirs as a vfsmount array before freeing it */
+ mounts = (struct vfsmount **) ofs->config.lowerdirs;
+ for (i = 0; i < ofs->numlayer; i++) {
+ iput(ofs->layers[i].trap);
+ kfree(ofs->config.lowerdirs[i]);
+ mounts[i] = ofs->layers[i].mnt;
+ }
+ kern_unmount_array(mounts, ofs->numlayer);
+ kfree(ofs->layers);
+ for (i = 0; i < ofs->numfs; i++)
+ free_anon_bdev(ofs->fs[i].pseudo_dev);
+ kfree(ofs->fs);
+
+ kfree(ofs->config.lowerdirs);
+ kfree(ofs->config.upperdir);
+ kfree(ofs->config.workdir);
+ if (ofs->creator_cred)
+ put_cred(ofs->creator_cred);
+ kfree(ofs);
+}
+
+int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
+ struct ovl_config *config)
+{
+ struct ovl_opt_set set = ctx->set;
+
+ /* Workdir/index are useless in non-upper mount */
+ if (!config->upperdir) {
+ if (config->workdir) {
+ pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
+ config->workdir);
+ kfree(config->workdir);
+ config->workdir = NULL;
+ }
+ if (config->index && set.index) {
+ pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n");
+ set.index = false;
+ }
+ config->index = false;
+ }
+
+ if (!config->upperdir && config->ovl_volatile) {
+ pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
+ config->ovl_volatile = false;
+ }
+
+ if (!config->upperdir && config->uuid == OVL_UUID_ON) {
+ pr_info("option \"uuid=on\" requires an upper fs, falling back to uuid=null.\n");
+ config->uuid = OVL_UUID_NULL;
+ }
+
+ /*
+ * This is to make the logic below simpler. It doesn't make any other
+ * difference, since redirect_dir=on is only used for upper.
+ */
+ if (!config->upperdir && config->redirect_mode == OVL_REDIRECT_FOLLOW)
+ config->redirect_mode = OVL_REDIRECT_ON;
+
+ /* metacopy -> redirect_dir dependency */
+ if (config->metacopy && config->redirect_mode != OVL_REDIRECT_ON) {
+ if (set.metacopy && set.redirect) {
+ pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
+ ovl_redirect_mode(config));
+ return -EINVAL;
+ }
+ if (set.redirect) {
+ /*
+ * There was an explicit redirect_dir=... that resulted
+ * in this conflict.
+ */
+ pr_info("disabling metacopy due to redirect_dir=%s\n",
+ ovl_redirect_mode(config));
+ config->metacopy = false;
+ } else {
+ /* Automatically enable redirect otherwise. */
+ config->redirect_mode = OVL_REDIRECT_ON;
+ }
+ }
+
+ /* Resolve nfs_export -> index dependency */
+ if (config->nfs_export && !config->index) {
+ if (!config->upperdir &&
+ config->redirect_mode != OVL_REDIRECT_NOFOLLOW) {
+ pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
+ config->nfs_export = false;
+ } else if (set.nfs_export && set.index) {
+ pr_err("conflicting options: nfs_export=on,index=off\n");
+ return -EINVAL;
+ } else if (set.index) {
+ /*
+ * There was an explicit index=off that resulted
+ * in this conflict.
+ */
+ pr_info("disabling nfs_export due to index=off\n");
+ config->nfs_export = false;
+ } else {
+ /* Automatically enable index otherwise. */
+ config->index = true;
+ }
+ }
+
+ /* Resolve nfs_export -> !metacopy && !verity dependency */
+ if (config->nfs_export && config->metacopy) {
+ if (set.nfs_export && set.metacopy) {
+ pr_err("conflicting options: nfs_export=on,metacopy=on\n");
+ return -EINVAL;
+ }
+ if (set.metacopy) {
+ /*
+ * There was an explicit metacopy=on that resulted
+ * in this conflict.
+ */
+ pr_info("disabling nfs_export due to metacopy=on\n");
+ config->nfs_export = false;
+ } else if (config->verity_mode) {
+ /*
+ * There was an explicit verity=.. that resulted
+ * in this conflict.
+ */
+ pr_info("disabling nfs_export due to verity=%s\n",
+ ovl_verity_mode(config));
+ config->nfs_export = false;
+ } else {
+ /*
+ * There was an explicit nfs_export=on that resulted
+ * in this conflict.
+ */
+ pr_info("disabling metacopy due to nfs_export=on\n");
+ config->metacopy = false;
+ }
+ }
+
+
+ /* Resolve userxattr -> !redirect && !metacopy dependency */
+ if (config->userxattr) {
+ if (set.redirect &&
+ config->redirect_mode != OVL_REDIRECT_NOFOLLOW) {
+ pr_err("conflicting options: userxattr,redirect_dir=%s\n",
+ ovl_redirect_mode(config));
+ return -EINVAL;
+ }
+ if (config->metacopy && set.metacopy) {
+ pr_err("conflicting options: userxattr,metacopy=on\n");
+ return -EINVAL;
+ }
+ /*
+ * Silently disable default setting of redirect and metacopy.
+ * This shall be the default in the future as well: these
+ * options must be explicitly enabled if used together with
+ * userxattr.
+ */
+ config->redirect_mode = OVL_REDIRECT_NOFOLLOW;
+ config->metacopy = false;
+ }
+
+ /*
+ * Fail if we don't have trusted xattr capability and a feature was
+ * explicitly requested that requires them.
+ */
+ if (!config->userxattr && !capable(CAP_SYS_ADMIN)) {
+ if (set.redirect &&
+ config->redirect_mode != OVL_REDIRECT_NOFOLLOW) {
+ pr_err("redirect_dir requires permission to access trusted xattrs\n");
+ return -EPERM;
+ }
+ if (config->metacopy && set.metacopy) {
+ pr_err("metacopy requires permission to access trusted xattrs\n");
+ return -EPERM;
+ }
+ if (config->verity_mode) {
+ pr_err("verity requires permission to access trusted xattrs\n");
+ return -EPERM;
+ }
+ if (ctx->nr_data > 0) {
+ pr_err("lower data-only dirs require permission to access trusted xattrs\n");
+ return -EPERM;
+ }
+ /*
+ * Other xattr-dependent features should be disabled without
+ * great disturbance to the user in ovl_make_workdir().
+ */
+ }
+
+ return 0;
+}
+
+/**
+ * ovl_show_options
+ * @m: the seq_file handle
+ * @dentry: The dentry to query
+ *
+ * Prints the mount options for a given superblock.
+ * Returns zero; does not fail.
+ */
+int ovl_show_options(struct seq_file *m, struct dentry *dentry)
+{
+ struct super_block *sb = dentry->d_sb;
+ struct ovl_fs *ofs = OVL_FS(sb);
+ size_t nr, nr_merged_lower, nr_lower = 0;
+ char **lowerdirs = ofs->config.lowerdirs;
+
+ /*
+ * lowerdirs[0] holds the colon separated list that user provided
+ * with lowerdir mount option.
+ * lowerdirs[1..numlayer] hold the lowerdir paths that were added
+ * using the lowerdir+ and datadir+ mount options.
+ * For now, we do not allow mixing the legacy lowerdir mount option
+ * with the new lowerdir+ and datadir+ mount options.
+ */
+ if (lowerdirs[0]) {
+ seq_show_option(m, "lowerdir", lowerdirs[0]);
+ } else {
+ nr_lower = ofs->numlayer;
+ nr_merged_lower = nr_lower - ofs->numdatalayer;
+ }
+ for (nr = 1; nr < nr_lower; nr++) {
+ if (nr < nr_merged_lower)
+ seq_show_option(m, "lowerdir+", lowerdirs[nr]);
+ else
+ seq_show_option(m, "datadir+", lowerdirs[nr]);
+ }
+ if (ofs->config.upperdir) {
+ seq_show_option(m, "upperdir", ofs->config.upperdir);
+ seq_show_option(m, "workdir", ofs->config.workdir);
+ }
+ if (ofs->config.default_permissions)
+ seq_puts(m, ",default_permissions");
+ if (ofs->config.redirect_mode != ovl_redirect_mode_def())
+ seq_printf(m, ",redirect_dir=%s",
+ ovl_redirect_mode(&ofs->config));
+ if (ofs->config.index != ovl_index_def)
+ seq_printf(m, ",index=%s", str_on_off(ofs->config.index));
+ if (ofs->config.uuid != ovl_uuid_def())
+ seq_printf(m, ",uuid=%s", ovl_uuid_mode(&ofs->config));
+ if (ofs->config.nfs_export != ovl_nfs_export_def)
+ seq_printf(m, ",nfs_export=%s",
+ str_on_off(ofs->config.nfs_export));
+ if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(ofs))
+ seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config));
+ if (ofs->config.metacopy != ovl_metacopy_def)
+ seq_printf(m, ",metacopy=%s", str_on_off(ofs->config.metacopy));
+ if (ofs->config.ovl_volatile)
+ seq_puts(m, ",volatile");
+ if (ofs->config.userxattr)
+ seq_puts(m, ",userxattr");
+ if (ofs->config.verity_mode != ovl_verity_mode_def())
+ seq_printf(m, ",verity=%s",
+ ovl_verity_mode(&ofs->config));
+ return 0;
+}
diff --git a/fs/overlayfs/params.h b/fs/overlayfs/params.h
new file mode 100644
index 000000000000..ffd53cdd8482
--- /dev/null
+++ b/fs/overlayfs/params.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+
+struct ovl_fs;
+struct ovl_config;
+
+extern const struct fs_parameter_spec ovl_parameter_spec[];
+extern const struct constant_table ovl_parameter_redirect_dir[];
+
+/* The set of options that user requested explicitly via mount options */
+struct ovl_opt_set {
+ bool metacopy;
+ bool redirect;
+ bool nfs_export;
+ bool index;
+};
+
+#define OVL_MAX_STACK 500
+
+struct ovl_fs_context_layer {
+ char *name;
+ struct path path;
+};
+
+struct ovl_fs_context {
+ struct path upper;
+ struct path work;
+ size_t capacity;
+ size_t nr; /* includes nr_data */
+ size_t nr_data;
+ struct ovl_opt_set set;
+ struct ovl_fs_context_layer *lower;
+ char *lowerdir_all; /* user provided lowerdir string */
+ bool casefold_set;
+};
+
+int ovl_init_fs_context(struct fs_context *fc);
+void ovl_free_fs(struct ovl_fs *ofs);
+int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
+ struct ovl_config *config);
+int ovl_show_options(struct seq_file *m, struct dentry *dentry);
+const char *ovl_xino_mode(struct ovl_config *config);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index cc8303a806b4..160960bb0ad0 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
*
* Copyright (C) 2011 Novell Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
*/
#include <linux/fs.h>
@@ -16,6 +13,7 @@
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/ratelimit.h>
+#include <linux/overflow.h>
#include "overlayfs.h"
struct ovl_cache_entry {
@@ -28,6 +26,9 @@ struct ovl_cache_entry {
struct ovl_cache_entry *next_maybe_whiteout;
bool is_upper;
bool is_whiteout;
+ bool check_xwhiteout;
+ const char *c_name;
+ int c_len;
char name[];
};
@@ -46,10 +47,12 @@ struct ovl_readdir_data {
struct list_head *list;
struct list_head middle;
struct ovl_cache_entry *first_maybe_whiteout;
+ struct unicode_map *map;
int count;
int err;
bool is_upper;
bool d_type_supported;
+ bool in_xwhiteouts_dir;
};
struct ovl_dir_file {
@@ -66,6 +69,31 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
return rb_entry(n, struct ovl_cache_entry, node);
}
+static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len,
+ char **dst)
+{
+ const struct qstr qstr = { .name = str, .len = len };
+ char *cf_name;
+ int cf_len;
+
+ if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || is_dot_dotdot(str, len))
+ return 0;
+
+ cf_name = kmalloc(NAME_MAX, GFP_KERNEL);
+ if (!cf_name) {
+ rdd->err = -ENOMEM;
+ return -ENOMEM;
+ }
+
+ cf_len = utf8_casefold(rdd->map, &qstr, cf_name, NAME_MAX);
+ if (cf_len > 0)
+ *dst = cf_name;
+ else
+ kfree(cf_name);
+
+ return cf_len;
+}
+
static bool ovl_cache_entry_find_link(const char *name, int len,
struct rb_node ***link,
struct rb_node **parent)
@@ -79,10 +107,10 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
*parent = *newp;
tmp = ovl_cache_entry_from_node(*newp);
- cmp = strncmp(name, tmp->name, len);
+ cmp = strncmp(name, tmp->c_name, len);
if (cmp > 0)
newp = &tmp->node.rb_right;
- else if (cmp < 0 || len < tmp->len)
+ else if (cmp < 0 || len < tmp->c_len)
newp = &tmp->node.rb_left;
else
found = true;
@@ -101,10 +129,10 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
while (node) {
struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
- cmp = strncmp(name, p->name, len);
+ cmp = strncmp(name, p->c_name, len);
if (cmp > 0)
node = p->node.rb_right;
- else if (cmp < 0 || len < p->len)
+ else if (cmp < 0 || len < p->c_len)
node = p->node.rb_left;
else
return p;
@@ -121,7 +149,7 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
return false;
/* Always recalc d_ino when remapping lower inode numbers */
- if (ovl_xino_bits(rdd->dentry->d_sb))
+ if (ovl_xino_bits(OVL_FS(rdd->dentry->d_sb)))
return true;
/* Always recalc d_ino for parent */
@@ -145,12 +173,12 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
const char *name, int len,
+ const char *c_name, int c_len,
u64 ino, unsigned int d_type)
{
struct ovl_cache_entry *p;
- size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
- p = kmalloc(size, GFP_KERNEL);
+ p = kmalloc(struct_size(p, name, len + 1), GFP_KERNEL);
if (!p)
return NULL;
@@ -165,6 +193,16 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
p->ino = 0;
p->is_upper = rdd->is_upper;
p->is_whiteout = false;
+ /* Defer check for overlay.whiteout to ovl_iterate() */
+ p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
+
+ if (c_name && c_name != name) {
+ p->c_name = c_name;
+ p->c_len = c_len;
+ } else {
+ p->c_name = p->name;
+ p->c_len = len;
+ }
if (d_type == DT_CHR) {
p->next_maybe_whiteout = rdd->first_maybe_whiteout;
@@ -173,18 +211,21 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
return p;
}
+/* Return 0 for found, 1 for added, <0 for error */
static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
- const char *name, int len, u64 ino,
+ const char *name, int len,
+ const char *c_name, int c_len,
+ u64 ino,
unsigned int d_type)
{
struct rb_node **newp = &rdd->root->rb_node;
struct rb_node *parent = NULL;
struct ovl_cache_entry *p;
- if (ovl_cache_entry_find_link(name, len, &newp, &parent))
+ if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent))
return 0;
- p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
+ p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type);
if (p == NULL) {
rdd->err = -ENOMEM;
return -ENOMEM;
@@ -194,27 +235,38 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
rb_link_node(&p->node, parent, newp);
rb_insert_color(&p->node, rdd->root);
- return 0;
+ return 1;
}
+/* Return 0 for found, 1 for added, <0 for error */
static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
const char *name, int namelen,
+ const char *c_name, int c_len,
loff_t offset, u64 ino, unsigned int d_type)
{
struct ovl_cache_entry *p;
- p = ovl_cache_entry_find(rdd->root, name, namelen);
+ p = ovl_cache_entry_find(rdd->root, c_name, c_len);
if (p) {
list_move_tail(&p->l_node, &rdd->middle);
+ return 0;
} else {
- p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+ p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len,
+ ino, d_type);
if (p == NULL)
rdd->err = -ENOMEM;
else
list_add_tail(&p->l_node, &rdd->middle);
}
- return rdd->err;
+ return rdd->err ?: 1;
+}
+
+static void ovl_cache_entry_free(struct ovl_cache_entry *p)
+{
+ if (p->c_name != p->name)
+ kfree(p->c_name);
+ kfree(p);
}
void ovl_cache_free(struct list_head *list)
@@ -223,7 +275,7 @@ void ovl_cache_free(struct list_head *list)
struct ovl_cache_entry *n;
list_for_each_entry_safe(p, n, list, l_node)
- kfree(p);
+ ovl_cache_entry_free(p);
INIT_LIST_HEAD(list);
}
@@ -238,69 +290,91 @@ void ovl_dir_cache_free(struct inode *inode)
}
}
-static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
+static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode)
{
struct ovl_dir_cache *cache = od->cache;
WARN_ON(cache->refcount <= 0);
cache->refcount--;
if (!cache->refcount) {
- if (ovl_dir_cache(d_inode(dentry)) == cache)
- ovl_set_dir_cache(d_inode(dentry), NULL);
+ if (ovl_dir_cache(inode) == cache)
+ ovl_set_dir_cache(inode, NULL);
ovl_cache_free(&cache->entries);
kfree(cache);
}
}
-static int ovl_fill_merge(struct dir_context *ctx, const char *name,
+static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
struct ovl_readdir_data *rdd =
container_of(ctx, struct ovl_readdir_data, ctx);
+ struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
+ const char *c_name = NULL;
+ char *cf_name = NULL;
+ int c_len = 0, ret;
+
+ if (ofs->casefold)
+ c_len = ovl_casefold(rdd, name, namelen, &cf_name);
+
+ if (rdd->err)
+ return false;
+
+ if (c_len <= 0) {
+ c_name = name;
+ c_len = namelen;
+ } else {
+ c_name = cf_name;
+ }
rdd->count++;
if (!rdd->is_lowest)
- return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
+ ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type);
else
- return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
+ ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type);
+
+ /*
+ * If ret == 1, that means that c_name is being used as part of struct
+ * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise,
+ * c_name was found in the rb-tree so we can free it here.
+ */
+ if (ret != 1 && c_name != name)
+ kfree(c_name);
+
+ return ret >= 0;
}
-static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
+static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
{
- int err;
- struct ovl_cache_entry *p;
- struct dentry *dentry;
- const struct cred *old_cred;
-
- old_cred = ovl_override_creds(rdd->dentry->d_sb);
-
- err = down_write_killable(&dir->d_inode->i_rwsem);
- if (!err) {
- while (rdd->first_maybe_whiteout) {
- p = rdd->first_maybe_whiteout;
- rdd->first_maybe_whiteout = p->next_maybe_whiteout;
- dentry = lookup_one_len(p->name, dir, p->len);
- if (!IS_ERR(dentry)) {
- p->is_whiteout = ovl_is_whiteout(dentry);
- dput(dentry);
- }
+ struct dentry *dentry, *dir = path->dentry;
+
+ while (rdd->first_maybe_whiteout) {
+ struct ovl_cache_entry *p =
+ rdd->first_maybe_whiteout;
+ rdd->first_maybe_whiteout = p->next_maybe_whiteout;
+ dentry = lookup_one_positive_killable(mnt_idmap(path->mnt),
+ &QSTR_LEN(p->name, p->len),
+ dir);
+ if (!IS_ERR(dentry)) {
+ p->is_whiteout = ovl_is_whiteout(dentry);
+ dput(dentry);
+ } else if (PTR_ERR(dentry) == -EINTR) {
+ return -EINTR;
}
- inode_unlock(dir->d_inode);
}
- revert_creds(old_cred);
- return err;
+ return 0;
}
-static inline int ovl_dir_read(struct path *realpath,
+static inline int ovl_dir_read(const struct path *realpath,
struct ovl_readdir_data *rdd)
{
struct file *realfile;
int err;
- realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
+ realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE);
if (IS_ERR(realfile))
return PTR_ERR(realfile);
@@ -315,38 +389,26 @@ static inline int ovl_dir_read(struct path *realpath,
} while (!err && rdd->count);
if (!err && rdd->first_maybe_whiteout && rdd->dentry)
- err = ovl_check_whiteouts(realpath->dentry, rdd);
+ err = ovl_check_whiteouts(realpath, rdd);
fput(realfile);
return err;
}
-/*
- * Can we iterate real dir directly?
- *
- * Non-merge dir may contain whiteouts from a time it was a merge upper, before
- * lower dir was removed under it and possibly before it was rotated from upper
- * to lower layer.
- */
-static bool ovl_dir_is_real(struct dentry *dir)
-{
- return !ovl_test_flag(OVL_WHITEOUTS, d_inode(dir));
-}
-
static void ovl_dir_reset(struct file *file)
{
struct ovl_dir_file *od = file->private_data;
struct ovl_dir_cache *cache = od->cache;
- struct dentry *dentry = file->f_path.dentry;
+ struct inode *inode = file_inode(file);
bool is_real;
- if (cache && ovl_dentry_version_get(dentry) != cache->version) {
- ovl_cache_put(od, dentry);
+ if (cache && ovl_inode_version_get(inode) != cache->version) {
+ ovl_cache_put(od, inode);
od->cache = NULL;
od->cursor = NULL;
}
- is_real = ovl_dir_is_real(dentry);
+ is_real = ovl_dir_is_real(inode);
if (od->is_real != is_real) {
/* is_real can only become false when dir is copied up */
if (WARN_ON(is_real))
@@ -362,16 +424,26 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
struct path realpath;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_merge,
+ .ctx.count = INT_MAX,
.dentry = dentry,
.list = list,
.root = root,
.is_lowest = false,
+ .map = NULL,
};
int idx, next;
+ const struct ovl_layer *layer;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
for (idx = 0; idx != -1; idx = next) {
- next = ovl_path_next(idx, dentry, &realpath);
+ next = ovl_path_next(idx, dentry, &realpath, &layer);
+
+ if (ofs->casefold)
+ rdd.map = sb_encoding(realpath.dentry->d_sb);
+
rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
+ rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
+ ovl_dentry_has_xwhiteouts(dentry);
if (next != -1) {
err = ovl_dir_read(&realpath, &rdd);
@@ -409,9 +481,10 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
{
int res;
struct ovl_dir_cache *cache;
+ struct inode *inode = d_inode(dentry);
- cache = ovl_dir_cache(d_inode(dentry));
- if (cache && ovl_dentry_version_get(dentry) == cache->version) {
+ cache = ovl_dir_cache(inode);
+ if (cache && ovl_inode_version_get(inode) == cache->version) {
WARN_ON(!cache->refcount);
cache->refcount++;
return cache;
@@ -433,27 +506,35 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
return ERR_PTR(res);
}
- cache->version = ovl_dentry_version_get(dentry);
- ovl_set_dir_cache(d_inode(dentry), cache);
+ cache->version = ovl_inode_version_get(inode);
+ ovl_set_dir_cache(inode, cache);
return cache;
}
/* Map inode number to lower fs unique range */
static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
- const char *name, int namelen)
+ const char *name, int namelen, bool warn)
{
- if (ino >> (64 - xinobits)) {
- pr_warn_ratelimited("overlayfs: d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
- namelen, name, ino, xinobits);
+ unsigned int xinoshift = 64 - xinobits;
+
+ if (unlikely(ino >> xinoshift)) {
+ if (warn) {
+ pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
+ namelen, name, ino, xinobits);
+ }
return ino;
}
- return ino | ((u64)fsid) << (64 - xinobits);
+ /*
+ * The lowest xinobit is reserved for mapping the non-peresistent inode
+ * numbers range, but this range is only exposed via st_ino, not here.
+ */
+ return ino | ((u64)fsid) << (xinoshift + 1);
}
/*
- * Set d_ino for upper entries. Non-upper entries should always report
+ * Set d_ino for upper entries if needed. Non-upper entries should always report
* the uppermost real inode ino and should not call this function.
*
* When not all layer are on same fs, report real ino also for upper.
@@ -461,18 +542,22 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
* When all layers are on the same fs, and upper has a reference to
* copy up origin, call vfs_getattr() on the overlay entry to make
* sure that d_ino will be consistent with st_ino from stat(2).
+ *
+ * Also checks the overlay.whiteout xattr by doing a full lookup which will return
+ * negative in this case.
*/
-static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
+static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino)
{
struct dentry *dir = path->dentry;
+ struct ovl_fs *ofs = OVL_FS(dir->d_sb);
struct dentry *this = NULL;
enum ovl_path_type type;
u64 ino = p->real_ino;
- int xinobits = ovl_xino_bits(dir->d_sb);
+ int xinobits = ovl_xino_bits(ofs);
int err = 0;
- if (!ovl_same_sb(dir->d_sb) && !xinobits)
+ if (!ovl_same_dev(ofs) && !p->check_xwhiteout)
goto out;
if (p->name[0] == '.') {
@@ -486,8 +571,11 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
goto get;
}
}
- this = lookup_one_len(p->name, dir, p->len);
+ /* This checks also for xwhiteouts */
+ this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir);
if (IS_ERR_OR_NULL(this) || !this->d_inode) {
+ /* Mark a stale entry */
+ p->is_whiteout = true;
if (IS_ERR(this)) {
err = PTR_ERR(this);
this = NULL;
@@ -497,6 +585,9 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
}
get:
+ if (!ovl_same_dev(ofs) || !update_ino)
+ goto out;
+
type = ovl_path_type(this);
if (OVL_TYPE_ORIGIN(type)) {
struct kstat stat;
@@ -507,12 +598,19 @@ get:
if (err)
goto fail;
- WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
+ /*
+ * Directory inode is always on overlay st_dev.
+ * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
+ * of xino bits overflow.
+ */
+ WARN_ON_ONCE(S_ISDIR(stat.mode) &&
+ dir->d_sb->s_dev != stat.dev);
ino = stat.ino;
} else if (xinobits && !OVL_TYPE_UPPER(type)) {
ino = ovl_remap_lower_ino(ino, xinobits,
ovl_layer_lower(this)->fsid,
- p->name, p->len);
+ p->name, p->len,
+ ovl_xino_warn(ofs));
}
out:
@@ -521,12 +619,12 @@ out:
return err;
fail:
- pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n",
+ pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n",
p->name, err);
goto out;
}
-static int ovl_fill_plain(struct dir_context *ctx, const char *name,
+static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -535,17 +633,17 @@ static int ovl_fill_plain(struct dir_context *ctx, const char *name,
container_of(ctx, struct ovl_readdir_data, ctx);
rdd->count++;
- p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+ p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
if (p == NULL) {
rdd->err = -ENOMEM;
- return -ENOMEM;
+ return false;
}
list_add_tail(&p->l_node, rdd->list);
- return 0;
+ return true;
}
-static int ovl_dir_read_impure(struct path *path, struct list_head *list,
+static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
struct rb_root *root)
{
int err;
@@ -553,6 +651,7 @@ static int ovl_dir_read_impure(struct path *path, struct list_head *list,
struct ovl_cache_entry *p, *n;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_plain,
+ .ctx.count = INT_MAX,
.list = list,
.root = root,
};
@@ -568,13 +667,13 @@ static int ovl_dir_read_impure(struct path *path, struct list_head *list,
list_for_each_entry_safe(p, n, list, l_node) {
if (strcmp(p->name, ".") != 0 &&
strcmp(p->name, "..") != 0) {
- err = ovl_cache_update_ino(path, p);
+ err = ovl_cache_update(path, p, true);
if (err)
return err;
}
if (p->ino == p->real_ino) {
list_del(&p->l_node);
- kfree(p);
+ ovl_cache_entry_free(p);
} else {
struct rb_node **newp = &root->rb_node;
struct rb_node *parent = NULL;
@@ -590,19 +689,21 @@ static int ovl_dir_read_impure(struct path *path, struct list_head *list,
return 0;
}
-static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
+static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path)
{
int res;
struct dentry *dentry = path->dentry;
+ struct inode *inode = d_inode(dentry);
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct ovl_dir_cache *cache;
- cache = ovl_dir_cache(d_inode(dentry));
- if (cache && ovl_dentry_version_get(dentry) == cache->version)
+ cache = ovl_dir_cache(inode);
+ if (cache && ovl_inode_version_get(inode) == cache->version)
return cache;
/* Impure cache is not refcounted, free it here */
- ovl_dir_cache_free(d_inode(dentry));
- ovl_set_dir_cache(d_inode(dentry), NULL);
+ ovl_dir_cache_free(inode);
+ ovl_set_dir_cache(inode, NULL);
cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
if (!cache)
@@ -620,17 +721,17 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
* Removing the "impure" xattr is best effort.
*/
if (!ovl_want_write(dentry)) {
- ovl_do_removexattr(ovl_dentry_upper(dentry),
- OVL_XATTR_IMPURE);
+ ovl_removexattr(ofs, ovl_dentry_upper(dentry),
+ OVL_XATTR_IMPURE);
ovl_drop_write(dentry);
}
- ovl_clear_flag(OVL_IMPURE, d_inode(dentry));
+ ovl_clear_flag(OVL_IMPURE, inode);
kfree(cache);
return NULL;
}
- cache->version = ovl_dentry_version_get(dentry);
- ovl_set_dir_cache(d_inode(dentry), cache);
+ cache->version = ovl_inode_version_get(inode);
+ ovl_set_dir_cache(inode, cache);
return cache;
}
@@ -642,15 +743,17 @@ struct ovl_readdir_translate {
u64 parent_ino;
int fsid;
int xinobits;
+ bool xinowarn;
};
-static int ovl_fill_real(struct dir_context *ctx, const char *name,
+static bool ovl_fill_real(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
struct ovl_readdir_translate *rdt =
container_of(ctx, struct ovl_readdir_translate, ctx);
struct dir_context *orig_ctx = rdt->orig_ctx;
+ bool res;
if (rdt->parent_ino && strcmp(name, "..") == 0) {
ino = rdt->parent_ino;
@@ -662,16 +765,19 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name,
ino = p->ino;
} else if (rdt->xinobits) {
ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
- name, namelen);
+ name, namelen, rdt->xinowarn);
}
- return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+ res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+ ctx->count = orig_ctx->count;
+
+ return res;
}
static bool ovl_is_impure_dir(struct file *file)
{
struct ovl_dir_file *od = file->private_data;
- struct inode *dir = d_inode(file->f_path.dentry);
+ struct inode *dir = file_inode(file);
/*
* Only upper dir can be impure, but if we are in the middle of
@@ -688,11 +794,14 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
int err;
struct ovl_dir_file *od = file->private_data;
struct dentry *dir = file->f_path.dentry;
- struct ovl_layer *lower_layer = ovl_layer_lower(dir);
+ struct ovl_fs *ofs = OVL_FS(dir->d_sb);
+ const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
struct ovl_readdir_translate rdt = {
.ctx.actor = ovl_fill_real,
+ .ctx.count = ctx->count,
.orig_ctx = ctx,
- .xinobits = ovl_xino_bits(dir->d_sb),
+ .xinobits = ovl_xino_bits(ofs),
+ .xinowarn = ovl_xino_warn(ofs),
};
if (rdt.xinobits && lower_layer)
@@ -723,38 +832,20 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
return err;
}
-
-static int ovl_iterate(struct file *file, struct dir_context *ctx)
+static int ovl_iterate_merged(struct file *file, struct dir_context *ctx)
{
struct ovl_dir_file *od = file->private_data;
struct dentry *dentry = file->f_path.dentry;
struct ovl_cache_entry *p;
- int err;
-
- if (!ctx->pos)
- ovl_dir_reset(file);
-
- if (od->is_real) {
- /*
- * If parent is merge, then need to adjust d_ino for '..', if
- * dir is impure then need to adjust d_ino for copied up
- * entries.
- */
- if (ovl_xino_bits(dentry->d_sb) ||
- (ovl_same_sb(dentry->d_sb) &&
- (ovl_is_impure_dir(file) ||
- OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
- return ovl_iterate_real(file, ctx);
- }
- return iterate_dir(od->realfile, ctx);
- }
+ int err = 0;
if (!od->cache) {
struct ovl_dir_cache *cache;
cache = ovl_cache_get(dentry);
+ err = PTR_ERR(cache);
if (IS_ERR(cache))
- return PTR_ERR(cache);
+ return err;
od->cache = cache;
ovl_seek_cursor(od, ctx->pos);
@@ -763,18 +854,62 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
while (od->cursor != &od->cache->entries) {
p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
if (!p->is_whiteout) {
- if (!p->ino) {
- err = ovl_cache_update_ino(&file->f_path, p);
+ if (!p->ino || p->check_xwhiteout) {
+ err = ovl_cache_update(&file->f_path, p, !p->ino);
if (err)
return err;
}
+ }
+ /* ovl_cache_update() sets is_whiteout on stale entry */
+ if (!p->is_whiteout) {
if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
break;
}
od->cursor = p->l_node.next;
ctx->pos++;
}
- return 0;
+ return err;
+}
+
+static bool ovl_need_adjust_d_ino(struct file *file)
+{
+ struct dentry *dentry = file->f_path.dentry;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+
+ /* If parent is merge, then need to adjust d_ino for '..' */
+ if (ovl_xino_bits(ofs))
+ return true;
+
+ /* Can't do consistent inode numbering */
+ if (!ovl_same_fs(ofs))
+ return false;
+
+ /* If dir is impure then need to adjust d_ino for copied up entries */
+ if (ovl_is_impure_dir(file) ||
+ OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent)))
+ return true;
+
+ /* Pure: no need to adjust d_ino */
+ return false;
+}
+
+
+static int ovl_iterate(struct file *file, struct dir_context *ctx)
+{
+ struct ovl_dir_file *od = file->private_data;
+
+ if (!ctx->pos)
+ ovl_dir_reset(file);
+
+ with_ovl_creds(file_dentry(file)->d_sb) {
+ if (!od->is_real)
+ return ovl_iterate_merged(file, ctx);
+
+ if (ovl_need_adjust_d_ino(file))
+ return ovl_iterate_real(file, ctx);
+
+ return iterate_dir(od->realfile, ctx);
+ }
}
static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
@@ -817,47 +952,71 @@ out_unlock:
return res;
}
-static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
- int datasync)
+static struct file *ovl_dir_open_realfile(const struct file *file,
+ const struct path *realpath)
{
+ with_ovl_creds(file_inode(file)->i_sb)
+ return ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
+}
+
+/*
+ * Like ovl_real_fdget(), returns upperfile if dir was copied up since open.
+ * Unlike ovl_real_fdget(), this caches upperfile in file->private_data.
+ *
+ * TODO: use same abstract type for file->private_data of dir and file so
+ * upperfile could also be cached for files as well.
+ */
+struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
+{
+
struct ovl_dir_file *od = file->private_data;
struct dentry *dentry = file->f_path.dentry;
- struct file *realfile = od->realfile;
+ struct file *old, *realfile = od->realfile;
- /* Nothing to sync for lower */
if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
- return 0;
+ return want_upper ? NULL : realfile;
/*
* Need to check if we started out being a lower dir, but got copied up
*/
if (!od->is_upper) {
- struct inode *inode = file_inode(file);
-
realfile = READ_ONCE(od->upperfile);
if (!realfile) {
struct path upperpath;
ovl_path_upper(dentry, &upperpath);
- realfile = ovl_path_open(&upperpath, O_RDONLY);
-
- inode_lock(inode);
- if (!od->upperfile) {
- if (IS_ERR(realfile)) {
- inode_unlock(inode);
- return PTR_ERR(realfile);
- }
- smp_store_release(&od->upperfile, realfile);
- } else {
- /* somebody has beaten us to it */
- if (!IS_ERR(realfile))
- fput(realfile);
- realfile = od->upperfile;
+ realfile = ovl_dir_open_realfile(file, &upperpath);
+ if (IS_ERR(realfile))
+ return realfile;
+
+ old = cmpxchg_release(&od->upperfile, NULL, realfile);
+ if (old) {
+ fput(realfile);
+ realfile = old;
}
- inode_unlock(inode);
}
}
+ return realfile;
+}
+
+static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
+{
+ struct file *realfile;
+ int err;
+
+ err = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
+ if (err <= 0)
+ return err;
+
+ realfile = ovl_dir_real_file(file, true);
+ err = PTR_ERR_OR_ZERO(realfile);
+
+ /* Nothing to sync for lower */
+ if (!realfile || err)
+ return err;
+
return vfs_fsync_range(realfile, start, end, datasync);
}
@@ -867,7 +1026,7 @@ static int ovl_dir_release(struct inode *inode, struct file *file)
if (od->cache) {
inode_lock(inode);
- ovl_cache_put(od, file->f_path.dentry);
+ ovl_cache_put(od, inode);
inode_unlock(inode);
}
fput(od->realfile);
@@ -890,23 +1049,24 @@ static int ovl_dir_open(struct inode *inode, struct file *file)
return -ENOMEM;
type = ovl_path_real(file->f_path.dentry, &realpath);
- realfile = ovl_path_open(&realpath, file->f_flags);
+ realfile = ovl_dir_open_realfile(file, &realpath);
if (IS_ERR(realfile)) {
kfree(od);
return PTR_ERR(realfile);
}
od->realfile = realfile;
- od->is_real = ovl_dir_is_real(file->f_path.dentry);
+ od->is_real = ovl_dir_is_real(inode);
od->is_upper = OVL_TYPE_UPPER(type);
file->private_data = od;
return 0;
}
+WRAP_DIR_ITER(ovl_iterate) // FIXME!
const struct file_operations ovl_dir_operations = {
.read = generic_read_dir,
.open = ovl_dir_open,
- .iterate = ovl_iterate,
+ .iterate_shared = shared_ovl_iterate,
.llseek = ovl_dir_llseek,
.fsync = ovl_dir_fsync,
.release = ovl_dir_release,
@@ -917,11 +1077,9 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
int err;
struct ovl_cache_entry *p, *n;
struct rb_root root = RB_ROOT;
- const struct cred *old_cred;
- old_cred = ovl_override_creds(dentry->d_sb);
- err = ovl_dir_read_merged(dentry, list, &root);
- revert_creds(old_cred);
+ with_ovl_creds(dentry->d_sb)
+ err = ovl_dir_read_merged(dentry, list, &root);
if (err)
return err;
@@ -949,38 +1107,37 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
del_entry:
list_del(&p->l_node);
- kfree(p);
+ ovl_cache_entry_free(p);
}
return err;
}
-void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
+void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
+ struct list_head *list)
{
struct ovl_cache_entry *p;
- inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
list_for_each_entry(p, list, l_node) {
struct dentry *dentry;
if (WARN_ON(!p->is_whiteout || !p->is_upper))
continue;
- dentry = lookup_one_len(p->name, upper, p->len);
+ dentry = ovl_lookup_upper_unlocked(ofs, p->name, upper, p->len);
if (IS_ERR(dentry)) {
- pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
+ pr_err("lookup '%s/%.*s' failed (%i)\n",
upper->d_name.name, p->len, p->name,
(int) PTR_ERR(dentry));
continue;
}
if (dentry->d_inode)
- ovl_cleanup(upper->d_inode, dentry);
+ ovl_cleanup(ofs, upper, dentry);
dput(dentry);
}
- inode_unlock(upper->d_inode);
}
-static int ovl_check_d_type(struct dir_context *ctx, const char *name,
+static bool ovl_check_d_type(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -989,23 +1146,24 @@ static int ovl_check_d_type(struct dir_context *ctx, const char *name,
/* Even if d_type is not supported, DT_DIR is returned for . and .. */
if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
- return 0;
+ return true;
if (d_type != DT_UNKNOWN)
rdd->d_type_supported = true;
- return 0;
+ return true;
}
/*
* Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
* if error is encountered.
*/
-int ovl_check_d_type_supported(struct path *realpath)
+int ovl_check_d_type_supported(const struct path *realpath)
{
int err;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_check_d_type,
+ .ctx.count = INT_MAX,
.d_type_supported = false,
};
@@ -1016,26 +1174,37 @@ int ovl_check_d_type_supported(struct path *realpath)
return rdd.d_type_supported;
}
-static void ovl_workdir_cleanup_recurse(struct path *path, int level)
+#define OVL_INCOMPATDIR_NAME "incompat"
+
+static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path,
+ int level)
{
int err;
- struct inode *dir = path->dentry->d_inode;
LIST_HEAD(list);
- struct rb_root root = RB_ROOT;
struct ovl_cache_entry *p;
struct ovl_readdir_data rdd = {
- .ctx.actor = ovl_fill_merge,
- .dentry = NULL,
+ .ctx.actor = ovl_fill_plain,
+ .ctx.count = INT_MAX,
.list = &list,
- .root = &root,
- .is_lowest = false,
};
+ bool incompat = false;
+
+ /*
+ * The "work/incompat" directory is treated specially - if it is not
+ * empty, instead of printing a generic error and mounting read-only,
+ * we will error about incompat features and fail the mount.
+ *
+ * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name
+ * starts with '#'.
+ */
+ if (level == 2 &&
+ !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME))
+ incompat = true;
err = ovl_dir_read(path, &rdd);
if (err)
goto out;
- inode_lock_nested(dir, I_MUTEX_PARENT);
list_for_each_entry(p, &list, l_node) {
struct dentry *dentry;
@@ -1044,63 +1213,69 @@ static void ovl_workdir_cleanup_recurse(struct path *path, int level)
continue;
if (p->len == 2 && p->name[1] == '.')
continue;
+ } else if (incompat) {
+ pr_err("overlay with incompat feature '%s' cannot be mounted\n",
+ p->name);
+ err = -EINVAL;
+ break;
}
- dentry = lookup_one_len(p->name, path->dentry, p->len);
+ dentry = ovl_lookup_upper_unlocked(ofs, p->name, path->dentry, p->len);
if (IS_ERR(dentry))
continue;
if (dentry->d_inode)
- ovl_workdir_cleanup(dir, path->mnt, dentry, level);
+ err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt,
+ dentry, level);
dput(dentry);
+ if (err)
+ break;
}
- inode_unlock(dir);
out:
ovl_cache_free(&list);
+ return err;
}
-void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
- struct dentry *dentry, int level)
+int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent,
+ struct vfsmount *mnt, struct dentry *dentry, int level)
{
int err;
- if (!d_is_dir(dentry) || level > 1) {
- ovl_cleanup(dir, dentry);
- return;
- }
+ if (!d_is_dir(dentry) || level > 1)
+ return ovl_cleanup(ofs, parent, dentry);
- err = ovl_do_rmdir(dir, dentry);
+ dentry = start_removing_dentry(parent, dentry);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ err = ovl_do_rmdir(ofs, parent->d_inode, dentry);
+ end_removing(dentry);
if (err) {
struct path path = { .mnt = mnt, .dentry = dentry };
- inode_unlock(dir);
- ovl_workdir_cleanup_recurse(&path, level + 1);
- inode_lock_nested(dir, I_MUTEX_PARENT);
- ovl_cleanup(dir, dentry);
+ err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1);
+ if (!err)
+ err = ovl_cleanup(ofs, parent, dentry);
}
+
+ return err;
}
int ovl_indexdir_cleanup(struct ovl_fs *ofs)
{
int err;
- struct dentry *indexdir = ofs->indexdir;
+ struct dentry *indexdir = ofs->workdir;
struct dentry *index = NULL;
- struct inode *dir = indexdir->d_inode;
- struct path path = { .mnt = ofs->upper_mnt, .dentry = indexdir };
+ struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
LIST_HEAD(list);
- struct rb_root root = RB_ROOT;
struct ovl_cache_entry *p;
struct ovl_readdir_data rdd = {
- .ctx.actor = ovl_fill_merge,
- .dentry = NULL,
+ .ctx.actor = ovl_fill_plain,
+ .ctx.count = INT_MAX,
.list = &list,
- .root = &root,
- .is_lowest = false,
};
err = ovl_dir_read(&path, &rdd);
if (err)
goto out;
- inode_lock_nested(dir, I_MUTEX_PARENT);
list_for_each_entry(p, &list, l_node) {
if (p->name[0] == '.') {
if (p->len == 1)
@@ -1108,18 +1283,25 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
if (p->len == 2 && p->name[1] == '.')
continue;
}
- index = lookup_one_len(p->name, indexdir, p->len);
+ index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, p->len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
index = NULL;
break;
}
+ /* Cleanup leftover from index create/cleanup attempt */
+ if (index->d_name.name[0] == '#') {
+ err = ovl_workdir_cleanup(ofs, indexdir, path.mnt, index, 1);
+ if (err)
+ break;
+ goto next;
+ }
err = ovl_verify_index(ofs, index);
if (!err) {
goto next;
} else if (err == -ESTALE) {
/* Cleanup stale index entries */
- err = ovl_cleanup(dir, index);
+ err = ovl_cleanup(ofs, indexdir, index);
} else if (err != -ENOENT) {
/*
* Abort mount to avoid corrupting the index if
@@ -1132,10 +1314,10 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
* Whiteout orphan index to block future open by
* handle after overlay nlink dropped to zero.
*/
- err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+ err = ovl_cleanup_and_whiteout(ofs, indexdir, index);
} else {
/* Cleanup orphan index entries */
- err = ovl_cleanup(dir, index);
+ err = ovl_cleanup(ofs, indexdir, index);
}
if (err)
@@ -1146,10 +1328,9 @@ next:
index = NULL;
}
dput(index);
- inode_unlock(dir);
out:
ovl_cache_free(&list);
if (err)
- pr_err("overlayfs: failed index dir cleanup (%i)\n", err);
+ pr_err("failed index dir cleanup (%i)\n", err);
return err;
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 0116735cc321..ba9146f22a2c 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
*
* Copyright (C) 2011 Novell Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
*/
#include <uapi/linux/magic.h>
@@ -18,7 +15,11 @@
#include <linux/seq_file.h>
#include <linux/posix_acl_xattr.h>
#include <linux/exportfs.h>
+#include <linux/file.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include "overlayfs.h"
+#include "params.h"
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Overlay filesystem");
@@ -27,153 +28,162 @@ MODULE_LICENSE("GPL");
struct ovl_dir_cache;
-#define OVL_MAX_STACK 500
-
-static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
-module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
-MODULE_PARM_DESC(ovl_redirect_dir_def,
- "Default to on or off for the redirect_dir feature");
-
-static bool ovl_redirect_always_follow =
- IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
-module_param_named(redirect_always_follow, ovl_redirect_always_follow,
- bool, 0644);
-MODULE_PARM_DESC(ovl_redirect_always_follow,
- "Follow redirects even if redirect_dir feature is turned off");
-
-static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
-module_param_named(index, ovl_index_def, bool, 0644);
-MODULE_PARM_DESC(ovl_index_def,
- "Default to on or off for the inodes index feature");
-
-static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
-module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
-MODULE_PARM_DESC(ovl_nfs_export_def,
- "Default to on or off for the NFS export feature");
-
-static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
-module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
-MODULE_PARM_DESC(ovl_xino_auto_def,
- "Auto enable xino feature");
-
-static void ovl_entry_stack_free(struct ovl_entry *oe)
-{
- unsigned int i;
-
- for (i = 0; i < oe->numlower; i++)
- dput(oe->lowerstack[i].dentry);
-}
-
-static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
-module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
-MODULE_PARM_DESC(ovl_metacopy_def,
- "Default to on or off for the metadata only copy up feature");
-
-static void ovl_dentry_release(struct dentry *dentry)
+static struct dentry *ovl_d_real(struct dentry *dentry, enum d_real_type type)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct dentry *upper, *lower;
+ int err;
- if (oe) {
- ovl_entry_stack_free(oe);
- kfree_rcu(oe, rcu);
+ switch (type) {
+ case D_REAL_DATA:
+ case D_REAL_METADATA:
+ break;
+ default:
+ goto bug;
}
-}
-
-static struct dentry *ovl_d_real(struct dentry *dentry,
- const struct inode *inode)
-{
- struct dentry *real;
-
- /* It's an overlay file */
- if (inode && d_inode(dentry) == inode)
- return dentry;
if (!d_is_reg(dentry)) {
- if (!inode || inode == d_inode(dentry))
- return dentry;
- goto bug;
+ /* d_real_inode() is only relevant for regular files */
+ return dentry;
}
- real = ovl_dentry_upper(dentry);
- if (real && (inode == d_inode(real)))
- return real;
+ upper = ovl_dentry_upper(dentry);
+ if (upper && (type == D_REAL_METADATA ||
+ ovl_has_upperdata(d_inode(dentry))))
+ return upper;
- if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
- return real;
+ if (type == D_REAL_METADATA) {
+ lower = ovl_dentry_lower(dentry);
+ goto real_lower;
+ }
- real = ovl_dentry_lowerdata(dentry);
- if (!real)
+ /*
+ * Best effort lazy lookup of lowerdata for D_REAL_DATA case to return
+ * the real lowerdata dentry. The only current caller of d_real() with
+ * D_REAL_DATA is d_real_inode() from trace_uprobe and this caller is
+ * likely going to be followed reading from the file, before placing
+ * uprobes on offset within the file, so lowerdata should be available
+ * when setting the uprobe.
+ */
+ err = ovl_verify_lowerdata(dentry);
+ if (err)
+ goto bug;
+ lower = ovl_dentry_lowerdata(dentry);
+ if (!lower)
goto bug;
- /* Handle recursion */
- real = d_real(real, inode);
+real_lower:
+ /* Handle recursion into stacked lower fs */
+ return d_real(lower, type);
- if (!inode || inode == d_inode(real))
- return real;
bug:
- WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
- inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
+ WARN(1, "%s(%pd4, %d): real dentry not found\n", __func__, dentry, type);
return dentry;
}
-static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak)
{
- struct ovl_entry *oe = dentry->d_fsdata;
- unsigned int i;
int ret = 1;
- for (i = 0; i < oe->numlower; i++) {
- struct dentry *d = oe->lowerstack[i].dentry;
-
- if (d->d_flags & DCACHE_OP_REVALIDATE) {
- ret = d->d_op->d_revalidate(d, flags);
- if (ret < 0)
- return ret;
- if (!ret) {
- if (!(flags & LOOKUP_RCU))
- d_invalidate(d);
- return -ESTALE;
- }
+ if (!d)
+ return 1;
+
+ if (weak) {
+ if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE)
+ ret = d->d_op->d_weak_revalidate(d, flags);
+ } else if (d->d_flags & DCACHE_OP_REVALIDATE) {
+ struct dentry *parent;
+ struct inode *dir;
+ struct name_snapshot n;
+
+ if (flags & LOOKUP_RCU) {
+ parent = READ_ONCE(d->d_parent);
+ dir = d_inode_rcu(parent);
+ if (!dir)
+ return -ECHILD;
+ } else {
+ parent = dget_parent(d);
+ dir = d_inode(parent);
+ }
+ take_dentry_name_snapshot(&n, d);
+ ret = d->d_op->d_revalidate(dir, &n.name, d, flags);
+ release_dentry_name_snapshot(&n);
+ if (!(flags & LOOKUP_RCU))
+ dput(parent);
+ if (!ret) {
+ if (!(flags & LOOKUP_RCU))
+ d_invalidate(d);
+ ret = -ESTALE;
}
}
- return 1;
+ return ret;
}
-static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
+static int ovl_dentry_revalidate_common(struct dentry *dentry,
+ unsigned int flags, bool weak)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_entry *oe;
+ struct ovl_path *lowerstack;
+ struct inode *inode = d_inode_rcu(dentry);
+ struct dentry *upper;
unsigned int i;
int ret = 1;
- for (i = 0; i < oe->numlower; i++) {
- struct dentry *d = oe->lowerstack[i].dentry;
-
- if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
- ret = d->d_op->d_weak_revalidate(d, flags);
- if (ret <= 0)
- break;
- }
+ if (!inode) {
+ /*
+ * Lookup of negative dentries will call ovl_dentry_init_flags()
+ * with NULL upperdentry and NULL oe, resulting in the
+ * DCACHE_OP*_REVALIDATE flags being cleared. Hence the only
+ * way to get a negative inode is due to a race with dentry
+ * destruction.
+ */
+ WARN_ON(!(flags & LOOKUP_RCU));
+ return -ECHILD;
}
+
+ oe = OVL_I_E(inode);
+ lowerstack = ovl_lowerstack(oe);
+ upper = ovl_i_dentry_upper(inode);
+ if (upper)
+ ret = ovl_revalidate_real(upper, flags, weak);
+
+ for (i = 0; ret > 0 && i < ovl_numlower(oe); i++)
+ ret = ovl_revalidate_real(lowerstack[i].dentry, flags, weak);
+
return ret;
}
+static int ovl_dentry_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
+{
+ return ovl_dentry_revalidate_common(dentry, flags, false);
+}
+
+static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ return ovl_dentry_revalidate_common(dentry, flags, true);
+}
+
static const struct dentry_operations ovl_dentry_operations = {
- .d_release = ovl_dentry_release,
.d_real = ovl_d_real,
+ .d_revalidate = ovl_dentry_revalidate,
+ .d_weak_revalidate = ovl_dentry_weak_revalidate,
};
-static const struct dentry_operations ovl_reval_dentry_operations = {
- .d_release = ovl_dentry_release,
+#if IS_ENABLED(CONFIG_UNICODE)
+static const struct dentry_operations ovl_dentry_ci_operations = {
.d_real = ovl_d_real,
.d_revalidate = ovl_dentry_revalidate,
.d_weak_revalidate = ovl_dentry_weak_revalidate,
+ .d_hash = generic_ci_d_hash,
+ .d_compare = generic_ci_d_compare,
};
+#endif
static struct kmem_cache *ovl_inode_cachep;
static struct inode *ovl_alloc_inode(struct super_block *sb)
{
- struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
+ struct ovl_inode *oi = alloc_inode_sb(sb, ovl_inode_cachep, GFP_KERNEL);
if (!oi)
return NULL;
@@ -183,18 +193,21 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
oi->version = 0;
oi->flags = 0;
oi->__upperdentry = NULL;
- oi->lower = NULL;
- oi->lowerdata = NULL;
+ oi->lowerdata_redirect = NULL;
+ oi->oe = NULL;
mutex_init(&oi->lock);
return &oi->vfs_inode;
}
-static void ovl_i_callback(struct rcu_head *head)
+static void ovl_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ovl_inode *oi = OVL_I(inode);
- kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
+ kfree(oi->redirect);
+ kfree(oi->oe);
+ mutex_destroy(&oi->lock);
+ kmem_cache_free(ovl_inode_cachep, oi);
}
static void ovl_destroy_inode(struct inode *inode)
@@ -202,65 +215,39 @@ static void ovl_destroy_inode(struct inode *inode)
struct ovl_inode *oi = OVL_I(inode);
dput(oi->__upperdentry);
- iput(oi->lower);
+ ovl_stack_put(ovl_lowerstack(oi->oe), ovl_numlower(oi->oe));
if (S_ISDIR(inode->i_mode))
ovl_dir_cache_free(inode);
else
- iput(oi->lowerdata);
- kfree(oi->redirect);
- mutex_destroy(&oi->lock);
-
- call_rcu(&inode->i_rcu, ovl_i_callback);
-}
-
-static void ovl_free_fs(struct ovl_fs *ofs)
-{
- unsigned i;
-
- dput(ofs->indexdir);
- dput(ofs->workdir);
- if (ofs->workdir_locked)
- ovl_inuse_unlock(ofs->workbasedir);
- dput(ofs->workbasedir);
- if (ofs->upperdir_locked)
- ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
- mntput(ofs->upper_mnt);
- for (i = 0; i < ofs->numlower; i++)
- mntput(ofs->lower_layers[i].mnt);
- for (i = 0; i < ofs->numlowerfs; i++)
- free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
- kfree(ofs->lower_layers);
- kfree(ofs->lower_fs);
-
- kfree(ofs->config.lowerdir);
- kfree(ofs->config.upperdir);
- kfree(ofs->config.workdir);
- kfree(ofs->config.redirect_mode);
- if (ofs->creator_cred)
- put_cred(ofs->creator_cred);
- kfree(ofs);
+ kfree(oi->lowerdata_redirect);
}
static void ovl_put_super(struct super_block *sb)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(sb);
- ovl_free_fs(ofs);
+ if (ofs)
+ ovl_free_fs(ofs);
}
/* Sync real dirty inodes in upper filesystem (if it exists) */
static int ovl_sync_fs(struct super_block *sb, int wait)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(sb);
struct super_block *upper_sb;
int ret;
- if (!ofs->upper_mnt)
- return 0;
+ ret = ovl_sync_status(ofs);
+
+ if (ret < 0)
+ return -EIO;
+
+ if (!ret)
+ return ret;
/*
- * If this is a sync(2) call or an emergency sync, all the super blocks
- * will be iterated, including upper_sb, so no need to do anything.
+ * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
+ * All the super blocks will be iterated, including upper_sb.
*
* If this is a syncfs(2) call, then we do need to call
* sync_filesystem() on upper_sb, but enough if we do it when being
@@ -269,7 +256,7 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
if (!wait)
return 0;
- upper_sb = ofs->upper_mnt->mnt_sb;
+ upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
down_read(&upper_sb->s_umount);
ret = sync_filesystem(upper_sb);
@@ -280,7 +267,7 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
/**
* ovl_statfs
- * @sb: The overlayfs super block
+ * @dentry: The dentry to query
* @buf: The struct kstatfs to fill in with stats
*
* Get the filesystem statistics. As writes always target the upper layer
@@ -288,8 +275,9 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
*/
static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
{
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
- struct dentry *root_dentry = dentry->d_sb->s_root;
+ struct super_block *sb = dentry->d_sb;
+ struct ovl_fs *ofs = OVL_FS(sb);
+ struct dentry *root_dentry = sb->s_root;
struct path path;
int err;
@@ -299,313 +287,24 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
if (!err) {
buf->f_namelen = ofs->namelen;
buf->f_type = OVERLAYFS_SUPER_MAGIC;
+ if (ovl_has_fsid(ofs))
+ buf->f_fsid = uuid_to_fsid(sb->s_uuid.b);
}
return err;
}
-/* Will this overlay be forced to mount/remount ro? */
-static bool ovl_force_readonly(struct ovl_fs *ofs)
-{
- return (!ofs->upper_mnt || !ofs->workdir);
-}
-
-static const char *ovl_redirect_mode_def(void)
-{
- return ovl_redirect_dir_def ? "on" : "off";
-}
-
-enum {
- OVL_XINO_OFF,
- OVL_XINO_AUTO,
- OVL_XINO_ON,
-};
-
-static const char * const ovl_xino_str[] = {
- "off",
- "auto",
- "on",
-};
-
-static inline int ovl_xino_def(void)
-{
- return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
-}
-
-/**
- * ovl_show_options
- *
- * Prints the mount options for a given superblock.
- * Returns zero; does not fail.
- */
-static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
-{
- struct super_block *sb = dentry->d_sb;
- struct ovl_fs *ofs = sb->s_fs_info;
-
- seq_show_option(m, "lowerdir", ofs->config.lowerdir);
- if (ofs->config.upperdir) {
- seq_show_option(m, "upperdir", ofs->config.upperdir);
- seq_show_option(m, "workdir", ofs->config.workdir);
- }
- if (ofs->config.default_permissions)
- seq_puts(m, ",default_permissions");
- if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
- seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
- if (ofs->config.index != ovl_index_def)
- seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
- if (ofs->config.nfs_export != ovl_nfs_export_def)
- seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
- "on" : "off");
- if (ofs->config.xino != ovl_xino_def())
- seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
- if (ofs->config.metacopy != ovl_metacopy_def)
- seq_printf(m, ",metacopy=%s",
- ofs->config.metacopy ? "on" : "off");
- return 0;
-}
-
-static int ovl_remount(struct super_block *sb, int *flags, char *data)
-{
- struct ovl_fs *ofs = sb->s_fs_info;
-
- if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
- return -EROFS;
-
- return 0;
-}
-
static const struct super_operations ovl_super_operations = {
.alloc_inode = ovl_alloc_inode,
+ .free_inode = ovl_free_inode,
.destroy_inode = ovl_destroy_inode,
- .drop_inode = generic_delete_inode,
+ .drop_inode = inode_just_drop,
.put_super = ovl_put_super,
.sync_fs = ovl_sync_fs,
.statfs = ovl_statfs,
.show_options = ovl_show_options,
- .remount_fs = ovl_remount,
};
-enum {
- OPT_LOWERDIR,
- OPT_UPPERDIR,
- OPT_WORKDIR,
- OPT_DEFAULT_PERMISSIONS,
- OPT_REDIRECT_DIR,
- OPT_INDEX_ON,
- OPT_INDEX_OFF,
- OPT_NFS_EXPORT_ON,
- OPT_NFS_EXPORT_OFF,
- OPT_XINO_ON,
- OPT_XINO_OFF,
- OPT_XINO_AUTO,
- OPT_METACOPY_ON,
- OPT_METACOPY_OFF,
- OPT_ERR,
-};
-
-static const match_table_t ovl_tokens = {
- {OPT_LOWERDIR, "lowerdir=%s"},
- {OPT_UPPERDIR, "upperdir=%s"},
- {OPT_WORKDIR, "workdir=%s"},
- {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
- {OPT_REDIRECT_DIR, "redirect_dir=%s"},
- {OPT_INDEX_ON, "index=on"},
- {OPT_INDEX_OFF, "index=off"},
- {OPT_NFS_EXPORT_ON, "nfs_export=on"},
- {OPT_NFS_EXPORT_OFF, "nfs_export=off"},
- {OPT_XINO_ON, "xino=on"},
- {OPT_XINO_OFF, "xino=off"},
- {OPT_XINO_AUTO, "xino=auto"},
- {OPT_METACOPY_ON, "metacopy=on"},
- {OPT_METACOPY_OFF, "metacopy=off"},
- {OPT_ERR, NULL}
-};
-
-static char *ovl_next_opt(char **s)
-{
- char *sbegin = *s;
- char *p;
-
- if (sbegin == NULL)
- return NULL;
-
- for (p = sbegin; *p; p++) {
- if (*p == '\\') {
- p++;
- if (!*p)
- break;
- } else if (*p == ',') {
- *p = '\0';
- *s = p + 1;
- return sbegin;
- }
- }
- *s = NULL;
- return sbegin;
-}
-
-static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
-{
- if (strcmp(mode, "on") == 0) {
- config->redirect_dir = true;
- /*
- * Does not make sense to have redirect creation without
- * redirect following.
- */
- config->redirect_follow = true;
- } else if (strcmp(mode, "follow") == 0) {
- config->redirect_follow = true;
- } else if (strcmp(mode, "off") == 0) {
- if (ovl_redirect_always_follow)
- config->redirect_follow = true;
- } else if (strcmp(mode, "nofollow") != 0) {
- pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
- mode);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int ovl_parse_opt(char *opt, struct ovl_config *config)
-{
- char *p;
- int err;
- bool metacopy_opt = false, redirect_opt = false;
-
- config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
- if (!config->redirect_mode)
- return -ENOMEM;
-
- while ((p = ovl_next_opt(&opt)) != NULL) {
- int token;
- substring_t args[MAX_OPT_ARGS];
-
- if (!*p)
- continue;
-
- token = match_token(p, ovl_tokens, args);
- switch (token) {
- case OPT_UPPERDIR:
- kfree(config->upperdir);
- config->upperdir = match_strdup(&args[0]);
- if (!config->upperdir)
- return -ENOMEM;
- break;
-
- case OPT_LOWERDIR:
- kfree(config->lowerdir);
- config->lowerdir = match_strdup(&args[0]);
- if (!config->lowerdir)
- return -ENOMEM;
- break;
-
- case OPT_WORKDIR:
- kfree(config->workdir);
- config->workdir = match_strdup(&args[0]);
- if (!config->workdir)
- return -ENOMEM;
- break;
-
- case OPT_DEFAULT_PERMISSIONS:
- config->default_permissions = true;
- break;
-
- case OPT_REDIRECT_DIR:
- kfree(config->redirect_mode);
- config->redirect_mode = match_strdup(&args[0]);
- if (!config->redirect_mode)
- return -ENOMEM;
- redirect_opt = true;
- break;
-
- case OPT_INDEX_ON:
- config->index = true;
- break;
-
- case OPT_INDEX_OFF:
- config->index = false;
- break;
-
- case OPT_NFS_EXPORT_ON:
- config->nfs_export = true;
- break;
-
- case OPT_NFS_EXPORT_OFF:
- config->nfs_export = false;
- break;
-
- case OPT_XINO_ON:
- config->xino = OVL_XINO_ON;
- break;
-
- case OPT_XINO_OFF:
- config->xino = OVL_XINO_OFF;
- break;
-
- case OPT_XINO_AUTO:
- config->xino = OVL_XINO_AUTO;
- break;
-
- case OPT_METACOPY_ON:
- config->metacopy = true;
- metacopy_opt = true;
- break;
-
- case OPT_METACOPY_OFF:
- config->metacopy = false;
- break;
-
- default:
- pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
- return -EINVAL;
- }
- }
-
- /* Workdir is useless in non-upper mount */
- if (!config->upperdir && config->workdir) {
- pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
- config->workdir);
- kfree(config->workdir);
- config->workdir = NULL;
- }
-
- err = ovl_parse_redirect_mode(config, config->redirect_mode);
- if (err)
- return err;
-
- /*
- * This is to make the logic below simpler. It doesn't make any other
- * difference, since config->redirect_dir is only used for upper.
- */
- if (!config->upperdir && config->redirect_follow)
- config->redirect_dir = true;
-
- /* Resolve metacopy -> redirect_dir dependency */
- if (config->metacopy && !config->redirect_dir) {
- if (metacopy_opt && redirect_opt) {
- pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n",
- config->redirect_mode);
- return -EINVAL;
- }
- if (redirect_opt) {
- /*
- * There was an explicit redirect_dir=... that resulted
- * in this conflict.
- */
- pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n",
- config->redirect_mode);
- config->metacopy = false;
- } else {
- /* Automatically enable redirect otherwise. */
- config->redirect_follow = config->redirect_dir = true;
- }
- }
-
- return 0;
-}
-
#define OVL_WORKDIR_NAME "work"
#define OVL_INDEXDIR_NAME "index"
@@ -613,17 +312,13 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
const char *name, bool persist)
{
struct inode *dir = ofs->workbasedir->d_inode;
- struct vfsmount *mnt = ofs->upper_mnt;
+ struct vfsmount *mnt = ovl_upper_mnt(ofs);
struct dentry *work;
int err;
bool retried = false;
- bool locked = false;
-
- inode_lock_nested(dir, I_MUTEX_PARENT);
- locked = true;
retry:
- work = lookup_one_len(name, ofs->workbasedir, strlen(name));
+ work = ovl_start_creating_upper(ofs, ofs->workbasedir, &QSTR(name));
if (!IS_ERR(work)) {
struct iattr attr = {
@@ -632,24 +327,32 @@ retry:
};
if (work->d_inode) {
+ end_creating_keep(work);
+ if (persist)
+ return work;
err = -EEXIST;
if (retried)
goto out_dput;
-
- if (persist)
- goto out_unlock;
-
retried = true;
- ovl_workdir_cleanup(dir, mnt, work, 0);
+ err = ovl_workdir_cleanup(ofs, ofs->workbasedir, mnt, work, 0);
dput(work);
+ if (err == -EINVAL)
+ return ERR_PTR(err);
+
goto retry;
}
- work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
+ work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode);
+ end_creating_keep(work);
err = PTR_ERR(work);
if (IS_ERR(work))
goto out_err;
+ /* Weird filesystem returning with hashed negative (kernfs)? */
+ err = -EINVAL;
+ if (d_really_is_negative(work))
+ goto out_dput;
+
/*
* Try to remove POSIX ACL xattrs from workdir. We are good if:
*
@@ -663,17 +366,17 @@ retry:
* allowed as upper are limited to "normal" ones, where checking
* for the above two errors is sufficient.
*/
- err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
+ err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_DEFAULT);
if (err && err != -ENODATA && err != -EOPNOTSUPP)
goto out_dput;
- err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
+ err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_ACCESS);
if (err && err != -ENODATA && err != -EOPNOTSUPP)
goto out_dput;
/* Clear any inherited mode bits */
inode_lock(work->d_inode);
- err = notify_change(work, &attr, NULL);
+ err = ovl_do_notify_change(ofs, work, &attr);
inode_unlock(work->d_inode);
if (err)
goto out_dput;
@@ -681,118 +384,42 @@ retry:
err = PTR_ERR(work);
goto out_err;
}
-out_unlock:
- if (locked)
- inode_unlock(dir);
-
return work;
out_dput:
dput(work);
out_err:
- pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+ pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
ofs->config.workdir, name, -err);
- work = NULL;
- goto out_unlock;
-}
-
-static void ovl_unescape(char *s)
-{
- char *d = s;
-
- for (;; s++, d++) {
- if (*s == '\\')
- s++;
- *d = *s;
- if (!*s)
- break;
- }
-}
-
-static int ovl_mount_dir_noesc(const char *name, struct path *path)
-{
- int err = -EINVAL;
-
- if (!*name) {
- pr_err("overlayfs: empty lowerdir\n");
- goto out;
- }
- err = kern_path(name, LOOKUP_FOLLOW, path);
- if (err) {
- pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
- goto out;
- }
- err = -EINVAL;
- if (ovl_dentry_weird(path->dentry)) {
- pr_err("overlayfs: filesystem on '%s' not supported\n", name);
- goto out_put;
- }
- if (!d_is_dir(path->dentry)) {
- pr_err("overlayfs: '%s' not a directory\n", name);
- goto out_put;
- }
- return 0;
-
-out_put:
- path_put_init(path);
-out:
- return err;
+ return NULL;
}
-static int ovl_mount_dir(const char *name, struct path *path)
-{
- int err = -ENOMEM;
- char *tmp = kstrdup(name, GFP_KERNEL);
-
- if (tmp) {
- ovl_unescape(tmp);
- err = ovl_mount_dir_noesc(tmp, path);
-
- if (!err)
- if (ovl_dentry_remote(path->dentry)) {
- pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
- tmp);
- path_put_init(path);
- err = -EINVAL;
- }
- kfree(tmp);
- }
- return err;
-}
-
-static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
+static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs,
const char *name)
{
struct kstatfs statfs;
int err = vfs_statfs(path, &statfs);
if (err)
- pr_err("overlayfs: statfs failed on '%s'\n", name);
+ pr_err("statfs failed on '%s'\n", name);
else
ofs->namelen = max(ofs->namelen, statfs.f_namelen);
return err;
}
-static int ovl_lower_dir(const char *name, struct path *path,
- struct ovl_fs *ofs, int *stack_depth, bool *remote)
+static int ovl_lower_dir(const char *name, const struct path *path,
+ struct ovl_fs *ofs, int *stack_depth)
{
int fh_type;
int err;
- err = ovl_mount_dir_noesc(name, path);
- if (err)
- goto out;
-
err = ovl_check_namelen(path, ofs, name);
if (err)
- goto out_put;
+ return err;
*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
- if (ovl_dentry_remote(path->dentry))
- *remote = true;
-
/*
* The inodes index feature and NFS export need to encode and decode
* file handles, so they require that all layers support them.
@@ -802,20 +429,26 @@ static int ovl_lower_dir(const char *name, struct path *path,
(ofs->config.index && ofs->config.upperdir)) && !fh_type) {
ofs->config.index = false;
ofs->config.nfs_export = false;
- pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
+ pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
+ name);
+ }
+ ofs->nofh |= !fh_type;
+ /*
+ * Decoding origin file handle is required for persistent st_ino.
+ * Without persistent st_ino, xino=auto falls back to xino=off.
+ */
+ if (ofs->config.xino == OVL_XINO_AUTO &&
+ ofs->config.upperdir && !fh_type) {
+ ofs->config.xino = OVL_XINO_OFF;
+ pr_warn("fs on '%s' does not support file handles, falling back to xino=off.\n",
name);
}
/* Check if lower fs has 32bit inode numbers */
if (fh_type != FILEID_INO32_GEN)
- ofs->xino_bits = 0;
+ ofs->xino_mode = -1;
return 0;
-
-out_put:
- path_put_init(path);
-out:
- return err;
}
/* Workdir should not be subdir of upperdir and vice versa */
@@ -824,178 +457,61 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
bool ok = false;
if (workdir != upperdir) {
- ok = (lock_rename(workdir, upperdir) == NULL);
- unlock_rename(workdir, upperdir);
+ struct dentry *trap = lock_rename(workdir, upperdir);
+ if (!IS_ERR(trap))
+ unlock_rename(workdir, upperdir);
+ ok = (trap == NULL);
}
return ok;
}
-static unsigned int ovl_split_lowerdirs(char *str)
+static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
+ struct inode **ptrap, const char *name)
{
- unsigned int ctr = 1;
- char *s, *d;
-
- for (s = d = str;; s++, d++) {
- if (*s == '\\') {
- s++;
- } else if (*s == ':') {
- *d = '\0';
- ctr++;
- continue;
- }
- *d = *s;
- if (!*s)
- break;
- }
- return ctr;
-}
-
-static int __maybe_unused
-ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, void *buffer, size_t size)
-{
- return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
-}
-
-static int __maybe_unused
-ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, const void *value,
- size_t size, int flags)
-{
- struct dentry *workdir = ovl_workdir(dentry);
- struct inode *realinode = ovl_inode_real(inode);
- struct posix_acl *acl = NULL;
+ struct inode *trap;
int err;
- /* Check that everything is OK before copy-up */
- if (value) {
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- }
- err = -EOPNOTSUPP;
- if (!IS_POSIXACL(d_inode(workdir)))
- goto out_acl_release;
- if (!realinode->i_op->set_acl)
- goto out_acl_release;
- if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
- err = acl ? -EACCES : 0;
- goto out_acl_release;
- }
- err = -EPERM;
- if (!inode_owner_or_capable(inode))
- goto out_acl_release;
-
- posix_acl_release(acl);
-
- /*
- * Check if sgid bit needs to be cleared (actual setacl operation will
- * be done with mounter's capabilities and so that won't do it for us).
- */
- if (unlikely(inode->i_mode & S_ISGID) &&
- handler->flags == ACL_TYPE_ACCESS &&
- !in_group_p(inode->i_gid) &&
- !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
- struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
-
- err = ovl_setattr(dentry, &iattr);
- if (err)
- return err;
+ trap = ovl_get_trap_inode(sb, dir);
+ err = PTR_ERR_OR_ZERO(trap);
+ if (err) {
+ if (err == -ELOOP)
+ pr_err("conflicting %s path\n", name);
+ return err;
}
- err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
- if (!err)
- ovl_copyattr(ovl_inode_real(inode), inode);
-
- return err;
-
-out_acl_release:
- posix_acl_release(acl);
- return err;
-}
-
-static int ovl_own_xattr_get(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, void *buffer, size_t size)
-{
- return -EOPNOTSUPP;
-}
-
-static int ovl_own_xattr_set(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, const void *value,
- size_t size, int flags)
-{
- return -EOPNOTSUPP;
-}
-
-static int ovl_other_xattr_get(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, void *buffer, size_t size)
-{
- return ovl_xattr_get(dentry, inode, name, buffer, size);
+ *ptrap = trap;
+ return 0;
}
-static int ovl_other_xattr_set(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, const void *value,
- size_t size, int flags)
+/*
+ * Determine how we treat concurrent use of upperdir/workdir based on the
+ * index feature. This is papering over mount leaks of container runtimes,
+ * for example, an old overlay mount is leaked and now its upperdir is
+ * attempted to be used as a lower layer in a new overlay mount.
+ */
+static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
{
- return ovl_xattr_set(dentry, inode, name, value, size, flags);
+ if (ofs->config.index) {
+ pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
+ name);
+ return -EBUSY;
+ } else {
+ pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
+ name);
+ return 0;
+ }
}
-static const struct xattr_handler __maybe_unused
-ovl_posix_acl_access_xattr_handler = {
- .name = XATTR_NAME_POSIX_ACL_ACCESS,
- .flags = ACL_TYPE_ACCESS,
- .get = ovl_posix_acl_xattr_get,
- .set = ovl_posix_acl_xattr_set,
-};
-
-static const struct xattr_handler __maybe_unused
-ovl_posix_acl_default_xattr_handler = {
- .name = XATTR_NAME_POSIX_ACL_DEFAULT,
- .flags = ACL_TYPE_DEFAULT,
- .get = ovl_posix_acl_xattr_get,
- .set = ovl_posix_acl_xattr_set,
-};
-
-static const struct xattr_handler ovl_own_xattr_handler = {
- .prefix = OVL_XATTR_PREFIX,
- .get = ovl_own_xattr_get,
- .set = ovl_own_xattr_set,
-};
-
-static const struct xattr_handler ovl_other_xattr_handler = {
- .prefix = "", /* catch all */
- .get = ovl_other_xattr_get,
- .set = ovl_other_xattr_set,
-};
-
-static const struct xattr_handler *ovl_xattr_handlers[] = {
-#ifdef CONFIG_FS_POSIX_ACL
- &ovl_posix_acl_access_xattr_handler,
- &ovl_posix_acl_default_xattr_handler,
-#endif
- &ovl_own_xattr_handler,
- &ovl_other_xattr_handler,
- NULL
-};
-
-static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
+static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
+ struct ovl_layer *upper_layer,
+ const struct path *upperpath)
{
struct vfsmount *upper_mnt;
int err;
- err = ovl_mount_dir(ofs->config.upperdir, upperpath);
- if (err)
- goto out;
-
- /* Upper fs should not be r/o */
- if (sb_rdonly(upperpath->mnt->mnt_sb)) {
- pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
+ /* Upperdir path should not be r/o */
+ if (__mnt_is_readonly(upperpath->mnt)) {
+ pr_err("upper fs is r/o, try multi-lower layers mount\n");
err = -EINVAL;
goto out;
}
@@ -1004,25 +520,42 @@ static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
if (err)
goto out;
+ err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap,
+ "upperdir");
+ if (err)
+ goto out;
+
upper_mnt = clone_private_mount(upperpath);
err = PTR_ERR(upper_mnt);
if (IS_ERR(upper_mnt)) {
- pr_err("overlayfs: failed to clone upperpath\n");
+ pr_err("failed to clone upperpath\n");
goto out;
}
/* Don't inherit atime flags */
upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
- ofs->upper_mnt = upper_mnt;
+ upper_layer->mnt = upper_mnt;
+ upper_layer->idx = 0;
+ upper_layer->fsid = 0;
+
+ /*
+ * Inherit SB_NOSEC flag from upperdir.
+ *
+ * This optimization changes behavior when a security related attribute
+ * (suid/sgid/security.*) is changed on an underlying layer. This is
+ * okay because we don't yet have guarantees in that case, but it will
+ * need careful treatment once we want to honour changes to underlying
+ * filesystems.
+ */
+ if (upper_mnt->mnt_sb->s_flags & SB_NOSEC)
+ sb->s_flags |= SB_NOSEC;
- err = -EBUSY;
- if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
+ if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) {
ofs->upperdir_locked = true;
- } else if (ofs->config.index) {
- pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
- goto out;
} else {
- pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
+ err = ovl_report_in_use(ofs, "upperdir");
+ if (err)
+ goto out;
}
err = 0;
@@ -1030,10 +563,115 @@ out:
return err;
}
-static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
+/*
+ * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and
+ * negative values if error is encountered.
+ */
+static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
{
- struct vfsmount *mnt = ofs->upper_mnt;
+ struct dentry *workdir = ofs->workdir;
struct dentry *temp;
+ struct dentry *whiteout;
+ struct name_snapshot name;
+ struct renamedata rd = {};
+ char name2[OVL_TEMPNAME_SIZE];
+ int err;
+
+ temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0));
+ err = PTR_ERR(temp);
+ if (IS_ERR(temp))
+ return err;
+
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = workdir;
+ rd.new_parent = workdir;
+ rd.flags = RENAME_WHITEOUT;
+ ovl_tempname(name2);
+ err = start_renaming_dentry(&rd, 0, temp, &QSTR(name2));
+ if (err) {
+ dput(temp);
+ return err;
+ }
+
+ /* Name is inline and stable - using snapshot as a copy helper */
+ take_dentry_name_snapshot(&name, temp);
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
+ if (err) {
+ if (err == -EINVAL)
+ err = 0;
+ goto cleanup_temp;
+ }
+
+ whiteout = ovl_lookup_upper_unlocked(ofs, name.name.name,
+ workdir, name.name.len);
+ err = PTR_ERR(whiteout);
+ if (IS_ERR(whiteout))
+ goto cleanup_temp;
+
+ err = ovl_upper_is_whiteout(ofs, whiteout);
+
+ /* Best effort cleanup of whiteout and temp file */
+ if (err)
+ ovl_cleanup(ofs, workdir, whiteout);
+ dput(whiteout);
+
+cleanup_temp:
+ ovl_cleanup(ofs, workdir, temp);
+ release_dentry_name_snapshot(&name);
+ dput(temp);
+
+ return err;
+}
+
+static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs,
+ struct dentry *parent,
+ const char *name, umode_t mode)
+{
+ struct dentry *child;
+
+ child = ovl_start_creating_upper(ofs, parent, &QSTR(name));
+ if (!IS_ERR(child)) {
+ if (!child->d_inode)
+ child = ovl_create_real(ofs, parent, child,
+ OVL_CATTR(mode));
+ end_creating_keep(child);
+ }
+ dput(parent);
+
+ return child;
+}
+
+/*
+ * Creates $workdir/work/incompat/volatile/dirty file if it is not already
+ * present.
+ */
+static int ovl_create_volatile_dirty(struct ovl_fs *ofs)
+{
+ unsigned int ctr;
+ struct dentry *d = dget(ofs->workbasedir);
+ static const char *const volatile_path[] = {
+ OVL_WORKDIR_NAME, "incompat", "volatile", "dirty"
+ };
+ const char *const *name = volatile_path;
+
+ for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) {
+ d = ovl_lookup_or_create(ofs, d, *name, ctr > 1 ? S_IFDIR : S_IFREG);
+ if (IS_ERR(d))
+ return PTR_ERR(d);
+ }
+ dput(d);
+ return 0;
+}
+
+static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
+ const struct path *workpath)
+{
+ struct vfsmount *mnt = ovl_upper_mnt(ofs);
+ struct dentry *workdir;
+ struct file *tmpfile;
+ bool rename_whiteout;
+ bool d_type;
int fh_type;
int err;
@@ -1041,8 +679,15 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
if (err)
return err;
- ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
- if (!ofs->workdir)
+ workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
+ err = PTR_ERR(workdir);
+ if (IS_ERR_OR_NULL(workdir))
+ goto out;
+
+ ofs->workdir = workdir;
+
+ err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
+ if (err)
goto out;
/*
@@ -1055,49 +700,105 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
if (err < 0)
goto out;
- /*
- * We allowed this configuration and don't want to break users over
- * kernel upgrade. So warn instead of erroring out.
- */
- if (!err)
- pr_warn("overlayfs: upper fs needs to support d_type.\n");
+ d_type = err;
+ if (!d_type)
+ pr_warn("upper fs needs to support d_type.\n");
/* Check if upper/work fs supports O_TMPFILE */
- temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
- ofs->tmpfile = !IS_ERR(temp);
+ tmpfile = ovl_do_tmpfile(ofs, ofs->workdir, S_IFREG | 0);
+ ofs->tmpfile = !IS_ERR(tmpfile);
if (ofs->tmpfile)
- dput(temp);
+ fput(tmpfile);
else
- pr_warn("overlayfs: upper fs does not support tmpfile.\n");
+ pr_warn("upper fs does not support tmpfile.\n");
+
+
+ /* Check if upper/work fs supports RENAME_WHITEOUT */
+ err = ovl_check_rename_whiteout(ofs);
+ if (err < 0)
+ goto out;
+
+ rename_whiteout = err;
+ if (!rename_whiteout)
+ pr_warn("upper fs does not support RENAME_WHITEOUT.\n");
/*
- * Check if upper/work fs supports trusted.overlay.* xattr
+ * Check if upper/work fs supports (trusted|user).overlay.* xattr
*/
- err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
+ err = ovl_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
if (err) {
+ pr_warn("failed to set xattr on upper\n");
ofs->noxattr = true;
- ofs->config.index = false;
- ofs->config.metacopy = false;
- pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
+ if (ovl_redirect_follow(ofs)) {
+ ofs->config.redirect_mode = OVL_REDIRECT_NOFOLLOW;
+ pr_warn("...falling back to redirect_dir=nofollow.\n");
+ }
+ if (ofs->config.metacopy) {
+ ofs->config.metacopy = false;
+ pr_warn("...falling back to metacopy=off.\n");
+ }
+ if (ofs->config.index) {
+ ofs->config.index = false;
+ pr_warn("...falling back to index=off.\n");
+ }
+ if (ovl_has_fsid(ofs)) {
+ ofs->config.uuid = OVL_UUID_NULL;
+ pr_warn("...falling back to uuid=null.\n");
+ }
+ /*
+ * xattr support is required for persistent st_ino.
+ * Without persistent st_ino, xino=auto falls back to xino=off.
+ */
+ if (ofs->config.xino == OVL_XINO_AUTO) {
+ ofs->config.xino = OVL_XINO_OFF;
+ pr_warn("...falling back to xino=off.\n");
+ }
+ if (err == -EPERM && !ofs->config.userxattr)
+ pr_info("try mounting with 'userxattr' option\n");
err = 0;
} else {
- vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
+ ovl_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE);
+ }
+
+ /*
+ * We allowed sub-optimal upper fs configuration and don't want to break
+ * users over kernel upgrade, but we never allowed remote upper fs, so
+ * we can enforce strict requirements for remote upper fs.
+ */
+ if (ovl_dentry_remote(ofs->workdir) &&
+ (!d_type || !rename_whiteout || ofs->noxattr)) {
+ pr_err("upper fs missing required features.\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * For volatile mount, create a incompat/volatile/dirty file to keep
+ * track of it.
+ */
+ if (ofs->config.ovl_volatile) {
+ err = ovl_create_volatile_dirty(ofs);
+ if (err < 0) {
+ pr_err("Failed to create volatile/dirty file.\n");
+ goto out;
+ }
}
/* Check if upper/work fs supports file handles */
fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
if (ofs->config.index && !fh_type) {
ofs->config.index = false;
- pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
+ pr_warn("upper fs does not support file handles, falling back to index=off.\n");
}
+ ofs->nofh |= !fh_type;
/* Check if upper fs has 32bit inode numbers */
if (fh_type != FILEID_INO32_GEN)
- ofs->xino_bits = 0;
+ ofs->xino_mode = -1;
/* NFS export of r/w mount depends on index */
if (ofs->config.nfs_export && !ofs->config.index) {
- pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
+ pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
ofs->config.nfs_export = false;
}
out:
@@ -1105,95 +806,110 @@ out:
return err;
}
-static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
+static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
+ const struct path *upperpath,
+ const struct path *workpath)
{
int err;
- struct path workpath = { };
-
- err = ovl_mount_dir(ofs->config.workdir, &workpath);
- if (err)
- goto out;
err = -EINVAL;
- if (upperpath->mnt != workpath.mnt) {
- pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
- goto out;
+ if (upperpath->mnt != workpath->mnt) {
+ pr_err("workdir and upperdir must reside under the same mount\n");
+ return err;
}
- if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
- pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
- goto out;
+ if (!ovl_workdir_ok(workpath->dentry, upperpath->dentry)) {
+ pr_err("workdir and upperdir must be separate subtrees\n");
+ return err;
}
- ofs->workbasedir = dget(workpath.dentry);
+ ofs->workbasedir = dget(workpath->dentry);
- err = -EBUSY;
if (ovl_inuse_trylock(ofs->workbasedir)) {
ofs->workdir_locked = true;
- } else if (ofs->config.index) {
- pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
- goto out;
} else {
- pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
+ err = ovl_report_in_use(ofs, "workdir");
+ if (err)
+ return err;
}
- err = ovl_make_workdir(ofs, &workpath);
+ err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
+ "workdir");
if (err)
- goto out;
-
- err = 0;
-out:
- path_put(&workpath);
+ return err;
- return err;
+ return ovl_make_workdir(sb, ofs, workpath);
}
-static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
- struct path *upperpath)
+static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
+ struct ovl_entry *oe, const struct path *upperpath)
{
- struct vfsmount *mnt = ofs->upper_mnt;
+ struct vfsmount *mnt = ovl_upper_mnt(ofs);
+ struct dentry *indexdir;
+ struct dentry *origin = ovl_lowerstack(oe)->dentry;
+ const struct ovl_fh *fh;
int err;
+ fh = ovl_get_origin_fh(ofs, origin);
+ if (IS_ERR(fh))
+ return PTR_ERR(fh);
+
err = mnt_want_write(mnt);
if (err)
- return err;
+ goto out_free_fh;
/* Verify lower root is upper root origin */
- err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
- true);
+ err = ovl_verify_origin_fh(ofs, upperpath->dentry, fh, true);
if (err) {
- pr_err("overlayfs: failed to verify upper root origin\n");
+ pr_err("failed to verify upper root origin\n");
goto out;
}
- ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
- if (ofs->indexdir) {
+ /* index dir will act also as workdir */
+ iput(ofs->workdir_trap);
+ ofs->workdir_trap = NULL;
+ dput(ofs->workdir);
+ ofs->workdir = NULL;
+ indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
+ if (IS_ERR(indexdir)) {
+ err = PTR_ERR(indexdir);
+ } else if (indexdir) {
+ ofs->workdir = indexdir;
+ err = ovl_setup_trap(sb, indexdir, &ofs->workdir_trap,
+ "indexdir");
+ if (err)
+ goto out;
+
/*
* Verify upper root is exclusively associated with index dir.
- * Older kernels stored upper fh in "trusted.overlay.origin"
+ * Older kernels stored upper fh in ".overlay.origin"
* xattr. If that xattr exists, verify that it is a match to
* upper dir file handle. In any case, verify or set xattr
- * "trusted.overlay.upper" to indicate that index may have
+ * ".overlay.upper" to indicate that index may have
* directory entries.
*/
- if (ovl_check_origin_xattr(ofs->indexdir)) {
- err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
- upperpath->dentry, true, false);
+ if (ovl_check_origin_xattr(ofs, indexdir)) {
+ err = ovl_verify_origin_xattr(ofs, indexdir,
+ OVL_XATTR_ORIGIN,
+ upperpath->dentry, true,
+ false);
if (err)
- pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
+ pr_err("failed to verify index dir 'origin' xattr\n");
}
- err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
+ err = ovl_verify_upper(ofs, indexdir, upperpath->dentry, true);
if (err)
- pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
+ pr_err("failed to verify index dir 'upper' xattr\n");
/* Cleanup bad/stale/orphan index entries */
if (!err)
err = ovl_indexdir_cleanup(ofs);
}
- if (err || !ofs->indexdir)
- pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+ if (err || !indexdir)
+ pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
out:
mnt_drop_write(mnt);
+out_free_fh:
+ kfree(fh);
return err;
}
@@ -1201,17 +917,33 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
{
unsigned int i;
- if (!ofs->config.nfs_export && !(ofs->config.index && ofs->upper_mnt))
+ if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs))
return true;
- for (i = 0; i < ofs->numlowerfs; i++) {
+ /*
+ * We allow using single lower with null uuid for index and nfs_export
+ * for example to support those features with single lower squashfs.
+ * To avoid regressions in setups of overlay with re-formatted lower
+ * squashfs, do not allow decoding origin with lower null uuid unless
+ * user opted-in to one of the new features that require following the
+ * lower inode of non-dir upper.
+ */
+ if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid))
+ return false;
+
+ for (i = 0; i < ofs->numfs; i++) {
/*
* We use uuid to associate an overlay lower file handle with a
* lower layer, so we can accept lower fs with null uuid as long
* as all lower layers with null uuid are on the same fs.
+ * if we detect multiple lower fs with the same uuid, we
+ * disable lower file handle decoding on all of them.
*/
- if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid))
+ if (ofs->fs[i].is_lower &&
+ uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) {
+ ofs->fs[i].bad_uuid = true;
return false;
+ }
}
return true;
}
@@ -1223,67 +955,154 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
unsigned int i;
dev_t dev;
int err;
+ bool bad_uuid = false;
+ bool warn = false;
- /* fsid 0 is reserved for upper fs even with non upper overlay */
- if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
- return 0;
-
- for (i = 0; i < ofs->numlowerfs; i++) {
- if (ofs->lower_fs[i].sb == sb)
- return i + 1;
+ for (i = 0; i < ofs->numfs; i++) {
+ if (ofs->fs[i].sb == sb)
+ return i;
}
if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
- ofs->config.index = false;
- ofs->config.nfs_export = false;
- pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
- uuid_is_null(&sb->s_uuid) ? "null" : "conflicting",
- path->dentry);
+ bad_uuid = true;
+ if (ofs->config.xino == OVL_XINO_AUTO) {
+ ofs->config.xino = OVL_XINO_OFF;
+ warn = true;
+ }
+ if (ofs->config.index || ofs->config.nfs_export) {
+ ofs->config.index = false;
+ ofs->config.nfs_export = false;
+ warn = true;
+ }
+ if (warn) {
+ pr_warn("%s uuid detected in lower fs '%pd2', falling back to xino=%s,index=off,nfs_export=off.\n",
+ uuid_is_null(&sb->s_uuid) ? "null" :
+ "conflicting",
+ path->dentry, ovl_xino_mode(&ofs->config));
+ }
}
err = get_anon_bdev(&dev);
if (err) {
- pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
+ pr_err("failed to get anonymous bdev for lowerpath\n");
return err;
}
- ofs->lower_fs[ofs->numlowerfs].sb = sb;
- ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
- ofs->numlowerfs++;
+ ofs->fs[ofs->numfs].sb = sb;
+ ofs->fs[ofs->numfs].pseudo_dev = dev;
+ ofs->fs[ofs->numfs].bad_uuid = bad_uuid;
+
+ return ofs->numfs++;
+}
+
+/*
+ * The fsid after the last lower fsid is used for the data layers.
+ * It is a "null fs" with a null sb, null uuid, and no pseudo dev.
+ */
+static int ovl_get_data_fsid(struct ovl_fs *ofs)
+{
+ return ofs->numfs;
+}
+
+/*
+ * Set the ovl sb encoding as the same one used by the first layer
+ */
+static int ovl_set_encoding(struct super_block *sb, struct super_block *fs_sb)
+{
+ if (!sb_has_encoding(fs_sb))
+ return 0;
+
+#if IS_ENABLED(CONFIG_UNICODE)
+ if (sb_has_strict_encoding(fs_sb)) {
+ pr_err("strict encoding not supported\n");
+ return -EINVAL;
+ }
- return ofs->numlowerfs;
+ sb->s_encoding = fs_sb->s_encoding;
+ sb->s_encoding_flags = fs_sb->s_encoding_flags;
+#endif
+ return 0;
}
-static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
- unsigned int numlower)
+static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
+ struct ovl_fs_context *ctx, struct ovl_layer *layers)
{
int err;
unsigned int i;
+ size_t nr_merged_lower;
- err = -ENOMEM;
- ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
- GFP_KERNEL);
- if (ofs->lower_layers == NULL)
- goto out;
+ ofs->fs = kcalloc(ctx->nr + 2, sizeof(struct ovl_sb), GFP_KERNEL);
+ if (ofs->fs == NULL)
+ return -ENOMEM;
- ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
- GFP_KERNEL);
- if (ofs->lower_fs == NULL)
- goto out;
+ /*
+ * idx/fsid 0 are reserved for upper fs even with lower only overlay
+ * and the last fsid is reserved for "null fs" of the data layers.
+ */
+ ofs->numfs++;
- for (i = 0; i < numlower; i++) {
+ /*
+ * All lower layers that share the same fs as upper layer, use the same
+ * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower
+ * only overlay to simplify ovl_fs_free().
+ * is_lower will be set if upper fs is shared with a lower layer.
+ */
+ err = get_anon_bdev(&ofs->fs[0].pseudo_dev);
+ if (err) {
+ pr_err("failed to get anonymous bdev for upper fs\n");
+ return err;
+ }
+
+ if (ovl_upper_mnt(ofs)) {
+ ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb;
+ ofs->fs[0].is_lower = false;
+
+ if (ofs->casefold) {
+ err = ovl_set_encoding(sb, ofs->fs[0].sb);
+ if (err)
+ return err;
+ }
+ }
+
+ nr_merged_lower = ctx->nr - ctx->nr_data;
+ for (i = 0; i < ctx->nr; i++) {
+ struct ovl_fs_context_layer *l = &ctx->lower[i];
struct vfsmount *mnt;
+ struct inode *trap;
int fsid;
- err = fsid = ovl_get_fsid(ofs, &stack[i]);
- if (err < 0)
- goto out;
+ if (i < nr_merged_lower)
+ fsid = ovl_get_fsid(ofs, &l->path);
+ else
+ fsid = ovl_get_data_fsid(ofs);
+ if (fsid < 0)
+ return fsid;
+
+ /*
+ * Check if lower root conflicts with this overlay layers before
+ * checking if it is in-use as upperdir/workdir of "another"
+ * mount, because we do not bother to check in ovl_is_inuse() if
+ * the upperdir/workdir is in fact in-use by our
+ * upperdir/workdir.
+ */
+ err = ovl_setup_trap(sb, l->path.dentry, &trap, "lowerdir");
+ if (err)
+ return err;
- mnt = clone_private_mount(&stack[i]);
+ if (ovl_is_inuse(l->path.dentry)) {
+ err = ovl_report_in_use(ofs, "lowerdir");
+ if (err) {
+ iput(trap);
+ return err;
+ }
+ }
+
+ mnt = clone_private_mount(&l->path);
err = PTR_ERR(mnt);
if (IS_ERR(mnt)) {
- pr_err("overlayfs: failed to clone lowerpath\n");
- goto out;
+ pr_err("failed to clone lowerpath\n");
+ iput(trap);
+ return err;
}
/*
@@ -1292,287 +1111,477 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
*/
mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
- ofs->lower_layers[ofs->numlower].mnt = mnt;
- ofs->lower_layers[ofs->numlower].idx = i + 1;
- ofs->lower_layers[ofs->numlower].fsid = fsid;
- if (fsid) {
- ofs->lower_layers[ofs->numlower].fs =
- &ofs->lower_fs[fsid - 1];
+ layers[ofs->numlayer].trap = trap;
+ layers[ofs->numlayer].mnt = mnt;
+ layers[ofs->numlayer].idx = ofs->numlayer;
+ layers[ofs->numlayer].fsid = fsid;
+ layers[ofs->numlayer].fs = &ofs->fs[fsid];
+ /* Store for printing lowerdir=... in ovl_show_options() */
+ ofs->config.lowerdirs[ofs->numlayer] = l->name;
+ l->name = NULL;
+ ofs->numlayer++;
+ ofs->fs[fsid].is_lower = true;
+
+ if (ofs->casefold) {
+ if (!ovl_upper_mnt(ofs) && !sb_has_encoding(sb)) {
+ err = ovl_set_encoding(sb, ofs->fs[fsid].sb);
+ if (err)
+ return err;
+ }
+
+ if (!sb_same_encoding(sb, mnt->mnt_sb)) {
+ pr_err("all layers must have the same encoding\n");
+ return -EINVAL;
+ }
}
- ofs->numlower++;
}
/*
* When all layers on same fs, overlay can use real inode numbers.
- * With mount option "xino=on", mounter declares that there are enough
- * free high bits in underlying fs to hold the unique fsid.
+ * With mount option "xino=<on|auto>", mounter declares that there are
+ * enough free high bits in underlying fs to hold the unique fsid.
* If overlayfs does encounter underlying inodes using the high xino
* bits reserved for fsid, it emits a warning and uses the original
- * inode number.
+ * inode number or a non persistent inode number allocated from a
+ * dedicated range.
*/
- if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) {
- ofs->xino_bits = 0;
- ofs->config.xino = OVL_XINO_OFF;
- } else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) {
+ if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) {
+ if (ofs->config.xino == OVL_XINO_ON)
+ pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
+ ofs->xino_mode = 0;
+ } else if (ofs->config.xino == OVL_XINO_OFF) {
+ ofs->xino_mode = -1;
+ } else if (ofs->xino_mode < 0) {
/*
- * This is a roundup of number of bits needed for numlowerfs+1
- * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for
- * upper fs even with non upper overlay.
+ * This is a roundup of number of bits needed for encoding
+ * fsid, where fsid 0 is reserved for upper fs (even with
+ * lower only overlay) +1 extra bit is reserved for the non
+ * persistent inode number range that is used for resolving
+ * xino lower bits overflow.
*/
- BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
- ofs->xino_bits = ilog2(ofs->numlowerfs) + 1;
+ BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30);
+ ofs->xino_mode = ilog2(ofs->numfs - 1) + 2;
}
- if (ofs->xino_bits) {
- pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n",
- ofs->xino_bits);
+ if (ofs->xino_mode > 0) {
+ pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
+ ofs->xino_mode);
}
- err = 0;
-out:
- return err;
+ return 0;
}
static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
- struct ovl_fs *ofs)
+ struct ovl_fs_context *ctx,
+ struct ovl_fs *ofs,
+ struct ovl_layer *layers)
{
int err;
- char *lowertmp, *lower;
- struct path *stack = NULL;
- unsigned int stacklen, numlower = 0, i;
- bool remote = false;
+ unsigned int i;
+ size_t nr_merged_lower;
struct ovl_entry *oe;
+ struct ovl_path *lowerstack;
- err = -ENOMEM;
- lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
- if (!lowertmp)
- goto out_err;
+ struct ovl_fs_context_layer *l;
- err = -EINVAL;
- stacklen = ovl_split_lowerdirs(lowertmp);
- if (stacklen > OVL_MAX_STACK) {
- pr_err("overlayfs: too many lower directories, limit is %d\n",
- OVL_MAX_STACK);
- goto out_err;
- } else if (!ofs->config.upperdir && stacklen == 1) {
- pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
- goto out_err;
- } else if (!ofs->config.upperdir && ofs->config.nfs_export &&
- ofs->config.redirect_follow) {
- pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
- ofs->config.nfs_export = false;
+ if (!ofs->config.upperdir && ctx->nr == 1) {
+ pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
+ return ERR_PTR(-EINVAL);
}
- err = -ENOMEM;
- stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
- if (!stack)
- goto out_err;
+ if (ctx->nr == ctx->nr_data) {
+ pr_err("at least one non-data lowerdir is required\n");
+ return ERR_PTR(-EINVAL);
+ }
err = -EINVAL;
- lower = lowertmp;
- for (numlower = 0; numlower < stacklen; numlower++) {
- err = ovl_lower_dir(lower, &stack[numlower], ofs,
- &sb->s_stack_depth, &remote);
- if (err)
- goto out_err;
+ for (i = 0; i < ctx->nr; i++) {
+ l = &ctx->lower[i];
- lower = strchr(lower, '\0') + 1;
+ err = ovl_lower_dir(l->name, &l->path, ofs, &sb->s_stack_depth);
+ if (err)
+ return ERR_PTR(err);
}
err = -EINVAL;
sb->s_stack_depth++;
if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
- pr_err("overlayfs: maximum fs stacking depth exceeded\n");
- goto out_err;
+ pr_err("maximum fs stacking depth exceeded\n");
+ return ERR_PTR(err);
}
- err = ovl_get_lower_layers(ofs, stack, numlower);
+ err = ovl_get_layers(sb, ofs, ctx, layers);
if (err)
- goto out_err;
+ return ERR_PTR(err);
err = -ENOMEM;
- oe = ovl_alloc_entry(numlower);
+ /* Data-only layers are not merged in root directory */
+ nr_merged_lower = ctx->nr - ctx->nr_data;
+ oe = ovl_alloc_entry(nr_merged_lower);
if (!oe)
- goto out_err;
+ return ERR_PTR(err);
- for (i = 0; i < numlower; i++) {
- oe->lowerstack[i].dentry = dget(stack[i].dentry);
- oe->lowerstack[i].layer = &ofs->lower_layers[i];
+ lowerstack = ovl_lowerstack(oe);
+ for (i = 0; i < nr_merged_lower; i++) {
+ l = &ctx->lower[i];
+ lowerstack[i].dentry = dget(l->path.dentry);
+ lowerstack[i].layer = &ofs->layers[i + 1];
}
+ ofs->numdatalayer = ctx->nr_data;
- if (remote)
- sb->s_d_op = &ovl_reval_dentry_operations;
- else
- sb->s_d_op = &ovl_dentry_operations;
+ return oe;
+}
-out:
- for (i = 0; i < numlower; i++)
- path_put(&stack[i]);
- kfree(stack);
- kfree(lowertmp);
+/*
+ * Check if this layer root is a descendant of:
+ * - another layer of this overlayfs instance
+ * - upper/work dir of any overlayfs instance
+ */
+static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
+ struct dentry *dentry, const char *name,
+ bool is_lower)
+{
+ struct dentry *next = dentry, *parent;
+ int err = 0;
- return oe;
+ if (!dentry)
+ return 0;
-out_err:
- oe = ERR_PTR(err);
- goto out;
+ parent = dget_parent(next);
+
+ /* Walk back ancestors to root (inclusive) looking for traps */
+ while (!err && parent != next) {
+ if (is_lower && ovl_lookup_trap_inode(sb, parent)) {
+ err = -ELOOP;
+ pr_err("overlapping %s path\n", name);
+ } else if (ovl_is_inuse(parent)) {
+ err = ovl_report_in_use(ofs, name);
+ }
+ next = parent;
+ parent = dget_parent(next);
+ dput(next);
+ }
+
+ dput(parent);
+
+ return err;
}
-static int ovl_fill_super(struct super_block *sb, void *data, int silent)
+/*
+ * Check if any of the layers or work dirs overlap.
+ */
+static int ovl_check_overlapping_layers(struct super_block *sb,
+ struct ovl_fs *ofs)
{
- struct path upperpath = { };
- struct dentry *root_dentry;
- struct ovl_entry *oe;
- struct ovl_fs *ofs;
- struct cred *cred;
- int err;
+ int i, err;
- err = -ENOMEM;
- ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
- if (!ofs)
- goto out;
+ if (ovl_upper_mnt(ofs)) {
+ err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root,
+ "upperdir", false);
+ if (err)
+ return err;
- ofs->creator_cred = cred = prepare_creds();
- if (!cred)
- goto out_err;
+ /*
+ * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
+ * this instance and covers overlapping work and index dirs,
+ * unless work or index dir have been moved since created inside
+ * workbasedir. In that case, we already have their traps in
+ * inode cache and we will catch that case on lookup.
+ */
+ err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir",
+ false);
+ if (err)
+ return err;
+ }
- ofs->config.index = ovl_index_def;
- ofs->config.nfs_export = ovl_nfs_export_def;
- ofs->config.xino = ovl_xino_def();
- ofs->config.metacopy = ovl_metacopy_def;
- err = ovl_parse_opt((char *) data, &ofs->config);
+ for (i = 1; i < ofs->numlayer; i++) {
+ err = ovl_check_layer(sb, ofs,
+ ofs->layers[i].mnt->mnt_root,
+ "lowerdir", true);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static struct dentry *ovl_get_root(struct super_block *sb,
+ struct dentry *upperdentry,
+ struct ovl_entry *oe)
+{
+ struct dentry *root;
+ struct ovl_fs *ofs = OVL_FS(sb);
+ struct ovl_path *lowerpath = ovl_lowerstack(oe);
+ unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
+ int fsid = lowerpath->layer->fsid;
+ struct ovl_inode_params oip = {
+ .upperdentry = upperdentry,
+ .oe = oe,
+ };
+
+ root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
+ if (!root)
+ return NULL;
+
+ if (upperdentry) {
+ /* Root inode uses upper st_ino/i_ino */
+ ino = d_inode(upperdentry)->i_ino;
+ fsid = 0;
+ ovl_dentry_set_upper_alias(root);
+ if (ovl_is_impuredir(sb, upperdentry))
+ ovl_set_flag(OVL_IMPURE, d_inode(root));
+ }
+
+ /* Look for xwhiteouts marker except in the lowermost layer */
+ for (int i = 0; i < ovl_numlower(oe) - 1; i++, lowerpath++) {
+ struct path path = {
+ .mnt = lowerpath->layer->mnt,
+ .dentry = lowerpath->dentry,
+ };
+
+ /* overlay.opaque=x means xwhiteouts directory */
+ if (ovl_get_opaquedir_val(ofs, &path) == 'x') {
+ ovl_layer_set_xwhiteouts(ofs, lowerpath->layer);
+ ovl_dentry_set_xwhiteouts(root);
+ }
+ }
+
+ /* Root is always merge -> can have whiteouts */
+ ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
+ ovl_dentry_set_flag(OVL_E_CONNECTED, root);
+ ovl_set_upperdata(d_inode(root));
+ ovl_inode_init(d_inode(root), &oip, ino, fsid);
+ WARN_ON(!!IS_CASEFOLDED(d_inode(root)) != ofs->casefold);
+ ovl_dentry_init_flags(root, upperdentry, oe, DCACHE_OP_WEAK_REVALIDATE);
+ /* root keeps a reference of upperdentry */
+ dget(upperdentry);
+
+ return root;
+}
+
+static void ovl_set_d_op(struct super_block *sb)
+{
+#if IS_ENABLED(CONFIG_UNICODE)
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ if (ofs->casefold) {
+ set_default_d_op(sb, &ovl_dentry_ci_operations);
+ return;
+ }
+#endif
+ set_default_d_op(sb, &ovl_dentry_operations);
+}
+
+static int ovl_fill_super_creds(struct fs_context *fc, struct super_block *sb)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+ struct cred *creator_cred = (struct cred *)ofs->creator_cred;
+ struct ovl_fs_context *ctx = fc->fs_private;
+ struct ovl_layer *layers;
+ struct ovl_entry *oe = NULL;
+ int err;
+
+ err = ovl_fs_params_verify(ctx, &ofs->config);
if (err)
- goto out_err;
+ return err;
err = -EINVAL;
- if (!ofs->config.lowerdir) {
- if (!silent)
- pr_err("overlayfs: missing 'lowerdir'\n");
- goto out_err;
+ if (ctx->nr == 0) {
+ if (!(fc->sb_flags & SB_SILENT))
+ pr_err("missing 'lowerdir'\n");
+ return err;
}
+ err = -ENOMEM;
+ layers = kcalloc(ctx->nr + 1, sizeof(struct ovl_layer), GFP_KERNEL);
+ if (!layers)
+ return err;
+
+ ofs->config.lowerdirs = kcalloc(ctx->nr + 1, sizeof(char *), GFP_KERNEL);
+ if (!ofs->config.lowerdirs) {
+ kfree(layers);
+ return err;
+ }
+ ofs->layers = layers;
+ /*
+ * Layer 0 is reserved for upper even if there's no upper.
+ * config.lowerdirs[0] is used for storing the user provided colon
+ * separated lowerdir string.
+ */
+ ofs->config.lowerdirs[0] = ctx->lowerdir_all;
+ ctx->lowerdir_all = NULL;
+ ofs->numlayer = 1;
+
sb->s_stack_depth = 0;
sb->s_maxbytes = MAX_LFS_FILESIZE;
- /* Assume underlaying fs uses 32bit inodes unless proven otherwise */
- if (ofs->config.xino != OVL_XINO_OFF)
- ofs->xino_bits = BITS_PER_LONG - 32;
+ atomic_long_set(&ofs->last_ino, 1);
+ /* Assume underlying fs uses 32bit inodes unless proven otherwise */
+ if (ofs->config.xino != OVL_XINO_OFF) {
+ ofs->xino_mode = BITS_PER_LONG - 32;
+ if (!ofs->xino_mode) {
+ pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n");
+ ofs->config.xino = OVL_XINO_OFF;
+ }
+ }
+
+ /* alloc/destroy_inode needed for setting up traps in inode cache */
+ sb->s_op = &ovl_super_operations;
if (ofs->config.upperdir) {
+ struct super_block *upper_sb;
+
+ err = -EINVAL;
if (!ofs->config.workdir) {
- pr_err("overlayfs: missing 'workdir'\n");
- goto out_err;
+ pr_err("missing 'workdir'\n");
+ return err;
}
- err = ovl_get_upper(ofs, &upperpath);
+ err = ovl_get_upper(sb, ofs, &layers[0], &ctx->upper);
if (err)
- goto out_err;
+ return err;
- err = ovl_get_workdir(ofs, &upperpath);
+ upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
+ if (!ovl_should_sync(ofs)) {
+ ofs->errseq = errseq_sample(&upper_sb->s_wb_err);
+ if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) {
+ err = -EIO;
+ pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n");
+ return err;
+ }
+ }
+
+ err = ovl_get_workdir(sb, ofs, &ctx->upper, &ctx->work);
if (err)
- goto out_err;
+ return err;
if (!ofs->workdir)
sb->s_flags |= SB_RDONLY;
- sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
- sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
-
+ sb->s_stack_depth = upper_sb->s_stack_depth;
+ sb->s_time_gran = upper_sb->s_time_gran;
}
- oe = ovl_get_lowerstack(sb, ofs);
+ oe = ovl_get_lowerstack(sb, ctx, ofs, layers);
err = PTR_ERR(oe);
if (IS_ERR(oe))
- goto out_err;
+ return err;
/* If the upper fs is nonexistent, we mark overlayfs r/o too */
- if (!ofs->upper_mnt)
+ if (!ovl_upper_mnt(ofs))
sb->s_flags |= SB_RDONLY;
- if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
- err = ovl_get_indexdir(ofs, oe, &upperpath);
+ if (!ovl_origin_uuid(ofs) && ofs->numfs > 1) {
+ pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=null.\n");
+ ofs->config.uuid = OVL_UUID_NULL;
+ } else if (ovl_has_fsid(ofs) && ovl_upper_mnt(ofs)) {
+ /* Use per instance persistent uuid/fsid */
+ ovl_init_uuid_xattr(sb, ofs, &ctx->upper);
+ }
+
+ if (!ovl_force_readonly(ofs) && ofs->config.index) {
+ err = ovl_get_indexdir(sb, ofs, oe, &ctx->upper);
if (err)
goto out_free_oe;
/* Force r/o mount with no index dir */
- if (!ofs->indexdir) {
- dput(ofs->workdir);
- ofs->workdir = NULL;
+ if (!ofs->workdir)
sb->s_flags |= SB_RDONLY;
- }
-
}
+ err = ovl_check_overlapping_layers(sb, ofs);
+ if (err)
+ goto out_free_oe;
+
/* Show index=off in /proc/mounts for forced r/o mount */
- if (!ofs->indexdir) {
+ if (!ofs->workdir) {
ofs->config.index = false;
- if (ofs->upper_mnt && ofs->config.nfs_export) {
- pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
+ if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) {
+ pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
ofs->config.nfs_export = false;
}
}
if (ofs->config.metacopy && ofs->config.nfs_export) {
- pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
+ pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
ofs->config.nfs_export = false;
}
+ /*
+ * Support encoding decodable file handles with nfs_export=on
+ * and encoding non-decodable file handles with nfs_export=off
+ * if all layers support file handles.
+ */
if (ofs->config.nfs_export)
sb->s_export_op = &ovl_export_operations;
+ else if (!ofs->nofh)
+ sb->s_export_op = &ovl_export_fid_operations;
/* Never override disk quota limits or use reserved space */
- cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
+ cap_lower(creator_cred->cap_effective, CAP_SYS_RESOURCE);
sb->s_magic = OVERLAYFS_SUPER_MAGIC;
- sb->s_op = &ovl_super_operations;
- sb->s_xattr = ovl_xattr_handlers;
+ sb->s_xattr = ovl_xattr_handlers(ofs);
sb->s_fs_info = ofs;
+#ifdef CONFIG_FS_POSIX_ACL
sb->s_flags |= SB_POSIXACL;
+#endif
+ sb->s_iflags |= SB_I_SKIP_SYNC;
+ /*
+ * Ensure that umask handling is done by the filesystems used
+ * for the the upper layer instead of overlayfs as that would
+ * lead to unexpected results.
+ */
+ sb->s_iflags |= SB_I_NOUMASK;
+ sb->s_iflags |= SB_I_EVM_HMAC_UNSUPPORTED;
err = -ENOMEM;
- root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
- if (!root_dentry)
+ sb->s_root = ovl_get_root(sb, ctx->upper.dentry, oe);
+ if (!sb->s_root)
goto out_free_oe;
- root_dentry->d_fsdata = oe;
-
- mntput(upperpath.mnt);
- if (upperpath.dentry) {
- ovl_dentry_set_upper_alias(root_dentry);
- if (ovl_is_impuredir(upperpath.dentry))
- ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
- }
-
- /* Root is always merge -> can have whiteouts */
- ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
- ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
- ovl_set_upperdata(d_inode(root_dentry));
- ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
- ovl_dentry_lower(root_dentry), NULL);
-
- sb->s_root = root_dentry;
-
return 0;
out_free_oe:
- ovl_entry_stack_free(oe);
- kfree(oe);
-out_err:
- path_put(&upperpath);
- ovl_free_fs(ofs);
-out:
+ ovl_free_entry(oe);
return err;
}
-static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data)
+int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
{
- return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
+ struct ovl_fs *ofs = sb->s_fs_info;
+ int err;
+
+ err = -EIO;
+ if (WARN_ON(fc->user_ns != current_user_ns()))
+ goto out_err;
+
+ ovl_set_d_op(sb);
+
+ if (!ofs->creator_cred) {
+ err = -ENOMEM;
+ ofs->creator_cred = prepare_creds();
+ if (!ofs->creator_cred)
+ goto out_err;
+ }
+
+ with_ovl_creds(sb)
+ err = ovl_fill_super_creds(fc, sb);
+
+out_err:
+ if (err) {
+ ovl_free_fs(ofs);
+ sb->s_fs_info = NULL;
+ }
+
+ return err;
}
-static struct file_system_type ovl_fs_type = {
- .owner = THIS_MODULE,
- .name = "overlay",
- .mount = ovl_mount,
- .kill_sb = kill_anon_super,
+struct file_system_type ovl_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "overlay",
+ .init_fs_context = ovl_init_fs_context,
+ .parameters = ovl_parameter_spec,
+ .fs_flags = FS_USERNS_MOUNT,
+ .kill_sb = kill_anon_super,
};
MODULE_ALIAS_FS("overlay");
@@ -1590,14 +1599,16 @@ static int __init ovl_init(void)
ovl_inode_cachep = kmem_cache_create("ovl_inode",
sizeof(struct ovl_inode), 0,
(SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
ovl_inode_init_once);
if (ovl_inode_cachep == NULL)
return -ENOMEM;
err = register_filesystem(&ovl_fs_type);
- if (err)
- kmem_cache_destroy(ovl_inode_cachep);
+ if (!err)
+ return 0;
+
+ kmem_cache_destroy(ovl_inode_cachep);
return err;
}
@@ -1612,7 +1623,6 @@ static void __exit ovl_exit(void)
*/
rcu_barrier();
kmem_cache_destroy(ovl_inode_cachep);
-
}
module_init(ovl_init);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 7c01327b1852..94986d11a166 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2011 Novell Inc.
* Copyright (C) 2016 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
*/
#include <linux/fs.h>
@@ -13,48 +10,65 @@
#include <linux/cred.h>
#include <linux/xattr.h>
#include <linux/exportfs.h>
+#include <linux/file.h>
+#include <linux/fileattr.h>
#include <linux/uuid.h>
#include <linux/namei.h>
#include <linux/ratelimit.h>
+#include <linux/overflow.h>
#include "overlayfs.h"
+/* Get write access to upper mnt - may fail if upper sb was remounted ro */
+int ovl_get_write_access(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ return mnt_get_write_access(ovl_upper_mnt(ofs));
+}
+
+/* Get write access to upper sb - may block if upper sb is frozen */
+void ovl_start_write(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ sb_start_write(ovl_upper_mnt(ofs)->mnt_sb);
+}
+
int ovl_want_write(struct dentry *dentry)
{
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
- return mnt_want_write(ofs->upper_mnt);
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ return mnt_want_write(ovl_upper_mnt(ofs));
+}
+
+void ovl_put_write_access(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ mnt_put_write_access(ovl_upper_mnt(ofs));
+}
+
+void ovl_end_write(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ sb_end_write(ovl_upper_mnt(ofs)->mnt_sb);
}
void ovl_drop_write(struct dentry *dentry)
{
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
- mnt_drop_write(ofs->upper_mnt);
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ mnt_drop_write(ovl_upper_mnt(ofs));
}
struct dentry *ovl_workdir(struct dentry *dentry)
{
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
return ofs->workdir;
}
const struct cred *ovl_override_creds(struct super_block *sb)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(sb);
return override_creds(ofs->creator_cred);
}
-struct super_block *ovl_same_sb(struct super_block *sb)
-{
- struct ovl_fs *ofs = sb->s_fs_info;
-
- if (!ofs->numlowerfs)
- return ofs->upper_mnt->mnt_sb;
- else if (ofs->numlowerfs == 1 && !ofs->upper_mnt)
- return ofs->lower_fs[0].sb;
- else
- return NULL;
-}
-
/*
* Check if underlying fs supports file handles and try to determine encoding
* type, in order to deduce maximum inode number used by fs.
@@ -65,7 +79,10 @@ struct super_block *ovl_same_sb(struct super_block *sb)
*/
int ovl_can_decode_fh(struct super_block *sb)
{
- if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry)
+ if (!capable(CAP_DAC_READ_SEARCH))
+ return 0;
+
+ if (!exportfs_can_decode_fh(sb->s_export_op))
return 0;
return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
@@ -73,15 +90,15 @@ int ovl_can_decode_fh(struct super_block *sb)
struct dentry *ovl_indexdir(struct super_block *sb)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(sb);
- return ofs->indexdir;
+ return ofs->config.index ? ofs->workdir : NULL;
}
/* Index all files on copy up. For now only enabled for NFS export */
bool ovl_index_all(struct super_block *sb)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(sb);
return ofs->config.nfs_export && ofs->config.index;
}
@@ -89,40 +106,117 @@ bool ovl_index_all(struct super_block *sb)
/* Verify lower origin on lookup. For now only enabled for NFS export */
bool ovl_verify_lower(struct super_block *sb)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(sb);
return ofs->config.nfs_export && ofs->config.index;
}
+struct ovl_path *ovl_stack_alloc(unsigned int n)
+{
+ return kcalloc(n, sizeof(struct ovl_path), GFP_KERNEL);
+}
+
+void ovl_stack_cpy(struct ovl_path *dst, struct ovl_path *src, unsigned int n)
+{
+ unsigned int i;
+
+ memcpy(dst, src, sizeof(struct ovl_path) * n);
+ for (i = 0; i < n; i++)
+ dget(src[i].dentry);
+}
+
+void ovl_stack_put(struct ovl_path *stack, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 0; stack && i < n; i++)
+ dput(stack[i].dentry);
+}
+
+void ovl_stack_free(struct ovl_path *stack, unsigned int n)
+{
+ ovl_stack_put(stack, n);
+ kfree(stack);
+}
+
struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
{
- size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
- struct ovl_entry *oe = kzalloc(size, GFP_KERNEL);
+ struct ovl_entry *oe;
+ oe = kzalloc(struct_size(oe, __lowerstack, numlower), GFP_KERNEL);
if (oe)
- oe->numlower = numlower;
+ oe->__numlower = numlower;
return oe;
}
+void ovl_free_entry(struct ovl_entry *oe)
+{
+ ovl_stack_put(ovl_lowerstack(oe), ovl_numlower(oe));
+ kfree(oe);
+}
+
+#define OVL_D_REVALIDATE (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE)
+
bool ovl_dentry_remote(struct dentry *dentry)
{
- return dentry->d_flags &
- (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE |
- DCACHE_OP_REAL);
+ return dentry->d_flags & OVL_D_REVALIDATE;
+}
+
+void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry)
+{
+ if (!ovl_dentry_remote(realdentry))
+ return;
+
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= realdentry->d_flags & OVL_D_REVALIDATE;
+ spin_unlock(&dentry->d_lock);
+}
+
+void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry,
+ struct ovl_entry *oe)
+{
+ return ovl_dentry_init_flags(dentry, upperdentry, oe, OVL_D_REVALIDATE);
+}
+
+void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry,
+ struct ovl_entry *oe, unsigned int mask)
+{
+ struct ovl_path *lowerstack = ovl_lowerstack(oe);
+ unsigned int i, flags = 0;
+
+ if (upperdentry)
+ flags |= upperdentry->d_flags;
+ for (i = 0; i < ovl_numlower(oe) && lowerstack[i].dentry; i++)
+ flags |= lowerstack[i].dentry->d_flags;
+
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~mask;
+ dentry->d_flags |= flags & mask;
+ spin_unlock(&dentry->d_lock);
}
bool ovl_dentry_weird(struct dentry *dentry)
{
- return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
- DCACHE_MANAGE_TRANSIT |
- DCACHE_OP_HASH |
- DCACHE_OP_COMPARE);
+ if (!d_can_lookup(dentry) && !d_is_file(dentry) && !d_is_symlink(dentry))
+ return true;
+
+ if (dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | DCACHE_MANAGE_TRANSIT))
+ return true;
+
+ /*
+ * Exceptionally for layers with casefold, we accept that they have
+ * their own hash and compare operations
+ */
+ if (sb_has_encoding(dentry->d_sb))
+ return false;
+
+ return dentry->d_flags & (DCACHE_OP_HASH | DCACHE_OP_COMPARE);
}
enum ovl_path_type ovl_path_type(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_entry *oe = OVL_E(dentry);
enum ovl_path_type type = 0;
if (ovl_dentry_upper(dentry)) {
@@ -131,7 +225,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
/*
* Non-dir dentry can hold lower dentry of its copy up origin.
*/
- if (oe->numlower) {
+ if (ovl_numlower(oe)) {
if (ovl_test_flag(OVL_CONST_INO, d_inode(dentry)))
type |= __OVL_PATH_ORIGIN;
if (d_is_dir(dentry) ||
@@ -139,7 +233,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
type |= __OVL_PATH_MERGE;
}
} else {
- if (oe->numlower > 1)
+ if (ovl_numlower(oe) > 1)
type |= __OVL_PATH_MERGE;
}
return type;
@@ -147,19 +241,20 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
void ovl_path_upper(struct dentry *dentry, struct path *path)
{
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
- path->mnt = ofs->upper_mnt;
+ path->mnt = ovl_upper_mnt(ofs);
path->dentry = ovl_dentry_upper(dentry);
}
void ovl_path_lower(struct dentry *dentry, struct path *path)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_entry *oe = OVL_E(dentry);
+ struct ovl_path *lowerpath = ovl_lowerstack(oe);
- if (oe->numlower) {
- path->mnt = oe->lowerstack[0].layer->mnt;
- path->dentry = oe->lowerstack[0].dentry;
+ if (ovl_numlower(oe)) {
+ path->mnt = lowerpath->layer->mnt;
+ path->dentry = lowerpath->dentry;
} else {
*path = (struct path) { };
}
@@ -167,11 +262,19 @@ void ovl_path_lower(struct dentry *dentry, struct path *path)
void ovl_path_lowerdata(struct dentry *dentry, struct path *path)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_entry *oe = OVL_E(dentry);
+ struct ovl_path *lowerdata = ovl_lowerdata(oe);
+ struct dentry *lowerdata_dentry = ovl_lowerdata_dentry(oe);
- if (oe->numlower) {
- path->mnt = oe->lowerstack[oe->numlower - 1].layer->mnt;
- path->dentry = oe->lowerstack[oe->numlower - 1].dentry;
+ if (lowerdata_dentry) {
+ path->dentry = lowerdata_dentry;
+ /*
+ * Pairs with smp_wmb() in ovl_dentry_set_lowerdata().
+ * Make sure that if lowerdata->dentry is visible, then
+ * datapath->layer is visible as well.
+ */
+ smp_rmb();
+ path->mnt = READ_ONCE(lowerdata->layer)->mnt;
} else {
*path = (struct path) { };
}
@@ -189,36 +292,73 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
return type;
}
+enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path)
+{
+ enum ovl_path_type type = ovl_path_type(dentry);
+
+ WARN_ON_ONCE(d_is_dir(dentry));
+
+ if (!OVL_TYPE_UPPER(type) || OVL_TYPE_MERGE(type))
+ ovl_path_lowerdata(dentry, path);
+ else
+ ovl_path_upper(dentry, path);
+
+ return type;
+}
+
struct dentry *ovl_dentry_upper(struct dentry *dentry)
{
- return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
+ struct inode *inode = d_inode(dentry);
+
+ return inode ? ovl_upperdentry_dereference(OVL_I(inode)) : NULL;
}
struct dentry *ovl_dentry_lower(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_entry *oe = OVL_E(dentry);
- return oe->numlower ? oe->lowerstack[0].dentry : NULL;
+ return ovl_numlower(oe) ? ovl_lowerstack(oe)->dentry : NULL;
}
-struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
+const struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_entry *oe = OVL_E(dentry);
- return oe->numlower ? oe->lowerstack[0].layer : NULL;
+ return ovl_numlower(oe) ? ovl_lowerstack(oe)->layer : NULL;
}
/*
* ovl_dentry_lower() could return either a data dentry or metacopy dentry
- * dependig on what is stored in lowerstack[0]. At times we need to find
+ * depending on what is stored in lowerstack[0]. At times we need to find
* lower dentry which has data (and not metacopy dentry). This helper
* returns the lower data dentry.
*/
struct dentry *ovl_dentry_lowerdata(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ return ovl_lowerdata_dentry(OVL_E(dentry));
+}
+
+int ovl_dentry_set_lowerdata(struct dentry *dentry, struct ovl_path *datapath)
+{
+ struct ovl_entry *oe = OVL_E(dentry);
+ struct ovl_path *lowerdata = ovl_lowerdata(oe);
+ struct dentry *datadentry = datapath->dentry;
+
+ if (WARN_ON_ONCE(ovl_numlower(oe) <= 1))
+ return -EIO;
+
+ WRITE_ONCE(lowerdata->layer, datapath->layer);
+ /*
+ * Pairs with smp_rmb() in ovl_path_lowerdata().
+ * Make sure that if lowerdata->dentry is visible, then
+ * lowerdata->layer is visible as well.
+ */
+ smp_wmb();
+ WRITE_ONCE(lowerdata->dentry, dget(datadentry));
- return oe->numlower ? oe->lowerstack[oe->numlower - 1].dentry : NULL;
+ ovl_dentry_update_reval(dentry, datadentry);
+
+ return 0;
}
struct dentry *ovl_dentry_real(struct dentry *dentry)
@@ -231,6 +371,21 @@ struct dentry *ovl_i_dentry_upper(struct inode *inode)
return ovl_upperdentry_dereference(OVL_I(inode));
}
+struct inode *ovl_i_path_real(struct inode *inode, struct path *path)
+{
+ struct ovl_path *lowerpath = ovl_lowerpath(OVL_I_E(inode));
+
+ path->dentry = ovl_i_dentry_upper(inode);
+ if (!path->dentry) {
+ path->dentry = lowerpath->dentry;
+ path->mnt = lowerpath->layer->mnt;
+ } else {
+ path->mnt = ovl_upper_mnt(OVL_FS(inode->i_sb));
+ }
+
+ return path->dentry ? d_inode_rcu(path->dentry) : NULL;
+}
+
struct inode *ovl_inode_upper(struct inode *inode)
{
struct dentry *upperdentry = ovl_i_dentry_upper(inode);
@@ -240,7 +395,9 @@ struct inode *ovl_inode_upper(struct inode *inode)
struct inode *ovl_inode_lower(struct inode *inode)
{
- return OVL_I(inode)->lower;
+ struct ovl_path *lowerpath = ovl_lowerpath(OVL_I_E(inode));
+
+ return lowerpath ? d_inode(lowerpath->dentry) : NULL;
}
struct inode *ovl_inode_real(struct inode *inode)
@@ -251,10 +408,12 @@ struct inode *ovl_inode_real(struct inode *inode)
/* Return inode which contains lower data. Do not return metacopy */
struct inode *ovl_inode_lowerdata(struct inode *inode)
{
+ struct dentry *lowerdata = ovl_lowerdata_dentry(OVL_I_E(inode));
+
if (WARN_ON(!S_ISREG(inode->i_mode)))
return NULL;
- return OVL_I(inode)->lowerdata ?: ovl_inode_lower(inode);
+ return lowerdata ? d_inode(lowerdata) : NULL;
}
/* Return real inode which contains data. Does not return metacopy inode */
@@ -269,9 +428,15 @@ struct inode *ovl_inode_realdata(struct inode *inode)
return ovl_inode_lowerdata(inode);
}
+const char *ovl_lowerdata_redirect(struct inode *inode)
+{
+ return inode && S_ISREG(inode->i_mode) ?
+ OVL_I(inode)->lowerdata_redirect : NULL;
+}
+
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode)
{
- return OVL_I(inode)->cache;
+ return inode && S_ISDIR(inode->i_mode) ? OVL_I(inode)->cache : NULL;
}
void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache)
@@ -281,17 +446,17 @@ void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache)
void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry)
{
- set_bit(flag, &OVL_E(dentry)->flags);
+ set_bit(flag, OVL_E_FLAGS(dentry));
}
void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry)
{
- clear_bit(flag, &OVL_E(dentry)->flags);
+ clear_bit(flag, OVL_E_FLAGS(dentry));
}
bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry)
{
- return test_bit(flag, &OVL_E(dentry)->flags);
+ return test_bit(flag, OVL_E_FLAGS(dentry));
}
bool ovl_dentry_is_opaque(struct dentry *dentry)
@@ -309,6 +474,33 @@ void ovl_dentry_set_opaque(struct dentry *dentry)
ovl_dentry_set_flag(OVL_E_OPAQUE, dentry);
}
+bool ovl_dentry_has_xwhiteouts(struct dentry *dentry)
+{
+ return ovl_dentry_test_flag(OVL_E_XWHITEOUTS, dentry);
+}
+
+void ovl_dentry_set_xwhiteouts(struct dentry *dentry)
+{
+ ovl_dentry_set_flag(OVL_E_XWHITEOUTS, dentry);
+}
+
+/*
+ * ovl_layer_set_xwhiteouts() is called before adding the overlay dir
+ * dentry to dcache, while readdir of that same directory happens after
+ * the overlay dir dentry is in dcache, so if some cpu observes that
+ * ovl_dentry_is_xwhiteouts(), it will also observe layer->has_xwhiteouts
+ * for the layers where xwhiteouts marker was found in that merge dir.
+ */
+void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs,
+ const struct ovl_layer *layer)
+{
+ if (layer->has_xwhiteouts)
+ return;
+
+ /* Write once to read-mostly layer properties */
+ ofs->layers[layer->idx].has_xwhiteouts = true;
+}
+
/*
* For hard links and decoded file handles, it's possible for ovl_dentry_upper()
* to return positive, while there's no actual upper alias for the inode.
@@ -381,13 +573,6 @@ bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags)
return !ovl_has_upperdata(d_inode(dentry));
}
-bool ovl_redirect_dir(struct super_block *sb)
-{
- struct ovl_fs *ofs = sb->s_fs_info;
-
- return ofs->config.redirect_dir && !ofs->noxattr;
-}
-
const char *ovl_dentry_get_redirect(struct dentry *dentry)
{
return OVL_I(d_inode(dentry))->redirect;
@@ -401,24 +586,6 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
oi->redirect = redirect;
}
-void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
- struct dentry *lowerdentry, struct dentry *lowerdata)
-{
- struct inode *realinode = d_inode(upperdentry ?: lowerdentry);
-
- if (upperdentry)
- OVL_I(inode)->__upperdentry = upperdentry;
- if (lowerdentry)
- OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
- if (lowerdata)
- OVL_I(inode)->lowerdata = igrab(d_inode(lowerdata));
-
- ovl_copyattr(realinode, inode);
- ovl_copyflags(realinode, inode);
- if (!inode->i_ino)
- inode->i_ino = realinode->i_ino;
-}
-
void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
{
struct inode *upperinode = d_inode(upperdentry);
@@ -431,40 +598,38 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
smp_wmb();
OVL_I(inode)->__upperdentry = upperdentry;
if (inode_unhashed(inode)) {
- if (!inode->i_ino)
- inode->i_ino = upperinode->i_ino;
inode->i_private = upperinode;
__insert_inode_hash(inode, (unsigned long) upperinode);
}
}
-static void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
+static void ovl_dir_version_inc(struct dentry *dentry, bool impurity)
{
struct inode *inode = d_inode(dentry);
WARN_ON(!inode_is_locked(inode));
+ WARN_ON(!d_is_dir(dentry));
/*
- * Version is used by readdir code to keep cache consistent. For merge
- * dirs all changes need to be noted. For non-merge dirs, cache only
- * contains impure (ones which have been copied up and have origins)
- * entries, so only need to note changes to impure entries.
+ * Version is used by readdir code to keep cache consistent.
+ * For merge dirs (or dirs with origin) all changes need to be noted.
+ * For non-merge dirs, cache contains only impure entries (i.e. ones
+ * which have been copied up and have origins), so only need to note
+ * changes to impure entries.
*/
- if (OVL_TYPE_MERGE(ovl_path_type(dentry)) || impurity)
+ if (!ovl_dir_is_real(inode) || impurity)
OVL_I(inode)->version++;
}
void ovl_dir_modified(struct dentry *dentry, bool impurity)
{
/* Copy mtime/ctime */
- ovl_copyattr(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry));
+ ovl_copyattr(d_inode(dentry));
- ovl_dentry_version_inc(dentry, impurity);
+ ovl_dir_version_inc(dentry, impurity);
}
-u64 ovl_dentry_version_get(struct dentry *dentry)
+u64 ovl_inode_version_get(struct inode *inode)
{
- struct inode *inode = d_inode(dentry);
-
WARN_ON(!inode_is_locked(inode));
return OVL_I(inode)->version;
}
@@ -476,9 +641,45 @@ bool ovl_is_whiteout(struct dentry *dentry)
return inode && IS_WHITEOUT(inode);
}
-struct file *ovl_path_open(struct path *path, int flags)
+/*
+ * Use this over ovl_is_whiteout for upper and lower files, as it also
+ * handles overlay.whiteout xattr whiteout files.
+ */
+bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path)
+{
+ return ovl_is_whiteout(path->dentry) ||
+ ovl_path_check_xwhiteout_xattr(ofs, path);
+}
+
+struct file *ovl_path_open(const struct path *path, int flags)
{
- return dentry_open(path, flags | O_NOATIME, current_cred());
+ struct inode *inode = d_inode(path->dentry);
+ struct mnt_idmap *real_idmap = mnt_idmap(path->mnt);
+ int err, acc_mode;
+
+ if (flags & ~(O_ACCMODE | O_LARGEFILE))
+ BUG();
+
+ switch (flags & O_ACCMODE) {
+ case O_RDONLY:
+ acc_mode = MAY_READ;
+ break;
+ case O_WRONLY:
+ acc_mode = MAY_WRITE;
+ break;
+ default:
+ BUG();
+ }
+
+ err = inode_permission(real_idmap, inode, acc_mode | MAY_OPEN);
+ if (err)
+ return ERR_PTR(err);
+
+ /* O_NOATIME is an optimization, don't fail if not permitted */
+ if (inode_owner_or_capable(real_idmap, inode))
+ flags |= O_NOATIME;
+
+ return dentry_open(path, flags, current_cred());
}
/* Caller should hold ovl_inode->lock */
@@ -519,30 +720,44 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags)
return false;
}
+/*
+ * The copy up "transaction" keeps an elevated mnt write count on upper mnt,
+ * but leaves taking freeze protection on upper sb to lower level helpers.
+ */
int ovl_copy_up_start(struct dentry *dentry, int flags)
{
struct inode *inode = d_inode(dentry);
int err;
- err = ovl_inode_lock(inode);
- if (!err && ovl_already_copied_up_locked(dentry, flags)) {
+ err = ovl_inode_lock_interruptible(inode);
+ if (err)
+ return err;
+
+ if (ovl_already_copied_up_locked(dentry, flags))
err = 1; /* Already copied up */
- ovl_inode_unlock(inode);
- }
+ else
+ err = ovl_get_write_access(dentry);
+ if (err)
+ goto out_unlock;
+
+ return 0;
+out_unlock:
+ ovl_inode_unlock(inode);
return err;
}
void ovl_copy_up_end(struct dentry *dentry)
{
+ ovl_put_write_access(dentry);
ovl_inode_unlock(d_inode(dentry));
}
-bool ovl_check_origin_xattr(struct dentry *dentry)
+bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path)
{
int res;
- res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+ res = ovl_path_getxattr(ofs, path, OVL_XATTR_ORIGIN, NULL, 0);
/* Zero size value means "copied up but origin unknown" */
if (res >= 0)
@@ -551,35 +766,136 @@ bool ovl_check_origin_xattr(struct dentry *dentry)
return false;
}
-bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
+bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path)
{
+ struct dentry *dentry = path->dentry;
int res;
- char val;
- if (!d_is_dir(dentry))
+ /* xattr.whiteout must be a zero size regular file */
+ if (!d_is_reg(dentry) || i_size_read(d_inode(dentry)) != 0)
return false;
- res = vfs_getxattr(dentry, name, &val, 1);
- if (res == 1 && val == 'y')
- return true;
+ res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUT, NULL, 0);
+ return res >= 0;
+}
+
+/*
+ * Load persistent uuid from xattr into s_uuid if found, or store a new
+ * random generated value in s_uuid and in xattr.
+ */
+bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
+ const struct path *upperpath)
+{
+ bool set = false;
+ uuid_t uuid;
+ int res;
+
+ /* Try to load existing persistent uuid */
+ res = ovl_path_getxattr(ofs, upperpath, OVL_XATTR_UUID, uuid.b,
+ UUID_SIZE);
+ if (res == UUID_SIZE)
+ goto set_uuid;
+
+ if (res != -ENODATA)
+ goto fail;
+
+ /*
+ * With uuid=auto, if uuid xattr is found, it will be used.
+ * If uuid xattrs is not found, generate a persistent uuid only on mount
+ * of new overlays where upper root dir is not yet marked as impure.
+ * An upper dir is marked as impure on copy up or lookup of its subdirs.
+ */
+ if (ofs->config.uuid == OVL_UUID_AUTO) {
+ res = ovl_path_getxattr(ofs, upperpath, OVL_XATTR_IMPURE, NULL,
+ 0);
+ if (res > 0) {
+ /* Any mount of old overlay - downgrade to uuid=null */
+ ofs->config.uuid = OVL_UUID_NULL;
+ return true;
+ } else if (res == -ENODATA) {
+ /* First mount of new overlay - upgrade to uuid=on */
+ ofs->config.uuid = OVL_UUID_ON;
+ } else if (res < 0) {
+ goto fail;
+ }
+
+ }
+
+ /* Generate overlay instance uuid */
+ uuid_gen(&uuid);
+
+ /* Try to store persistent uuid */
+ set = true;
+ res = ovl_setxattr(ofs, upperpath->dentry, OVL_XATTR_UUID, uuid.b,
+ UUID_SIZE);
+ if (res)
+ goto fail;
+set_uuid:
+ super_set_uuid(sb, uuid.b, sizeof(uuid));
+ return true;
+
+fail:
+ ofs->config.uuid = OVL_UUID_NULL;
+ pr_warn("failed to %s uuid (%pd2, err=%i); falling back to uuid=null.\n",
+ set ? "set" : "get", upperpath->dentry, res);
return false;
}
-int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
- const char *name, const void *value, size_t size,
+char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path,
+ enum ovl_xattr ox)
+{
+ int res;
+ char val;
+
+ if (!d_is_dir(path->dentry))
+ return 0;
+
+ res = ovl_path_getxattr(ofs, path, ox, &val, 1);
+ return res == 1 ? val : 0;
+}
+
+#define OVL_XATTR_OPAQUE_POSTFIX "opaque"
+#define OVL_XATTR_REDIRECT_POSTFIX "redirect"
+#define OVL_XATTR_ORIGIN_POSTFIX "origin"
+#define OVL_XATTR_IMPURE_POSTFIX "impure"
+#define OVL_XATTR_NLINK_POSTFIX "nlink"
+#define OVL_XATTR_UPPER_POSTFIX "upper"
+#define OVL_XATTR_UUID_POSTFIX "uuid"
+#define OVL_XATTR_METACOPY_POSTFIX "metacopy"
+#define OVL_XATTR_PROTATTR_POSTFIX "protattr"
+#define OVL_XATTR_XWHITEOUT_POSTFIX "whiteout"
+
+#define OVL_XATTR_TAB_ENTRY(x) \
+ [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
+ [true] = OVL_XATTR_USER_PREFIX x ## _POSTFIX }
+
+const char *const ovl_xattr_table[][2] = {
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_OPAQUE),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_REDIRECT),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_ORIGIN),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_IMPURE),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_UUID),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT),
+};
+
+int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
+ enum ovl_xattr ox, const void *value, size_t size,
int xerr)
{
int err;
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
if (ofs->noxattr)
return xerr;
- err = ovl_do_setxattr(upperdentry, name, value, size, 0);
+ err = ovl_setxattr(ofs, upperdentry, ox, value, size);
if (err == -EOPNOTSUPP) {
- pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
+ pr_warn("cannot set %s xattr on upper\n", ovl_xattr(ofs, ox));
ofs->noxattr = true;
return xerr;
}
@@ -589,6 +905,7 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
int err;
if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
@@ -598,30 +915,96 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
* Do not fail when upper doesn't support xattrs.
* Upper inodes won't have origin nor redirect xattr anyway.
*/
- err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
- "y", 1, 0);
+ err = ovl_check_setxattr(ofs, upperdentry, OVL_XATTR_IMPURE, "y", 1, 0);
if (!err)
ovl_set_flag(OVL_IMPURE, d_inode(dentry));
return err;
}
-void ovl_set_flag(unsigned long flag, struct inode *inode)
-{
- set_bit(flag, &OVL_I(inode)->flags);
-}
-void ovl_clear_flag(unsigned long flag, struct inode *inode)
+#define OVL_PROTATTR_MAX 32 /* Reserved for future flags */
+
+void ovl_check_protattr(struct inode *inode, struct dentry *upper)
{
- clear_bit(flag, &OVL_I(inode)->flags);
+ struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+ u32 iflags = inode->i_flags & OVL_PROT_I_FLAGS_MASK;
+ char buf[OVL_PROTATTR_MAX+1];
+ int res, n;
+
+ res = ovl_getxattr_upper(ofs, upper, OVL_XATTR_PROTATTR, buf,
+ OVL_PROTATTR_MAX);
+ if (res < 0)
+ return;
+
+ /*
+ * Initialize inode flags from overlay.protattr xattr and upper inode
+ * flags. If upper inode has those fileattr flags set (i.e. from old
+ * kernel), we do not clear them on ovl_get_inode(), but we will clear
+ * them on next fileattr_set().
+ */
+ for (n = 0; n < res; n++) {
+ if (buf[n] == 'a')
+ iflags |= S_APPEND;
+ else if (buf[n] == 'i')
+ iflags |= S_IMMUTABLE;
+ else
+ break;
+ }
+
+ if (!res || n < res) {
+ pr_warn_ratelimited("incompatible overlay.protattr format (%pd2, len=%d)\n",
+ upper, res);
+ } else {
+ inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK);
+ }
}
-bool ovl_test_flag(unsigned long flag, struct inode *inode)
+int ovl_set_protattr(struct inode *inode, struct dentry *upper,
+ struct file_kattr *fa)
{
- return test_bit(flag, &OVL_I(inode)->flags);
+ struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+ char buf[OVL_PROTATTR_MAX];
+ int len = 0, err = 0;
+ u32 iflags = 0;
+
+ BUILD_BUG_ON(HWEIGHT32(OVL_PROT_FS_FLAGS_MASK) > OVL_PROTATTR_MAX);
+
+ if (fa->flags & FS_APPEND_FL) {
+ buf[len++] = 'a';
+ iflags |= S_APPEND;
+ }
+ if (fa->flags & FS_IMMUTABLE_FL) {
+ buf[len++] = 'i';
+ iflags |= S_IMMUTABLE;
+ }
+
+ /*
+ * Do not allow to set protection flags when upper doesn't support
+ * xattrs, because we do not set those fileattr flags on upper inode.
+ * Remove xattr if it exist and all protection flags are cleared.
+ */
+ if (len) {
+ err = ovl_check_setxattr(ofs, upper, OVL_XATTR_PROTATTR,
+ buf, len, -EPERM);
+ } else if (inode->i_flags & OVL_PROT_I_FLAGS_MASK) {
+ err = ovl_removexattr(ofs, upper, OVL_XATTR_PROTATTR);
+ if (err == -EOPNOTSUPP || err == -ENODATA)
+ err = 0;
+ }
+ if (err)
+ return err;
+
+ inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK);
+
+ /* Mask out the fileattr flags that should not be set in upper inode */
+ fa->flags &= ~OVL_PROT_FS_FLAGS_MASK;
+ fa->fsx_xflags &= ~OVL_PROT_FSX_FLAGS_MASK;
+
+ return 0;
}
-/**
+/*
* Caller must hold a reference to inode to prevent it from being freed while
* it is marked inuse.
*/
@@ -631,8 +1014,8 @@ bool ovl_inuse_trylock(struct dentry *dentry)
bool locked = false;
spin_lock(&inode->i_lock);
- if (!(inode->i_state & I_OVL_INUSE)) {
- inode->i_state |= I_OVL_INUSE;
+ if (!(inode_state_read(inode) & I_OVL_INUSE)) {
+ inode_state_set(inode, I_OVL_INUSE);
locked = true;
}
spin_unlock(&inode->i_lock);
@@ -646,12 +1029,24 @@ void ovl_inuse_unlock(struct dentry *dentry)
struct inode *inode = d_inode(dentry);
spin_lock(&inode->i_lock);
- WARN_ON(!(inode->i_state & I_OVL_INUSE));
- inode->i_state &= ~I_OVL_INUSE;
+ WARN_ON(!(inode_state_read(inode) & I_OVL_INUSE));
+ inode_state_clear(inode, I_OVL_INUSE);
spin_unlock(&inode->i_lock);
}
}
+bool ovl_is_inuse(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ bool inuse;
+
+ spin_lock(&inode->i_lock);
+ inuse = (inode_state_read(inode) & I_OVL_INUSE);
+ spin_unlock(&inode->i_lock);
+
+ return inuse;
+}
+
/*
* Does this overlay dentry need to be indexed on copy up?
*/
@@ -676,22 +1071,28 @@ bool ovl_need_index(struct dentry *dentry)
/* Caller must hold OVL_I(inode)->lock */
static void ovl_cleanup_index(struct dentry *dentry)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
- struct inode *dir = indexdir->d_inode;
struct dentry *lowerdentry = ovl_dentry_lower(dentry);
struct dentry *upperdentry = ovl_dentry_upper(dentry);
struct dentry *index = NULL;
struct inode *inode;
struct qstr name = { };
+ bool got_write = false;
int err;
- err = ovl_get_index_name(lowerdentry, &name);
+ err = ovl_get_index_name(ofs, lowerdentry, &name);
+ if (err)
+ goto fail;
+
+ err = ovl_want_write(dentry);
if (err)
goto fail;
+ got_write = true;
inode = d_inode(upperdentry);
if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
- pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
+ pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
upperdentry, inode->i_ino, inode->i_nlink);
/*
* We either have a bug with persistent union nlink or a lower
@@ -707,30 +1108,30 @@ static void ovl_cleanup_index(struct dentry *dentry)
goto out;
}
- inode_lock_nested(dir, I_MUTEX_PARENT);
- index = lookup_one_len(name.name, indexdir, name.len);
+ index = ovl_lookup_upper_unlocked(ofs, name.name, indexdir, name.len);
err = PTR_ERR(index);
if (IS_ERR(index)) {
index = NULL;
} else if (ovl_index_all(dentry->d_sb)) {
/* Whiteout orphan index to block future open by handle */
- err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+ err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb),
+ indexdir, index);
} else {
/* Cleanup orphan index entries */
- err = ovl_cleanup(dir, index);
+ err = ovl_cleanup(ofs, indexdir, index);
}
-
- inode_unlock(dir);
if (err)
goto fail;
out:
+ if (got_write)
+ ovl_drop_write(dentry);
kfree(name.name);
dput(index);
return;
fail:
- pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err);
+ pr_err("cleanup index of '%pd2' failed (%i)\n", dentry, err);
goto out;
}
@@ -741,7 +1142,6 @@ fail:
int ovl_nlink_start(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
- const struct cred *old_cred;
int err;
if (WARN_ON(!inode))
@@ -767,26 +1167,34 @@ int ovl_nlink_start(struct dentry *dentry)
return err;
}
- err = ovl_inode_lock(inode);
+ err = ovl_inode_lock_interruptible(inode);
if (err)
return err;
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out_unlock;
+
if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, inode))
- goto out;
+ return 0;
- old_cred = ovl_override_creds(dentry->d_sb);
/*
* The overlay inode nlink should be incremented/decremented IFF the
* upper operation succeeds, along with nlink change of upper inode.
* Therefore, before link/unlink/rename, we store the union nlink
* value relative to the upper inode nlink in an upper inode xattr.
*/
- err = ovl_set_nlink_upper(dentry);
- revert_creds(old_cred);
-
-out:
+ with_ovl_creds(dentry->d_sb)
+ err = ovl_set_nlink_upper(dentry);
if (err)
- ovl_inode_unlock(inode);
+ goto out_drop_write;
+
+ return 0;
+
+out_drop_write:
+ ovl_drop_write(dentry);
+out_unlock:
+ ovl_inode_unlock(inode);
return err;
}
@@ -795,25 +1203,30 @@ void ovl_nlink_end(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
- if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) {
- const struct cred *old_cred;
+ ovl_drop_write(dentry);
- old_cred = ovl_override_creds(dentry->d_sb);
- ovl_cleanup_index(dentry);
- revert_creds(old_cred);
+ if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) {
+ with_ovl_creds(dentry->d_sb)
+ ovl_cleanup_index(dentry);
}
ovl_inode_unlock(inode);
}
-int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *work,
+ struct dentry *upperdir, struct dentry *upper)
{
- /* Workdir should not be the same as upperdir */
- if (workdir == upperdir)
- goto err;
+ struct dentry *trap;
/* Workdir should not be subdir of upperdir and vice versa */
- if (lock_rename(workdir, upperdir) != NULL)
+ trap = lock_rename(workdir, upperdir);
+ if (IS_ERR(trap))
+ goto err;
+ if (trap)
+ goto err_unlock;
+ if (work && (work->d_parent != workdir || d_unhashed(work)))
+ goto err_unlock;
+ if (upper && (upper->d_parent != upperdir || d_unhashed(upper)))
goto err_unlock;
return 0;
@@ -821,35 +1234,83 @@ int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
err_unlock:
unlock_rename(workdir, upperdir);
err:
- pr_err("overlayfs: failed to lock workdir+upperdir\n");
+ pr_err("failed to lock workdir+upperdir\n");
return -EIO;
}
-/* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */
-int ovl_check_metacopy_xattr(struct dentry *dentry)
+/*
+ * err < 0, 0 if no metacopy xattr, metacopy data size if xattr found.
+ * an empty xattr returns OVL_METACOPY_MIN_SIZE to distinguish from no xattr value.
+ */
+int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path,
+ struct ovl_metacopy *data)
{
int res;
/* Only regular files can have metacopy xattr */
- if (!S_ISREG(d_inode(dentry)->i_mode))
+ if (!S_ISREG(d_inode(path->dentry)->i_mode))
return 0;
- res = vfs_getxattr(dentry, OVL_XATTR_METACOPY, NULL, 0);
+ res = ovl_path_getxattr(ofs, path, OVL_XATTR_METACOPY,
+ data, data ? OVL_METACOPY_MAX_SIZE : 0);
if (res < 0) {
if (res == -ENODATA || res == -EOPNOTSUPP)
return 0;
+ /*
+ * getxattr on user.* may fail with EACCES in case there's no
+ * read permission on the inode. Not much we can do, other than
+ * tell the caller that this is not a metacopy inode.
+ */
+ if (ofs->config.userxattr && res == -EACCES)
+ return 0;
goto out;
}
- return 1;
+ if (res == 0) {
+ /* Emulate empty data for zero size metacopy xattr */
+ res = OVL_METACOPY_MIN_SIZE;
+ if (data) {
+ memset(data, 0, res);
+ data->len = res;
+ }
+ } else if (res < OVL_METACOPY_MIN_SIZE) {
+ pr_warn_ratelimited("metacopy file '%pd' has too small xattr\n",
+ path->dentry);
+ return -EIO;
+ } else if (data) {
+ if (data->version != 0) {
+ pr_warn_ratelimited("metacopy file '%pd' has unsupported version\n",
+ path->dentry);
+ return -EIO;
+ }
+ if (res != data->len) {
+ pr_warn_ratelimited("metacopy file '%pd' has invalid xattr size\n",
+ path->dentry);
+ return -EIO;
+ }
+ }
+
+ return res;
out:
- pr_warn_ratelimited("overlayfs: failed to get metacopy (%i)\n", res);
+ pr_warn_ratelimited("failed to get metacopy (%i)\n", res);
return res;
}
+int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d, struct ovl_metacopy *metacopy)
+{
+ size_t len = metacopy->len;
+
+ /* If no flags or digest fall back to empty metacopy file */
+ if (metacopy->version == 0 && metacopy->flags == 0 && metacopy->digest_algo == 0)
+ len = 0;
+
+ return ovl_check_setxattr(ofs, d, OVL_XATTR_METACOPY,
+ metacopy, len, -EOPNOTSUPP);
+}
+
bool ovl_is_metacopy_dentry(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_entry *oe = OVL_E(dentry);
if (!d_is_reg(dentry))
return false;
@@ -860,29 +1321,27 @@ bool ovl_is_metacopy_dentry(struct dentry *dentry)
return false;
}
- return (oe->numlower > 1);
+ return (ovl_numlower(oe) > 1);
}
-char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
+char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding)
{
int res;
char *s, *next, *buf = NULL;
- res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0);
- if (res < 0) {
- if (res == -ENODATA || res == -EOPNOTSUPP)
- return NULL;
+ res = ovl_path_getxattr(ofs, path, OVL_XATTR_REDIRECT, NULL, 0);
+ if (res == -ENODATA || res == -EOPNOTSUPP)
+ return NULL;
+ if (res < 0)
goto fail;
- }
+ if (res == 0)
+ goto invalid;
buf = kzalloc(res + padding + 1, GFP_KERNEL);
if (!buf)
return ERR_PTR(-ENOMEM);
- if (res == 0)
- goto invalid;
-
- res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res);
+ res = ovl_path_getxattr(ofs, path, OVL_XATTR_REDIRECT, buf, res);
if (res < 0)
goto fail;
if (res == 0)
@@ -900,15 +1359,182 @@ char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
}
return buf;
-
-err_free:
- kfree(buf);
- return ERR_PTR(res);
-fail:
- pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res);
- goto err_free;
invalid:
- pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
+ pr_warn_ratelimited("invalid redirect (%s)\n", buf);
res = -EINVAL;
goto err_free;
+fail:
+ pr_warn_ratelimited("failed to get redirect (%i)\n", res);
+err_free:
+ kfree(buf);
+ return ERR_PTR(res);
+}
+
+/* Call with mounter creds as it may open the file */
+int ovl_ensure_verity_loaded(const struct path *datapath)
+{
+ struct inode *inode = d_inode(datapath->dentry);
+ struct file *filp;
+
+ if (!fsverity_active(inode) && IS_VERITY(inode)) {
+ /*
+ * If this inode was not yet opened, the verity info hasn't been
+ * loaded yet, so we need to do that here to force it into memory.
+ */
+ filp = kernel_file_open(datapath, O_RDONLY, current_cred());
+ if (IS_ERR(filp))
+ return PTR_ERR(filp);
+ fput(filp);
+ }
+
+ return 0;
+}
+
+int ovl_validate_verity(struct ovl_fs *ofs,
+ const struct path *metapath,
+ const struct path *datapath)
+{
+ struct ovl_metacopy metacopy_data;
+ u8 actual_digest[FS_VERITY_MAX_DIGEST_SIZE];
+ int xattr_digest_size, digest_size;
+ int xattr_size, err;
+ u8 verity_algo;
+
+ if (!ofs->config.verity_mode ||
+ /* Verity only works on regular files */
+ !S_ISREG(d_inode(metapath->dentry)->i_mode))
+ return 0;
+
+ xattr_size = ovl_check_metacopy_xattr(ofs, metapath, &metacopy_data);
+ if (xattr_size < 0)
+ return xattr_size;
+
+ if (!xattr_size || !metacopy_data.digest_algo) {
+ if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
+ pr_warn_ratelimited("metacopy file '%pd' has no digest specified\n",
+ metapath->dentry);
+ return -EIO;
+ }
+ return 0;
+ }
+
+ xattr_digest_size = ovl_metadata_digest_size(&metacopy_data);
+
+ err = ovl_ensure_verity_loaded(datapath);
+ if (err < 0) {
+ pr_warn_ratelimited("lower file '%pd' failed to load fs-verity info\n",
+ datapath->dentry);
+ return -EIO;
+ }
+
+ digest_size = fsverity_get_digest(d_inode(datapath->dentry), actual_digest,
+ &verity_algo, NULL);
+ if (digest_size == 0) {
+ pr_warn_ratelimited("lower file '%pd' has no fs-verity digest\n", datapath->dentry);
+ return -EIO;
+ }
+
+ if (xattr_digest_size != digest_size ||
+ metacopy_data.digest_algo != verity_algo ||
+ memcmp(metacopy_data.digest, actual_digest, xattr_digest_size) != 0) {
+ pr_warn_ratelimited("lower file '%pd' has the wrong fs-verity digest\n",
+ datapath->dentry);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int ovl_get_verity_digest(struct ovl_fs *ofs, const struct path *src,
+ struct ovl_metacopy *metacopy)
+{
+ int err, digest_size;
+
+ if (!ofs->config.verity_mode || !S_ISREG(d_inode(src->dentry)->i_mode))
+ return 0;
+
+ err = ovl_ensure_verity_loaded(src);
+ if (err < 0) {
+ pr_warn_ratelimited("lower file '%pd' failed to load fs-verity info\n",
+ src->dentry);
+ return -EIO;
+ }
+
+ digest_size = fsverity_get_digest(d_inode(src->dentry),
+ metacopy->digest, &metacopy->digest_algo, NULL);
+ if (digest_size == 0 ||
+ WARN_ON_ONCE(digest_size > FS_VERITY_MAX_DIGEST_SIZE)) {
+ if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
+ pr_warn_ratelimited("lower file '%pd' has no fs-verity digest\n",
+ src->dentry);
+ return -EIO;
+ }
+ return 0;
+ }
+
+ metacopy->len += digest_size;
+ return 0;
+}
+
+/*
+ * ovl_sync_status() - Check fs sync status for volatile mounts
+ *
+ * Returns 1 if this is not a volatile mount and a real sync is required.
+ *
+ * Returns 0 if syncing can be skipped because mount is volatile, and no errors
+ * have occurred on the upperdir since the mount.
+ *
+ * Returns -errno if it is a volatile mount, and the error that occurred since
+ * the last mount. If the error code changes, it'll return the latest error
+ * code.
+ */
+
+int ovl_sync_status(struct ovl_fs *ofs)
+{
+ struct vfsmount *mnt;
+
+ if (ovl_should_sync(ofs))
+ return 1;
+
+ mnt = ovl_upper_mnt(ofs);
+ if (!mnt)
+ return 0;
+
+ return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq);
+}
+
+/*
+ * ovl_copyattr() - copy inode attributes from layer to ovl inode
+ *
+ * When overlay copies inode information from an upper or lower layer to the
+ * relevant overlay inode it will apply the idmapping of the upper or lower
+ * layer when doing so ensuring that the ovl inode ownership will correctly
+ * reflect the ownership of the idmapped upper or lower layer. For example, an
+ * idmapped upper or lower layer mapping id 1001 to id 1000 will take care to
+ * map any lower or upper inode owned by id 1001 to id 1000. These mapping
+ * helpers are nops when the relevant layer isn't idmapped.
+ */
+void ovl_copyattr(struct inode *inode)
+{
+ struct path realpath;
+ struct inode *realinode;
+ struct mnt_idmap *real_idmap;
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
+
+ realinode = ovl_i_path_real(inode, &realpath);
+ real_idmap = mnt_idmap(realpath.mnt);
+
+ spin_lock(&inode->i_lock);
+ vfsuid = i_uid_into_vfsuid(real_idmap, realinode);
+ vfsgid = i_gid_into_vfsgid(real_idmap, realinode);
+
+ inode->i_uid = vfsuid_into_kuid(vfsuid);
+ inode->i_gid = vfsgid_into_kgid(vfsgid);
+ inode->i_mode = realinode->i_mode;
+ inode_set_atime_to_ts(inode, inode_get_atime(realinode));
+ inode_set_mtime_to_ts(inode, inode_get_mtime(realinode));
+ inode_set_ctime_to_ts(inode, inode_get_ctime(realinode));
+ i_size_write(inode, i_size_read(realinode));
+ spin_unlock(&inode->i_lock);
}
diff --git a/fs/overlayfs/xattrs.c b/fs/overlayfs/xattrs.c
new file mode 100644
index 000000000000..aa95855c7023
--- /dev/null
+++ b/fs/overlayfs/xattrs.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/fs.h>
+#include <linux/xattr.h>
+#include "overlayfs.h"
+
+static bool ovl_is_escaped_xattr(struct super_block *sb, const char *name)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ if (ofs->config.userxattr)
+ return strncmp(name, OVL_XATTR_ESCAPE_USER_PREFIX,
+ OVL_XATTR_ESCAPE_USER_PREFIX_LEN) == 0;
+ else
+ return strncmp(name, OVL_XATTR_ESCAPE_TRUSTED_PREFIX,
+ OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN - 1) == 0;
+}
+
+static bool ovl_is_own_xattr(struct super_block *sb, const char *name)
+{
+ struct ovl_fs *ofs = OVL_FS(sb);
+
+ if (ofs->config.userxattr)
+ return strncmp(name, OVL_XATTR_USER_PREFIX,
+ OVL_XATTR_USER_PREFIX_LEN) == 0;
+ else
+ return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
+ OVL_XATTR_TRUSTED_PREFIX_LEN) == 0;
+}
+
+bool ovl_is_private_xattr(struct super_block *sb, const char *name)
+{
+ return ovl_is_own_xattr(sb, name) && !ovl_is_escaped_xattr(sb, name);
+}
+
+static int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ int err;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+ struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
+ struct path realpath;
+
+ if (!value && !upperdentry) {
+ ovl_path_lower(dentry, &realpath);
+ with_ovl_creds(dentry->d_sb)
+ err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0);
+ if (err < 0)
+ goto out;
+ }
+
+ if (!upperdentry) {
+ err = ovl_copy_up(dentry);
+ if (err)
+ goto out;
+
+ realdentry = ovl_dentry_upper(dentry);
+ }
+
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out;
+
+ with_ovl_creds(dentry->d_sb) {
+ if (value) {
+ err = ovl_do_setxattr(ofs, realdentry, name, value, size, flags);
+ } else {
+ WARN_ON(flags != XATTR_REPLACE);
+ err = ovl_do_removexattr(ofs, realdentry, name);
+ }
+ }
+ ovl_drop_write(dentry);
+
+ /* copy c/mtime */
+ ovl_copyattr(inode);
+out:
+ return err;
+}
+
+static int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ struct path realpath;
+
+ ovl_i_path_real(inode, &realpath);
+ with_ovl_creds(dentry->d_sb)
+ return vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size);
+}
+
+static bool ovl_can_list(struct super_block *sb, const char *s)
+{
+ /* Never list private (.overlay) */
+ if (ovl_is_private_xattr(sb, s))
+ return false;
+
+ /* List all non-trusted xattrs */
+ if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
+ return true;
+
+ /* list other trusted for superuser only */
+ return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+}
+
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+ struct dentry *realdentry = ovl_dentry_real(dentry);
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ ssize_t res;
+ size_t len;
+ char *s;
+ size_t prefix_len, name_len;
+
+ with_ovl_creds(dentry->d_sb)
+ res = vfs_listxattr(realdentry, list, size);
+ if (res <= 0 || size == 0)
+ return res;
+
+ prefix_len = ofs->config.userxattr ?
+ OVL_XATTR_USER_PREFIX_LEN : OVL_XATTR_TRUSTED_PREFIX_LEN;
+
+ /* filter out private xattrs */
+ for (s = list, len = res; len;) {
+ size_t slen = strnlen(s, len) + 1;
+
+ /* underlying fs providing us with an broken xattr list? */
+ if (WARN_ON(slen > len))
+ return -EIO;
+
+ len -= slen;
+ if (!ovl_can_list(dentry->d_sb, s)) {
+ res -= slen;
+ memmove(s, s + slen, len);
+ } else if (ovl_is_escaped_xattr(dentry->d_sb, s)) {
+ res -= OVL_XATTR_ESCAPE_PREFIX_LEN;
+ name_len = slen - prefix_len - OVL_XATTR_ESCAPE_PREFIX_LEN;
+ s += prefix_len;
+ memmove(s, s + OVL_XATTR_ESCAPE_PREFIX_LEN, name_len + len);
+ s += name_len;
+ } else {
+ s += slen;
+ }
+ }
+
+ return res;
+}
+
+static char *ovl_xattr_escape_name(const char *prefix, const char *name)
+{
+ size_t prefix_len = strlen(prefix);
+ size_t name_len = strlen(name);
+ size_t escaped_len;
+ char *escaped, *s;
+
+ escaped_len = prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN + name_len;
+ if (escaped_len > XATTR_NAME_MAX)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ escaped = kmalloc(escaped_len + 1, GFP_KERNEL);
+ if (escaped == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ s = escaped;
+ memcpy(s, prefix, prefix_len);
+ s += prefix_len;
+ memcpy(s, OVL_XATTR_ESCAPE_PREFIX, OVL_XATTR_ESCAPE_PREFIX_LEN);
+ s += OVL_XATTR_ESCAPE_PREFIX_LEN;
+ memcpy(s, name, name_len + 1);
+
+ return escaped;
+}
+
+static int ovl_own_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
+{
+ char *escaped;
+ int r;
+
+ escaped = ovl_xattr_escape_name(handler->prefix, name);
+ if (IS_ERR(escaped))
+ return PTR_ERR(escaped);
+
+ r = ovl_xattr_get(dentry, inode, escaped, buffer, size);
+
+ kfree(escaped);
+
+ return r;
+}
+
+static int ovl_own_xattr_set(const struct xattr_handler *handler,
+ struct mnt_idmap *idmap,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ char *escaped;
+ int r;
+
+ escaped = ovl_xattr_escape_name(handler->prefix, name);
+ if (IS_ERR(escaped))
+ return PTR_ERR(escaped);
+
+ r = ovl_xattr_set(dentry, inode, escaped, value, size, flags);
+
+ kfree(escaped);
+
+ return r;
+}
+
+static int ovl_other_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
+{
+ return ovl_xattr_get(dentry, inode, name, buffer, size);
+}
+
+static int ovl_other_xattr_set(const struct xattr_handler *handler,
+ struct mnt_idmap *idmap,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ return ovl_xattr_set(dentry, inode, name, value, size, flags);
+}
+
+static const struct xattr_handler ovl_own_trusted_xattr_handler = {
+ .prefix = OVL_XATTR_TRUSTED_PREFIX,
+ .get = ovl_own_xattr_get,
+ .set = ovl_own_xattr_set,
+};
+
+static const struct xattr_handler ovl_own_user_xattr_handler = {
+ .prefix = OVL_XATTR_USER_PREFIX,
+ .get = ovl_own_xattr_get,
+ .set = ovl_own_xattr_set,
+};
+
+static const struct xattr_handler ovl_other_xattr_handler = {
+ .prefix = "", /* catch all */
+ .get = ovl_other_xattr_get,
+ .set = ovl_other_xattr_set,
+};
+
+static const struct xattr_handler * const ovl_trusted_xattr_handlers[] = {
+ &ovl_own_trusted_xattr_handler,
+ &ovl_other_xattr_handler,
+ NULL
+};
+
+static const struct xattr_handler * const ovl_user_xattr_handlers[] = {
+ &ovl_own_user_xattr_handler,
+ &ovl_other_xattr_handler,
+ NULL
+};
+
+const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs)
+{
+ return ofs->config.userxattr ? ovl_user_xattr_handlers :
+ ovl_trusted_xattr_handlers;
+}