summaryrefslogtreecommitdiff
path: root/fs/overlayfs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/overlayfs/namei.c')
-rw-r--r--fs/overlayfs/namei.c1396
1 files changed, 1038 insertions, 358 deletions
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 8aef2b304b2d..e9a69c95be91 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -1,65 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2011 Novell Inc.
* Copyright (C) 2016 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/cred.h>
+#include <linux/ctype.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/ratelimit.h>
#include <linux/mount.h>
#include <linux/exportfs.h>
#include "overlayfs.h"
-#include "ovl_entry.h"
struct ovl_lookup_data {
+ struct super_block *sb;
+ struct dentry *dentry;
+ const struct ovl_layer *layer;
struct qstr name;
bool is_dir;
bool opaque;
+ bool xwhiteouts;
bool stop;
bool last;
char *redirect;
+ char *upperredirect;
+ int metacopy;
+ /* Referring to last redirect xattr */
+ bool absolute_redirect;
};
-static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
+static int ovl_check_redirect(const struct path *path, struct ovl_lookup_data *d,
size_t prelen, const char *post)
{
int res;
- char *s, *next, *buf = NULL;
+ char *buf;
+ struct ovl_fs *ofs = OVL_FS(d->sb);
- res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0);
- if (res < 0) {
- if (res == -ENODATA || res == -EOPNOTSUPP)
- return 0;
- goto fail;
- }
- buf = kzalloc(prelen + res + strlen(post) + 1, GFP_TEMPORARY);
- if (!buf)
- return -ENOMEM;
+ d->absolute_redirect = false;
+ buf = ovl_get_redirect_xattr(ofs, path, prelen + strlen(post));
+ if (IS_ERR_OR_NULL(buf))
+ return PTR_ERR(buf);
- if (res == 0)
- goto invalid;
-
- res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res);
- if (res < 0)
- goto fail;
- if (res == 0)
- goto invalid;
if (buf[0] == '/') {
- for (s = buf; *s++ == '/'; s = next) {
- next = strchrnul(s, '/');
- if (s == next)
- goto invalid;
- }
+ d->absolute_redirect = true;
+ /*
+ * One of the ancestor path elements in an absolute path
+ * lookup in ovl_lookup_layer() could have been opaque and
+ * that will stop further lookup in lower layers (d->stop=true)
+ * But we have found an absolute redirect in descendant path
+ * element and that should force continue lookup in lower
+ * layers (reset d->stop).
+ */
+ d->stop = false;
} else {
- if (strchr(buf, '/') != NULL)
- goto invalid;
-
+ res = strlen(buf) + 1;
memmove(buf + prelen, buf, res);
memcpy(buf, d->name.name, prelen);
}
@@ -71,29 +67,59 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
d->name.len = strlen(d->redirect);
return 0;
-
-err_free:
- kfree(buf);
- return 0;
-fail:
- pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res);
- goto err_free;
-invalid:
- pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
- goto err_free;
}
static int ovl_acceptable(void *ctx, struct dentry *dentry)
{
- return 1;
+ /*
+ * A non-dir origin may be disconnected, which is fine, because
+ * we only need it for its unique inode number.
+ */
+ if (!d_is_dir(dentry))
+ return 1;
+
+ /* Don't decode a deleted empty directory */
+ if (d_unhashed(dentry))
+ return 0;
+
+ /* Check if directory belongs to the layer we are decoding from */
+ return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
}
-static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
+/*
+ * Check validity of an overlay file handle buffer.
+ *
+ * Return 0 for a valid file handle.
+ * Return -ENODATA for "origin unknown".
+ * Return <0 for an invalid file handle.
+ */
+int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
{
- int res;
+ if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
+ return -EINVAL;
+
+ if (fb->magic != OVL_FH_MAGIC)
+ return -EINVAL;
+
+ /* Treat larger version and unknown flags as "origin unknown" */
+ if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
+ return -ENODATA;
+
+ /* Treat endianness mismatch as "origin unknown" */
+ if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+ (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+ return -ENODATA;
+
+ return 0;
+}
+
+static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *upperdentry,
+ enum ovl_xattr ox)
+{
+ int res, err;
struct ovl_fh *fh = NULL;
- res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+ res = ovl_getxattr_upper(ofs, upperdentry, ox, NULL, 0);
if (res < 0) {
if (res == -ENODATA || res == -EOPNOTSUPP)
return NULL;
@@ -103,28 +129,20 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
if (res == 0)
return NULL;
- fh = kzalloc(res, GFP_TEMPORARY);
+ fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
if (!fh)
return ERR_PTR(-ENOMEM);
- res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res);
+ res = ovl_getxattr_upper(ofs, upperdentry, ox, fh->buf, res);
if (res < 0)
goto fail;
- if (res < sizeof(struct ovl_fh) || res < fh->len)
- goto invalid;
-
- if (fh->magic != OVL_FH_MAGIC)
+ err = ovl_check_fb_len(&fh->fb, res);
+ if (err < 0) {
+ if (err == -ENODATA)
+ goto out;
goto invalid;
-
- /* Treat larger version and unknown flags as "origin unknown" */
- if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
- goto out;
-
- /* Treat endianness mismatch as "origin unknown" */
- if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
- (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
- goto out;
+ }
return fh;
@@ -133,70 +151,107 @@ out:
return NULL;
fail:
- pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+ pr_warn_ratelimited("failed to get origin (%i)\n", res);
goto out;
invalid:
- pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+ pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
goto out;
}
-static struct dentry *ovl_get_origin(struct dentry *dentry,
- struct vfsmount *mnt)
+struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+ struct vfsmount *mnt, bool connected)
{
- struct dentry *origin = NULL;
- struct ovl_fh *fh = ovl_get_origin_fh(dentry);
+ struct dentry *real;
int bytes;
- if (IS_ERR_OR_NULL(fh))
- return (struct dentry *)fh;
+ if (!capable(CAP_DAC_READ_SEARCH))
+ return NULL;
/*
* Make sure that the stored uuid matches the uuid of the lower
* layer where file handle will be decoded.
+ * In case of uuid=off option just make sure that stored uuid is null.
*/
- if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
- goto out;
+ if (ovl_origin_uuid(ofs) ?
+ !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) :
+ !uuid_is_null(&fh->fb.uuid))
+ return NULL;
- bytes = (fh->len - offsetof(struct ovl_fh, fid));
- origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
- bytes >> 2, (int)fh->type,
- ovl_acceptable, NULL);
- if (IS_ERR(origin)) {
- /* Treat stale file handle as "origin unknown" */
- if (origin == ERR_PTR(-ESTALE))
- origin = NULL;
- goto out;
+ bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
+ real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
+ bytes >> 2, (int)fh->fb.type,
+ connected ? ovl_acceptable : NULL, mnt);
+ if (IS_ERR(real)) {
+ /*
+ * Treat stale file handle to lower file as "origin unknown".
+ * upper file handle could become stale when upper file is
+ * unlinked and this information is needed to handle stale
+ * index entries correctly.
+ */
+ if (real == ERR_PTR(-ESTALE) &&
+ !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
+ real = NULL;
+ return real;
}
- if (ovl_dentry_weird(origin) ||
- ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT))
- goto invalid;
-
-out:
- kfree(fh);
- return origin;
+ if (ovl_dentry_weird(real)) {
+ dput(real);
+ return NULL;
+ }
-invalid:
- pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin);
- dput(origin);
- origin = NULL;
- goto out;
+ return real;
}
-static bool ovl_is_opaquedir(struct dentry *dentry)
+static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
+ const char *name,
+ struct dentry *base, int len,
+ bool drop_negative)
{
- return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
+ struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt),
+ &QSTR_LEN(name, len), base);
+
+ if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
+ if (drop_negative && ret->d_lockref.count == 1) {
+ spin_lock(&ret->d_lock);
+ /* Recheck condition under lock */
+ if (d_is_negative(ret) && ret->d_lockref.count == 1)
+ __d_drop(ret);
+ spin_unlock(&ret->d_lock);
+ }
+ dput(ret);
+ ret = ERR_PTR(-ENOENT);
+ }
+ return ret;
}
static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
const char *name, unsigned int namelen,
size_t prelen, const char *post,
- struct dentry **ret)
+ struct dentry **ret, bool drop_negative)
{
- struct dentry *this;
+ struct ovl_fs *ofs = OVL_FS(d->sb);
+ struct dentry *this = NULL;
+ const char *warn;
+ struct path path;
int err;
+ bool last_element = !post[0];
+ bool is_upper = d->layer->idx == 0;
+ char val;
+
+ /*
+ * We allow filesystems that are case-folding capable as long as the
+ * layers are consistently enabled in the stack, enabled for every dir
+ * or disabled in all dirs. If someone has modified case folding on a
+ * directory on underlying layer, the warranty of the ovl stack is
+ * voided.
+ */
+ if (ofs->casefold != ovl_dentry_casefolded(base)) {
+ warn = "parent wrong casefold";
+ err = -ESTALE;
+ goto out_warn;
+ }
- this = lookup_one_len_unlocked(name, base, namelen);
+ this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
if (IS_ERR(this)) {
err = PTR_ERR(this);
this = NULL;
@@ -204,30 +259,74 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
goto out;
goto out_err;
}
- if (!this->d_inode)
- goto put_and_out;
+
+ if (ofs->casefold != ovl_dentry_casefolded(this)) {
+ warn = "child wrong casefold";
+ err = -EREMOTE;
+ goto out_warn;
+ }
if (ovl_dentry_weird(this)) {
/* Don't support traversing automounts and other weirdness */
+ warn = "unsupported object type";
err = -EREMOTE;
- goto out_err;
+ goto out_warn;
}
- if (ovl_is_whiteout(this)) {
+
+ path.dentry = this;
+ path.mnt = d->layer->mnt;
+ if (ovl_path_is_whiteout(ofs, &path)) {
d->stop = d->opaque = true;
goto put_and_out;
}
- if (!d_can_lookup(this)) {
+ /*
+ * This dentry should be a regular file if previous layer lookup
+ * found a metacopy dentry.
+ */
+ if (last_element && d->metacopy && !d_is_reg(this)) {
d->stop = true;
- if (d->is_dir)
- goto put_and_out;
- goto out;
+ goto put_and_out;
}
- d->is_dir = true;
- if (!d->last && ovl_is_opaquedir(this)) {
- d->stop = d->opaque = true;
- goto out;
+
+ if (!d_can_lookup(this)) {
+ if (d->is_dir || !last_element) {
+ d->stop = true;
+ goto put_and_out;
+ }
+ err = ovl_check_metacopy_xattr(ofs, &path, NULL);
+ if (err < 0)
+ goto out_err;
+
+ d->metacopy = err;
+ d->stop = !d->metacopy;
+ if (!d->metacopy || d->last)
+ goto out;
+ } else {
+ if (ovl_lookup_trap_inode(d->sb, this)) {
+ /* Caught in a trap of overlapping layers */
+ warn = "overlapping layers";
+ err = -ELOOP;
+ goto out_warn;
+ }
+
+ if (last_element)
+ d->is_dir = true;
+ if (d->last)
+ goto out;
+
+ /* overlay.opaque=x means xwhiteouts directory */
+ val = ovl_get_opaquedir_val(ofs, &path);
+ if (last_element && !is_upper && val == 'x') {
+ d->xwhiteouts = true;
+ ovl_layer_set_xwhiteouts(ofs, d->layer);
+ } else if (val == 'y') {
+ d->stop = true;
+ if (last_element)
+ d->opaque = true;
+ goto out;
+ }
}
- err = ovl_check_redirect(this, d, prelen, post);
+ err = ovl_check_redirect(&path, d, prelen, post);
if (err)
goto out_err;
out:
@@ -239,13 +338,17 @@ put_and_out:
this = NULL;
goto out;
+out_warn:
+ pr_warn_ratelimited("failed lookup in %s (%pd2, name='%.*s', err=%i): %s\n",
+ is_upper ? "upper" : "lower", base,
+ namelen, name, err, warn);
out_err:
dput(this);
return err;
}
static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
- struct dentry **ret)
+ struct dentry **ret, bool drop_negative)
{
/* Counting down from the end, since the prefix can change */
size_t rem = d->name.len - 1;
@@ -254,7 +357,7 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
if (d->name.name[0] != '/')
return ovl_lookup_single(base, d, d->name.name, d->name.len,
- 0, "", ret);
+ 0, "", ret, drop_negative);
while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
const char *s = d->name.name + d->name.len - rem;
@@ -267,7 +370,8 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
return -EIO;
err = ovl_lookup_single(base, d, s, thislen,
- d->name.len - rem, next, &base);
+ d->name.len - rem, next, &base,
+ drop_negative);
dput(dentry);
if (err)
return err;
@@ -284,49 +388,142 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
return 0;
}
+static int ovl_lookup_data_layer(struct dentry *dentry, const char *redirect,
+ const struct ovl_layer *layer,
+ struct path *datapath)
+{
+ int err;
+
+ err = vfs_path_lookup(layer->mnt->mnt_root, layer->mnt, redirect,
+ LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS | LOOKUP_NO_XDEV,
+ datapath);
+ pr_debug("lookup lowerdata (%pd2, redirect=\"%s\", layer=%d, err=%i)\n",
+ dentry, redirect, layer->idx, err);
+
+ if (err)
+ return err;
+
+ err = -EREMOTE;
+ if (ovl_dentry_weird(datapath->dentry))
+ goto out_path_put;
+
+ err = -ENOENT;
+ /* Only regular file is acceptable as lower data */
+ if (!d_is_reg(datapath->dentry))
+ goto out_path_put;
+
+ return 0;
+
+out_path_put:
+ path_put(datapath);
+
+ return err;
+}
+
+/* Lookup in data-only layers by absolute redirect to layer root */
+static int ovl_lookup_data_layers(struct dentry *dentry, const char *redirect,
+ struct ovl_path *lowerdata)
+{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ const struct ovl_layer *layer;
+ struct path datapath;
+ int err = -ENOENT;
+ int i;
+
+ layer = &ofs->layers[ofs->numlayer - ofs->numdatalayer];
+ for (i = 0; i < ofs->numdatalayer; i++, layer++) {
+ err = ovl_lookup_data_layer(dentry, redirect, layer, &datapath);
+ if (!err) {
+ mntput(datapath.mnt);
+ lowerdata->dentry = datapath.dentry;
+ lowerdata->layer = layer;
+ return 0;
+ }
+ }
+
+ return err;
+}
-static int ovl_check_origin(struct dentry *upperdentry,
- struct path *lowerstack, unsigned int numlower,
- struct path **stackp, unsigned int *ctrp)
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
+ struct dentry *upperdentry, struct ovl_path **stackp)
{
- struct vfsmount *mnt;
struct dentry *origin = NULL;
int i;
+ for (i = 1; i <= ovl_numlowerlayer(ofs); i++) {
+ /*
+ * If lower fs uuid is not unique among lower fs we cannot match
+ * fh->uuid to layer.
+ */
+ if (ofs->layers[i].fsid &&
+ ofs->layers[i].fs->bad_uuid)
+ continue;
- for (i = 0; i < numlower; i++) {
- mnt = lowerstack[i].mnt;
- origin = ovl_get_origin(upperdentry, mnt);
- if (IS_ERR(origin))
- return PTR_ERR(origin);
-
+ origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt,
+ connected);
if (origin)
break;
}
if (!origin)
- return 0;
+ return -ESTALE;
+ else if (IS_ERR(origin))
+ return PTR_ERR(origin);
+
+ if (upperdentry && !ovl_upper_is_whiteout(ofs, upperdentry) &&
+ inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
+ goto invalid;
- BUG_ON(*ctrp);
if (!*stackp)
- *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
+ *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
if (!*stackp) {
dput(origin);
return -ENOMEM;
}
- **stackp = (struct path) { .dentry = origin, .mnt = mnt };
- *ctrp = 1;
+ **stackp = (struct ovl_path){
+ .dentry = origin,
+ .layer = &ofs->layers[i]
+ };
+
+ return 0;
+
+invalid:
+ pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
+ upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
+ d_inode(origin)->i_mode & S_IFMT);
+ dput(origin);
+ return -ESTALE;
+}
+
+static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
+ struct ovl_path **stackp)
+{
+ struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN);
+ int err;
+
+ if (IS_ERR_OR_NULL(fh))
+ return PTR_ERR(fh);
+
+ err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
+ kfree(fh);
+
+ if (err) {
+ if (err == -ESTALE)
+ return 0;
+ return err;
+ }
return 0;
}
/*
- * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN.
+ * Verify that @fh matches the file handle stored in xattr @name.
* Return 0 on match, -ESTALE on mismatch, < 0 on error.
*/
-static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
+static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, const struct ovl_fh *fh)
{
- struct ovl_fh *ofh = ovl_get_origin_fh(dentry);
+ struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox);
int err = 0;
if (!ofh)
@@ -335,36 +532,50 @@ static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
if (IS_ERR(ofh))
return PTR_ERR(ofh);
- if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
+ if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
err = -ESTALE;
kfree(ofh);
return err;
}
+int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, const struct ovl_fh *fh,
+ bool is_upper, bool set)
+{
+ int err;
+
+ err = ovl_verify_fh(ofs, dentry, ox, fh);
+ if (set && err == -ENODATA)
+ err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
+
+ return err;
+}
+
/*
- * Verify that an inode matches the origin file handle stored in upper inode.
+ * Verify that @real dentry matches the file handle stored in xattr @name.
*
- * If @set is true and there is no stored file handle, encode and store origin
- * file handle in OVL_XATTR_ORIGIN.
+ * If @set is true and there is no stored file handle, encode @real and store
+ * file handle in xattr @name.
*
- * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
*/
-int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
- struct dentry *origin, bool is_upper, bool set)
+int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, struct dentry *real,
+ bool is_upper, bool set)
{
struct inode *inode;
struct ovl_fh *fh;
int err;
- fh = ovl_encode_fh(origin, is_upper);
+ fh = ovl_encode_real_fh(ofs, d_inode(real), is_upper);
err = PTR_ERR(fh);
- if (IS_ERR(fh))
+ if (IS_ERR(fh)) {
+ fh = NULL;
goto fail;
+ }
- err = ovl_verify_origin_fh(dentry, fh);
- if (set && err == -ENODATA)
- err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0);
+ err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set);
if (err)
goto fail;
@@ -373,83 +584,163 @@ out:
return err;
fail:
- inode = d_inode(origin);
- pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n",
- origin, inode ? inode->i_ino : 0, err);
+ inode = d_inode(real);
+ pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
+ is_upper ? "upper" : "origin", real,
+ inode ? inode->i_ino : 0, err);
goto out;
}
+
+/* Get upper dentry from index */
+struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
+ bool connected)
+{
+ struct ovl_fh *fh;
+ struct dentry *upper;
+
+ if (!d_is_dir(index))
+ return dget(index);
+
+ fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER);
+ if (IS_ERR_OR_NULL(fh))
+ return ERR_CAST(fh);
+
+ upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), connected);
+ kfree(fh);
+
+ if (IS_ERR_OR_NULL(upper))
+ return upper ?: ERR_PTR(-ESTALE);
+
+ if (!d_is_dir(upper)) {
+ pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
+ index, upper);
+ dput(upper);
+ return ERR_PTR(-EIO);
+ }
+
+ return upper;
+}
+
/*
* Verify that an index entry name matches the origin file handle stored in
* OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
* Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
*/
-int ovl_verify_index(struct dentry *index, struct path *lowerstack,
- unsigned int numlower)
+int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
{
struct ovl_fh *fh = NULL;
size_t len;
- struct path origin = { };
- struct path *stack = &origin;
- unsigned int ctr = 0;
+ struct ovl_path origin = { };
+ struct ovl_path *stack = &origin;
+ struct dentry *upper = NULL;
int err;
if (!d_inode(index))
return 0;
- /*
- * Directory index entries are going to be used for looking up
- * redirected upper dirs by lower dir fh when decoding an overlay
- * file handle of a merge dir. Whiteout index entries are going to be
- * used as an indication that an exported overlay file handle should
- * be treated as stale (i.e. after unlink of the overlay inode).
- * We don't know the verification rules for directory and whiteout
- * index entries, because they have not been implemented yet, so return
- * EROFS if those entries are found to avoid corrupting an index that
- * was created by a newer kernel.
- */
- err = -EROFS;
- if (d_is_dir(index) || ovl_is_whiteout(index))
- goto fail;
-
err = -EINVAL;
- if (index->d_name.len < sizeof(struct ovl_fh)*2)
+ if (index->d_name.len < sizeof(struct ovl_fb)*2)
goto fail;
err = -ENOMEM;
len = index->d_name.len / 2;
- fh = kzalloc(len, GFP_TEMPORARY);
+ fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
if (!fh)
goto fail;
err = -EINVAL;
- if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len)
+ if (hex2bin(fh->buf, index->d_name.name, len))
goto fail;
- err = ovl_verify_origin_fh(index, fh);
+ err = ovl_check_fb_len(&fh->fb, len);
if (err)
goto fail;
- err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr);
- if (!err && !ctr)
- err = -ESTALE;
+ /*
+ * Whiteout index entries are used as an indication that an exported
+ * overlay file handle should be treated as stale (i.e. after unlink
+ * of the overlay inode). These entries contain no origin xattr.
+ */
+ if (ovl_is_whiteout(index))
+ goto out;
+
+ /*
+ * Verifying directory index entries are not stale is expensive, so
+ * only verify stale dir index if NFS export is enabled.
+ */
+ if (d_is_dir(index) && !ofs->config.nfs_export)
+ goto out;
+
+ /*
+ * Directory index entries should have 'upper' xattr pointing to the
+ * real upper dir. Non-dir index entries are hardlinks to the upper
+ * real inode. For non-dir index, we can read the copy up origin xattr
+ * directly from the index dentry, but for dir index we first need to
+ * decode the upper directory.
+ */
+ upper = ovl_index_upper(ofs, index, false);
+ if (IS_ERR_OR_NULL(upper)) {
+ err = PTR_ERR(upper);
+ /*
+ * Directory index entries with no 'upper' xattr need to be
+ * removed. When dir index entry has a stale 'upper' xattr,
+ * we assume that upper dir was removed and we treat the dir
+ * index as orphan entry that needs to be whited out.
+ */
+ if (err == -ESTALE)
+ goto orphan;
+ else if (!err)
+ err = -ESTALE;
+ goto fail;
+ }
+
+ err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh);
+ dput(upper);
if (err)
goto fail;
- /* Check if index is orphan and don't warn before cleaning it */
- if (d_inode(index)->i_nlink == 1 &&
- ovl_get_nlink(index, origin.dentry, 0) == 0)
- err = -ENOENT;
+ /* Check if non-dir index is orphan and don't warn before cleaning it */
+ if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
+ err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
+ if (err)
+ goto fail;
+
+ if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0)
+ goto orphan;
+ }
- dput(origin.dentry);
out:
+ dput(origin.dentry);
kfree(fh);
return err;
fail:
- pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
+ pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
index, d_inode(index)->i_mode & S_IFMT, err);
goto out;
+
+orphan:
+ pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
+ index, d_inode(index)->i_mode & S_IFMT,
+ d_inode(index)->i_nlink);
+ err = -ENOENT;
+ goto out;
+}
+
+int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name)
+{
+ char *n, *s;
+
+ n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
+ if (!n)
+ return -ENOMEM;
+
+ s = bin2hex(n, fh->buf, fh->fb.len);
+ *name = (struct qstr) QSTR_INIT(n, s - n);
+
+ return 0;
+
}
/*
@@ -464,49 +755,77 @@ fail:
* If the index dentry for a copy up origin inode is positive, but points
* to an inode different than the upper inode, then either the upper inode
* has been copied up and not indexed or it was indexed, but since then
- * index dir was cleared. Either way, that index cannot be used to indentify
+ * index dir was cleared. Either way, that index cannot be used to identify
* the overlay inode.
*/
-int ovl_get_index_name(struct dentry *origin, struct qstr *name)
+int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
+ struct qstr *name)
{
- int err;
struct ovl_fh *fh;
- char *n, *s;
+ int err;
- fh = ovl_encode_fh(origin, false);
+ fh = ovl_encode_real_fh(ofs, d_inode(origin), false);
if (IS_ERR(fh))
return PTR_ERR(fh);
- err = -ENOMEM;
- n = kzalloc(fh->len * 2, GFP_TEMPORARY);
- if (n) {
- s = bin2hex(n, fh, fh->len);
- *name = (struct qstr) QSTR_INIT(n, s - n);
- err = 0;
- }
- kfree(fh);
+ err = ovl_get_index_name_fh(fh, name);
+ kfree(fh);
return err;
+}
+
+/* Lookup index by file handle for NFS export */
+struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
+{
+ struct dentry *index;
+ struct qstr name;
+ int err;
+ err = ovl_get_index_name_fh(fh, &name);
+ if (err)
+ return ERR_PTR(err);
+
+ index = lookup_noperm_positive_unlocked(&name, ofs->workdir);
+ kfree(name.name);
+ if (IS_ERR(index)) {
+ if (PTR_ERR(index) == -ENOENT)
+ index = NULL;
+ return index;
+ }
+
+ if (ovl_is_whiteout(index))
+ err = -ESTALE;
+ else if (ovl_dentry_weird(index))
+ err = -EIO;
+ else
+ return index;
+
+ dput(index);
+ return ERR_PTR(err);
}
-static struct dentry *ovl_lookup_index(struct dentry *dentry,
- struct dentry *upper,
- struct dentry *origin)
+struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
+ struct dentry *origin, bool verify)
{
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct dentry *index;
struct inode *inode;
struct qstr name;
+ bool is_dir = d_is_dir(origin);
int err;
- err = ovl_get_index_name(origin, &name);
+ err = ovl_get_index_name(ofs, origin, &name);
if (err)
return ERR_PTR(err);
- index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+ index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), &name,
+ ofs->workdir);
if (IS_ERR(index)) {
- pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
+ err = PTR_ERR(index);
+ if (err == -ENOENT) {
+ index = NULL;
+ goto out;
+ }
+ pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
"overlayfs: mount with '-o index=off' to disable inodes index.\n",
d_inode(origin)->i_ino, name.len, name.name,
err);
@@ -514,21 +833,18 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
}
inode = d_inode(index);
- if (d_is_negative(index)) {
- if (upper && d_inode(origin)->i_nlink > 1) {
- pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n",
- d_inode(origin)->i_ino);
- goto fail;
- }
-
+ if (ovl_is_whiteout(index) && !verify) {
+ /*
+ * When index lookup is called with !verify for decoding an
+ * overlay file handle, a whiteout index implies that decode
+ * should treat file handle as stale and no need to print a
+ * warning about it.
+ */
dput(index);
- index = NULL;
- } else if (upper && d_inode(upper) != inode) {
- pr_warn_ratelimited("overlayfs: wrong index found (index=%pd2, ino=%lu, upper ino=%lu).\n",
- index, inode->i_ino, d_inode(upper)->i_ino);
- goto fail;
+ index = ERR_PTR(-ESTALE);
+ goto out;
} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
- ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
+ inode_wrong_type(inode, d_inode(origin)->i_mode)) {
/*
* Index should always be of the same file type as origin
* except for the case of a whiteout index. A whiteout
@@ -536,16 +852,38 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
* unlinked, which means that finding a lower origin on lookup
* whose index is a whiteout should be treated as an error.
*/
- pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
+ pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
index, d_inode(index)->i_mode & S_IFMT,
d_inode(origin)->i_mode & S_IFMT);
goto fail;
- }
+ } else if (is_dir && verify) {
+ if (!upper) {
+ pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
+ origin, index);
+ goto fail;
+ }
+ /* Verify that dir index 'upper' xattr points to upper dir */
+ err = ovl_verify_upper(ofs, index, upper, false);
+ if (err) {
+ if (err == -ESTALE) {
+ pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
+ upper, origin, index);
+ }
+ goto fail;
+ }
+ } else if (upper && d_inode(upper) != inode) {
+ goto out_dput;
+ }
out:
kfree(name.name);
return index;
+out_dput:
+ dput(index);
+ index = NULL;
+ goto out;
+
fail:
dput(index);
index = ERR_PTR(-EIO);
@@ -556,67 +894,220 @@ fail:
* Returns next layer in stack starting from top.
* Returns -1 if this is the last layer.
*/
-int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
+ const struct ovl_layer **layer)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_entry *oe = OVL_E(dentry);
+ struct ovl_path *lowerstack = ovl_lowerstack(oe);
BUG_ON(idx < 0);
if (idx == 0) {
ovl_path_upper(dentry, path);
- if (path->dentry)
- return oe->numlower ? 1 : -1;
+ if (path->dentry) {
+ *layer = &OVL_FS(dentry->d_sb)->layers[0];
+ return ovl_numlower(oe) ? 1 : -1;
+ }
idx++;
}
- BUG_ON(idx > oe->numlower);
- *path = oe->lowerstack[idx - 1];
+ BUG_ON(idx > ovl_numlower(oe));
+ path->dentry = lowerstack[idx - 1].dentry;
+ *layer = lowerstack[idx - 1].layer;
+ path->mnt = (*layer)->mnt;
- return (idx < oe->numlower) ? idx + 1 : -1;
+ return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
}
-struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
+/* Fix missing 'origin' xattr */
+static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
+ struct dentry *lower, struct dentry *upper)
+{
+ const struct ovl_fh *fh;
+ int err;
+
+ if (ovl_check_origin_xattr(ofs, upper))
+ return 0;
+
+ fh = ovl_get_origin_fh(ofs, lower);
+ if (IS_ERR(fh))
+ return PTR_ERR(fh);
+
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out;
+
+ err = ovl_set_origin_fh(ofs, fh, upper);
+ if (!err)
+ err = ovl_set_impure(dentry->d_parent, upper->d_parent);
+
+ ovl_drop_write(dentry);
+out:
+ kfree(fh);
+ return err;
+}
+
+static int ovl_maybe_validate_verity(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ struct inode *inode = d_inode(dentry);
+ struct path datapath, metapath;
+ int err;
+
+ if (!ofs->config.verity_mode ||
+ !ovl_is_metacopy_dentry(dentry) ||
+ ovl_test_flag(OVL_VERIFIED_DIGEST, inode))
+ return 0;
+
+ if (!ovl_test_flag(OVL_HAS_DIGEST, inode)) {
+ if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
+ pr_warn_ratelimited("metacopy file '%pd' has no digest specified\n",
+ dentry);
+ return -EIO;
+ }
+ return 0;
+ }
+
+ ovl_path_lowerdata(dentry, &datapath);
+ if (!datapath.dentry)
+ return -EIO;
+
+ ovl_path_real(dentry, &metapath);
+ if (!metapath.dentry)
+ return -EIO;
+
+ err = ovl_inode_lock_interruptible(inode);
+ if (err)
+ return err;
+
+ if (!ovl_test_flag(OVL_VERIFIED_DIGEST, inode)) {
+ with_ovl_creds(dentry->d_sb)
+ err = ovl_validate_verity(ofs, &metapath, &datapath);
+ if (err == 0)
+ ovl_set_flag(OVL_VERIFIED_DIGEST, inode);
+ }
+
+ ovl_inode_unlock(inode);
+
+ return err;
+}
+
+/* Lazy lookup of lowerdata */
+static int ovl_maybe_lookup_lowerdata(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ const char *redirect = ovl_lowerdata_redirect(inode);
+ struct ovl_path datapath = {};
+ int err;
+
+ if (!redirect || ovl_dentry_lowerdata(dentry))
+ return 0;
+
+ if (redirect[0] != '/')
+ return -EIO;
+
+ err = ovl_inode_lock_interruptible(inode);
+ if (err)
+ return err;
+
+ err = 0;
+ /* Someone got here before us? */
+ if (ovl_dentry_lowerdata(dentry))
+ goto out;
+
+ with_ovl_creds(dentry->d_sb)
+ err = ovl_lookup_data_layers(dentry, redirect, &datapath);
+ if (err)
+ goto out_err;
+
+ err = ovl_dentry_set_lowerdata(dentry, &datapath);
+ if (err)
+ goto out_err;
+
+out:
+ ovl_inode_unlock(inode);
+ dput(datapath.dentry);
+
+ return err;
+
+out_err:
+ pr_warn_ratelimited("lazy lowerdata lookup failed (%pd2, err=%i)\n",
+ dentry, err);
+ goto out;
+}
+
+int ovl_verify_lowerdata(struct dentry *dentry)
+{
+ int err;
+
+ err = ovl_maybe_lookup_lowerdata(dentry);
+ if (err)
+ return err;
+
+ return ovl_maybe_validate_verity(dentry);
+}
+
+/*
+ * Following redirects/metacopy can have security consequences: it's like a
+ * symlink into the lower layer without the permission checks.
+ *
+ * This is only a problem if the upper layer is untrusted (e.g comes from an USB
+ * drive). This can allow a non-readable file or directory to become readable.
+ *
+ * Only following redirects when redirects are enabled disables this attack
+ * vector when not necessary.
+ */
+static bool ovl_check_follow_redirect(struct ovl_lookup_data *d)
{
+ struct ovl_fs *ofs = OVL_FS(d->sb);
+
+ if (d->metacopy && !ofs->config.metacopy) {
+ pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", d->dentry);
+ return false;
+ }
+ if ((d->redirect || d->upperredirect) && !ovl_redirect_follow(ofs)) {
+ pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n", d->dentry);
+ return false;
+ }
+ return true;
+}
+
+struct ovl_lookup_ctx {
+ struct dentry *dentry;
struct ovl_entry *oe;
- const struct cred *old_cred;
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
- struct ovl_entry *poe = dentry->d_parent->d_fsdata;
- struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
- struct path *stack = NULL;
- struct dentry *upperdir, *upperdentry = NULL;
- struct dentry *index = NULL;
- unsigned int ctr = 0;
- struct inode *inode = NULL;
- bool upperopaque = false;
- char *upperredirect = NULL;
+ struct ovl_path *stack;
+ struct ovl_path *origin_path;
+ struct dentry *upperdentry;
+ struct dentry *index;
+ struct inode *inode;
+ unsigned int ctr;
+};
+
+static int ovl_lookup_layers(struct ovl_lookup_ctx *ctx, struct ovl_lookup_data *d)
+{
+ struct dentry *dentry = ctx->dentry;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ struct ovl_entry *poe = OVL_E(dentry->d_parent);
+ struct ovl_entry *roe = OVL_E(dentry->d_sb->s_root);
+ bool check_redirect = (ovl_redirect_follow(ofs) || ofs->numdatalayer);
+ struct dentry *upperdir;
struct dentry *this;
+ struct dentry *origin = NULL;
+ bool upperopaque = false;
+ bool uppermetacopy = false;
+ int metacopy_size = 0;
unsigned int i;
int err;
- struct ovl_lookup_data d = {
- .name = dentry->d_name,
- .is_dir = false,
- .opaque = false,
- .stop = false,
- .last = !poe->numlower,
- .redirect = NULL,
- };
- if (dentry->d_name.len > ofs->namelen)
- return ERR_PTR(-ENAMETOOLONG);
-
- old_cred = ovl_override_creds(dentry->d_sb);
upperdir = ovl_dentry_upper(dentry->d_parent);
if (upperdir) {
- err = ovl_lookup_layer(upperdir, &d, &upperdentry);
+ d->layer = &ofs->layers[0];
+ err = ovl_lookup_layer(upperdir, d, &ctx->upperdentry, true);
if (err)
- goto out;
+ return err;
- if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) {
- dput(upperdentry);
- err = -EREMOTE;
- goto out;
- }
- if (upperdentry && !d.is_dir) {
- BUG_ON(!d.stop || d.redirect);
+ if (ctx->upperdentry && ctx->upperdentry->d_flags & DCACHE_OP_REAL)
+ return -EREMOTE;
+
+ if (ctx->upperdentry && !d->is_dir) {
/*
* Lookup copy up origin by decoding origin file handle.
* We may get a disconnected dentry, which is fine,
@@ -627,126 +1118,305 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* number - it's the same as if we held a reference
* to a dentry in lower layer that was moved under us.
*/
- err = ovl_check_origin(upperdentry, roe->lowerstack,
- roe->numlower, &stack, &ctr);
+ err = ovl_check_origin(ofs, ctx->upperdentry, &ctx->origin_path);
if (err)
- goto out;
+ return err;
+
+ if (d->metacopy)
+ uppermetacopy = true;
+ metacopy_size = d->metacopy;
}
- if (d.redirect) {
- upperredirect = kstrdup(d.redirect, GFP_KERNEL);
- if (!upperredirect)
- goto out_put_upper;
- if (d.redirect[0] == '/')
+ if (d->redirect) {
+ err = -ENOMEM;
+ d->upperredirect = kstrdup(d->redirect, GFP_KERNEL);
+ if (!d->upperredirect)
+ return err;
+ if (d->redirect[0] == '/')
poe = roe;
}
- upperopaque = d.opaque;
+ upperopaque = d->opaque;
}
- if (!d.stop && poe->numlower) {
+ if (!d->stop && ovl_numlower(poe)) {
err = -ENOMEM;
- stack = kcalloc(ofs->numlower, sizeof(struct path),
- GFP_TEMPORARY);
- if (!stack)
- goto out_put_upper;
+ ctx->stack = ovl_stack_alloc(ofs->numlayer - 1);
+ if (!ctx->stack)
+ return err;
}
- for (i = 0; !d.stop && i < poe->numlower; i++) {
- struct path lowerpath = poe->lowerstack[i];
+ for (i = 0; !d->stop && i < ovl_numlower(poe); i++) {
+ struct ovl_path lower = ovl_lowerstack(poe)[i];
+
+ if (!ovl_check_follow_redirect(d)) {
+ err = -EPERM;
+ return err;
+ }
+
+ if (!check_redirect)
+ d->last = i == ovl_numlower(poe) - 1;
+ else if (d->is_dir || !ofs->numdatalayer)
+ d->last = lower.layer->idx == ovl_numlower(roe);
- d.last = i == poe->numlower - 1;
- err = ovl_lookup_layer(lowerpath.dentry, &d, &this);
+ d->layer = lower.layer;
+ err = ovl_lookup_layer(lower.dentry, d, &this, false);
if (err)
- goto out_put;
+ return err;
if (!this)
continue;
- stack[ctr].dentry = this;
- stack[ctr].mnt = lowerpath.mnt;
- ctr++;
+ /*
+ * If no origin fh is stored in upper of a merge dir, store fh
+ * of lower dir and set upper parent "impure".
+ */
+ if (ctx->upperdentry && !ctx->ctr && !ofs->noxattr && d->is_dir) {
+ err = ovl_fix_origin(ofs, dentry, this, ctx->upperdentry);
+ if (err) {
+ dput(this);
+ return err;
+ }
+ }
+
+ /*
+ * When "verify_lower" feature is enabled, do not merge with a
+ * lower dir that does not match a stored origin xattr. In any
+ * case, only verified origin is used for index lookup.
+ *
+ * For non-dir dentry, if index=on, then ensure origin
+ * matches the dentry found using path based lookup,
+ * otherwise error out.
+ */
+ if (ctx->upperdentry && !ctx->ctr &&
+ ((d->is_dir && ovl_verify_lower(dentry->d_sb)) ||
+ (!d->is_dir && ofs->config.index && ctx->origin_path))) {
+ err = ovl_verify_origin(ofs, ctx->upperdentry, this, false);
+ if (err) {
+ dput(this);
+ if (d->is_dir)
+ break;
+ return err;
+ }
+ origin = this;
+ }
+
+ if (!ctx->upperdentry && !d->is_dir && !ctx->ctr && d->metacopy)
+ metacopy_size = d->metacopy;
+
+ if (d->metacopy && ctx->ctr) {
+ /*
+ * Do not store intermediate metacopy dentries in
+ * lower chain, except top most lower metacopy dentry.
+ * Continue the loop so that if there is an absolute
+ * redirect on this dentry, poe can be reset to roe.
+ */
+ dput(this);
+ this = NULL;
+ } else {
+ ctx->stack[ctx->ctr].dentry = this;
+ ctx->stack[ctx->ctr].layer = lower.layer;
+ ctx->ctr++;
+ }
- if (d.stop)
+ if (d->stop)
break;
- if (d.redirect && d.redirect[0] == '/' && poe != roe) {
+ if (d->redirect && d->redirect[0] == '/' && poe != roe) {
poe = roe;
-
/* Find the current layer on the root dentry */
- for (i = 0; i < poe->numlower; i++)
- if (poe->lowerstack[i].mnt == lowerpath.mnt)
- break;
- if (WARN_ON(i == poe->numlower))
- break;
+ i = lower.layer->idx - 1;
}
}
- /* Lookup index by lower inode and verify it matches upper inode */
- if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) {
- struct dentry *origin = stack[0].dentry;
+ /*
+ * Defer lookup of lowerdata in data-only layers to first access.
+ * Don't require redirect=follow and metacopy=on in this case.
+ */
+ if (d->metacopy && ctx->ctr && ofs->numdatalayer && d->absolute_redirect) {
+ d->metacopy = 0;
+ ctx->ctr++;
+ } else if (!ovl_check_follow_redirect(d)) {
+ err = -EPERM;
+ return err;
+ }
- index = ovl_lookup_index(dentry, upperdentry, origin);
- if (IS_ERR(index)) {
- err = PTR_ERR(index);
- index = NULL;
- goto out_put;
+ /*
+ * For regular non-metacopy upper dentries, there is no lower
+ * path based lookup, hence ctr will be zero. If a dentry is found
+ * using ORIGIN xattr on upper, install it in stack.
+ *
+ * For metacopy dentry, path based lookup will find lower dentries.
+ * Just make sure a corresponding data dentry has been found.
+ */
+ if (d->metacopy || (uppermetacopy && !ctx->ctr)) {
+ pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
+ dentry);
+ err = -EIO;
+ return err;
+ } else if (!d->is_dir && ctx->upperdentry && !ctx->ctr && ctx->origin_path) {
+ if (WARN_ON(ctx->stack != NULL)) {
+ err = -EIO;
+ return err;
}
+ ctx->stack = ctx->origin_path;
+ ctx->ctr = 1;
+ origin = ctx->origin_path->dentry;
+ ctx->origin_path = NULL;
}
- oe = ovl_alloc_entry(ctr);
- err = -ENOMEM;
- if (!oe)
- goto out_put;
+ /*
+ * Always lookup index if there is no-upperdentry.
+ *
+ * For the case of upperdentry, we have set origin by now if it
+ * needed to be set. There are basically three cases.
+ *
+ * For directories, lookup index by lower inode and verify it matches
+ * upper inode. We only trust dir index if we verified that lower dir
+ * matches origin, otherwise dir index entries may be inconsistent
+ * and we ignore them.
+ *
+ * For regular upper, we already set origin if upper had ORIGIN
+ * xattr. There is no verification though as there is no path
+ * based dentry lookup in lower in this case.
+ *
+ * For metacopy upper, we set a verified origin already if index
+ * is enabled and if upper had an ORIGIN xattr.
+ *
+ */
+ if (!ctx->upperdentry && ctx->ctr)
+ origin = ctx->stack[0].dentry;
+
+ if (origin && ovl_indexdir(dentry->d_sb) &&
+ (!d->is_dir || ovl_index_all(dentry->d_sb))) {
+ ctx->index = ovl_lookup_index(ofs, ctx->upperdentry, origin, true);
+ if (IS_ERR(ctx->index)) {
+ err = PTR_ERR(ctx->index);
+ ctx->index = NULL;
+ return err;
+ }
+ }
+
+ if (ctx->ctr) {
+ ctx->oe = ovl_alloc_entry(ctx->ctr);
+ err = -ENOMEM;
+ if (!ctx->oe)
+ return err;
+
+ ovl_stack_cpy(ovl_lowerstack(ctx->oe), ctx->stack, ctx->ctr);
+ }
- oe->opaque = upperopaque;
- memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
- dentry->d_fsdata = oe;
+ if (upperopaque)
+ ovl_dentry_set_opaque(dentry);
+ if (d->xwhiteouts)
+ ovl_dentry_set_xwhiteouts(dentry);
- if (upperdentry)
+ if (ctx->upperdentry)
ovl_dentry_set_upper_alias(dentry);
- else if (index)
- upperdentry = dget(index);
+ else if (ctx->index) {
+ char *upperredirect;
+ struct path upperpath = {
+ .dentry = ctx->upperdentry = dget(ctx->index),
+ .mnt = ovl_upper_mnt(ofs),
+ };
+
+ /*
+ * It's safe to assign upperredirect here: the previous
+ * assignment happens only if upperdentry is non-NULL, and
+ * this one only if upperdentry is NULL.
+ */
+ upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0);
+ if (IS_ERR(upperredirect))
+ return PTR_ERR(upperredirect);
+ d->upperredirect = upperredirect;
+
+ err = ovl_check_metacopy_xattr(ofs, &upperpath, NULL);
+ if (err < 0)
+ return err;
+ d->metacopy = uppermetacopy = err;
+ metacopy_size = err;
+
+ if (!ovl_check_follow_redirect(d)) {
+ err = -EPERM;
+ return err;
+ }
+ }
+
+ if (ctx->upperdentry || ctx->ctr) {
+ struct inode *inode;
+ struct ovl_inode_params oip = {
+ .upperdentry = ctx->upperdentry,
+ .oe = ctx->oe,
+ .index = ctx->index,
+ .redirect = d->upperredirect,
+ };
+
+ /* Store lowerdata redirect for lazy lookup */
+ if (ctx->ctr > 1 && !d->is_dir && !ctx->stack[ctx->ctr - 1].dentry) {
+ oip.lowerdata_redirect = d->redirect;
+ d->redirect = NULL;
+ }
- if (upperdentry || ctr) {
- inode = ovl_get_inode(dentry, upperdentry);
- err = PTR_ERR(inode);
+ inode = ovl_get_inode(dentry->d_sb, &oip);
if (IS_ERR(inode))
- goto out_free_oe;
+ return PTR_ERR(inode);
- OVL_I(inode)->redirect = upperredirect;
- if (index)
- ovl_set_flag(OVL_INDEX, inode);
+ ctx->inode = inode;
+ if (ctx->upperdentry && !uppermetacopy)
+ ovl_set_flag(OVL_UPPERDATA, ctx->inode);
+
+ if (metacopy_size > OVL_METACOPY_MIN_SIZE)
+ ovl_set_flag(OVL_HAS_DIGEST, ctx->inode);
}
- revert_creds(old_cred);
- dput(index);
- kfree(stack);
- kfree(d.redirect);
- d_add(dentry, inode);
+ ovl_dentry_init_reval(dentry, ctx->upperdentry, OVL_I_E(ctx->inode));
- return NULL;
+ return 0;
+}
-out_free_oe:
- dentry->d_fsdata = NULL;
- kfree(oe);
-out_put:
- dput(index);
- for (i = 0; i < ctr; i++)
- dput(stack[i].dentry);
- kfree(stack);
-out_put_upper:
- dput(upperdentry);
- kfree(upperredirect);
-out:
+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ struct ovl_entry *poe = OVL_E(dentry->d_parent);
+ bool check_redirect = (ovl_redirect_follow(ofs) || ofs->numdatalayer);
+ int err;
+ struct ovl_lookup_ctx ctx = {
+ .dentry = dentry,
+ };
+ struct ovl_lookup_data d = {
+ .sb = dentry->d_sb,
+ .dentry = dentry,
+ .name = dentry->d_name,
+ .last = check_redirect ? false : !ovl_numlower(poe),
+ };
+
+ if (dentry->d_name.len > ofs->namelen)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ with_ovl_creds(dentry->d_sb)
+ err = ovl_lookup_layers(&ctx, &d);
+
+ if (ctx.origin_path) {
+ dput(ctx.origin_path->dentry);
+ kfree(ctx.origin_path);
+ }
+ dput(ctx.index);
+ ovl_stack_free(ctx.stack, ctx.ctr);
kfree(d.redirect);
- revert_creds(old_cred);
- return ERR_PTR(err);
+
+ if (err) {
+ ovl_free_entry(ctx.oe);
+ dput(ctx.upperdentry);
+ kfree(d.upperredirect);
+ return ERR_PTR(err);
+ }
+
+ return d_splice_alias(ctx.inode, dentry);
}
bool ovl_lower_positive(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
- struct ovl_entry *poe = dentry->d_parent->d_fsdata;
+ struct ovl_entry *poe = OVL_E(dentry->d_parent);
const struct qstr *name = &dentry->d_name;
unsigned int i;
bool positive = false;
@@ -757,39 +1427,49 @@ bool ovl_lower_positive(struct dentry *dentry)
* whiteout.
*/
if (!dentry->d_inode)
- return oe->opaque;
+ return ovl_dentry_is_opaque(dentry);
/* Negative upper -> positive lower */
if (!ovl_dentry_upper(dentry))
return true;
- /* Positive upper -> have to look up lower to see whether it exists */
- for (i = 0; !done && !positive && i < poe->numlower; i++) {
- struct dentry *this;
- struct dentry *lowerdir = poe->lowerstack[i].dentry;
-
- this = lookup_one_len_unlocked(name->name, lowerdir,
- name->len);
- if (IS_ERR(this)) {
- switch (PTR_ERR(this)) {
- case -ENOENT:
- case -ENAMETOOLONG:
- break;
-
- default:
- /*
- * Assume something is there, we just couldn't
- * access it.
- */
- positive = true;
- break;
- }
- } else {
- if (this->d_inode) {
- positive = !ovl_is_whiteout(this);
+ with_ovl_creds(dentry->d_sb) {
+ /* Positive upper -> have to look up lower to see whether it exists */
+ for (i = 0; !done && !positive && i < ovl_numlower(poe); i++) {
+ struct dentry *this;
+ struct ovl_path *parentpath = &ovl_lowerstack(poe)[i];
+
+ /*
+ * We need to make a non-const copy of dentry->d_name,
+ * because lookup_one_positive_unlocked() will hash name
+ * with parentpath base, which is on another (lower fs).
+ */
+ this = lookup_one_positive_unlocked(mnt_idmap(parentpath->layer->mnt),
+ &QSTR_LEN(name->name, name->len),
+ parentpath->dentry);
+ if (IS_ERR(this)) {
+ switch (PTR_ERR(this)) {
+ case -ENOENT:
+ case -ENAMETOOLONG:
+ break;
+
+ default:
+ /*
+ * Assume something is there, we just couldn't
+ * access it.
+ */
+ positive = true;
+ break;
+ }
+ } else {
+ struct path path = {
+ .dentry = this,
+ .mnt = parentpath->layer->mnt,
+ };
+ positive = !ovl_path_is_whiteout(OVL_FS(dentry->d_sb), &path);
done = true;
+ dput(this);
}
- dput(this);
}
}