diff options
Diffstat (limited to 'fs/overlayfs/namei.c')
| -rw-r--r-- | fs/overlayfs/namei.c | 1396 |
1 files changed, 1038 insertions, 358 deletions
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 8aef2b304b2d..e9a69c95be91 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -1,65 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 Novell Inc. * Copyright (C) 2016 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> #include <linux/cred.h> +#include <linux/ctype.h> #include <linux/namei.h> #include <linux/xattr.h> #include <linux/ratelimit.h> #include <linux/mount.h> #include <linux/exportfs.h> #include "overlayfs.h" -#include "ovl_entry.h" struct ovl_lookup_data { + struct super_block *sb; + struct dentry *dentry; + const struct ovl_layer *layer; struct qstr name; bool is_dir; bool opaque; + bool xwhiteouts; bool stop; bool last; char *redirect; + char *upperredirect; + int metacopy; + /* Referring to last redirect xattr */ + bool absolute_redirect; }; -static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, +static int ovl_check_redirect(const struct path *path, struct ovl_lookup_data *d, size_t prelen, const char *post) { int res; - char *s, *next, *buf = NULL; + char *buf; + struct ovl_fs *ofs = OVL_FS(d->sb); - res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0); - if (res < 0) { - if (res == -ENODATA || res == -EOPNOTSUPP) - return 0; - goto fail; - } - buf = kzalloc(prelen + res + strlen(post) + 1, GFP_TEMPORARY); - if (!buf) - return -ENOMEM; + d->absolute_redirect = false; + buf = ovl_get_redirect_xattr(ofs, path, prelen + strlen(post)); + if (IS_ERR_OR_NULL(buf)) + return PTR_ERR(buf); - if (res == 0) - goto invalid; - - res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res); - if (res < 0) - goto fail; - if (res == 0) - goto invalid; if (buf[0] == '/') { - for (s = buf; *s++ == '/'; s = next) { - next = strchrnul(s, '/'); - if (s == next) - goto invalid; - } + d->absolute_redirect = true; + /* + * One of the ancestor path elements in an absolute path + * lookup in ovl_lookup_layer() could have been opaque and + * that will stop further lookup in lower layers (d->stop=true) + * But we have found an absolute redirect in descendant path + * element and that should force continue lookup in lower + * layers (reset d->stop). + */ + d->stop = false; } else { - if (strchr(buf, '/') != NULL) - goto invalid; - + res = strlen(buf) + 1; memmove(buf + prelen, buf, res); memcpy(buf, d->name.name, prelen); } @@ -71,29 +67,59 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, d->name.len = strlen(d->redirect); return 0; - -err_free: - kfree(buf); - return 0; -fail: - pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res); - goto err_free; -invalid: - pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf); - goto err_free; } static int ovl_acceptable(void *ctx, struct dentry *dentry) { - return 1; + /* + * A non-dir origin may be disconnected, which is fine, because + * we only need it for its unique inode number. + */ + if (!d_is_dir(dentry)) + return 1; + + /* Don't decode a deleted empty directory */ + if (d_unhashed(dentry)) + return 0; + + /* Check if directory belongs to the layer we are decoding from */ + return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root); } -static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) +/* + * Check validity of an overlay file handle buffer. + * + * Return 0 for a valid file handle. + * Return -ENODATA for "origin unknown". + * Return <0 for an invalid file handle. + */ +int ovl_check_fb_len(struct ovl_fb *fb, int fb_len) { - int res; + if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len) + return -EINVAL; + + if (fb->magic != OVL_FH_MAGIC) + return -EINVAL; + + /* Treat larger version and unknown flags as "origin unknown" */ + if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL) + return -ENODATA; + + /* Treat endianness mismatch as "origin unknown" */ + if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) && + (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) + return -ENODATA; + + return 0; +} + +static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *upperdentry, + enum ovl_xattr ox) +{ + int res, err; struct ovl_fh *fh = NULL; - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); + res = ovl_getxattr_upper(ofs, upperdentry, ox, NULL, 0); if (res < 0) { if (res == -ENODATA || res == -EOPNOTSUPP) return NULL; @@ -103,28 +129,20 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) if (res == 0) return NULL; - fh = kzalloc(res, GFP_TEMPORARY); + fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL); if (!fh) return ERR_PTR(-ENOMEM); - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); + res = ovl_getxattr_upper(ofs, upperdentry, ox, fh->buf, res); if (res < 0) goto fail; - if (res < sizeof(struct ovl_fh) || res < fh->len) - goto invalid; - - if (fh->magic != OVL_FH_MAGIC) + err = ovl_check_fb_len(&fh->fb, res); + if (err < 0) { + if (err == -ENODATA) + goto out; goto invalid; - - /* Treat larger version and unknown flags as "origin unknown" */ - if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL) - goto out; - - /* Treat endianness mismatch as "origin unknown" */ - if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) && - (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) - goto out; + } return fh; @@ -133,70 +151,107 @@ out: return NULL; fail: - pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res); + pr_warn_ratelimited("failed to get origin (%i)\n", res); goto out; invalid: - pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh); + pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh); goto out; } -static struct dentry *ovl_get_origin(struct dentry *dentry, - struct vfsmount *mnt) +struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, + struct vfsmount *mnt, bool connected) { - struct dentry *origin = NULL; - struct ovl_fh *fh = ovl_get_origin_fh(dentry); + struct dentry *real; int bytes; - if (IS_ERR_OR_NULL(fh)) - return (struct dentry *)fh; + if (!capable(CAP_DAC_READ_SEARCH)) + return NULL; /* * Make sure that the stored uuid matches the uuid of the lower * layer where file handle will be decoded. + * In case of uuid=off option just make sure that stored uuid is null. */ - if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid)) - goto out; + if (ovl_origin_uuid(ofs) ? + !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) : + !uuid_is_null(&fh->fb.uuid)) + return NULL; - bytes = (fh->len - offsetof(struct ovl_fh, fid)); - origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, - bytes >> 2, (int)fh->type, - ovl_acceptable, NULL); - if (IS_ERR(origin)) { - /* Treat stale file handle as "origin unknown" */ - if (origin == ERR_PTR(-ESTALE)) - origin = NULL; - goto out; + bytes = (fh->fb.len - offsetof(struct ovl_fb, fid)); + real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid, + bytes >> 2, (int)fh->fb.type, + connected ? ovl_acceptable : NULL, mnt); + if (IS_ERR(real)) { + /* + * Treat stale file handle to lower file as "origin unknown". + * upper file handle could become stale when upper file is + * unlinked and this information is needed to handle stale + * index entries correctly. + */ + if (real == ERR_PTR(-ESTALE) && + !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER)) + real = NULL; + return real; } - if (ovl_dentry_weird(origin) || - ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) - goto invalid; - -out: - kfree(fh); - return origin; + if (ovl_dentry_weird(real)) { + dput(real); + return NULL; + } -invalid: - pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin); - dput(origin); - origin = NULL; - goto out; + return real; } -static bool ovl_is_opaquedir(struct dentry *dentry) +static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d, + const char *name, + struct dentry *base, int len, + bool drop_negative) { - return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE); + struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt), + &QSTR_LEN(name, len), base); + + if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { + if (drop_negative && ret->d_lockref.count == 1) { + spin_lock(&ret->d_lock); + /* Recheck condition under lock */ + if (d_is_negative(ret) && ret->d_lockref.count == 1) + __d_drop(ret); + spin_unlock(&ret->d_lock); + } + dput(ret); + ret = ERR_PTR(-ENOENT); + } + return ret; } static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, const char *name, unsigned int namelen, size_t prelen, const char *post, - struct dentry **ret) + struct dentry **ret, bool drop_negative) { - struct dentry *this; + struct ovl_fs *ofs = OVL_FS(d->sb); + struct dentry *this = NULL; + const char *warn; + struct path path; int err; + bool last_element = !post[0]; + bool is_upper = d->layer->idx == 0; + char val; + + /* + * We allow filesystems that are case-folding capable as long as the + * layers are consistently enabled in the stack, enabled for every dir + * or disabled in all dirs. If someone has modified case folding on a + * directory on underlying layer, the warranty of the ovl stack is + * voided. + */ + if (ofs->casefold != ovl_dentry_casefolded(base)) { + warn = "parent wrong casefold"; + err = -ESTALE; + goto out_warn; + } - this = lookup_one_len_unlocked(name, base, namelen); + this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative); if (IS_ERR(this)) { err = PTR_ERR(this); this = NULL; @@ -204,30 +259,74 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, goto out; goto out_err; } - if (!this->d_inode) - goto put_and_out; + + if (ofs->casefold != ovl_dentry_casefolded(this)) { + warn = "child wrong casefold"; + err = -EREMOTE; + goto out_warn; + } if (ovl_dentry_weird(this)) { /* Don't support traversing automounts and other weirdness */ + warn = "unsupported object type"; err = -EREMOTE; - goto out_err; + goto out_warn; } - if (ovl_is_whiteout(this)) { + + path.dentry = this; + path.mnt = d->layer->mnt; + if (ovl_path_is_whiteout(ofs, &path)) { d->stop = d->opaque = true; goto put_and_out; } - if (!d_can_lookup(this)) { + /* + * This dentry should be a regular file if previous layer lookup + * found a metacopy dentry. + */ + if (last_element && d->metacopy && !d_is_reg(this)) { d->stop = true; - if (d->is_dir) - goto put_and_out; - goto out; + goto put_and_out; } - d->is_dir = true; - if (!d->last && ovl_is_opaquedir(this)) { - d->stop = d->opaque = true; - goto out; + + if (!d_can_lookup(this)) { + if (d->is_dir || !last_element) { + d->stop = true; + goto put_and_out; + } + err = ovl_check_metacopy_xattr(ofs, &path, NULL); + if (err < 0) + goto out_err; + + d->metacopy = err; + d->stop = !d->metacopy; + if (!d->metacopy || d->last) + goto out; + } else { + if (ovl_lookup_trap_inode(d->sb, this)) { + /* Caught in a trap of overlapping layers */ + warn = "overlapping layers"; + err = -ELOOP; + goto out_warn; + } + + if (last_element) + d->is_dir = true; + if (d->last) + goto out; + + /* overlay.opaque=x means xwhiteouts directory */ + val = ovl_get_opaquedir_val(ofs, &path); + if (last_element && !is_upper && val == 'x') { + d->xwhiteouts = true; + ovl_layer_set_xwhiteouts(ofs, d->layer); + } else if (val == 'y') { + d->stop = true; + if (last_element) + d->opaque = true; + goto out; + } } - err = ovl_check_redirect(this, d, prelen, post); + err = ovl_check_redirect(&path, d, prelen, post); if (err) goto out_err; out: @@ -239,13 +338,17 @@ put_and_out: this = NULL; goto out; +out_warn: + pr_warn_ratelimited("failed lookup in %s (%pd2, name='%.*s', err=%i): %s\n", + is_upper ? "upper" : "lower", base, + namelen, name, err, warn); out_err: dput(this); return err; } static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, - struct dentry **ret) + struct dentry **ret, bool drop_negative) { /* Counting down from the end, since the prefix can change */ size_t rem = d->name.len - 1; @@ -254,7 +357,7 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, if (d->name.name[0] != '/') return ovl_lookup_single(base, d, d->name.name, d->name.len, - 0, "", ret); + 0, "", ret, drop_negative); while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) { const char *s = d->name.name + d->name.len - rem; @@ -267,7 +370,8 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, return -EIO; err = ovl_lookup_single(base, d, s, thislen, - d->name.len - rem, next, &base); + d->name.len - rem, next, &base, + drop_negative); dput(dentry); if (err) return err; @@ -284,49 +388,142 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, return 0; } +static int ovl_lookup_data_layer(struct dentry *dentry, const char *redirect, + const struct ovl_layer *layer, + struct path *datapath) +{ + int err; + + err = vfs_path_lookup(layer->mnt->mnt_root, layer->mnt, redirect, + LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS | LOOKUP_NO_XDEV, + datapath); + pr_debug("lookup lowerdata (%pd2, redirect=\"%s\", layer=%d, err=%i)\n", + dentry, redirect, layer->idx, err); + + if (err) + return err; + + err = -EREMOTE; + if (ovl_dentry_weird(datapath->dentry)) + goto out_path_put; + + err = -ENOENT; + /* Only regular file is acceptable as lower data */ + if (!d_is_reg(datapath->dentry)) + goto out_path_put; + + return 0; + +out_path_put: + path_put(datapath); + + return err; +} + +/* Lookup in data-only layers by absolute redirect to layer root */ +static int ovl_lookup_data_layers(struct dentry *dentry, const char *redirect, + struct ovl_path *lowerdata) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + const struct ovl_layer *layer; + struct path datapath; + int err = -ENOENT; + int i; + + layer = &ofs->layers[ofs->numlayer - ofs->numdatalayer]; + for (i = 0; i < ofs->numdatalayer; i++, layer++) { + err = ovl_lookup_data_layer(dentry, redirect, layer, &datapath); + if (!err) { + mntput(datapath.mnt); + lowerdata->dentry = datapath.dentry; + lowerdata->layer = layer; + return 0; + } + } + + return err; +} -static int ovl_check_origin(struct dentry *upperdentry, - struct path *lowerstack, unsigned int numlower, - struct path **stackp, unsigned int *ctrp) +int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, + struct dentry *upperdentry, struct ovl_path **stackp) { - struct vfsmount *mnt; struct dentry *origin = NULL; int i; + for (i = 1; i <= ovl_numlowerlayer(ofs); i++) { + /* + * If lower fs uuid is not unique among lower fs we cannot match + * fh->uuid to layer. + */ + if (ofs->layers[i].fsid && + ofs->layers[i].fs->bad_uuid) + continue; - for (i = 0; i < numlower; i++) { - mnt = lowerstack[i].mnt; - origin = ovl_get_origin(upperdentry, mnt); - if (IS_ERR(origin)) - return PTR_ERR(origin); - + origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt, + connected); if (origin) break; } if (!origin) - return 0; + return -ESTALE; + else if (IS_ERR(origin)) + return PTR_ERR(origin); + + if (upperdentry && !ovl_upper_is_whiteout(ofs, upperdentry) && + inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode)) + goto invalid; - BUG_ON(*ctrp); if (!*stackp) - *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY); + *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL); if (!*stackp) { dput(origin); return -ENOMEM; } - **stackp = (struct path) { .dentry = origin, .mnt = mnt }; - *ctrp = 1; + **stackp = (struct ovl_path){ + .dentry = origin, + .layer = &ofs->layers[i] + }; + + return 0; + +invalid: + pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n", + upperdentry, d_inode(upperdentry)->i_mode & S_IFMT, + d_inode(origin)->i_mode & S_IFMT); + dput(origin); + return -ESTALE; +} + +static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, + struct ovl_path **stackp) +{ + struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN); + int err; + + if (IS_ERR_OR_NULL(fh)) + return PTR_ERR(fh); + + err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp); + kfree(fh); + + if (err) { + if (err == -ESTALE) + return 0; + return err; + } return 0; } /* - * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN. + * Verify that @fh matches the file handle stored in xattr @name. * Return 0 on match, -ESTALE on mismatch, < 0 on error. */ -static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) +static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, const struct ovl_fh *fh) { - struct ovl_fh *ofh = ovl_get_origin_fh(dentry); + struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox); int err = 0; if (!ofh) @@ -335,36 +532,50 @@ static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) if (IS_ERR(ofh)) return PTR_ERR(ofh); - if (fh->len != ofh->len || memcmp(fh, ofh, fh->len)) + if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len)) err = -ESTALE; kfree(ofh); return err; } +int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, const struct ovl_fh *fh, + bool is_upper, bool set) +{ + int err; + + err = ovl_verify_fh(ofs, dentry, ox, fh); + if (set && err == -ENODATA) + err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len); + + return err; +} + /* - * Verify that an inode matches the origin file handle stored in upper inode. + * Verify that @real dentry matches the file handle stored in xattr @name. * - * If @set is true and there is no stored file handle, encode and store origin - * file handle in OVL_XATTR_ORIGIN. + * If @set is true and there is no stored file handle, encode @real and store + * file handle in xattr @name. * - * Return 0 on match, -ESTALE on mismatch, < 0 on error. + * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error. */ -int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, - struct dentry *origin, bool is_upper, bool set) +int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, struct dentry *real, + bool is_upper, bool set) { struct inode *inode; struct ovl_fh *fh; int err; - fh = ovl_encode_fh(origin, is_upper); + fh = ovl_encode_real_fh(ofs, d_inode(real), is_upper); err = PTR_ERR(fh); - if (IS_ERR(fh)) + if (IS_ERR(fh)) { + fh = NULL; goto fail; + } - err = ovl_verify_origin_fh(dentry, fh); - if (set && err == -ENODATA) - err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0); + err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set); if (err) goto fail; @@ -373,83 +584,163 @@ out: return err; fail: - inode = d_inode(origin); - pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n", - origin, inode ? inode->i_ino : 0, err); + inode = d_inode(real); + pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n", + is_upper ? "upper" : "origin", real, + inode ? inode->i_ino : 0, err); goto out; } + +/* Get upper dentry from index */ +struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index, + bool connected) +{ + struct ovl_fh *fh; + struct dentry *upper; + + if (!d_is_dir(index)) + return dget(index); + + fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER); + if (IS_ERR_OR_NULL(fh)) + return ERR_CAST(fh); + + upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), connected); + kfree(fh); + + if (IS_ERR_OR_NULL(upper)) + return upper ?: ERR_PTR(-ESTALE); + + if (!d_is_dir(upper)) { + pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n", + index, upper); + dput(upper); + return ERR_PTR(-EIO); + } + + return upper; +} + /* * Verify that an index entry name matches the origin file handle stored in * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error. */ -int ovl_verify_index(struct dentry *index, struct path *lowerstack, - unsigned int numlower) +int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) { struct ovl_fh *fh = NULL; size_t len; - struct path origin = { }; - struct path *stack = &origin; - unsigned int ctr = 0; + struct ovl_path origin = { }; + struct ovl_path *stack = &origin; + struct dentry *upper = NULL; int err; if (!d_inode(index)) return 0; - /* - * Directory index entries are going to be used for looking up - * redirected upper dirs by lower dir fh when decoding an overlay - * file handle of a merge dir. Whiteout index entries are going to be - * used as an indication that an exported overlay file handle should - * be treated as stale (i.e. after unlink of the overlay inode). - * We don't know the verification rules for directory and whiteout - * index entries, because they have not been implemented yet, so return - * EROFS if those entries are found to avoid corrupting an index that - * was created by a newer kernel. - */ - err = -EROFS; - if (d_is_dir(index) || ovl_is_whiteout(index)) - goto fail; - err = -EINVAL; - if (index->d_name.len < sizeof(struct ovl_fh)*2) + if (index->d_name.len < sizeof(struct ovl_fb)*2) goto fail; err = -ENOMEM; len = index->d_name.len / 2; - fh = kzalloc(len, GFP_TEMPORARY); + fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL); if (!fh) goto fail; err = -EINVAL; - if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len) + if (hex2bin(fh->buf, index->d_name.name, len)) goto fail; - err = ovl_verify_origin_fh(index, fh); + err = ovl_check_fb_len(&fh->fb, len); if (err) goto fail; - err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr); - if (!err && !ctr) - err = -ESTALE; + /* + * Whiteout index entries are used as an indication that an exported + * overlay file handle should be treated as stale (i.e. after unlink + * of the overlay inode). These entries contain no origin xattr. + */ + if (ovl_is_whiteout(index)) + goto out; + + /* + * Verifying directory index entries are not stale is expensive, so + * only verify stale dir index if NFS export is enabled. + */ + if (d_is_dir(index) && !ofs->config.nfs_export) + goto out; + + /* + * Directory index entries should have 'upper' xattr pointing to the + * real upper dir. Non-dir index entries are hardlinks to the upper + * real inode. For non-dir index, we can read the copy up origin xattr + * directly from the index dentry, but for dir index we first need to + * decode the upper directory. + */ + upper = ovl_index_upper(ofs, index, false); + if (IS_ERR_OR_NULL(upper)) { + err = PTR_ERR(upper); + /* + * Directory index entries with no 'upper' xattr need to be + * removed. When dir index entry has a stale 'upper' xattr, + * we assume that upper dir was removed and we treat the dir + * index as orphan entry that needs to be whited out. + */ + if (err == -ESTALE) + goto orphan; + else if (!err) + err = -ESTALE; + goto fail; + } + + err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh); + dput(upper); if (err) goto fail; - /* Check if index is orphan and don't warn before cleaning it */ - if (d_inode(index)->i_nlink == 1 && - ovl_get_nlink(index, origin.dentry, 0) == 0) - err = -ENOENT; + /* Check if non-dir index is orphan and don't warn before cleaning it */ + if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) { + err = ovl_check_origin_fh(ofs, fh, false, index, &stack); + if (err) + goto fail; + + if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0) + goto orphan; + } - dput(origin.dentry); out: + dput(origin.dentry); kfree(fh); return err; fail: - pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n", + pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n", index, d_inode(index)->i_mode & S_IFMT, err); goto out; + +orphan: + pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n", + index, d_inode(index)->i_mode & S_IFMT, + d_inode(index)->i_nlink); + err = -ENOENT; + goto out; +} + +int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name) +{ + char *n, *s; + + n = kcalloc(fh->fb.len, 2, GFP_KERNEL); + if (!n) + return -ENOMEM; + + s = bin2hex(n, fh->buf, fh->fb.len); + *name = (struct qstr) QSTR_INIT(n, s - n); + + return 0; + } /* @@ -464,49 +755,77 @@ fail: * If the index dentry for a copy up origin inode is positive, but points * to an inode different than the upper inode, then either the upper inode * has been copied up and not indexed or it was indexed, but since then - * index dir was cleared. Either way, that index cannot be used to indentify + * index dir was cleared. Either way, that index cannot be used to identify * the overlay inode. */ -int ovl_get_index_name(struct dentry *origin, struct qstr *name) +int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, + struct qstr *name) { - int err; struct ovl_fh *fh; - char *n, *s; + int err; - fh = ovl_encode_fh(origin, false); + fh = ovl_encode_real_fh(ofs, d_inode(origin), false); if (IS_ERR(fh)) return PTR_ERR(fh); - err = -ENOMEM; - n = kzalloc(fh->len * 2, GFP_TEMPORARY); - if (n) { - s = bin2hex(n, fh, fh->len); - *name = (struct qstr) QSTR_INIT(n, s - n); - err = 0; - } - kfree(fh); + err = ovl_get_index_name_fh(fh, name); + kfree(fh); return err; +} + +/* Lookup index by file handle for NFS export */ +struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh) +{ + struct dentry *index; + struct qstr name; + int err; + err = ovl_get_index_name_fh(fh, &name); + if (err) + return ERR_PTR(err); + + index = lookup_noperm_positive_unlocked(&name, ofs->workdir); + kfree(name.name); + if (IS_ERR(index)) { + if (PTR_ERR(index) == -ENOENT) + index = NULL; + return index; + } + + if (ovl_is_whiteout(index)) + err = -ESTALE; + else if (ovl_dentry_weird(index)) + err = -EIO; + else + return index; + + dput(index); + return ERR_PTR(err); } -static struct dentry *ovl_lookup_index(struct dentry *dentry, - struct dentry *upper, - struct dentry *origin) +struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, + struct dentry *origin, bool verify) { - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; struct dentry *index; struct inode *inode; struct qstr name; + bool is_dir = d_is_dir(origin); int err; - err = ovl_get_index_name(origin, &name); + err = ovl_get_index_name(ofs, origin, &name); if (err) return ERR_PTR(err); - index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); + index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), &name, + ofs->workdir); if (IS_ERR(index)) { - pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" + err = PTR_ERR(index); + if (err == -ENOENT) { + index = NULL; + goto out; + } + pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n" "overlayfs: mount with '-o index=off' to disable inodes index.\n", d_inode(origin)->i_ino, name.len, name.name, err); @@ -514,21 +833,18 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, } inode = d_inode(index); - if (d_is_negative(index)) { - if (upper && d_inode(origin)->i_nlink > 1) { - pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n", - d_inode(origin)->i_ino); - goto fail; - } - + if (ovl_is_whiteout(index) && !verify) { + /* + * When index lookup is called with !verify for decoding an + * overlay file handle, a whiteout index implies that decode + * should treat file handle as stale and no need to print a + * warning about it. + */ dput(index); - index = NULL; - } else if (upper && d_inode(upper) != inode) { - pr_warn_ratelimited("overlayfs: wrong index found (index=%pd2, ino=%lu, upper ino=%lu).\n", - index, inode->i_ino, d_inode(upper)->i_ino); - goto fail; + index = ERR_PTR(-ESTALE); + goto out; } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || - ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { + inode_wrong_type(inode, d_inode(origin)->i_mode)) { /* * Index should always be of the same file type as origin * except for the case of a whiteout index. A whiteout @@ -536,16 +852,38 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, * unlinked, which means that finding a lower origin on lookup * whose index is a whiteout should be treated as an error. */ - pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n", + pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n", index, d_inode(index)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); goto fail; - } + } else if (is_dir && verify) { + if (!upper) { + pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n", + origin, index); + goto fail; + } + /* Verify that dir index 'upper' xattr points to upper dir */ + err = ovl_verify_upper(ofs, index, upper, false); + if (err) { + if (err == -ESTALE) { + pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n", + upper, origin, index); + } + goto fail; + } + } else if (upper && d_inode(upper) != inode) { + goto out_dput; + } out: kfree(name.name); return index; +out_dput: + dput(index); + index = NULL; + goto out; + fail: dput(index); index = ERR_PTR(-EIO); @@ -556,67 +894,220 @@ fail: * Returns next layer in stack starting from top. * Returns -1 if this is the last layer. */ -int ovl_path_next(int idx, struct dentry *dentry, struct path *path) +int ovl_path_next(int idx, struct dentry *dentry, struct path *path, + const struct ovl_layer **layer) { - struct ovl_entry *oe = dentry->d_fsdata; + struct ovl_entry *oe = OVL_E(dentry); + struct ovl_path *lowerstack = ovl_lowerstack(oe); BUG_ON(idx < 0); if (idx == 0) { ovl_path_upper(dentry, path); - if (path->dentry) - return oe->numlower ? 1 : -1; + if (path->dentry) { + *layer = &OVL_FS(dentry->d_sb)->layers[0]; + return ovl_numlower(oe) ? 1 : -1; + } idx++; } - BUG_ON(idx > oe->numlower); - *path = oe->lowerstack[idx - 1]; + BUG_ON(idx > ovl_numlower(oe)); + path->dentry = lowerstack[idx - 1].dentry; + *layer = lowerstack[idx - 1].layer; + path->mnt = (*layer)->mnt; - return (idx < oe->numlower) ? idx + 1 : -1; + return (idx < ovl_numlower(oe)) ? idx + 1 : -1; } -struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) +/* Fix missing 'origin' xattr */ +static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry, + struct dentry *lower, struct dentry *upper) +{ + const struct ovl_fh *fh; + int err; + + if (ovl_check_origin_xattr(ofs, upper)) + return 0; + + fh = ovl_get_origin_fh(ofs, lower); + if (IS_ERR(fh)) + return PTR_ERR(fh); + + err = ovl_want_write(dentry); + if (err) + goto out; + + err = ovl_set_origin_fh(ofs, fh, upper); + if (!err) + err = ovl_set_impure(dentry->d_parent, upper->d_parent); + + ovl_drop_write(dentry); +out: + kfree(fh); + return err; +} + +static int ovl_maybe_validate_verity(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + struct inode *inode = d_inode(dentry); + struct path datapath, metapath; + int err; + + if (!ofs->config.verity_mode || + !ovl_is_metacopy_dentry(dentry) || + ovl_test_flag(OVL_VERIFIED_DIGEST, inode)) + return 0; + + if (!ovl_test_flag(OVL_HAS_DIGEST, inode)) { + if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) { + pr_warn_ratelimited("metacopy file '%pd' has no digest specified\n", + dentry); + return -EIO; + } + return 0; + } + + ovl_path_lowerdata(dentry, &datapath); + if (!datapath.dentry) + return -EIO; + + ovl_path_real(dentry, &metapath); + if (!metapath.dentry) + return -EIO; + + err = ovl_inode_lock_interruptible(inode); + if (err) + return err; + + if (!ovl_test_flag(OVL_VERIFIED_DIGEST, inode)) { + with_ovl_creds(dentry->d_sb) + err = ovl_validate_verity(ofs, &metapath, &datapath); + if (err == 0) + ovl_set_flag(OVL_VERIFIED_DIGEST, inode); + } + + ovl_inode_unlock(inode); + + return err; +} + +/* Lazy lookup of lowerdata */ +static int ovl_maybe_lookup_lowerdata(struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + const char *redirect = ovl_lowerdata_redirect(inode); + struct ovl_path datapath = {}; + int err; + + if (!redirect || ovl_dentry_lowerdata(dentry)) + return 0; + + if (redirect[0] != '/') + return -EIO; + + err = ovl_inode_lock_interruptible(inode); + if (err) + return err; + + err = 0; + /* Someone got here before us? */ + if (ovl_dentry_lowerdata(dentry)) + goto out; + + with_ovl_creds(dentry->d_sb) + err = ovl_lookup_data_layers(dentry, redirect, &datapath); + if (err) + goto out_err; + + err = ovl_dentry_set_lowerdata(dentry, &datapath); + if (err) + goto out_err; + +out: + ovl_inode_unlock(inode); + dput(datapath.dentry); + + return err; + +out_err: + pr_warn_ratelimited("lazy lowerdata lookup failed (%pd2, err=%i)\n", + dentry, err); + goto out; +} + +int ovl_verify_lowerdata(struct dentry *dentry) +{ + int err; + + err = ovl_maybe_lookup_lowerdata(dentry); + if (err) + return err; + + return ovl_maybe_validate_verity(dentry); +} + +/* + * Following redirects/metacopy can have security consequences: it's like a + * symlink into the lower layer without the permission checks. + * + * This is only a problem if the upper layer is untrusted (e.g comes from an USB + * drive). This can allow a non-readable file or directory to become readable. + * + * Only following redirects when redirects are enabled disables this attack + * vector when not necessary. + */ +static bool ovl_check_follow_redirect(struct ovl_lookup_data *d) { + struct ovl_fs *ofs = OVL_FS(d->sb); + + if (d->metacopy && !ofs->config.metacopy) { + pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", d->dentry); + return false; + } + if ((d->redirect || d->upperredirect) && !ovl_redirect_follow(ofs)) { + pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n", d->dentry); + return false; + } + return true; +} + +struct ovl_lookup_ctx { + struct dentry *dentry; struct ovl_entry *oe; - const struct cred *old_cred; - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - struct ovl_entry *poe = dentry->d_parent->d_fsdata; - struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; - struct path *stack = NULL; - struct dentry *upperdir, *upperdentry = NULL; - struct dentry *index = NULL; - unsigned int ctr = 0; - struct inode *inode = NULL; - bool upperopaque = false; - char *upperredirect = NULL; + struct ovl_path *stack; + struct ovl_path *origin_path; + struct dentry *upperdentry; + struct dentry *index; + struct inode *inode; + unsigned int ctr; +}; + +static int ovl_lookup_layers(struct ovl_lookup_ctx *ctx, struct ovl_lookup_data *d) +{ + struct dentry *dentry = ctx->dentry; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + struct ovl_entry *poe = OVL_E(dentry->d_parent); + struct ovl_entry *roe = OVL_E(dentry->d_sb->s_root); + bool check_redirect = (ovl_redirect_follow(ofs) || ofs->numdatalayer); + struct dentry *upperdir; struct dentry *this; + struct dentry *origin = NULL; + bool upperopaque = false; + bool uppermetacopy = false; + int metacopy_size = 0; unsigned int i; int err; - struct ovl_lookup_data d = { - .name = dentry->d_name, - .is_dir = false, - .opaque = false, - .stop = false, - .last = !poe->numlower, - .redirect = NULL, - }; - if (dentry->d_name.len > ofs->namelen) - return ERR_PTR(-ENAMETOOLONG); - - old_cred = ovl_override_creds(dentry->d_sb); upperdir = ovl_dentry_upper(dentry->d_parent); if (upperdir) { - err = ovl_lookup_layer(upperdir, &d, &upperdentry); + d->layer = &ofs->layers[0]; + err = ovl_lookup_layer(upperdir, d, &ctx->upperdentry, true); if (err) - goto out; + return err; - if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) { - dput(upperdentry); - err = -EREMOTE; - goto out; - } - if (upperdentry && !d.is_dir) { - BUG_ON(!d.stop || d.redirect); + if (ctx->upperdentry && ctx->upperdentry->d_flags & DCACHE_OP_REAL) + return -EREMOTE; + + if (ctx->upperdentry && !d->is_dir) { /* * Lookup copy up origin by decoding origin file handle. * We may get a disconnected dentry, which is fine, @@ -627,126 +1118,305 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, * number - it's the same as if we held a reference * to a dentry in lower layer that was moved under us. */ - err = ovl_check_origin(upperdentry, roe->lowerstack, - roe->numlower, &stack, &ctr); + err = ovl_check_origin(ofs, ctx->upperdentry, &ctx->origin_path); if (err) - goto out; + return err; + + if (d->metacopy) + uppermetacopy = true; + metacopy_size = d->metacopy; } - if (d.redirect) { - upperredirect = kstrdup(d.redirect, GFP_KERNEL); - if (!upperredirect) - goto out_put_upper; - if (d.redirect[0] == '/') + if (d->redirect) { + err = -ENOMEM; + d->upperredirect = kstrdup(d->redirect, GFP_KERNEL); + if (!d->upperredirect) + return err; + if (d->redirect[0] == '/') poe = roe; } - upperopaque = d.opaque; + upperopaque = d->opaque; } - if (!d.stop && poe->numlower) { + if (!d->stop && ovl_numlower(poe)) { err = -ENOMEM; - stack = kcalloc(ofs->numlower, sizeof(struct path), - GFP_TEMPORARY); - if (!stack) - goto out_put_upper; + ctx->stack = ovl_stack_alloc(ofs->numlayer - 1); + if (!ctx->stack) + return err; } - for (i = 0; !d.stop && i < poe->numlower; i++) { - struct path lowerpath = poe->lowerstack[i]; + for (i = 0; !d->stop && i < ovl_numlower(poe); i++) { + struct ovl_path lower = ovl_lowerstack(poe)[i]; + + if (!ovl_check_follow_redirect(d)) { + err = -EPERM; + return err; + } + + if (!check_redirect) + d->last = i == ovl_numlower(poe) - 1; + else if (d->is_dir || !ofs->numdatalayer) + d->last = lower.layer->idx == ovl_numlower(roe); - d.last = i == poe->numlower - 1; - err = ovl_lookup_layer(lowerpath.dentry, &d, &this); + d->layer = lower.layer; + err = ovl_lookup_layer(lower.dentry, d, &this, false); if (err) - goto out_put; + return err; if (!this) continue; - stack[ctr].dentry = this; - stack[ctr].mnt = lowerpath.mnt; - ctr++; + /* + * If no origin fh is stored in upper of a merge dir, store fh + * of lower dir and set upper parent "impure". + */ + if (ctx->upperdentry && !ctx->ctr && !ofs->noxattr && d->is_dir) { + err = ovl_fix_origin(ofs, dentry, this, ctx->upperdentry); + if (err) { + dput(this); + return err; + } + } + + /* + * When "verify_lower" feature is enabled, do not merge with a + * lower dir that does not match a stored origin xattr. In any + * case, only verified origin is used for index lookup. + * + * For non-dir dentry, if index=on, then ensure origin + * matches the dentry found using path based lookup, + * otherwise error out. + */ + if (ctx->upperdentry && !ctx->ctr && + ((d->is_dir && ovl_verify_lower(dentry->d_sb)) || + (!d->is_dir && ofs->config.index && ctx->origin_path))) { + err = ovl_verify_origin(ofs, ctx->upperdentry, this, false); + if (err) { + dput(this); + if (d->is_dir) + break; + return err; + } + origin = this; + } + + if (!ctx->upperdentry && !d->is_dir && !ctx->ctr && d->metacopy) + metacopy_size = d->metacopy; + + if (d->metacopy && ctx->ctr) { + /* + * Do not store intermediate metacopy dentries in + * lower chain, except top most lower metacopy dentry. + * Continue the loop so that if there is an absolute + * redirect on this dentry, poe can be reset to roe. + */ + dput(this); + this = NULL; + } else { + ctx->stack[ctx->ctr].dentry = this; + ctx->stack[ctx->ctr].layer = lower.layer; + ctx->ctr++; + } - if (d.stop) + if (d->stop) break; - if (d.redirect && d.redirect[0] == '/' && poe != roe) { + if (d->redirect && d->redirect[0] == '/' && poe != roe) { poe = roe; - /* Find the current layer on the root dentry */ - for (i = 0; i < poe->numlower; i++) - if (poe->lowerstack[i].mnt == lowerpath.mnt) - break; - if (WARN_ON(i == poe->numlower)) - break; + i = lower.layer->idx - 1; } } - /* Lookup index by lower inode and verify it matches upper inode */ - if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) { - struct dentry *origin = stack[0].dentry; + /* + * Defer lookup of lowerdata in data-only layers to first access. + * Don't require redirect=follow and metacopy=on in this case. + */ + if (d->metacopy && ctx->ctr && ofs->numdatalayer && d->absolute_redirect) { + d->metacopy = 0; + ctx->ctr++; + } else if (!ovl_check_follow_redirect(d)) { + err = -EPERM; + return err; + } - index = ovl_lookup_index(dentry, upperdentry, origin); - if (IS_ERR(index)) { - err = PTR_ERR(index); - index = NULL; - goto out_put; + /* + * For regular non-metacopy upper dentries, there is no lower + * path based lookup, hence ctr will be zero. If a dentry is found + * using ORIGIN xattr on upper, install it in stack. + * + * For metacopy dentry, path based lookup will find lower dentries. + * Just make sure a corresponding data dentry has been found. + */ + if (d->metacopy || (uppermetacopy && !ctx->ctr)) { + pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n", + dentry); + err = -EIO; + return err; + } else if (!d->is_dir && ctx->upperdentry && !ctx->ctr && ctx->origin_path) { + if (WARN_ON(ctx->stack != NULL)) { + err = -EIO; + return err; } + ctx->stack = ctx->origin_path; + ctx->ctr = 1; + origin = ctx->origin_path->dentry; + ctx->origin_path = NULL; } - oe = ovl_alloc_entry(ctr); - err = -ENOMEM; - if (!oe) - goto out_put; + /* + * Always lookup index if there is no-upperdentry. + * + * For the case of upperdentry, we have set origin by now if it + * needed to be set. There are basically three cases. + * + * For directories, lookup index by lower inode and verify it matches + * upper inode. We only trust dir index if we verified that lower dir + * matches origin, otherwise dir index entries may be inconsistent + * and we ignore them. + * + * For regular upper, we already set origin if upper had ORIGIN + * xattr. There is no verification though as there is no path + * based dentry lookup in lower in this case. + * + * For metacopy upper, we set a verified origin already if index + * is enabled and if upper had an ORIGIN xattr. + * + */ + if (!ctx->upperdentry && ctx->ctr) + origin = ctx->stack[0].dentry; + + if (origin && ovl_indexdir(dentry->d_sb) && + (!d->is_dir || ovl_index_all(dentry->d_sb))) { + ctx->index = ovl_lookup_index(ofs, ctx->upperdentry, origin, true); + if (IS_ERR(ctx->index)) { + err = PTR_ERR(ctx->index); + ctx->index = NULL; + return err; + } + } + + if (ctx->ctr) { + ctx->oe = ovl_alloc_entry(ctx->ctr); + err = -ENOMEM; + if (!ctx->oe) + return err; + + ovl_stack_cpy(ovl_lowerstack(ctx->oe), ctx->stack, ctx->ctr); + } - oe->opaque = upperopaque; - memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); - dentry->d_fsdata = oe; + if (upperopaque) + ovl_dentry_set_opaque(dentry); + if (d->xwhiteouts) + ovl_dentry_set_xwhiteouts(dentry); - if (upperdentry) + if (ctx->upperdentry) ovl_dentry_set_upper_alias(dentry); - else if (index) - upperdentry = dget(index); + else if (ctx->index) { + char *upperredirect; + struct path upperpath = { + .dentry = ctx->upperdentry = dget(ctx->index), + .mnt = ovl_upper_mnt(ofs), + }; + + /* + * It's safe to assign upperredirect here: the previous + * assignment happens only if upperdentry is non-NULL, and + * this one only if upperdentry is NULL. + */ + upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0); + if (IS_ERR(upperredirect)) + return PTR_ERR(upperredirect); + d->upperredirect = upperredirect; + + err = ovl_check_metacopy_xattr(ofs, &upperpath, NULL); + if (err < 0) + return err; + d->metacopy = uppermetacopy = err; + metacopy_size = err; + + if (!ovl_check_follow_redirect(d)) { + err = -EPERM; + return err; + } + } + + if (ctx->upperdentry || ctx->ctr) { + struct inode *inode; + struct ovl_inode_params oip = { + .upperdentry = ctx->upperdentry, + .oe = ctx->oe, + .index = ctx->index, + .redirect = d->upperredirect, + }; + + /* Store lowerdata redirect for lazy lookup */ + if (ctx->ctr > 1 && !d->is_dir && !ctx->stack[ctx->ctr - 1].dentry) { + oip.lowerdata_redirect = d->redirect; + d->redirect = NULL; + } - if (upperdentry || ctr) { - inode = ovl_get_inode(dentry, upperdentry); - err = PTR_ERR(inode); + inode = ovl_get_inode(dentry->d_sb, &oip); if (IS_ERR(inode)) - goto out_free_oe; + return PTR_ERR(inode); - OVL_I(inode)->redirect = upperredirect; - if (index) - ovl_set_flag(OVL_INDEX, inode); + ctx->inode = inode; + if (ctx->upperdentry && !uppermetacopy) + ovl_set_flag(OVL_UPPERDATA, ctx->inode); + + if (metacopy_size > OVL_METACOPY_MIN_SIZE) + ovl_set_flag(OVL_HAS_DIGEST, ctx->inode); } - revert_creds(old_cred); - dput(index); - kfree(stack); - kfree(d.redirect); - d_add(dentry, inode); + ovl_dentry_init_reval(dentry, ctx->upperdentry, OVL_I_E(ctx->inode)); - return NULL; + return 0; +} -out_free_oe: - dentry->d_fsdata = NULL; - kfree(oe); -out_put: - dput(index); - for (i = 0; i < ctr; i++) - dput(stack[i].dentry); - kfree(stack); -out_put_upper: - dput(upperdentry); - kfree(upperredirect); -out: +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + struct ovl_entry *poe = OVL_E(dentry->d_parent); + bool check_redirect = (ovl_redirect_follow(ofs) || ofs->numdatalayer); + int err; + struct ovl_lookup_ctx ctx = { + .dentry = dentry, + }; + struct ovl_lookup_data d = { + .sb = dentry->d_sb, + .dentry = dentry, + .name = dentry->d_name, + .last = check_redirect ? false : !ovl_numlower(poe), + }; + + if (dentry->d_name.len > ofs->namelen) + return ERR_PTR(-ENAMETOOLONG); + + with_ovl_creds(dentry->d_sb) + err = ovl_lookup_layers(&ctx, &d); + + if (ctx.origin_path) { + dput(ctx.origin_path->dentry); + kfree(ctx.origin_path); + } + dput(ctx.index); + ovl_stack_free(ctx.stack, ctx.ctr); kfree(d.redirect); - revert_creds(old_cred); - return ERR_PTR(err); + + if (err) { + ovl_free_entry(ctx.oe); + dput(ctx.upperdentry); + kfree(d.upperredirect); + return ERR_PTR(err); + } + + return d_splice_alias(ctx.inode, dentry); } bool ovl_lower_positive(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - struct ovl_entry *poe = dentry->d_parent->d_fsdata; + struct ovl_entry *poe = OVL_E(dentry->d_parent); const struct qstr *name = &dentry->d_name; unsigned int i; bool positive = false; @@ -757,39 +1427,49 @@ bool ovl_lower_positive(struct dentry *dentry) * whiteout. */ if (!dentry->d_inode) - return oe->opaque; + return ovl_dentry_is_opaque(dentry); /* Negative upper -> positive lower */ if (!ovl_dentry_upper(dentry)) return true; - /* Positive upper -> have to look up lower to see whether it exists */ - for (i = 0; !done && !positive && i < poe->numlower; i++) { - struct dentry *this; - struct dentry *lowerdir = poe->lowerstack[i].dentry; - - this = lookup_one_len_unlocked(name->name, lowerdir, - name->len); - if (IS_ERR(this)) { - switch (PTR_ERR(this)) { - case -ENOENT: - case -ENAMETOOLONG: - break; - - default: - /* - * Assume something is there, we just couldn't - * access it. - */ - positive = true; - break; - } - } else { - if (this->d_inode) { - positive = !ovl_is_whiteout(this); + with_ovl_creds(dentry->d_sb) { + /* Positive upper -> have to look up lower to see whether it exists */ + for (i = 0; !done && !positive && i < ovl_numlower(poe); i++) { + struct dentry *this; + struct ovl_path *parentpath = &ovl_lowerstack(poe)[i]; + + /* + * We need to make a non-const copy of dentry->d_name, + * because lookup_one_positive_unlocked() will hash name + * with parentpath base, which is on another (lower fs). + */ + this = lookup_one_positive_unlocked(mnt_idmap(parentpath->layer->mnt), + &QSTR_LEN(name->name, name->len), + parentpath->dentry); + if (IS_ERR(this)) { + switch (PTR_ERR(this)) { + case -ENOENT: + case -ENAMETOOLONG: + break; + + default: + /* + * Assume something is there, we just couldn't + * access it. + */ + positive = true; + break; + } + } else { + struct path path = { + .dentry = this, + .mnt = parentpath->layer->mnt, + }; + positive = !ovl_path_is_whiteout(OVL_FS(dentry->d_sb), &path); done = true; + dput(this); } - dput(this); } } |
