diff options
Diffstat (limited to 'fs/overlayfs')
| -rw-r--r-- | fs/overlayfs/Kconfig | 1 | ||||
| -rw-r--r-- | fs/overlayfs/Makefile | 2 | ||||
| -rw-r--r-- | fs/overlayfs/copy_up.c | 388 | ||||
| -rw-r--r-- | fs/overlayfs/dir.c | 868 | ||||
| -rw-r--r-- | fs/overlayfs/export.c | 95 | ||||
| -rw-r--r-- | fs/overlayfs/file.c | 598 | ||||
| -rw-r--r-- | fs/overlayfs/inode.c | 313 | ||||
| -rw-r--r-- | fs/overlayfs/namei.c | 575 | ||||
| -rw-r--r-- | fs/overlayfs/overlayfs.h | 248 | ||||
| -rw-r--r-- | fs/overlayfs/ovl_entry.h | 23 | ||||
| -rw-r--r-- | fs/overlayfs/params.c | 620 | ||||
| -rw-r--r-- | fs/overlayfs/params.h | 2 | ||||
| -rw-r--r-- | fs/overlayfs/readdir.c | 345 | ||||
| -rw-r--r-- | fs/overlayfs/super.c | 497 | ||||
| -rw-r--r-- | fs/overlayfs/util.c | 254 | ||||
| -rw-r--r-- | fs/overlayfs/xattrs.c | 261 |
16 files changed, 2957 insertions, 2133 deletions
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index fec5020c3495..2ac67e04a6fb 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config OVERLAY_FS tristate "Overlay filesystem support" + select FS_STACK select EXPORTFS help An overlay filesystem combines two filesystems - an 'upper' filesystem diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 4e173d56b11f..5648954f8588 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -6,4 +6,4 @@ obj-$(CONFIG_OVERLAY_FS) += overlay.o overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \ - copy_up.o export.o params.o + copy_up.o export.o params.o xattrs.o diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index bae404a1bad4..758611ee4475 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -16,7 +16,6 @@ #include <linux/sched/signal.h> #include <linux/cred.h> #include <linux/namei.h> -#include <linux/fdtable.h> #include <linux/ratelimit.h> #include <linux/exportfs.h> #include "overlayfs.h" @@ -114,13 +113,13 @@ int ovl_copy_xattr(struct super_block *sb, const struct path *oldpath, struct de if (ovl_is_private_xattr(sb, name)) continue; - error = security_inode_copy_up_xattr(name); - if (error < 0 && error != -EOPNOTSUPP) - break; - if (error == 1) { + error = security_inode_copy_up_xattr(old, name); + if (error == -ECANCELED) { error = 0; continue; /* Discard */ } + if (error < 0 && error != -EOPNOTSUPP) + break; if (is_posix_acl_xattr(name)) { error = ovl_copy_acl(OVL_FS(sb), oldpath, new, name); @@ -172,8 +171,8 @@ out: static int ovl_copy_fileattr(struct inode *inode, const struct path *old, const struct path *new) { - struct fileattr oldfa = { .flags_valid = true }; - struct fileattr newfa = { .flags_valid = true }; + struct file_kattr oldfa = { .flags_valid = true }; + struct file_kattr newfa = { .flags_valid = true }; int err; err = ovl_real_fileattr_get(old, &oldfa); @@ -230,8 +229,37 @@ static int ovl_copy_fileattr(struct inode *inode, const struct path *old, return ovl_real_fileattr_set(new, &newfa); } +static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen) +{ + loff_t tmp; + + if (pos != pos2) + return -EIO; + if (pos < 0 || len < 0 || totlen < 0) + return -EIO; + if (check_add_overflow(pos, len, &tmp)) + return -EIO; + return 0; +} + +static int ovl_sync_file(const struct path *path) +{ + struct file *new_file; + int err; + + new_file = ovl_path_open(path, O_LARGEFILE | O_RDONLY); + if (IS_ERR(new_file)) + return PTR_ERR(new_file); + + err = vfs_fsync(new_file, 0); + fput(new_file); + + return err; +} + static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, - struct file *new_file, loff_t len) + struct file *new_file, loff_t len, + bool datasync) { struct path datapath; struct file *old_file; @@ -244,7 +272,8 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, int error = 0; ovl_path_lowerdata(dentry, &datapath); - if (WARN_ON(datapath.dentry == NULL)) + if (WARN_ON_ONCE(datapath.dentry == NULL) || + WARN_ON_ONCE(len < 0)) return -EIO; old_file = ovl_path_open(&datapath, O_LARGEFILE | O_RDONLY); @@ -252,10 +281,16 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, return PTR_ERR(old_file); /* Try to use clone_file_range to clone up within the same fs */ - cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); + cloned = vfs_clone_file_range(old_file, 0, new_file, 0, len, 0); if (cloned == len) goto out_fput; + /* Couldn't clone, so now we try to copy the data */ + error = rw_verify_area(READ, old_file, &old_pos, len); + if (!error) + error = rw_verify_area(WRITE, new_file, &new_pos, len); + if (error) + goto out_fput; /* Check if lower fs supports seek operation */ if (old_file->f_mode & FMODE_LSEEK) @@ -263,7 +298,7 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, while (len) { size_t this_len = OVL_COPY_UP_CHUNK_SIZE; - long bytes; + ssize_t bytes; if (len < this_len) this_len = len; @@ -287,8 +322,12 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, * it may not recognize all kind of holes and sometimes * only skips partial of hole area. However, it will be * enough for most of the use cases. + * + * We do not hold upper sb_writers throughout the loop to avert + * lockdep warning with llseek of lower file in nested overlay: + * - upper sb_writers + * -- lower ovl_inode_lock (ovl_llseek) */ - if (skip_hole && data_pos < old_pos) { data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA); if (data_pos > old_pos) { @@ -303,6 +342,10 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, } } + error = ovl_verify_area(old_pos, new_pos, this_len, len); + if (error) + break; + bytes = do_splice_direct(old_file, &old_pos, new_file, &new_pos, this_len, SPLICE_F_MOVE); @@ -314,7 +357,8 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, len -= bytes; } - if (!error && ovl_should_sync(ofs)) + /* call fsync once, either now or later along with metadata */ + if (!error && ovl_should_sync(ofs) && datasync) error = vfs_fsync(new_file, 0); out_fput: fput(old_file); @@ -337,7 +381,7 @@ static int ovl_set_timestamps(struct ovl_fs *ofs, struct dentry *upperdentry, { struct iattr attr = { .ia_valid = - ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, + ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_CTIME, .ia_atime = stat->atime, .ia_mtime = stat->mtime, }; @@ -371,13 +415,13 @@ int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upperdentry, return err; } -struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, +struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode, bool is_upper) { struct ovl_fh *fh; int fh_type, dwords; int buflen = MAX_HANDLE_SZ; - uuid_t *uuid = &real->d_sb->s_uuid; + uuid_t *uuid = &realinode->i_sb->s_uuid; int err; /* Make sure the real fid stays 32bit aligned */ @@ -394,13 +438,13 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, * the price or reconnecting the dentry. */ dwords = buflen >> 2; - fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0); + fh_type = exportfs_encode_inode_fh(realinode, (void *)fh->fb.fid, + &dwords, NULL, 0); buflen = (dwords << 2); err = -EIO; - if (WARN_ON(fh_type < 0) || - WARN_ON(buflen > MAX_HANDLE_SZ) || - WARN_ON(fh_type == FILEID_INVALID)) + if (fh_type < 0 || fh_type == FILEID_INVALID || + WARN_ON(buflen > MAX_HANDLE_SZ)) goto out_err; fh->fb.version = OVL_FH_VERSION; @@ -426,29 +470,29 @@ out_err: return ERR_PTR(err); } -int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, - struct dentry *upper) +struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin) { - const struct ovl_fh *fh = NULL; - int err; - /* * When lower layer doesn't support export operations store a 'null' fh, * so we can use the overlay.origin xattr to distignuish between a copy * up and a pure upper inode. */ - if (ovl_can_decode_fh(lower->d_sb)) { - fh = ovl_encode_real_fh(ofs, lower, false); - if (IS_ERR(fh)) - return PTR_ERR(fh); - } + if (!ovl_can_decode_fh(origin->d_sb)) + return NULL; + + return ovl_encode_real_fh(ofs, d_inode(origin), false); +} + +int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh, + struct dentry *upper) +{ + int err; /* * Do not fail when upper doesn't support xattrs. */ err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf, fh ? fh->fb.len : 0, 0); - kfree(fh); /* Ignore -EPERM from setting "user.*" on symlink/special */ return err == -EPERM ? 0 : err; @@ -461,7 +505,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper, const struct ovl_fh *fh; int err; - fh = ovl_encode_real_fh(ofs, upper, true); + fh = ovl_encode_real_fh(ofs, d_inode(upper), true); if (IS_ERR(fh)) return PTR_ERR(fh); @@ -473,17 +517,14 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper, /* * Create and install index entry. - * - * Caller must hold i_mutex on indexdir. */ -static int ovl_create_index(struct dentry *dentry, struct dentry *origin, +static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh, struct dentry *upper) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *indexdir = ovl_indexdir(dentry->d_sb); - struct inode *dir = d_inode(indexdir); - struct dentry *index = NULL; struct dentry *temp = NULL; + struct renamedata rd = {}; struct qstr name = { }; int err; @@ -502,7 +543,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin, if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry)))) return -EIO; - err = ovl_get_index_name(ofs, origin, &name); + err = ovl_get_index_name_fh(fh, &name); if (err) return err; @@ -515,16 +556,18 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin, if (err) goto out; - index = ovl_lookup_upper(ofs, name.name, indexdir, name.len); - if (IS_ERR(index)) { - err = PTR_ERR(index); - } else { - err = ovl_do_rename(ofs, dir, temp, dir, index, 0); - dput(index); - } + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = indexdir; + rd.new_parent = indexdir; + err = start_renaming_dentry(&rd, 0, temp, &name); + if (err) + goto out; + + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); out: if (err) - ovl_cleanup(ofs, dir, temp); + ovl_cleanup(ofs, indexdir, temp); dput(temp); free_name: kfree(name.name); @@ -541,10 +584,12 @@ struct ovl_copy_up_ctx { struct dentry *destdir; struct qstr destname; struct dentry *workdir; + const struct ovl_fh *origin_fh; bool origin; bool indexed; bool metacopy; bool metacopy_digest; + bool metadata_fsync; }; static int ovl_link_up(struct ovl_copy_up_ctx *c) @@ -555,22 +600,23 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct inode *udir = d_inode(upperdir); + ovl_start_write(c->dentry); + /* Mark parent "impure" because it may now contain non-pure upper */ err = ovl_set_impure(c->parent, upperdir); if (err) - return err; + goto out; err = ovl_set_nlink_lower(c->dentry); if (err) - return err; + goto out; - inode_lock_nested(udir, I_MUTEX_PARENT); - upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir, - c->dentry->d_name.len); + upper = ovl_start_creating_upper(ofs, upperdir, + &QSTR_LEN(c->dentry->d_name.name, + c->dentry->d_name.len)); err = PTR_ERR(upper); if (!IS_ERR(upper)) { err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper); - dput(upper); if (!err) { /* Restore timestamps on parent (best effort) */ @@ -578,13 +624,15 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) ovl_dentry_set_upper_alias(c->dentry); ovl_dentry_update_reval(c->dentry, upper); } + end_creating(upper); } - inode_unlock(udir); if (err) - return err; + goto out; err = ovl_set_nlink_upper(c->dentry); +out: + ovl_end_write(c->dentry); return err; } @@ -601,7 +649,8 @@ static int ovl_copy_up_data(struct ovl_copy_up_ctx *c, const struct path *temp) if (IS_ERR(new_file)) return PTR_ERR(new_file); - err = ovl_copy_up_file(ofs, c->dentry, new_file, c->stat.size); + err = ovl_copy_up_file(ofs, c->dentry, new_file, c->stat.size, + !c->metadata_fsync); fput(new_file); return err; @@ -618,7 +667,8 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp) if (err) return err; - if (inode->i_flags & OVL_COPY_I_FLAGS_MASK) { + if (inode->i_flags & OVL_FATTR_I_FLAGS_MASK && + (S_ISREG(c->stat.mode) || S_ISDIR(c->stat.mode))) { /* * Copy the fileattr inode flags that are the source of already * copied i_flags @@ -636,7 +686,7 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp) * hard link. */ if (c->origin) { - err = ovl_set_origin(ofs, c->lowerpath.dentry, temp); + err = ovl_set_origin_fh(ofs, c->origin_fh, temp); if (err) return err; } @@ -667,37 +717,40 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp) err = ovl_set_attr(ofs, temp, &c->stat); inode_unlock(temp->d_inode); + /* fsync metadata before moving it into upper dir */ + if (!err && ovl_should_sync(ofs) && c->metadata_fsync) + err = ovl_sync_file(&upperpath); + return err; } -struct ovl_cu_creds { - const struct cred *old; - struct cred *new; -}; - -static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc) +static const struct cred *ovl_prepare_copy_up_creds(struct dentry *dentry) { + struct cred *copy_up_cred = NULL; int err; - cc->old = cc->new = NULL; - err = security_inode_copy_up(dentry, &cc->new); + err = security_inode_copy_up(dentry, ©_up_cred); if (err < 0) - return err; + return ERR_PTR(err); - if (cc->new) - cc->old = override_creds(cc->new); + if (!copy_up_cred) + return NULL; - return 0; + return override_creds(copy_up_cred); } -static void ovl_revert_cu_creds(struct ovl_cu_creds *cc) +static void ovl_revert_copy_up_creds(const struct cred *orig_cred) { - if (cc->new) { - revert_creds(cc->old); - put_cred(cc->new); - } + const struct cred *copy_up_cred; + + copy_up_cred = revert_creds(orig_cred); + put_cred(copy_up_cred); } +DEFINE_CLASS(copy_up_creds, const struct cred *, + if (!IS_ERR_OR_NULL(_T)) ovl_revert_copy_up_creds(_T), + ovl_prepare_copy_up_creds(dentry), struct dentry *dentry) + /* * Copyup using workdir to prepare temp file. Used when copying up directories, * special files or when upper fs doesn't support O_TMPFILE. @@ -706,10 +759,9 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) { struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct inode *inode; - struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir); struct path path = { .mnt = ovl_upper_mnt(ofs) }; - struct dentry *temp, *upper; - struct ovl_cu_creds cc; + struct renamedata rd = {}; + struct dentry *temp; int err; struct ovl_cattr cattr = { /* Can't properly set mode on creation because of the umask */ @@ -718,21 +770,17 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) .link = c->link }; - /* workdir and destdir could be the same when copying up to indexdir */ - err = -EIO; - if (lock_rename(c->workdir, c->destdir) != NULL) - goto unlock; - - err = ovl_prep_cu_creds(c->dentry, &cc); - if (err) - goto unlock; + scoped_class(copy_up_creds, copy_up_creds, c->dentry) { + if (IS_ERR(copy_up_creds)) + return PTR_ERR(copy_up_creds); - temp = ovl_create_temp(ofs, c->workdir, &cattr); - ovl_revert_cu_creds(&cc); + ovl_start_write(c->dentry); + temp = ovl_create_temp(ofs, c->workdir, &cattr); + ovl_end_write(c->dentry); + } - err = PTR_ERR(temp); if (IS_ERR(temp)) - goto unlock; + return PTR_ERR(temp); /* * Copy up data first and then xattrs. Writing data after @@ -740,29 +788,42 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) */ path.dentry = temp; err = ovl_copy_up_data(c, &path); + ovl_start_write(c->dentry); if (err) - goto cleanup; - - err = ovl_copy_up_metadata(c, temp); - if (err) - goto cleanup; + goto cleanup_unlocked; if (S_ISDIR(c->stat.mode) && c->indexed) { - err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp); + err = ovl_create_index(c->dentry, c->origin_fh, temp); if (err) - goto cleanup; + goto cleanup_unlocked; } - upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir, - c->destname.len); - err = PTR_ERR(upper); - if (IS_ERR(upper)) - goto cleanup; + /* + * We cannot hold lock_rename() throughout this helper, because of + * lock ordering with sb_writers, which shouldn't be held when calling + * ovl_copy_up_data(), so lock workdir and destdir and make sure that + * temp wasn't moved before copy up completion or cleanup. + */ + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = c->workdir; + rd.new_parent = c->destdir; + rd.flags = 0; + err = start_renaming_dentry(&rd, 0, temp, + &QSTR_LEN(c->destname.name, c->destname.len)); + if (err) { + /* temp or workdir moved underneath us? map to -EIO */ + err = -EIO; + } + if (err) + goto cleanup_unlocked; + + err = ovl_copy_up_metadata(c, temp); + if (!err) + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); - err = ovl_do_rename(ofs, wdir, temp, udir, upper, 0); - dput(upper); if (err) - goto cleanup; + goto cleanup_unlocked; inode = d_inode(c->dentry); if (c->metacopy_digest) @@ -776,15 +837,15 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) ovl_inode_update(inode, temp); if (S_ISDIR(inode->i_mode)) ovl_set_flag(OVL_WHITEOUTS, inode); -unlock: - unlock_rename(c->workdir, c->destdir); +out: + ovl_end_write(c->dentry); return err; -cleanup: - ovl_cleanup(ofs, wdir, temp); +cleanup_unlocked: + ovl_cleanup(ofs, c->workdir, temp); dput(temp); - goto unlock; + goto out; } /* Copyup using O_TMPFILE which does not require cross dir locking */ @@ -794,43 +855,45 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) struct inode *udir = d_inode(c->destdir); struct dentry *temp, *upper; struct file *tmpfile; - struct ovl_cu_creds cc; int err; - err = ovl_prep_cu_creds(c->dentry, &cc); - if (err) - return err; + scoped_class(copy_up_creds, copy_up_creds, c->dentry) { + if (IS_ERR(copy_up_creds)) + return PTR_ERR(copy_up_creds); - tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode); - ovl_revert_cu_creds(&cc); + ovl_start_write(c->dentry); + tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode); + ovl_end_write(c->dentry); + } if (IS_ERR(tmpfile)) return PTR_ERR(tmpfile); temp = tmpfile->f_path.dentry; if (!c->metacopy && c->stat.size) { - err = ovl_copy_up_file(ofs, c->dentry, tmpfile, c->stat.size); + err = ovl_copy_up_file(ofs, c->dentry, tmpfile, c->stat.size, + !c->metadata_fsync); if (err) goto out_fput; } + ovl_start_write(c->dentry); + err = ovl_copy_up_metadata(c, temp); if (err) - goto out_fput; - - inode_lock_nested(udir, I_MUTEX_PARENT); + goto out; - upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir, - c->destname.len); + upper = ovl_start_creating_upper(ofs, c->destdir, + &QSTR_LEN(c->destname.name, + c->destname.len)); err = PTR_ERR(upper); if (!IS_ERR(upper)) { err = ovl_do_link(ofs, temp, udir, upper); - dput(upper); + end_creating(upper); } - inode_unlock(udir); if (err) - goto out_fput; + goto out; if (c->metacopy_digest) ovl_set_flag(OVL_HAS_DIGEST, d_inode(c->dentry)); @@ -842,6 +905,8 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) ovl_set_upperdata(d_inode(c->dentry)); ovl_inode_update(d_inode(c->dentry), dget(temp)); +out: + ovl_end_write(c->dentry); out_fput: fput(tmpfile); return err; @@ -860,6 +925,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) { int err; struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); + struct dentry *origin = c->lowerpath.dentry; + struct ovl_fh *fh = NULL; bool to_index = false; /* @@ -876,25 +943,42 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) to_index = true; } - if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) + if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) { + fh = ovl_get_origin_fh(ofs, origin); + if (IS_ERR(fh)) + return PTR_ERR(fh); + + /* origin_fh may be NULL */ + c->origin_fh = fh; c->origin = true; + } if (to_index) { c->destdir = ovl_indexdir(c->dentry->d_sb); - err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname); + err = ovl_get_index_name(ofs, origin, &c->destname); if (err) - return err; + goto out_free_fh; } else if (WARN_ON(!c->parent)) { /* Disconnected dentry must be copied up to index dir */ - return -EIO; + err = -EIO; + goto out_free_fh; } else { /* + * c->dentry->d_name is stabilzed by ovl_copy_up_start(), + * because if we got here, it means that c->dentry has no upper + * alias and changing ->d_name means going through ovl_rename() + * that will call ovl_copy_up() on source and target dentry. + */ + c->destname = c->dentry->d_name; + /* * Mark parent "impure" because it may now contain non-pure * upper */ + ovl_start_write(c->dentry); err = ovl_set_impure(c->parent, c->destdir); + ovl_end_write(c->dentry); if (err) - return err; + goto out_free_fh; } /* Should we copyup with O_TMPFILE or with workdir? */ @@ -908,6 +992,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) if (c->indexed) ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); + ovl_start_write(c->dentry); if (to_index) { /* Initialize nlink for copy up of disconnected dentry */ err = ovl_set_nlink_upper(c->dentry); @@ -922,10 +1007,13 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) ovl_dentry_set_upper_alias(c->dentry); ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry)); } + ovl_end_write(c->dentry); out: if (to_index) kfree(c->destname.name); +out_free_fh: + kfree(fh); return err; } @@ -1010,15 +1098,16 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c) * Writing to upper file will clear security.capability xattr. We * don't want that to happen for normal copy-up operation. */ + ovl_start_write(c->dentry); if (capability) { err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS, capability, cap_size, 0); - if (err) - goto out_free; } - - - err = ovl_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY); + if (!err) { + err = ovl_removexattr(ofs, upperpath.dentry, + OVL_XATTR_METACOPY); + } + ovl_end_write(c->dentry); if (err) goto out_free; @@ -1056,12 +1145,22 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, !kgid_has_mapping(current_user_ns(), ctx.stat.gid)) return -EOVERFLOW; + /* + * With metacopy disabled, we fsync after final metadata copyup, for + * both regular files and directories to get atomic copyup semantics + * on filesystems that do not use strict metadata ordering (e.g. ubifs). + * + * With metacopy enabled we want to avoid fsync on all meta copyup + * that will hurt performance of workloads such as chown -R, so we + * only fsync on data copyup as legacy behavior. + */ + ctx.metadata_fsync = !OVL_FS(dentry->d_sb)->config.metacopy && + (S_ISREG(ctx.stat.mode) || S_ISDIR(ctx.stat.mode)); ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags); if (parent) { ovl_path_upper(parent, &parentpath); ctx.destdir = parentpath.dentry; - ctx.destname = dentry->d_name; err = vfs_getattr(&parentpath, &ctx.pstat, STATX_ATIME | STATX_MTIME, @@ -1102,7 +1201,6 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, static int ovl_copy_up_flags(struct dentry *dentry, int flags) { int err = 0; - const struct cred *old_cred; bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED); /* @@ -1122,7 +1220,6 @@ static int ovl_copy_up_flags(struct dentry *dentry, int flags) if (err) return err; - old_cred = ovl_override_creds(dentry->d_sb); while (!err) { struct dentry *next; struct dentry *parent = NULL; @@ -1142,12 +1239,12 @@ static int ovl_copy_up_flags(struct dentry *dentry, int flags) next = parent; } - err = ovl_copy_up_one(parent, next, flags); + with_ovl_creds(dentry->d_sb) + err = ovl_copy_up_one(parent, next, flags); dput(parent); dput(next); } - revert_creds(old_cred); return err; } @@ -1169,17 +1266,10 @@ static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) int ovl_maybe_copy_up(struct dentry *dentry, int flags) { - int err = 0; - - if (ovl_open_need_copy_up(dentry, flags)) { - err = ovl_want_write(dentry); - if (!err) { - err = ovl_copy_up_flags(dentry, flags); - ovl_drop_write(dentry); - } - } + if (!ovl_open_need_copy_up(dentry, flags)) + return 0; - return err; + return ovl_copy_up_flags(dentry, flags); } int ovl_copy_up_with_data(struct dentry *dentry) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 033fc0458a3d..ff3dbd1ca61f 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -14,6 +14,7 @@ #include <linux/posix_acl_xattr.h> #include <linux/atomic.h> #include <linux/ratelimit.h> +#include <linux/backing-file.h> #include "overlayfs.h" static unsigned short ovl_redirect_max = 256; @@ -23,7 +24,8 @@ MODULE_PARM_DESC(redirect_max, static int ovl_set_redirect(struct dentry *dentry, bool samedir); -int ovl_cleanup(struct ovl_fs *ofs, struct inode *wdir, struct dentry *wdentry) +static int ovl_cleanup_locked(struct ovl_fs *ofs, struct inode *wdir, + struct dentry *wdentry) { int err; @@ -42,75 +44,86 @@ int ovl_cleanup(struct ovl_fs *ofs, struct inode *wdir, struct dentry *wdentry) return err; } -struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir) +int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir, + struct dentry *wdentry) +{ + wdentry = start_removing_dentry(workdir, wdentry); + if (IS_ERR(wdentry)) + return PTR_ERR(wdentry); + + ovl_cleanup_locked(ofs, workdir->d_inode, wdentry); + end_removing(wdentry); + + return 0; +} + +void ovl_tempname(char name[OVL_TEMPNAME_SIZE]) { - struct dentry *temp; - char name[20]; static atomic_t temp_id = ATOMIC_INIT(0); /* counter is allowed to wrap, since temp dentries are ephemeral */ - snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id)); + snprintf(name, OVL_TEMPNAME_SIZE, "#%x", atomic_inc_return(&temp_id)); +} - temp = ovl_lookup_upper(ofs, name, workdir, strlen(name)); - if (!IS_ERR(temp) && temp->d_inode) { - pr_err("workdir/%s already exists\n", name); - dput(temp); - temp = ERR_PTR(-EIO); - } +static struct dentry *ovl_start_creating_temp(struct ovl_fs *ofs, + struct dentry *workdir) +{ + char name[OVL_TEMPNAME_SIZE]; - return temp; + ovl_tempname(name); + return start_creating(ovl_upper_mnt_idmap(ofs), workdir, + &QSTR(name)); } -/* caller holds i_mutex on workdir */ static struct dentry *ovl_whiteout(struct ovl_fs *ofs) { int err; - struct dentry *whiteout; + struct dentry *whiteout, *link; struct dentry *workdir = ofs->workdir; struct inode *wdir = workdir->d_inode; + guard(mutex)(&ofs->whiteout_lock); + if (!ofs->whiteout) { - whiteout = ovl_lookup_temp(ofs, workdir); + whiteout = ovl_start_creating_temp(ofs, workdir); if (IS_ERR(whiteout)) - goto out; - + return whiteout; err = ovl_do_whiteout(ofs, wdir, whiteout); - if (err) { - dput(whiteout); - whiteout = ERR_PTR(err); - goto out; - } - ofs->whiteout = whiteout; + if (!err) + ofs->whiteout = dget(whiteout); + end_creating(whiteout); + if (err) + return ERR_PTR(err); } if (!ofs->no_shared_whiteout) { - whiteout = ovl_lookup_temp(ofs, workdir); - if (IS_ERR(whiteout)) - goto out; - - err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout); + link = ovl_start_creating_temp(ofs, workdir); + if (IS_ERR(link)) + return link; + err = ovl_do_link(ofs, ofs->whiteout, wdir, link); if (!err) - goto out; + whiteout = dget(link); + end_creating(link); + if (!err) + return whiteout; if (err != -EMLINK) { - pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n", - ofs->whiteout->d_inode->i_nlink, err); + pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%u)\n", + ofs->whiteout->d_inode->i_nlink, + err); ofs->no_shared_whiteout = true; } - dput(whiteout); } whiteout = ofs->whiteout; ofs->whiteout = NULL; -out: return whiteout; } -/* Caller must hold i_mutex on both workdir and dir */ -int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, +int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir, struct dentry *dentry) { - struct inode *wdir = ofs->workdir->d_inode; struct dentry *whiteout; + struct renamedata rd = {}; int err; int flags = 0; @@ -122,55 +135,33 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, if (d_is_dir(dentry)) flags = RENAME_EXCHANGE; - err = ovl_do_rename(ofs, wdir, whiteout, dir, dentry, flags); + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = ofs->workdir; + rd.new_parent = dir; + rd.flags = flags; + err = start_renaming_two_dentries(&rd, whiteout, dentry); + if (!err) { + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); + } if (err) goto kill_whiteout; if (flags) - ovl_cleanup(ofs, wdir, dentry); + ovl_cleanup(ofs, ofs->workdir, dentry); out: dput(whiteout); return err; kill_whiteout: - ovl_cleanup(ofs, wdir, whiteout); + ovl_cleanup(ofs, ofs->workdir, whiteout); goto out; } -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, - struct dentry **newdentry, umode_t mode) -{ - int err; - struct dentry *d, *dentry = *newdentry; - - err = ovl_do_mkdir(ofs, dir, dentry, mode); - if (err) - return err; - - if (likely(!d_unhashed(dentry))) - return 0; - - /* - * vfs_mkdir() may succeed and leave the dentry passed - * to it unhashed and negative. If that happens, try to - * lookup a new hashed and positive dentry. - */ - d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent, - dentry->d_name.len); - if (IS_ERR(d)) { - pr_warn("failed lookup after mkdir (%pd2, err=%i).\n", - dentry, err); - return PTR_ERR(d); - } - dput(dentry); - *newdentry = d; - - return 0; -} - -struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, +struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent, struct dentry *newdentry, struct ovl_cattr *attr) { + struct inode *dir = parent->d_inode; int err; if (IS_ERR(newdentry)) @@ -190,7 +181,15 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, case S_IFDIR: /* mkdir is special... */ - err = ovl_mkdir_real(ofs, dir, &newdentry, attr->mode); + newdentry = ovl_do_mkdir(ofs, dir, newdentry, attr->mode); + err = PTR_ERR_OR_ZERO(newdentry); + /* expect to inherit casefolding from workdir/upperdir */ + if (!err && ofs->casefold != ovl_dentry_casefolded(newdentry)) { + pr_warn_ratelimited("wrong inherited casefold (%pd2)\n", + newdentry); + end_creating(newdentry); + err = -EINVAL; + } break; case S_IFCHR: @@ -209,16 +208,36 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, err = -EPERM; } } - if (!err && WARN_ON(!newdentry->d_inode)) { + if (err) + goto out; + + if (WARN_ON(!newdentry->d_inode)) { /* * Not quite sure if non-instantiated dentry is legal or not. * VFS doesn't seem to care so check and warn here. */ err = -EIO; + } else if (d_unhashed(newdentry)) { + struct dentry *d; + /* + * Some filesystems (i.e. casefolded) may return an unhashed + * negative dentry from the ovl_lookup_upper() call before + * ovl_create_real(). + * In that case, lookup again after making the newdentry + * positive, so ovl_create_upper() always returns a hashed + * positive dentry. + */ + d = ovl_lookup_upper(ofs, newdentry->d_name.name, parent, + newdentry->d_name.len); + dput(newdentry); + if (IS_ERR_OR_NULL(d)) + err = d ? PTR_ERR(d) : -ENOENT; + else + return d; } out: if (err) { - dput(newdentry); + end_creating(newdentry); return ERR_PTR(err); } return newdentry; @@ -227,8 +246,12 @@ out: struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, struct ovl_cattr *attr) { - return ovl_create_real(ofs, d_inode(workdir), - ovl_lookup_temp(ofs, workdir), attr); + struct dentry *ret; + ret = ovl_start_creating_temp(ofs, workdir); + if (IS_ERR(ret)) + return ret; + ret = ovl_create_real(ofs, workdir, ret, attr); + return end_creating_keep(ret); } static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper, @@ -260,14 +283,13 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) * may not use to instantiate the new dentry. */ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, - struct dentry *newdentry, bool hardlink) + struct dentry *newdentry, bool hardlink, struct file *tmpfile) { struct ovl_inode_params oip = { .upperdentry = newdentry, .newinode = inode, }; - ovl_dir_modified(dentry->d_parent, false); ovl_dentry_set_upper_alias(dentry); ovl_dentry_init_reval(dentry, newdentry, NULL); @@ -282,7 +304,8 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, * XXX: if we ever use ovl_obtain_alias() to decode directory * file handles, need to use ovl_get_inode_locked() and * d_instantiate_new() here to prevent from creating two - * hashed directory inode aliases. + * hashed directory inode aliases. We then need to return + * the obtained alias to ovl_mkdir(). */ inode = ovl_get_inode(dentry->d_sb, &oip); if (IS_ERR(inode)) @@ -295,6 +318,9 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, inc_nlink(inode); } + if (tmpfile) + d_mark_tmpfile(tmpfile, inode); + d_instantiate(dentry, inode); if (inode != oip.newinode) { pr_warn_ratelimited("newly created inode found in cache (%pd2)\n", @@ -323,21 +349,19 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); - struct inode *udir = upperdir->d_inode; struct dentry *newdentry; int err; - if (!attr->hardlink && !IS_POSIXACL(udir)) - attr->mode &= ~current_umask(); - - inode_lock_nested(udir, I_MUTEX_PARENT); - newdentry = ovl_create_real(ofs, udir, - ovl_lookup_upper(ofs, dentry->d_name.name, - upperdir, dentry->d_name.len), - attr); - err = PTR_ERR(newdentry); + newdentry = ovl_start_creating_upper(ofs, upperdir, + &QSTR_LEN(dentry->d_name.name, + dentry->d_name.len)); if (IS_ERR(newdentry)) - goto out_unlock; + return PTR_ERR(newdentry); + newdentry = ovl_create_real(ofs, upperdir, newdentry, attr); + if (IS_ERR(newdentry)) + return PTR_ERR(newdentry); + + end_creating_keep(newdentry); if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) && !ovl_allow_offline_changes(ofs)) { @@ -345,17 +369,16 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, ovl_set_opaque(dentry, newdentry); } - err = ovl_instantiate(dentry, inode, newdentry, !!attr->hardlink); + ovl_dir_modified(dentry->d_parent, false); + err = ovl_instantiate(dentry, inode, newdentry, !!attr->hardlink, NULL); if (err) goto out_cleanup; -out_unlock: - inode_unlock(udir); - return err; + return 0; out_cleanup: - ovl_cleanup(ofs, udir, newdentry); + ovl_cleanup(ofs, upperdir, newdentry); dput(newdentry); - goto out_unlock; + return err; } static struct dentry *ovl_clear_empty(struct dentry *dentry, @@ -363,9 +386,8 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *workdir = ovl_workdir(dentry); - struct inode *wdir = workdir->d_inode; struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); - struct inode *udir = upperdir->d_inode; + struct renamedata rd = {}; struct path upperpath; struct dentry *upper; struct dentry *opaquedir; @@ -375,27 +397,29 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, if (WARN_ON(!workdir)) return ERR_PTR(-EROFS); - err = ovl_lock_rename_workdir(workdir, upperdir); - if (err) - goto out; - ovl_path_upper(dentry, &upperpath); err = vfs_getattr(&upperpath, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (err) - goto out_unlock; + goto out; err = -ESTALE; if (!S_ISDIR(stat.mode)) - goto out_unlock; + goto out; upper = upperpath.dentry; - if (upper->d_parent->d_inode != udir) - goto out_unlock; opaquedir = ovl_create_temp(ofs, workdir, OVL_CATTR(stat.mode)); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) - goto out_unlock; + goto out; + + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = workdir; + rd.new_parent = upperdir; + rd.flags = RENAME_EXCHANGE; + err = start_renaming_two_dentries(&rd, opaquedir, upper); + if (err) + goto out_cleanup_unlocked; err = ovl_copy_xattr(dentry->d_sb, &upperpath, opaquedir); if (err) @@ -411,13 +435,13 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, if (err) goto out_cleanup; - err = ovl_do_rename(ofs, wdir, opaquedir, udir, upper, RENAME_EXCHANGE); + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); if (err) - goto out_cleanup; + goto out_cleanup_unlocked; ovl_cleanup_whiteouts(ofs, upper, list); - ovl_cleanup(ofs, wdir, upper); - unlock_rename(workdir, upperdir); + ovl_cleanup(ofs, workdir, upper); /* dentry's upper doesn't match now, get rid of it */ d_drop(dentry); @@ -425,10 +449,10 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, return opaquedir; out_cleanup: - ovl_cleanup(ofs, wdir, opaquedir); + end_renaming(&rd); +out_cleanup_unlocked: + ovl_cleanup(ofs, workdir, opaquedir); dput(opaquedir); -out_unlock: - unlock_rename(workdir, upperdir); out: return ERR_PTR(err); } @@ -447,9 +471,8 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *workdir = ovl_workdir(dentry); - struct inode *wdir = workdir->d_inode; struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); - struct inode *udir = upperdir->d_inode; + struct renamedata rd = {}; struct dentry *upper; struct dentry *newdentry; int err; @@ -466,18 +489,14 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, return err; } - err = ovl_lock_rename_workdir(workdir, upperdir); - if (err) - goto out; - - upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir, - dentry->d_name.len); + upper = ovl_lookup_upper_unlocked(ofs, dentry->d_name.name, upperdir, + dentry->d_name.len); err = PTR_ERR(upper); if (IS_ERR(upper)) - goto out_unlock; + goto out; err = -ESTALE; - if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper))) + if (d_is_negative(upper) || !ovl_upper_is_whiteout(ofs, upper)) goto out_dput; newdentry = ovl_create_temp(ofs, workdir, cattr); @@ -485,6 +504,14 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (IS_ERR(newdentry)) goto out_dput; + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = workdir; + rd.new_parent = upperdir; + rd.flags = 0; + err = start_renaming_two_dentries(&rd, newdentry, upper); + if (err) + goto out_cleanup_unlocked; + /* * mode could have been mutilated due to umask (e.g. sgid directory) */ @@ -518,26 +545,27 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (err) goto out_cleanup; - err = ovl_do_rename(ofs, wdir, newdentry, udir, upper, - RENAME_EXCHANGE); + rd.flags = RENAME_EXCHANGE; + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); if (err) - goto out_cleanup; + goto out_cleanup_unlocked; - ovl_cleanup(ofs, wdir, upper); + ovl_cleanup(ofs, workdir, upper); } else { - err = ovl_do_rename(ofs, wdir, newdentry, udir, upper, 0); + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); if (err) - goto out_cleanup; + goto out_cleanup_unlocked; } - err = ovl_instantiate(dentry, inode, newdentry, hardlink); + ovl_dir_modified(dentry->d_parent, false); + err = ovl_instantiate(dentry, inode, newdentry, hardlink, NULL); if (err) { - ovl_cleanup(ofs, udir, newdentry); + ovl_cleanup(ofs, upperdir, newdentry); dput(newdentry); } out_dput: dput(upper); -out_unlock: - unlock_rename(workdir, upperdir); out: if (!hardlink) { posix_acl_release(acl); @@ -546,40 +574,80 @@ out: return err; out_cleanup: - ovl_cleanup(ofs, wdir, newdentry); + end_renaming(&rd); +out_cleanup_unlocked: + ovl_cleanup(ofs, workdir, newdentry); dput(newdentry); goto out_dput; } -static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, - struct ovl_cattr *attr, bool origin) +static const struct cred *ovl_override_creator_creds(const struct cred *original_creds, + struct dentry *dentry, struct inode *inode, umode_t mode) { int err; - const struct cred *old_cred; - struct cred *override_cred; - struct dentry *parent = dentry->d_parent; - err = ovl_copy_up(parent); + if (WARN_ON_ONCE(current->cred != ovl_creds(dentry->d_sb))) + return ERR_PTR(-EINVAL); + + CLASS(prepare_creds, override_cred)(); + if (!override_cred) + return ERR_PTR(-ENOMEM); + + override_cred->fsuid = inode->i_uid; + override_cred->fsgid = inode->i_gid; + + err = security_dentry_create_files_as(dentry, mode, &dentry->d_name, + original_creds, override_cred); if (err) - return err; + return ERR_PTR(err); - old_cred = ovl_override_creds(dentry->d_sb); + return override_creds(no_free_ptr(override_cred)); +} - /* - * When linking a file with copy up origin into a new parent, mark the - * new parent dir "impure". - */ - if (origin) { - err = ovl_set_impure(parent, ovl_dentry_upper(parent)); - if (err) - goto out_revert_creds; - } +static void ovl_revert_creator_creds(const struct cred *old_cred) +{ + const struct cred *override_cred; + + override_cred = revert_creds(old_cred); + put_cred(override_cred); +} + +DEFINE_CLASS(ovl_override_creator_creds, + const struct cred *, + if (!IS_ERR_OR_NULL(_T)) ovl_revert_creator_creds(_T), + ovl_override_creator_creds(original_creds, dentry, inode, mode), + const struct cred *original_creds, + struct dentry *dentry, + struct inode *inode, + umode_t mode) + +static int ovl_create_handle_whiteouts(struct dentry *dentry, + struct inode *inode, + struct ovl_cattr *attr) +{ + if (!ovl_dentry_is_whiteout(dentry)) + return ovl_create_upper(dentry, inode, attr); + + return ovl_create_over_whiteout(dentry, inode, attr); +} + +static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, + struct ovl_cattr *attr, bool origin) +{ + int err; + struct dentry *parent = dentry->d_parent; + + scoped_class(override_creds_ovl, original_creds, dentry->d_sb) { + /* + * When linking a file with copy up origin into a new parent, mark the + * new parent dir "impure". + */ + if (origin) { + err = ovl_set_impure(parent, ovl_dentry_upper(parent)); + if (err) + return err; + } - if (!attr->hardlink) { - err = -ENOMEM; - override_cred = prepare_creds(); - if (!override_cred) - goto out_revert_creds; /* * In the creation cases(create, mkdir, mknod, symlink), * ovl should transfer current's fs{u,g}id to underlying @@ -593,26 +661,16 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, * create a new inode, so just use the ovl mounter's * fs{u,g}id. */ - override_cred->fsuid = inode->i_uid; - override_cred->fsgid = inode->i_gid; - err = security_dentry_create_files_as(dentry, - attr->mode, &dentry->d_name, old_cred, - override_cred); - if (err) { - put_cred(override_cred); - goto out_revert_creds; - } - put_cred(override_creds(override_cred)); - put_cred(override_cred); - } - if (!ovl_dentry_is_whiteout(dentry)) - err = ovl_create_upper(dentry, inode, attr); - else - err = ovl_create_over_whiteout(dentry, inode, attr); + if (attr->hardlink) + return ovl_create_handle_whiteouts(dentry, inode, attr); -out_revert_creds: - revert_creds(old_cred); + scoped_class(ovl_override_creator_creds, cred, original_creds, dentry, inode, attr->mode) { + if (IS_ERR(cred)) + return PTR_ERR(cred); + return ovl_create_handle_whiteouts(dentry, inode, attr); + } + } return err; } @@ -626,6 +684,10 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, .link = link, }; + err = ovl_copy_up(dentry->d_parent); + if (err) + return err; + err = ovl_want_write(dentry); if (err) goto out; @@ -637,7 +699,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, goto out_drop_write; spin_lock(&inode->i_lock); - inode->i_state |= I_CREATING; + inode_state_set(inode, I_CREATING); spin_unlock(&inode->i_lock); inode_init_owner(&nop_mnt_idmap, inode, dentry->d_parent->d_inode, mode); @@ -660,10 +722,10 @@ static int ovl_create(struct mnt_idmap *idmap, struct inode *dir, return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL); } -static int ovl_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode) +static struct dentry *ovl_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { - return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL); + return ERR_PTR(ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL)); } static int ovl_mknod(struct mnt_idmap *idmap, struct inode *dir, @@ -684,14 +746,8 @@ static int ovl_symlink(struct mnt_idmap *idmap, struct inode *dir, static int ovl_set_link_redirect(struct dentry *dentry) { - const struct cred *old_cred; - int err; - - old_cred = ovl_override_creds(dentry->d_sb); - err = ovl_set_redirect(dentry, false); - revert_creds(old_cred); - - return err; + with_ovl_creds(dentry->d_sb) + return ovl_set_redirect(dentry, false); } static int ovl_link(struct dentry *old, struct inode *newdir, @@ -700,28 +756,24 @@ static int ovl_link(struct dentry *old, struct inode *newdir, int err; struct inode *inode; - err = ovl_want_write(old); + err = ovl_copy_up(old); if (err) goto out; - err = ovl_copy_up(old); + err = ovl_copy_up(new->d_parent); if (err) - goto out_drop_write; + goto out; - err = ovl_copy_up(new->d_parent); + err = ovl_nlink_start(old); if (err) - goto out_drop_write; + goto out; if (ovl_is_metacopy_dentry(old)) { err = ovl_set_link_redirect(old); if (err) - goto out_drop_write; + goto out_nlink_end; } - err = ovl_nlink_start(old); - if (err) - goto out_drop_write; - inode = d_inode(old); ihold(inode); @@ -731,9 +783,8 @@ static int ovl_link(struct dentry *old, struct inode *newdir, if (err) iput(inode); +out_nlink_end: ovl_nlink_end(old); -out_drop_write: - ovl_drop_write(old); out: return err; } @@ -763,15 +814,11 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, goto out; } - err = ovl_lock_rename_workdir(workdir, upperdir); - if (err) - goto out_dput; - - upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir, - dentry->d_name.len); + upper = ovl_lookup_upper_unlocked(ofs, dentry->d_name.name, upperdir, + dentry->d_name.len); err = PTR_ERR(upper); if (IS_ERR(upper)) - goto out_unlock; + goto out_dput; err = -ESTALE; if ((opaquedir && upper != opaquedir) || @@ -780,17 +827,13 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, goto out_dput_upper; } - err = ovl_cleanup_and_whiteout(ofs, d_inode(upperdir), upper); - if (err) - goto out_d_drop; + err = ovl_cleanup_and_whiteout(ofs, upperdir, upper); + if (!err) + ovl_dir_modified(dentry->d_parent, true); - ovl_dir_modified(dentry->d_parent, true); -out_d_drop: d_drop(dentry); out_dput_upper: dput(upper); -out_unlock: - unlock_rename(workdir, upperdir); out_dput: dput(opaquedir); out: @@ -814,17 +857,17 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir, goto out; } - inode_lock_nested(dir, I_MUTEX_PARENT); - upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir, - dentry->d_name.len); + upper = ovl_start_removing_upper(ofs, upperdir, + &QSTR_LEN(dentry->d_name.name, + dentry->d_name.len)); err = PTR_ERR(upper); if (IS_ERR(upper)) - goto out_unlock; + goto out_dput; err = -ESTALE; if ((opaquedir && upper != opaquedir) || (!opaquedir && !ovl_matches_upper(dentry, upper))) - goto out_dput_upper; + goto out_unlock; if (is_dir) err = ovl_do_rmdir(ofs, dir, upper); @@ -840,10 +883,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir, */ if (!err) d_drop(dentry); -out_dput_upper: - dput(upper); out_unlock: - inode_unlock(dir); + end_removing(upper); +out_dput: dput(opaquedir); out: return err; @@ -880,7 +922,6 @@ static void ovl_drop_nlink(struct dentry *dentry) static int ovl_do_remove(struct dentry *dentry, bool is_dir) { int err; - const struct cred *old_cred; bool lower_positive = ovl_lower_positive(dentry); LIST_HEAD(list); @@ -891,24 +932,20 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) goto out; } - err = ovl_want_write(dentry); - if (err) - goto out; - err = ovl_copy_up(dentry->d_parent); if (err) - goto out_drop_write; + goto out; err = ovl_nlink_start(dentry); if (err) - goto out_drop_write; + goto out; - old_cred = ovl_override_creds(dentry->d_sb); - if (!lower_positive) - err = ovl_remove_upper(dentry, is_dir, &list); - else - err = ovl_remove_and_whiteout(dentry, &list); - revert_creds(old_cred); + with_ovl_creds(dentry->d_sb) { + if (!lower_positive) + err = ovl_remove_upper(dentry, is_dir, &list); + else + err = ovl_remove_and_whiteout(dentry, &list); + } if (!err) { if (is_dir) clear_nlink(dentry->d_inode); @@ -926,8 +963,6 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) if (ovl_dentry_upper(dentry)) ovl_copyattr(d_inode(dentry)); -out_drop_write: - ovl_drop_write(dentry); out: ovl_cache_free(&list); return err; @@ -1074,99 +1109,107 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) return err; } -static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, - struct dentry *old, struct inode *newdir, - struct dentry *new, unsigned int flags) +struct ovl_renamedata { + struct renamedata; + struct dentry *opaquedir; + bool cleanup_whiteout; + bool update_nlink; + bool overwrite; +}; + +static int ovl_rename_start(struct ovl_renamedata *ovlrd, struct list_head *list) { - int err; - struct dentry *old_upperdir; - struct dentry *new_upperdir; - struct dentry *olddentry; - struct dentry *newdentry; - struct dentry *trap; - bool old_opaque; - bool new_opaque; - bool cleanup_whiteout = false; - bool update_nlink = false; - bool overwrite = !(flags & RENAME_EXCHANGE); + struct dentry *old = ovlrd->old_dentry; + struct dentry *new = ovlrd->new_dentry; bool is_dir = d_is_dir(old); bool new_is_dir = d_is_dir(new); - bool samedir = olddir == newdir; - struct dentry *opaquedir = NULL; - const struct cred *old_cred = NULL; - struct ovl_fs *ofs = OVL_FS(old->d_sb); - LIST_HEAD(list); + int err; - err = -EINVAL; - if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) - goto out; + if (ovlrd->flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) + return -EINVAL; - flags &= ~RENAME_NOREPLACE; + ovlrd->flags &= ~RENAME_NOREPLACE; /* Don't copy up directory trees */ err = -EXDEV; if (!ovl_can_move(old)) - goto out; - if (!overwrite && !ovl_can_move(new)) - goto out; + return err; + if (!ovlrd->overwrite && !ovl_can_move(new)) + return err; - if (overwrite && new_is_dir && !ovl_pure_upper(new)) { - err = ovl_check_empty_dir(new, &list); + if (ovlrd->overwrite && new_is_dir && !ovl_pure_upper(new)) { + err = ovl_check_empty_dir(new, list); if (err) - goto out; + return err; } - if (overwrite) { + if (ovlrd->overwrite) { if (ovl_lower_positive(old)) { if (!ovl_dentry_is_whiteout(new)) { /* Whiteout source */ - flags |= RENAME_WHITEOUT; + ovlrd->flags |= RENAME_WHITEOUT; } else { /* Switch whiteouts */ - flags |= RENAME_EXCHANGE; + ovlrd->flags |= RENAME_EXCHANGE; } } else if (is_dir && ovl_dentry_is_whiteout(new)) { - flags |= RENAME_EXCHANGE; - cleanup_whiteout = true; + ovlrd->flags |= RENAME_EXCHANGE; + ovlrd->cleanup_whiteout = true; } } - err = ovl_want_write(old); - if (err) - goto out; - err = ovl_copy_up(old); if (err) - goto out_drop_write; + return err; err = ovl_copy_up(new->d_parent); if (err) - goto out_drop_write; - if (!overwrite) { + return err; + + if (!ovlrd->overwrite) { err = ovl_copy_up(new); if (err) - goto out_drop_write; + return err; } else if (d_inode(new)) { err = ovl_nlink_start(new); if (err) - goto out_drop_write; + return err; - update_nlink = true; + ovlrd->update_nlink = true; } - old_cred = ovl_override_creds(old->d_sb); - - if (!list_empty(&list)) { - opaquedir = ovl_clear_empty(new, &list); - err = PTR_ERR(opaquedir); - if (IS_ERR(opaquedir)) { - opaquedir = NULL; - goto out_revert_creds; - } + if (!ovlrd->update_nlink) { + /* ovl_nlink_start() took ovl_want_write() */ + err = ovl_want_write(old); + if (err) + return err; } - old_upperdir = ovl_dentry_upper(old->d_parent); - new_upperdir = ovl_dentry_upper(new->d_parent); + return 0; +} + +static int ovl_rename_upper(struct ovl_renamedata *ovlrd, struct list_head *list) +{ + struct dentry *old = ovlrd->old_dentry; + struct dentry *new = ovlrd->new_dentry; + struct ovl_fs *ofs = OVL_FS(old->d_sb); + struct dentry *old_upperdir = ovl_dentry_upper(old->d_parent); + struct dentry *new_upperdir = ovl_dentry_upper(new->d_parent); + bool is_dir = d_is_dir(old); + bool new_is_dir = d_is_dir(new); + bool samedir = old->d_parent == new->d_parent; + struct renamedata rd = {}; + struct dentry *de; + struct dentry *whiteout = NULL; + bool old_opaque, new_opaque; + int err; + + if (!list_empty(list)) { + de = ovl_clear_empty(new, list); + if (IS_ERR(de)) + return PTR_ERR(de); + ovlrd->opaquedir = de; + } if (!samedir) { /* @@ -1178,88 +1221,88 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, if (ovl_type_origin(old)) { err = ovl_set_impure(new->d_parent, new_upperdir); if (err) - goto out_revert_creds; + return err; } - if (!overwrite && ovl_type_origin(new)) { + if (!ovlrd->overwrite && ovl_type_origin(new)) { err = ovl_set_impure(old->d_parent, old_upperdir); if (err) - goto out_revert_creds; + return err; } } - trap = lock_rename(new_upperdir, old_upperdir); + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = old_upperdir; + rd.new_parent = new_upperdir; + rd.flags = ovlrd->flags; - olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir, - old->d_name.len); - err = PTR_ERR(olddentry); - if (IS_ERR(olddentry)) - goto out_unlock; + err = start_renaming(&rd, 0, + &QSTR_LEN(old->d_name.name, old->d_name.len), + &QSTR_LEN(new->d_name.name, new->d_name.len)); + if (err) + return err; err = -ESTALE; - if (!ovl_matches_upper(old, olddentry)) - goto out_dput_old; - - newdentry = ovl_lookup_upper(ofs, new->d_name.name, new_upperdir, - new->d_name.len); - err = PTR_ERR(newdentry); - if (IS_ERR(newdentry)) - goto out_dput_old; + if (!ovl_matches_upper(old, rd.old_dentry)) + goto out_unlock; old_opaque = ovl_dentry_is_opaque(old); new_opaque = ovl_dentry_is_opaque(new); err = -ESTALE; if (d_inode(new) && ovl_dentry_upper(new)) { - if (opaquedir) { - if (newdentry != opaquedir) - goto out_dput; + if (ovlrd->opaquedir) { + if (rd.new_dentry != ovlrd->opaquedir) + goto out_unlock; } else { - if (!ovl_matches_upper(new, newdentry)) - goto out_dput; + if (!ovl_matches_upper(new, rd.new_dentry)) + goto out_unlock; } } else { - if (!d_is_negative(newdentry)) { - if (!new_opaque || !ovl_is_whiteout(newdentry)) - goto out_dput; + if (!d_is_negative(rd.new_dentry)) { + if (!new_opaque || !ovl_upper_is_whiteout(ofs, rd.new_dentry)) + goto out_unlock; } else { - if (flags & RENAME_EXCHANGE) - goto out_dput; + if (ovlrd->flags & RENAME_EXCHANGE) + goto out_unlock; } } - if (olddentry == trap) - goto out_dput; - if (newdentry == trap) - goto out_dput; - - if (olddentry->d_inode == newdentry->d_inode) - goto out_dput; + if (rd.old_dentry->d_inode == rd.new_dentry->d_inode) + goto out_unlock; err = 0; if (ovl_type_merge_or_lower(old)) err = ovl_set_redirect(old, samedir); else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent)) - err = ovl_set_opaque_xerr(old, olddentry, -EXDEV); + err = ovl_set_opaque_xerr(old, rd.old_dentry, -EXDEV); if (err) - goto out_dput; + goto out_unlock; - if (!overwrite && ovl_type_merge_or_lower(new)) + if (!ovlrd->overwrite && ovl_type_merge_or_lower(new)) err = ovl_set_redirect(new, samedir); - else if (!overwrite && new_is_dir && !new_opaque && + else if (!ovlrd->overwrite && new_is_dir && !new_opaque && ovl_type_merge(old->d_parent)) - err = ovl_set_opaque_xerr(new, newdentry, -EXDEV); + err = ovl_set_opaque_xerr(new, rd.new_dentry, -EXDEV); if (err) - goto out_dput; + goto out_unlock; + + err = ovl_do_rename_rd(&rd); + + if (!err && ovlrd->cleanup_whiteout) + whiteout = dget(rd.new_dentry); + +out_unlock: + end_renaming(&rd); - err = ovl_do_rename(ofs, old_upperdir->d_inode, olddentry, - new_upperdir->d_inode, newdentry, flags); if (err) - goto out_dput; + return err; - if (cleanup_whiteout) - ovl_cleanup(ofs, old_upperdir->d_inode, newdentry); + if (whiteout) { + ovl_cleanup(ofs, old_upperdir, whiteout); + dput(whiteout); + } - if (overwrite && d_inode(new)) { + if (ovlrd->overwrite && d_inode(new)) { if (new_is_dir) clear_nlink(d_inode(new)); else @@ -1267,7 +1310,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, } ovl_dir_modified(old->d_parent, ovl_type_origin(old) || - (!overwrite && ovl_type_origin(new))); + (!ovlrd->overwrite && ovl_type_origin(new))); ovl_dir_modified(new->d_parent, ovl_type_origin(old) || (d_inode(new) && ovl_type_origin(new))); @@ -1276,24 +1319,144 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, if (d_inode(new) && ovl_dentry_upper(new)) ovl_copyattr(d_inode(new)); -out_dput: - dput(newdentry); -out_dput_old: - dput(olddentry); -out_unlock: - unlock_rename(new_upperdir, old_upperdir); -out_revert_creds: - revert_creds(old_cred); - if (update_nlink) - ovl_nlink_end(new); -out_drop_write: - ovl_drop_write(old); -out: - dput(opaquedir); + return err; +} + +static void ovl_rename_end(struct ovl_renamedata *ovlrd) +{ + if (ovlrd->update_nlink) + ovl_nlink_end(ovlrd->new_dentry); + else + ovl_drop_write(ovlrd->old_dentry); +} + +static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, + struct dentry *old, struct inode *newdir, + struct dentry *new, unsigned int flags) +{ + struct ovl_renamedata ovlrd = { + .old_parent = old->d_parent, + .old_dentry = old, + .new_parent = new->d_parent, + .new_dentry = new, + .flags = flags, + .overwrite = !(flags & RENAME_EXCHANGE), + }; + LIST_HEAD(list); + int err; + + err = ovl_rename_start(&ovlrd, &list); + if (!err) { + with_ovl_creds(old->d_sb) + err = ovl_rename_upper(&ovlrd, &list); + ovl_rename_end(&ovlrd); + } + + dput(ovlrd.opaquedir); ovl_cache_free(&list); return err; } +static int ovl_create_tmpfile(struct file *file, struct dentry *dentry, + struct inode *inode, umode_t mode) +{ + struct path realparentpath; + struct file *realfile; + struct ovl_file *of; + struct dentry *newdentry; + /* It's okay to set O_NOATIME, since the owner will be current fsuid */ + int flags = file->f_flags | OVL_OPEN_FLAGS; + int err; + + scoped_class(override_creds_ovl, original_creds, dentry->d_sb) { + scoped_class(ovl_override_creator_creds, cred, original_creds, dentry, inode, mode) { + if (IS_ERR(cred)) + return PTR_ERR(cred); + + ovl_path_upper(dentry->d_parent, &realparentpath); + realfile = backing_tmpfile_open(&file->f_path, flags, &realparentpath, + mode, current_cred()); + err = PTR_ERR_OR_ZERO(realfile); + pr_debug("tmpfile/open(%pd2, 0%o) = %i\n", realparentpath.dentry, mode, err); + if (err) + return err; + + of = ovl_file_alloc(realfile); + if (!of) { + fput(realfile); + return -ENOMEM; + } + + /* ovl_instantiate() consumes the newdentry reference on success */ + newdentry = dget(realfile->f_path.dentry); + err = ovl_instantiate(dentry, inode, newdentry, false, file); + if (!err) { + file->private_data = of; + } else { + dput(newdentry); + ovl_file_free(of); + } + } + } + return err; +} + +static int ovl_dummy_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int ovl_tmpfile(struct mnt_idmap *idmap, struct inode *dir, + struct file *file, umode_t mode) +{ + int err; + struct dentry *dentry = file->f_path.dentry; + struct inode *inode; + + if (!OVL_FS(dentry->d_sb)->tmpfile) + return -EOPNOTSUPP; + + err = ovl_copy_up(dentry->d_parent); + if (err) + return err; + + err = ovl_want_write(dentry); + if (err) + return err; + + err = -ENOMEM; + inode = ovl_new_inode(dentry->d_sb, mode, 0); + if (!inode) + goto drop_write; + + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); + err = ovl_create_tmpfile(file, dentry, inode, inode->i_mode); + if (err) + goto put_inode; + + /* + * Check if the preallocated inode was actually used. Having something + * else assigned to the dentry shouldn't happen as that would indicate + * that the backing tmpfile "leaked" out of overlayfs. + */ + err = -EIO; + if (WARN_ON(inode != d_inode(dentry))) + goto put_realfile; + + /* inode reference was transferred to dentry */ + inode = NULL; + err = finish_open(file, dentry, ovl_dummy_open); +put_realfile: + /* Without FMODE_OPENED ->release() won't be called on @file */ + if (!(file->f_mode & FMODE_OPENED)) + ovl_file_free(file->private_data); +put_inode: + iput(inode); +drop_write: + ovl_drop_write(dentry); + return err; +} + const struct inode_operations ovl_dir_inode_operations = { .lookup = ovl_lookup, .mkdir = ovl_mkdir, @@ -1314,4 +1477,5 @@ const struct inode_operations ovl_dir_inode_operations = { .update_time = ovl_update_time, .fileattr_get = ovl_fileattr_get, .fileattr_set = ovl_fileattr_set, + .tmpfile = ovl_tmpfile, }; diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index c8c8588bd98c..83f80fdb1567 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -23,12 +23,7 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry) if (ovl_dentry_upper(dentry)) return 0; - err = ovl_want_write(dentry); - if (!err) { - err = ovl_copy_up(dentry); - ovl_drop_write(dentry); - } - + err = ovl_copy_up(dentry); if (err) { pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n", dentry, err); @@ -181,31 +176,37 @@ static int ovl_connect_layer(struct dentry *dentry) * * Return 0 for upper file handle, > 0 for lower file handle or < 0 on error. */ -static int ovl_check_encode_origin(struct dentry *dentry) +static int ovl_check_encode_origin(struct inode *inode) { - struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + struct ovl_fs *ofs = OVL_FS(inode->i_sb); bool decodable = ofs->config.nfs_export; + struct dentry *dentry; + int err; + + /* No upper layer? */ + if (!ovl_upper_mnt(ofs)) + return 1; /* Lower file handle for non-upper non-decodable */ - if (!ovl_dentry_upper(dentry) && !decodable) - return 0; + if (!ovl_inode_upper(inode) && !decodable) + return 1; /* Upper file handle for pure upper */ - if (!ovl_dentry_lower(dentry)) + if (!ovl_inode_lower(inode)) return 0; /* * Root is never indexed, so if there's an upper layer, encode upper for * root. */ - if (dentry == dentry->d_sb->s_root) + if (inode == d_inode(inode->i_sb->s_root)) return 0; /* * Upper decodable file handle for non-indexed upper. */ - if (ovl_dentry_upper(dentry) && decodable && - !ovl_test_flag(OVL_INDEX, d_inode(dentry))) + if (ovl_inode_upper(inode) && decodable && + !ovl_test_flag(OVL_INDEX, inode)) return 0; /* @@ -214,14 +215,23 @@ static int ovl_check_encode_origin(struct dentry *dentry) * ovl_connect_layer() will try to make origin's layer "connected" by * copying up a "connectable" ancestor. */ - if (d_is_dir(dentry) && ovl_upper_mnt(ofs) && decodable) - return ovl_connect_layer(dentry); + if (!decodable || !S_ISDIR(inode->i_mode)) + return 1; + + dentry = d_find_any_alias(inode); + if (!dentry) + return -ENOENT; + + err = ovl_connect_layer(dentry); + dput(dentry); + if (err < 0) + return err; /* Lower file handle for indexed and non-upper dir/non-dir */ return 1; } -static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry, +static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct inode *inode, u32 *fid, int buflen) { struct ovl_fh *fh = NULL; @@ -232,13 +242,13 @@ static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry, * Check if we should encode a lower or upper file handle and maybe * copy up an ancestor to make lower file handle connectable. */ - err = enc_lower = ovl_check_encode_origin(dentry); + err = enc_lower = ovl_check_encode_origin(inode); if (enc_lower < 0) goto fail; /* Encode an upper or lower file handle */ - fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_dentry_lower(dentry) : - ovl_dentry_upper(dentry), !enc_lower); + fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_inode_lower(inode) : + ovl_inode_upper(inode), !enc_lower); if (IS_ERR(fh)) return PTR_ERR(fh); @@ -252,8 +262,8 @@ out: return err; fail: - pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i)\n", - dentry, err); + pr_warn_ratelimited("failed to encode file handle (ino=%lu, err=%i)\n", + inode->i_ino, err); goto out; } @@ -261,19 +271,13 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len, struct inode *parent) { struct ovl_fs *ofs = OVL_FS(inode->i_sb); - struct dentry *dentry; int bytes, buflen = *max_len << 2; /* TODO: encode connectable file handles */ if (parent) return FILEID_INVALID; - dentry = d_find_any_alias(inode); - if (!dentry) - return FILEID_INVALID; - - bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen); - dput(dentry); + bytes = ovl_dentry_to_fid(ofs, inode, fid, buflen); if (bytes <= 0) return FILEID_INVALID; @@ -294,7 +298,6 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, { struct dentry *lower = lowerpath ? lowerpath->dentry : NULL; struct dentry *upper = upper_alias ?: index; - struct dentry *dentry; struct inode *inode = NULL; struct ovl_entry *oe; struct ovl_inode_params oip = { @@ -325,27 +328,7 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, if (upper) ovl_set_flag(OVL_UPPERDATA, inode); - dentry = d_find_any_alias(inode); - if (dentry) - goto out_iput; - - dentry = d_alloc_anon(inode->i_sb); - if (unlikely(!dentry)) - goto nomem; - - if (upper_alias) - ovl_dentry_set_upper_alias(dentry); - - ovl_dentry_init_reval(dentry, upper, OVL_I_E(inode)); - - return d_instantiate_anon(dentry, inode); - -nomem: - dput(dentry); - dentry = ERR_PTR(-ENOMEM); -out_iput: - iput(inode); - return dentry; + return d_obtain_alias(inode); } /* Get the upper or lower dentry in stack whose on layer @idx */ @@ -402,11 +385,9 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, */ take_dentry_name_snapshot(&name, real); /* - * No idmap handling here: it's an internal lookup. Could skip - * permission checking altogether, but for now just use non-idmap - * transformed ids. + * No idmap handling here: it's an internal lookup. */ - this = lookup_one_len(name.name.name, connected, name.name.len); + this = lookup_noperm(&name.name, connected); release_dentry_name_snapshot(&name); err = PTR_ERR(this); if (IS_ERR(this)) { @@ -465,7 +446,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, * For decoded lower dir file handle, lookup index by origin to check * if lower dir was copied up and and/or removed. */ - if (!this && layer->idx && ofs->indexdir && !WARN_ON(!d_is_dir(real))) { + if (!this && layer->idx && ovl_indexdir(sb) && !WARN_ON(!d_is_dir(real))) { index = ovl_lookup_index(ofs, NULL, real, false); if (IS_ERR(index)) return index; @@ -738,7 +719,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, } /* Then lookup indexed upper/whiteout by origin fh */ - if (ofs->indexdir) { + if (ovl_indexdir(sb)) { index = ovl_get_index_fh(ofs, fh); err = PTR_ERR(index); if (IS_ERR(index)) { diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 3b4cc633d763..cbae89457234 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -9,21 +9,11 @@ #include <linux/xattr.h> #include <linux/uio.h> #include <linux/uaccess.h> -#include <linux/splice.h> #include <linux/security.h> -#include <linux/mm.h> #include <linux/fs.h> +#include <linux/backing-file.h> #include "overlayfs.h" -struct ovl_aio_req { - struct kiocb iocb; - refcount_t ref; - struct kiocb *orig_iocb; - struct fd fd; -}; - -static struct kmem_cache *ovl_aio_request_cachep; - static char ovl_whatisit(struct inode *inode, struct inode *realinode) { if (realinode != ovl_inode_upper(inode)) @@ -34,9 +24,6 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode) return 'm'; } -/* No atime modification on underlying */ -#define OVL_OPEN_FLAGS (O_NOATIME) - static struct file *ovl_open_realfile(const struct file *file, const struct path *realpath) { @@ -44,7 +31,6 @@ static struct file *ovl_open_realfile(const struct file *file, struct inode *inode = file_inode(file); struct mnt_idmap *real_idmap; struct file *realfile; - const struct cred *old_cred; int flags = file->f_flags | OVL_OPEN_FLAGS; int acc_mode = ACC_MODE(flags); int err; @@ -52,19 +38,19 @@ static struct file *ovl_open_realfile(const struct file *file, if (flags & O_APPEND) acc_mode |= MAY_APPEND; - old_cred = ovl_override_creds(inode->i_sb); - real_idmap = mnt_idmap(realpath->mnt); - err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode); - if (err) { - realfile = ERR_PTR(err); - } else { - if (!inode_owner_or_capable(real_idmap, realinode)) - flags &= ~O_NOATIME; - - realfile = backing_file_open(&file->f_path, flags, realpath, - current_cred()); + with_ovl_creds(inode->i_sb) { + real_idmap = mnt_idmap(realpath->mnt); + err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode); + if (err) { + realfile = ERR_PTR(err); + } else { + if (!inode_owner_or_capable(real_idmap, realinode)) + flags &= ~O_NOATIME; + + realfile = backing_file_open(file_user_path(file), + flags, realpath, current_cred()); + } } - revert_creds(old_cred); pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n", file, file, ovl_whatisit(inode, realinode), file->f_flags, @@ -102,54 +88,110 @@ static int ovl_change_flags(struct file *file, unsigned int flags) return 0; } -static int ovl_real_fdget_meta(const struct file *file, struct fd *real, - bool allow_meta) +struct ovl_file { + struct file *realfile; + struct file *upperfile; +}; + +struct ovl_file *ovl_file_alloc(struct file *realfile) { - struct dentry *dentry = file_dentry(file); - struct path realpath; - int err; + struct ovl_file *of = kzalloc(sizeof(struct ovl_file), GFP_KERNEL); - real->flags = 0; - real->file = file->private_data; + if (unlikely(!of)) + return NULL; - if (allow_meta) { - ovl_path_real(dentry, &realpath); - } else { - /* lazy lookup and verify of lowerdata */ - err = ovl_verify_lowerdata(dentry); - if (err) - return err; + of->realfile = realfile; + return of; +} - ovl_path_realdata(dentry, &realpath); - } - if (!realpath.dentry) - return -EIO; +void ovl_file_free(struct ovl_file *of) +{ + fput(of->realfile); + if (of->upperfile) + fput(of->upperfile); + kfree(of); +} - /* Has it been copied up since we'd opened it? */ - if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) { - real->flags = FDPUT_FPUT; - real->file = ovl_open_realfile(file, &realpath); +static bool ovl_is_real_file(const struct file *realfile, + const struct path *realpath) +{ + return file_inode(realfile) == d_inode(realpath->dentry); +} + +static struct file *ovl_real_file_path(const struct file *file, + const struct path *realpath) +{ + struct ovl_file *of = file->private_data; + struct file *realfile = of->realfile; - return PTR_ERR_OR_ZERO(real->file); + if (WARN_ON_ONCE(!realpath->dentry)) + return ERR_PTR(-EIO); + + /* + * If the realfile that we want is not where the data used to be at + * open time, either we'd been copied up, or it's an fsync of a + * metacopied file. We need the upperfile either way, so see if it + * is already opened and if it is not then open and store it. + */ + if (unlikely(!ovl_is_real_file(realfile, realpath))) { + struct file *upperfile = READ_ONCE(of->upperfile); + struct file *old; + + if (!upperfile) { /* Nobody opened upperfile yet */ + upperfile = ovl_open_realfile(file, realpath); + if (IS_ERR(upperfile)) + return upperfile; + + /* Store the upperfile for later */ + old = cmpxchg_release(&of->upperfile, NULL, upperfile); + if (old) { /* Someone opened upperfile before us */ + fput(upperfile); + upperfile = old; + } + } + /* + * Stored file must be from the right inode, unless someone's + * been corrupting the upper layer. + */ + if (WARN_ON_ONCE(!ovl_is_real_file(upperfile, realpath))) + return ERR_PTR(-EIO); + + realfile = upperfile; } /* Did the flags change since open? */ - if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS)) - return ovl_change_flags(real->file, file->f_flags); + if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS)) { + int err = ovl_change_flags(realfile, file->f_flags); - return 0; + if (err) + return ERR_PTR(err); + } + + return realfile; } -static int ovl_real_fdget(const struct file *file, struct fd *real) +static struct file *ovl_real_file(const struct file *file) { - if (d_is_dir(file_dentry(file))) { - real->flags = 0; - real->file = ovl_dir_real_file(file, false); + struct dentry *dentry = file_dentry(file); + struct path realpath; + int err; + + if (d_is_dir(dentry)) { + struct file *f = ovl_dir_real_file(file, false); - return PTR_ERR_OR_ZERO(real->file); + if (WARN_ON_ONCE(!f)) + return ERR_PTR(-EIO); + return f; } - return ovl_real_fdget_meta(file, real, false); + /* lazy lookup and verify of lowerdata */ + err = ovl_verify_lowerdata(dentry); + if (err) + return ERR_PTR(err); + + ovl_path_realdata(dentry, &realpath); + + return ovl_real_file_path(file, &realpath); } static int ovl_open(struct inode *inode, struct file *file) @@ -157,6 +199,7 @@ static int ovl_open(struct inode *inode, struct file *file) struct dentry *dentry = file_dentry(file); struct file *realfile; struct path realpath; + struct ovl_file *of; int err; /* lazy lookup and verify lowerdata */ @@ -179,23 +222,27 @@ static int ovl_open(struct inode *inode, struct file *file) if (IS_ERR(realfile)) return PTR_ERR(realfile); - file->private_data = realfile; + of = ovl_file_alloc(realfile); + if (!of) { + fput(realfile); + return -ENOMEM; + } + + file->private_data = of; return 0; } static int ovl_release(struct inode *inode, struct file *file) { - fput(file->private_data); - + ovl_file_free(file->private_data); return 0; } static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file_inode(file); - struct fd real; - const struct cred *old_cred; + struct file *realfile; loff_t ret; /* @@ -210,9 +257,9 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) return vfs_setpos(file, 0, 0); } - ret = ovl_real_fdget(file, &real); - if (ret) - return ret; + realfile = ovl_real_file(file); + if (IS_ERR(realfile)) + return PTR_ERR(realfile); /* * Overlay file f_pos is the master copy that is preserved @@ -222,24 +269,33 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) * files, so we use the real file to perform seeks. */ ovl_inode_lock(inode); - real.file->f_pos = file->f_pos; + realfile->f_pos = file->f_pos; - old_cred = ovl_override_creds(inode->i_sb); - ret = vfs_llseek(real.file, offset, whence); - revert_creds(old_cred); + with_ovl_creds(inode->i_sb) + ret = vfs_llseek(realfile, offset, whence); - file->f_pos = real.file->f_pos; + file->f_pos = realfile->f_pos; ovl_inode_unlock(inode); - fdput(real); - return ret; } +static void ovl_file_modified(struct file *file) +{ + /* Update size/mtime */ + ovl_copyattr(file_inode(file)); +} + +static void ovl_file_end_write(struct kiocb *iocb, ssize_t ret) +{ + ovl_file_modified(iocb->ki_filp); +} + static void ovl_file_accessed(struct file *file) { struct inode *inode, *upperinode; struct timespec64 ctime, uctime; + struct timespec64 mtime, umtime; if (file->f_flags & O_NOATIME) return; @@ -252,124 +308,48 @@ static void ovl_file_accessed(struct file *file) ctime = inode_get_ctime(inode); uctime = inode_get_ctime(upperinode); - if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) || - !timespec64_equal(&ctime, &uctime))) { - inode->i_mtime = upperinode->i_mtime; + mtime = inode_get_mtime(inode); + umtime = inode_get_mtime(upperinode); + if ((!timespec64_equal(&mtime, &umtime)) || + !timespec64_equal(&ctime, &uctime)) { + inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode)); inode_set_ctime_to_ts(inode, uctime); } touch_atime(&file->f_path); } -static rwf_t ovl_iocb_to_rwf(int ifl) -{ - rwf_t flags = 0; - - if (ifl & IOCB_NOWAIT) - flags |= RWF_NOWAIT; - if (ifl & IOCB_HIPRI) - flags |= RWF_HIPRI; - if (ifl & IOCB_DSYNC) - flags |= RWF_DSYNC; - if (ifl & IOCB_SYNC) - flags |= RWF_SYNC; - - return flags; -} - -static inline void ovl_aio_put(struct ovl_aio_req *aio_req) -{ - if (refcount_dec_and_test(&aio_req->ref)) { - fdput(aio_req->fd); - kmem_cache_free(ovl_aio_request_cachep, aio_req); - } -} - -static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) -{ - struct kiocb *iocb = &aio_req->iocb; - struct kiocb *orig_iocb = aio_req->orig_iocb; - - if (iocb->ki_flags & IOCB_WRITE) { - struct inode *inode = file_inode(orig_iocb->ki_filp); - - kiocb_end_write(iocb); - ovl_copyattr(inode); - } - - orig_iocb->ki_pos = iocb->ki_pos; - ovl_aio_put(aio_req); -} - -static void ovl_aio_rw_complete(struct kiocb *iocb, long res) -{ - struct ovl_aio_req *aio_req = container_of(iocb, - struct ovl_aio_req, iocb); - struct kiocb *orig_iocb = aio_req->orig_iocb; - - ovl_aio_cleanup_handler(aio_req); - orig_iocb->ki_complete(orig_iocb, res); -} - static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; - struct fd real; - const struct cred *old_cred; - ssize_t ret; + struct file *realfile; + struct backing_file_ctx ctx = { + .cred = ovl_creds(file_inode(file)->i_sb), + .accessed = ovl_file_accessed, + }; if (!iov_iter_count(iter)) return 0; - ret = ovl_real_fdget(file, &real); - if (ret) - return ret; - - ret = -EINVAL; - if (iocb->ki_flags & IOCB_DIRECT && - !(real.file->f_mode & FMODE_CAN_ODIRECT)) - goto out_fdput; - - old_cred = ovl_override_creds(file_inode(file)->i_sb); - if (is_sync_kiocb(iocb)) { - ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb->ki_flags)); - } else { - struct ovl_aio_req *aio_req; - - ret = -ENOMEM; - aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL); - if (!aio_req) - goto out; - - aio_req->fd = real; - real.flags = 0; - aio_req->orig_iocb = iocb; - kiocb_clone(&aio_req->iocb, iocb, real.file); - aio_req->iocb.ki_complete = ovl_aio_rw_complete; - refcount_set(&aio_req->ref, 2); - ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter); - ovl_aio_put(aio_req); - if (ret != -EIOCBQUEUED) - ovl_aio_cleanup_handler(aio_req); - } -out: - revert_creds(old_cred); - ovl_file_accessed(file); -out_fdput: - fdput(real); + realfile = ovl_real_file(file); + if (IS_ERR(realfile)) + return PTR_ERR(realfile); - return ret; + return backing_file_read_iter(realfile, iter, iocb, iocb->ki_flags, + &ctx); } static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - struct fd real; - const struct cred *old_cred; + struct file *realfile; ssize_t ret; int ifl = iocb->ki_flags; + struct backing_file_ctx ctx = { + .cred = ovl_creds(inode->i_sb), + .end_write = ovl_file_end_write, + }; if (!iov_iter_count(iter)) return 0; @@ -377,55 +357,16 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) inode_lock(inode); /* Update mode */ ovl_copyattr(inode); - ret = file_remove_privs(file); - if (ret) - goto out_unlock; - ret = ovl_real_fdget(file, &real); - if (ret) + realfile = ovl_real_file(file); + ret = PTR_ERR(realfile); + if (IS_ERR(realfile)) goto out_unlock; - ret = -EINVAL; - if (iocb->ki_flags & IOCB_DIRECT && - !(real.file->f_mode & FMODE_CAN_ODIRECT)) - goto out_fdput; - if (!ovl_should_sync(OVL_FS(inode->i_sb))) ifl &= ~(IOCB_DSYNC | IOCB_SYNC); - old_cred = ovl_override_creds(file_inode(file)->i_sb); - if (is_sync_kiocb(iocb)) { - file_start_write(real.file); - ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(ifl)); - file_end_write(real.file); - /* Update size */ - ovl_copyattr(inode); - } else { - struct ovl_aio_req *aio_req; - - ret = -ENOMEM; - aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL); - if (!aio_req) - goto out; - - aio_req->fd = real; - real.flags = 0; - aio_req->orig_iocb = iocb; - kiocb_clone(&aio_req->iocb, iocb, real.file); - aio_req->iocb.ki_flags = ifl; - aio_req->iocb.ki_complete = ovl_aio_rw_complete; - refcount_set(&aio_req->ref, 2); - kiocb_start_write(&aio_req->iocb); - ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); - ovl_aio_put(aio_req); - if (ret != -EIOCBQUEUED) - ovl_aio_cleanup_handler(aio_req); - } -out: - revert_creds(old_cred); -out_fdput: - fdput(real); + ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx); out_unlock: inode_unlock(inode); @@ -437,27 +378,30 @@ static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - const struct cred *old_cred; - struct fd real; + struct file *realfile; ssize_t ret; + struct backing_file_ctx ctx = { + .cred = ovl_creds(file_inode(in)->i_sb), + .accessed = ovl_file_accessed, + }; + struct kiocb iocb; - ret = ovl_real_fdget(in, &real); - if (ret) - return ret; + realfile = ovl_real_file(in); + if (IS_ERR(realfile)) + return PTR_ERR(realfile); - old_cred = ovl_override_creds(file_inode(in)->i_sb); - ret = vfs_splice_read(real.file, ppos, pipe, len, flags); - revert_creds(old_cred); - ovl_file_accessed(in); + init_sync_kiocb(&iocb, in); + iocb.ki_pos = *ppos; + ret = backing_file_splice_read(realfile, &iocb, pipe, len, flags, &ctx); + *ppos = iocb.ki_pos; - fdput(real); return ret; } /* * Calling iter_file_splice_write() directly from overlay's f_op may deadlock * due to lock order inversion between pipe->mutex in iter_file_splice_write() - * and file_start_write(real.file) in ovl_write_iter(). + * and file_start_write(realfile) in ovl_write_iter(). * * So do everything ovl_write_iter() does and call iter_file_splice_write() on * the real file. @@ -465,32 +409,28 @@ static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { - struct fd real; - const struct cred *old_cred; + struct file *realfile; struct inode *inode = file_inode(out); ssize_t ret; + struct backing_file_ctx ctx = { + .cred = ovl_creds(inode->i_sb), + .end_write = ovl_file_end_write, + }; + struct kiocb iocb; inode_lock(inode); /* Update mode */ ovl_copyattr(inode); - ret = file_remove_privs(out); - if (ret) - goto out_unlock; - ret = ovl_real_fdget(out, &real); - if (ret) + realfile = ovl_real_file(out); + ret = PTR_ERR(realfile); + if (IS_ERR(realfile)) goto out_unlock; - old_cred = ovl_override_creds(inode->i_sb); - file_start_write(real.file); - - ret = iter_file_splice_write(pipe, real.file, ppos, len, flags); - - file_end_write(real.file); - /* Update size */ - ovl_copyattr(inode); - revert_creds(old_cred); - fdput(real); + init_sync_kiocb(&iocb, out); + iocb.ki_pos = *ppos; + ret = backing_file_splice_write(pipe, realfile, &iocb, len, flags, &ctx); + *ppos = iocb.ki_pos; out_unlock: inode_unlock(inode); @@ -500,57 +440,45 @@ out_unlock: static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct fd real; - const struct cred *old_cred; + struct dentry *dentry = file_dentry(file); + enum ovl_path_type type; + struct path upperpath; + struct file *upperfile; int ret; ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); if (ret <= 0) return ret; - ret = ovl_real_fdget_meta(file, &real, !datasync); - if (ret) - return ret; - /* Don't sync lower file for fear of receiving EROFS error */ - if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) { - old_cred = ovl_override_creds(file_inode(file)->i_sb); - ret = vfs_fsync_range(real.file, start, end, datasync); - revert_creds(old_cred); - } + type = ovl_path_type(dentry); + if (!OVL_TYPE_UPPER(type) || (datasync && OVL_TYPE_MERGE(type))) + return 0; - fdput(real); + ovl_path_upper(dentry, &upperpath); + upperfile = ovl_real_file_path(file, &upperpath); + if (IS_ERR(upperfile)) + return PTR_ERR(upperfile); - return ret; + with_ovl_creds(file_inode(file)->i_sb) + return vfs_fsync_range(upperfile, start, end, datasync); } static int ovl_mmap(struct file *file, struct vm_area_struct *vma) { - struct file *realfile = file->private_data; - const struct cred *old_cred; - int ret; + struct ovl_file *of = file->private_data; + struct backing_file_ctx ctx = { + .cred = ovl_creds(file_inode(file)->i_sb), + .accessed = ovl_file_accessed, + }; - if (!realfile->f_op->mmap) - return -ENODEV; - - if (WARN_ON(file != vma->vm_file)) - return -EIO; - - vma_set_file(vma, realfile); - - old_cred = ovl_override_creds(file_inode(file)->i_sb); - ret = call_mmap(vma->vm_file, vma); - revert_creds(old_cred); - ovl_file_accessed(file); - - return ret; + return backing_file_mmap(of->realfile, vma, &ctx); } static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); - struct fd real; - const struct cred *old_cred; + struct file *realfile; int ret; inode_lock(inode); @@ -560,18 +488,16 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len if (ret) goto out_unlock; - ret = ovl_real_fdget(file, &real); - if (ret) + realfile = ovl_real_file(file); + ret = PTR_ERR(realfile); + if (IS_ERR(realfile)) goto out_unlock; - old_cred = ovl_override_creds(file_inode(file)->i_sb); - ret = vfs_fallocate(real.file, mode, offset, len); - revert_creds(old_cred); + with_ovl_creds(inode->i_sb) + ret = vfs_fallocate(realfile, mode, offset, len); /* Update size */ - ovl_copyattr(inode); - - fdput(real); + ovl_file_modified(file); out_unlock: inode_unlock(inode); @@ -581,21 +507,14 @@ out_unlock: static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice) { - struct fd real; - const struct cred *old_cred; - int ret; - - ret = ovl_real_fdget(file, &real); - if (ret) - return ret; - - old_cred = ovl_override_creds(file_inode(file)->i_sb); - ret = vfs_fadvise(real.file, offset, len, advice); - revert_creds(old_cred); + struct file *realfile; - fdput(real); + realfile = ovl_real_file(file); + if (IS_ERR(realfile)) + return PTR_ERR(realfile); - return ret; + with_ovl_creds(file_inode(file)->i_sb) + return vfs_fadvise(realfile, offset, len, advice); } enum ovl_copyop { @@ -609,8 +528,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, loff_t len, unsigned int flags, enum ovl_copyop op) { struct inode *inode_out = file_inode(file_out); - struct fd real_in, real_out; - const struct cred *old_cred; + struct file *realfile_in, *realfile_out; loff_t ret; inode_lock(inode_out); @@ -622,41 +540,38 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, goto out_unlock; } - ret = ovl_real_fdget(file_out, &real_out); - if (ret) + realfile_out = ovl_real_file(file_out); + ret = PTR_ERR(realfile_out); + if (IS_ERR(realfile_out)) goto out_unlock; - ret = ovl_real_fdget(file_in, &real_in); - if (ret) { - fdput(real_out); + realfile_in = ovl_real_file(file_in); + ret = PTR_ERR(realfile_in); + if (IS_ERR(realfile_in)) goto out_unlock; - } - old_cred = ovl_override_creds(file_inode(file_out)->i_sb); - switch (op) { - case OVL_COPY: - ret = vfs_copy_file_range(real_in.file, pos_in, - real_out.file, pos_out, len, flags); - break; - - case OVL_CLONE: - ret = vfs_clone_file_range(real_in.file, pos_in, - real_out.file, pos_out, len, flags); - break; - - case OVL_DEDUPE: - ret = vfs_dedupe_file_range_one(real_in.file, pos_in, - real_out.file, pos_out, len, - flags); - break; + with_ovl_creds(file_inode(file_out)->i_sb) { + switch (op) { + case OVL_COPY: + ret = vfs_copy_file_range(realfile_in, pos_in, + realfile_out, pos_out, len, flags); + break; + + case OVL_CLONE: + ret = vfs_clone_file_range(realfile_in, pos_in, + realfile_out, pos_out, len, flags); + break; + + case OVL_DEDUPE: + ret = vfs_dedupe_file_range_one(realfile_in, pos_in, + realfile_out, pos_out, len, + flags); + break; + } } - revert_creds(old_cred); /* Update size */ - ovl_copyattr(inode_out); - - fdput(real_in); - fdput(real_out); + ovl_file_modified(file_out); out_unlock: inode_unlock(inode_out); @@ -701,20 +616,17 @@ static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, static int ovl_flush(struct file *file, fl_owner_t id) { - struct fd real; - const struct cred *old_cred; - int err; + struct file *realfile; + int err = 0; - err = ovl_real_fdget(file, &real); - if (err) - return err; + realfile = ovl_real_file(file); + if (IS_ERR(realfile)) + return PTR_ERR(realfile); - if (real.file->f_op->flush) { - old_cred = ovl_override_creds(file_inode(file)->i_sb); - err = real.file->f_op->flush(real.file, id); - revert_creds(old_cred); + if (realfile->f_op->flush) { + with_ovl_creds(file_inode(file)->i_sb) + err = realfile->f_op->flush(realfile, id); } - fdput(real); return err; } @@ -736,19 +648,3 @@ const struct file_operations ovl_file_operations = { .copy_file_range = ovl_copy_file_range, .remap_file_range = ovl_remap_file_range, }; - -int __init ovl_aio_request_cache_init(void) -{ - ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req", - sizeof(struct ovl_aio_req), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!ovl_aio_request_cachep) - return -ENOMEM; - - return 0; -} - -void ovl_aio_request_cache_destroy(void) -{ - kmem_cache_destroy(ovl_aio_request_cachep); -} diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 83ef66644c21..bdbf86b56a9b 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -8,7 +8,6 @@ #include <linux/slab.h> #include <linux/cred.h> #include <linux/xattr.h> -#include <linux/posix_acl.h> #include <linux/ratelimit.h> #include <linux/fiemap.h> #include <linux/fileattr.h> @@ -26,16 +25,11 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct ovl_fs *ofs = OVL_FS(dentry->d_sb); bool full_copy_up = false; struct dentry *upperdentry; - const struct cred *old_cred; err = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (err) return err; - err = ovl_want_write(dentry); - if (err) - goto out; - if (attr->ia_valid & ATTR_SIZE) { /* Truncate should trigger data copy up as well */ full_copy_up = true; @@ -54,7 +48,7 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, winode = d_inode(upperdentry); err = get_write_access(winode); if (err) - goto out_drop_write; + goto out; } if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) @@ -78,19 +72,22 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, */ attr->ia_valid &= ~ATTR_OPEN; + err = ovl_want_write(dentry); + if (err) + goto out_put_write; + inode_lock(upperdentry->d_inode); - old_cred = ovl_override_creds(dentry->d_sb); - err = ovl_do_notify_change(ofs, upperdentry, attr); - revert_creds(old_cred); + with_ovl_creds(dentry->d_sb) + err = ovl_do_notify_change(ofs, upperdentry, attr); if (!err) ovl_copyattr(dentry->d_inode); inode_unlock(upperdentry->d_inode); + ovl_drop_write(dentry); +out_put_write: if (winode) put_write_access(winode); } -out_drop_write: - ovl_drop_write(dentry); out: return err; } @@ -154,13 +151,22 @@ static void ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) } } +static inline int ovl_real_getattr_nosec(struct super_block *sb, + const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int flags) +{ + with_ovl_creds(sb) + return vfs_getattr_nosec(path, stat, request_mask, flags); +} + int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; + struct super_block *sb = dentry->d_sb; enum ovl_path_type type; struct path realpath; - const struct cred *old_cred; struct inode *inode = d_inode(dentry); bool is_dir = S_ISDIR(inode->i_mode); int fsid = 0; @@ -170,10 +176,9 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, metacopy_blocks = ovl_is_metacopy_dentry(dentry); type = ovl_path_real(dentry, &realpath); - old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&realpath, stat, request_mask, flags); + err = ovl_real_getattr_nosec(sb, &realpath, stat, request_mask, flags); if (err) - goto out; + return err; /* Report the effective immutable/append-only STATX flags */ generic_fill_statx_attr(inode, stat); @@ -196,10 +201,9 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, (!is_dir ? STATX_NLINK : 0); ovl_path_lower(dentry, &realpath); - err = vfs_getattr(&realpath, &lowerstat, - lowermask, flags); + err = ovl_real_getattr_nosec(sb, &realpath, &lowerstat, lowermask, flags); if (err) - goto out; + return err; /* * Lower hardlinks may be broken on copy up to different @@ -249,10 +253,10 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, ovl_path_lowerdata(dentry, &realpath); if (realpath.dentry) { - err = vfs_getattr(&realpath, &lowerdatastat, - lowermask, flags); + err = ovl_real_getattr_nosec(sb, &realpath, &lowerdatastat, + lowermask, flags); if (err) - goto out; + return err; } else { lowerdatastat.blocks = round_up(stat->size, stat->blksize) >> 9; @@ -280,9 +284,6 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) stat->nlink = dentry->d_inode->i_nlink; -out: - revert_creds(old_cred); - return err; } @@ -292,7 +293,6 @@ int ovl_permission(struct mnt_idmap *idmap, struct inode *upperinode = ovl_inode_upper(inode); struct inode *realinode; struct path realpath; - const struct cred *old_cred; int err; /* Careful in RCU walk mode */ @@ -310,157 +310,26 @@ int ovl_permission(struct mnt_idmap *idmap, if (err) return err; - old_cred = ovl_override_creds(inode->i_sb); if (!upperinode && !special_file(realinode->i_mode) && mask & MAY_WRITE) { mask &= ~(MAY_WRITE | MAY_APPEND); /* Make sure mounter can read file for copy up later */ mask |= MAY_READ; } - err = inode_permission(mnt_idmap(realpath.mnt), realinode, mask); - revert_creds(old_cred); - return err; + with_ovl_creds(inode->i_sb) + return inode_permission(mnt_idmap(realpath.mnt), realinode, mask); } static const char *ovl_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - const struct cred *old_cred; - const char *p; - if (!dentry) return ERR_PTR(-ECHILD); - old_cred = ovl_override_creds(dentry->d_sb); - p = vfs_get_link(ovl_dentry_real(dentry), done); - revert_creds(old_cred); - return p; -} - -bool ovl_is_private_xattr(struct super_block *sb, const char *name) -{ - struct ovl_fs *ofs = OVL_FS(sb); - - if (ofs->config.userxattr) - return strncmp(name, OVL_XATTR_USER_PREFIX, - sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0; - else - return strncmp(name, OVL_XATTR_TRUSTED_PREFIX, - sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0; -} - -int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - int err; - struct ovl_fs *ofs = OVL_FS(dentry->d_sb); - struct dentry *upperdentry = ovl_i_dentry_upper(inode); - struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); - struct path realpath; - const struct cred *old_cred; - - err = ovl_want_write(dentry); - if (err) - goto out; - - if (!value && !upperdentry) { - ovl_path_lower(dentry, &realpath); - old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0); - revert_creds(old_cred); - if (err < 0) - goto out_drop_write; - } - - if (!upperdentry) { - err = ovl_copy_up(dentry); - if (err) - goto out_drop_write; - - realdentry = ovl_dentry_upper(dentry); - } - - old_cred = ovl_override_creds(dentry->d_sb); - if (value) { - err = ovl_do_setxattr(ofs, realdentry, name, value, size, - flags); - } else { - WARN_ON(flags != XATTR_REPLACE); - err = ovl_do_removexattr(ofs, realdentry, name); - } - revert_creds(old_cred); - - /* copy c/mtime */ - ovl_copyattr(inode); - -out_drop_write: - ovl_drop_write(dentry); -out: - return err; -} - -int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, - void *value, size_t size) -{ - ssize_t res; - const struct cred *old_cred; - struct path realpath; - - ovl_i_path_real(inode, &realpath); - old_cred = ovl_override_creds(dentry->d_sb); - res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size); - revert_creds(old_cred); - return res; -} - -static bool ovl_can_list(struct super_block *sb, const char *s) -{ - /* Never list private (.overlay) */ - if (ovl_is_private_xattr(sb, s)) - return false; - - /* List all non-trusted xattrs */ - if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) - return true; - - /* list other trusted for superuser only */ - return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); -} - -ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) -{ - struct dentry *realdentry = ovl_dentry_real(dentry); - ssize_t res; - size_t len; - char *s; - const struct cred *old_cred; - - old_cred = ovl_override_creds(dentry->d_sb); - res = vfs_listxattr(realdentry, list, size); - revert_creds(old_cred); - if (res <= 0 || size == 0) - return res; - - /* filter out private xattrs */ - for (s = list, len = res; len;) { - size_t slen = strnlen(s, len) + 1; - - /* underlying fs providing us with an broken xattr list? */ - if (WARN_ON(slen > len)) - return -EIO; - - len -= slen; - if (!ovl_can_list(dentry->d_sb, s)) { - res -= slen; - memmove(s, s + slen, len); - } else { - s += slen; - } - } - - return res; + with_ovl_creds(dentry->d_sb) + return vfs_get_link(ovl_dentry_real(dentry), done); } #ifdef CONFIG_FS_POSIX_ACL @@ -590,11 +459,8 @@ struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap, acl = get_cached_acl_rcu(realinode, type); } else { - const struct cred *old_cred; - - old_cred = ovl_override_creds(inode->i_sb); - acl = ovl_get_acl_path(&realpath, posix_acl_xattr_name(type), noperm); - revert_creds(old_cred); + with_ovl_creds(inode->i_sb) + acl = ovl_get_acl_path(&realpath, posix_acl_xattr_name(type), noperm); } return acl; @@ -606,15 +472,10 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, int err; struct path realpath; const char *acl_name; - const struct cred *old_cred; struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *upperdentry = ovl_dentry_upper(dentry); struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); - err = ovl_want_write(dentry); - if (err) - return err; - /* * If ACL is to be removed from a lower file, check if it exists in * the first place before copying it up. @@ -624,13 +485,11 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, struct posix_acl *real_acl; ovl_path_lower(dentry, &realpath); - old_cred = ovl_override_creds(dentry->d_sb); - real_acl = vfs_get_acl(mnt_idmap(realpath.mnt), realdentry, - acl_name); - revert_creds(old_cred); + with_ovl_creds(dentry->d_sb) + real_acl = vfs_get_acl(mnt_idmap(realpath.mnt), realdentry, acl_name); if (IS_ERR(real_acl)) { err = PTR_ERR(real_acl); - goto out_drop_write; + goto out; } posix_acl_release(real_acl); } @@ -638,23 +497,26 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, if (!upperdentry) { err = ovl_copy_up(dentry); if (err) - goto out_drop_write; + goto out; realdentry = ovl_dentry_upper(dentry); } - old_cred = ovl_override_creds(dentry->d_sb); - if (acl) - err = ovl_do_set_acl(ofs, realdentry, acl_name, acl); - else - err = ovl_do_remove_acl(ofs, realdentry, acl_name); - revert_creds(old_cred); + err = ovl_want_write(dentry); + if (err) + goto out; + + with_ovl_creds(dentry->d_sb) { + if (acl) + err = ovl_do_set_acl(ofs, realdentry, acl_name, acl); + else + err = ovl_do_remove_acl(ofs, realdentry, acl_name); + } + ovl_drop_write(dentry); /* copy c/mtime */ ovl_copyattr(inode); - -out_drop_write: - ovl_drop_write(dentry); +out: return err; } @@ -704,7 +566,8 @@ int ovl_update_time(struct inode *inode, int flags) if (upperpath.dentry) { touch_atime(&upperpath); - inode->i_atime = d_inode(upperpath.dentry)->i_atime; + inode_set_atime_to_ts(inode, + inode_get_atime(d_inode(upperpath.dentry))); } } return 0; @@ -713,9 +576,7 @@ int ovl_update_time(struct inode *inode, int flags) static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { - int err; struct inode *realinode = ovl_inode_realdata(inode); - const struct cred *old_cred; if (!realinode) return -EIO; @@ -723,11 +584,8 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (!realinode->i_op->fiemap) return -EOPNOTSUPP; - old_cred = ovl_override_creds(inode->i_sb); - err = realinode->i_op->fiemap(realinode, fieinfo, start, len); - revert_creds(old_cred); - - return err; + with_ovl_creds(inode->i_sb) + return realinode->i_op->fiemap(realinode, fieinfo, start, len); } /* @@ -735,14 +593,19 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * Introducing security_inode_fileattr_get/set() hooks would solve this issue * properly. */ -static int ovl_security_fileattr(const struct path *realpath, struct fileattr *fa, +static int ovl_security_fileattr(const struct path *realpath, struct file_kattr *fa, bool set) { struct file *file; unsigned int cmd; int err; + unsigned int flags; + + flags = O_RDONLY; + if (force_o_largefile()) + flags |= O_LARGEFILE; - file = dentry_open(realpath, O_RDONLY, current_cred()); + file = dentry_open(realpath, flags, current_cred()); if (IS_ERR(file)) return PTR_ERR(file); @@ -757,7 +620,7 @@ static int ovl_security_fileattr(const struct path *realpath, struct fileattr *f return err; } -int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa) +int ovl_real_fileattr_set(const struct path *realpath, struct file_kattr *fa) { int err; @@ -769,34 +632,34 @@ int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa) } int ovl_fileattr_set(struct mnt_idmap *idmap, - struct dentry *dentry, struct fileattr *fa) + struct dentry *dentry, struct file_kattr *fa) { struct inode *inode = d_inode(dentry); struct path upperpath; - const struct cred *old_cred; unsigned int flags; int err; - err = ovl_want_write(dentry); - if (err) - goto out; - err = ovl_copy_up(dentry); if (!err) { ovl_path_real(dentry, &upperpath); - old_cred = ovl_override_creds(inode->i_sb); - /* - * Store immutable/append-only flags in xattr and clear them - * in upper fileattr (in case they were set by older kernel) - * so children of "ovl-immutable" directories lower aliases of - * "ovl-immutable" hardlinks could be copied up. - * Clear xattr when flags are cleared. - */ - err = ovl_set_protattr(inode, upperpath.dentry, fa); - if (!err) - err = ovl_real_fileattr_set(&upperpath, fa); - revert_creds(old_cred); + err = ovl_want_write(dentry); + if (err) + goto out; + + with_ovl_creds(inode->i_sb) { + /* + * Store immutable/append-only flags in xattr and clear them + * in upper fileattr (in case they were set by older kernel) + * so children of "ovl-immutable" directories lower aliases of + * "ovl-immutable" hardlinks could be copied up. + * Clear xattr when flags are cleared. + */ + err = ovl_set_protattr(inode, upperpath.dentry, fa); + if (!err) + err = ovl_real_fileattr_set(&upperpath, fa); + } + ovl_drop_write(dentry); /* * Merge real inode flags with inode flags read from @@ -811,13 +674,12 @@ int ovl_fileattr_set(struct mnt_idmap *idmap, /* Update ctime */ ovl_copyattr(inode); } - ovl_drop_write(dentry); out: return err; } /* Convert inode protection flags to fileattr flags */ -static void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa) +static void ovl_fileattr_prot_flags(struct inode *inode, struct file_kattr *fa) { BUILD_BUG_ON(OVL_PROT_FS_FLAGS_MASK & ~FS_COMMON_FL); BUILD_BUG_ON(OVL_PROT_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); @@ -832,7 +694,7 @@ static void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa) } } -int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa) +int ovl_real_fileattr_get(const struct path *realpath, struct file_kattr *fa) { int err; @@ -846,19 +708,17 @@ int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa) return err; } -int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) +int ovl_fileattr_get(struct dentry *dentry, struct file_kattr *fa) { struct inode *inode = d_inode(dentry); struct path realpath; - const struct cred *old_cred; int err; ovl_path_real(dentry, &realpath); - old_cred = ovl_override_creds(inode->i_sb); - err = ovl_real_fileattr_get(&realpath, fa); + with_ovl_creds(inode->i_sb) + err = ovl_real_fileattr_get(&realpath, fa); ovl_fileattr_prot_flags(inode, fa); - revert_creds(old_cred); return err; } @@ -1272,7 +1132,7 @@ struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir) if (!trap) return ERR_PTR(-ENOMEM); - if (!(trap->i_state & I_NEW)) { + if (!(inode_state_read_once(trap) & I_NEW)) { /* Conflicting layer roots? */ iput(trap); return ERR_PTR(-ELOOP); @@ -1363,7 +1223,7 @@ struct inode *ovl_get_inode(struct super_block *sb, inode = ovl_iget5(sb, oip->newinode, key); if (!inode) goto out_err; - if (!(inode->i_state & I_NEW)) { + if (!(inode_state_read_once(inode) & I_NEW)) { /* * Verify that the underlying files stored in the inode * match those in the dentry. @@ -1400,6 +1260,7 @@ struct inode *ovl_get_inode(struct super_block *sb, } ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); ovl_inode_init(inode, oip, ino, fsid); + WARN_ON_ONCE(!!IS_CASEFOLDED(inode) != ofs->casefold); if (upperdentry && ovl_is_impuredir(sb, upperdentry)) ovl_set_flag(OVL_IMPURE, inode); @@ -1422,7 +1283,7 @@ struct inode *ovl_get_inode(struct super_block *sb, if (upperdentry) ovl_check_protattr(inode, upperdentry); - if (inode->i_state & I_NEW) + if (inode_state_read_once(inode) & I_NEW) unlock_new_inode(inode); out: return inode; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 80391c687c2a..e9a69c95be91 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -14,17 +14,18 @@ #include <linux/exportfs.h> #include "overlayfs.h" -#include "../internal.h" /* for vfs_path_lookup */ - struct ovl_lookup_data { struct super_block *sb; - struct vfsmount *mnt; + struct dentry *dentry; + const struct ovl_layer *layer; struct qstr name; bool is_dir; bool opaque; + bool xwhiteouts; bool stop; bool last; char *redirect; + char *upperredirect; int metacopy; /* Referring to last redirect xattr */ bool absolute_redirect; @@ -201,17 +202,13 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, return real; } -static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path) -{ - return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE); -} - static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d, const char *name, struct dentry *base, int len, bool drop_negative) { - struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->mnt), name, base, len); + struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt), + &QSTR_LEN(name, len), base); if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { if (drop_negative && ret->d_lockref.count == 1) { @@ -232,10 +229,27 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, size_t prelen, const char *post, struct dentry **ret, bool drop_negative) { - struct dentry *this; + struct ovl_fs *ofs = OVL_FS(d->sb); + struct dentry *this = NULL; + const char *warn; struct path path; int err; bool last_element = !post[0]; + bool is_upper = d->layer->idx == 0; + char val; + + /* + * We allow filesystems that are case-folding capable as long as the + * layers are consistently enabled in the stack, enabled for every dir + * or disabled in all dirs. If someone has modified case folding on a + * directory on underlying layer, the warranty of the ovl stack is + * voided. + */ + if (ofs->casefold != ovl_dentry_casefolded(base)) { + warn = "parent wrong casefold"; + err = -ESTALE; + goto out_warn; + } this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative); if (IS_ERR(this)) { @@ -246,12 +260,22 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, goto out_err; } + if (ofs->casefold != ovl_dentry_casefolded(this)) { + warn = "child wrong casefold"; + err = -EREMOTE; + goto out_warn; + } + if (ovl_dentry_weird(this)) { /* Don't support traversing automounts and other weirdness */ + warn = "unsupported object type"; err = -EREMOTE; - goto out_err; + goto out_warn; } - if (ovl_is_whiteout(this)) { + + path.dentry = this; + path.mnt = d->layer->mnt; + if (ovl_path_is_whiteout(ofs, &path)) { d->stop = d->opaque = true; goto put_and_out; } @@ -264,14 +288,12 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, goto put_and_out; } - path.dentry = this; - path.mnt = d->mnt; if (!d_can_lookup(this)) { if (d->is_dir || !last_element) { d->stop = true; goto put_and_out; } - err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path, NULL); + err = ovl_check_metacopy_xattr(ofs, &path, NULL); if (err < 0) goto out_err; @@ -282,8 +304,9 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, } else { if (ovl_lookup_trap_inode(d->sb, this)) { /* Caught in a trap of overlapping layers */ + warn = "overlapping layers"; err = -ELOOP; - goto out_err; + goto out_warn; } if (last_element) @@ -291,7 +314,12 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, if (d->last) goto out; - if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) { + /* overlay.opaque=x means xwhiteouts directory */ + val = ovl_get_opaquedir_val(ofs, &path); + if (last_element && !is_upper && val == 'x') { + d->xwhiteouts = true; + ovl_layer_set_xwhiteouts(ofs, d->layer); + } else if (val == 'y') { d->stop = true; if (last_element) d->opaque = true; @@ -310,6 +338,10 @@ put_and_out: this = NULL; goto out; +out_warn: + pr_warn_ratelimited("failed lookup in %s (%pd2, name='%.*s', err=%i): %s\n", + is_upper ? "upper" : "lower", base, + namelen, name, err, warn); out_err: dput(this); return err; @@ -438,7 +470,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, else if (IS_ERR(origin)) return PTR_ERR(origin); - if (upperdentry && !ovl_is_whiteout(upperdentry) && + if (upperdentry && !ovl_upper_is_whiteout(ofs, upperdentry) && inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode)) goto invalid; @@ -507,6 +539,19 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry, return err; } +int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, const struct ovl_fh *fh, + bool is_upper, bool set) +{ + int err; + + err = ovl_verify_fh(ofs, dentry, ox, fh); + if (set && err == -ENODATA) + err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len); + + return err; +} + /* * Verify that @real dentry matches the file handle stored in xattr @name. * @@ -515,24 +560,22 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry, * * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error. */ -int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, - enum ovl_xattr ox, struct dentry *real, bool is_upper, - bool set) +int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, struct dentry *real, + bool is_upper, bool set) { struct inode *inode; struct ovl_fh *fh; int err; - fh = ovl_encode_real_fh(ofs, real, is_upper); + fh = ovl_encode_real_fh(ofs, d_inode(real), is_upper); err = PTR_ERR(fh); if (IS_ERR(fh)) { fh = NULL; goto fail; } - err = ovl_verify_fh(ofs, dentry, ox, fh); - if (set && err == -ENODATA) - err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len); + err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set); if (err) goto fail; @@ -548,6 +591,7 @@ fail: goto out; } + /* Get upper dentry from index */ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index, bool connected) @@ -684,7 +728,7 @@ orphan: goto out; } -static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name) +int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name) { char *n, *s; @@ -720,7 +764,7 @@ int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, struct ovl_fh *fh; int err; - fh = ovl_encode_real_fh(ofs, origin, false); + fh = ovl_encode_real_fh(ofs, d_inode(origin), false); if (IS_ERR(fh)) return PTR_ERR(fh); @@ -741,7 +785,7 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh) if (err) return ERR_PTR(err); - index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len); + index = lookup_noperm_positive_unlocked(&name, ofs->workdir); kfree(name.name); if (IS_ERR(index)) { if (PTR_ERR(index) == -ENOENT) @@ -773,8 +817,8 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, if (err) return ERR_PTR(err); - index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), name.name, - ofs->indexdir, name.len); + index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), &name, + ofs->workdir); if (IS_ERR(index)) { err = PTR_ERR(index); if (err == -ENOENT) { @@ -850,7 +894,8 @@ fail: * Returns next layer in stack starting from top. * Returns -1 if this is the last layer. */ -int ovl_path_next(int idx, struct dentry *dentry, struct path *path) +int ovl_path_next(int idx, struct dentry *dentry, struct path *path, + const struct ovl_layer **layer) { struct ovl_entry *oe = OVL_E(dentry); struct ovl_path *lowerstack = ovl_lowerstack(oe); @@ -858,13 +903,16 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path) BUG_ON(idx < 0); if (idx == 0) { ovl_path_upper(dentry, path); - if (path->dentry) + if (path->dentry) { + *layer = &OVL_FS(dentry->d_sb)->layers[0]; return ovl_numlower(oe) ? 1 : -1; + } idx++; } BUG_ON(idx > ovl_numlower(oe)); path->dentry = lowerstack[idx - 1].dentry; - path->mnt = lowerstack[idx - 1].layer->mnt; + *layer = lowerstack[idx - 1].layer; + path->mnt = (*layer)->mnt; return (idx < ovl_numlower(oe)) ? idx + 1 : -1; } @@ -873,20 +921,27 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path) static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry, struct dentry *lower, struct dentry *upper) { + const struct ovl_fh *fh; int err; if (ovl_check_origin_xattr(ofs, upper)) return 0; + fh = ovl_get_origin_fh(ofs, lower); + if (IS_ERR(fh)) + return PTR_ERR(fh); + err = ovl_want_write(dentry); if (err) - return err; + goto out; - err = ovl_set_origin(ofs, lower, upper); + err = ovl_set_origin_fh(ofs, fh, upper); if (!err) err = ovl_set_impure(dentry->d_parent, upper->d_parent); ovl_drop_write(dentry); +out: + kfree(fh); return err; } @@ -924,15 +979,10 @@ static int ovl_maybe_validate_verity(struct dentry *dentry) return err; if (!ovl_test_flag(OVL_VERIFIED_DIGEST, inode)) { - const struct cred *old_cred; - - old_cred = ovl_override_creds(dentry->d_sb); - - err = ovl_validate_verity(ofs, &metapath, &datapath); + with_ovl_creds(dentry->d_sb) + err = ovl_validate_verity(ofs, &metapath, &datapath); if (err == 0) ovl_set_flag(OVL_VERIFIED_DIGEST, inode); - - revert_creds(old_cred); } ovl_inode_unlock(inode); @@ -946,7 +996,6 @@ static int ovl_maybe_lookup_lowerdata(struct dentry *dentry) struct inode *inode = d_inode(dentry); const char *redirect = ovl_lowerdata_redirect(inode); struct ovl_path datapath = {}; - const struct cred *old_cred; int err; if (!redirect || ovl_dentry_lowerdata(dentry)) @@ -964,9 +1013,8 @@ static int ovl_maybe_lookup_lowerdata(struct dentry *dentry) if (ovl_dentry_lowerdata(dentry)) goto out; - old_cred = ovl_override_creds(dentry->d_sb); - err = ovl_lookup_data_layers(dentry, redirect, &datapath); - revert_creds(old_cred); + with_ovl_creds(dentry->d_sb) + err = ovl_lookup_data_layers(dentry, redirect, &datapath); if (err) goto out_err; @@ -997,55 +1045,69 @@ int ovl_verify_lowerdata(struct dentry *dentry) return ovl_maybe_validate_verity(dentry); } -struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) +/* + * Following redirects/metacopy can have security consequences: it's like a + * symlink into the lower layer without the permission checks. + * + * This is only a problem if the upper layer is untrusted (e.g comes from an USB + * drive). This can allow a non-readable file or directory to become readable. + * + * Only following redirects when redirects are enabled disables this attack + * vector when not necessary. + */ +static bool ovl_check_follow_redirect(struct ovl_lookup_data *d) { - struct ovl_entry *oe = NULL; - const struct cred *old_cred; + struct ovl_fs *ofs = OVL_FS(d->sb); + + if (d->metacopy && !ofs->config.metacopy) { + pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", d->dentry); + return false; + } + if ((d->redirect || d->upperredirect) && !ovl_redirect_follow(ofs)) { + pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n", d->dentry); + return false; + } + return true; +} + +struct ovl_lookup_ctx { + struct dentry *dentry; + struct ovl_entry *oe; + struct ovl_path *stack; + struct ovl_path *origin_path; + struct dentry *upperdentry; + struct dentry *index; + struct inode *inode; + unsigned int ctr; +}; + +static int ovl_lookup_layers(struct ovl_lookup_ctx *ctx, struct ovl_lookup_data *d) +{ + struct dentry *dentry = ctx->dentry; struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct ovl_entry *poe = OVL_E(dentry->d_parent); struct ovl_entry *roe = OVL_E(dentry->d_sb->s_root); - struct ovl_path *stack = NULL, *origin_path = NULL; - struct dentry *upperdir, *upperdentry = NULL; + bool check_redirect = (ovl_redirect_follow(ofs) || ofs->numdatalayer); + struct dentry *upperdir; + struct dentry *this; struct dentry *origin = NULL; - struct dentry *index = NULL; - unsigned int ctr = 0; - struct inode *inode = NULL; bool upperopaque = false; - char *upperredirect = NULL; - struct dentry *this; - unsigned int i; - int err; bool uppermetacopy = false; int metacopy_size = 0; - struct ovl_lookup_data d = { - .sb = dentry->d_sb, - .name = dentry->d_name, - .is_dir = false, - .opaque = false, - .stop = false, - .last = ovl_redirect_follow(ofs) ? false : !ovl_numlower(poe), - .redirect = NULL, - .metacopy = 0, - }; - - if (dentry->d_name.len > ofs->namelen) - return ERR_PTR(-ENAMETOOLONG); + unsigned int i; + int err; - old_cred = ovl_override_creds(dentry->d_sb); upperdir = ovl_dentry_upper(dentry->d_parent); if (upperdir) { - d.mnt = ovl_upper_mnt(ofs); - err = ovl_lookup_layer(upperdir, &d, &upperdentry, true); + d->layer = &ofs->layers[0]; + err = ovl_lookup_layer(upperdir, d, &ctx->upperdentry, true); if (err) - goto out; + return err; - if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) { - dput(upperdentry); - err = -EREMOTE; - goto out; - } - if (upperdentry && !d.is_dir) { + if (ctx->upperdentry && ctx->upperdentry->d_flags & DCACHE_OP_REAL) + return -EREMOTE; + + if (ctx->upperdentry && !d->is_dir) { /* * Lookup copy up origin by decoding origin file handle. * We may get a disconnected dentry, which is fine, @@ -1056,65 +1118,63 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, * number - it's the same as if we held a reference * to a dentry in lower layer that was moved under us. */ - err = ovl_check_origin(ofs, upperdentry, &origin_path); + err = ovl_check_origin(ofs, ctx->upperdentry, &ctx->origin_path); if (err) - goto out_put_upper; + return err; - if (d.metacopy) + if (d->metacopy) uppermetacopy = true; - metacopy_size = d.metacopy; + metacopy_size = d->metacopy; } - if (d.redirect) { + if (d->redirect) { err = -ENOMEM; - upperredirect = kstrdup(d.redirect, GFP_KERNEL); - if (!upperredirect) - goto out_put_upper; - if (d.redirect[0] == '/') + d->upperredirect = kstrdup(d->redirect, GFP_KERNEL); + if (!d->upperredirect) + return err; + if (d->redirect[0] == '/') poe = roe; } - upperopaque = d.opaque; + upperopaque = d->opaque; } - if (!d.stop && ovl_numlower(poe)) { + if (!d->stop && ovl_numlower(poe)) { err = -ENOMEM; - stack = ovl_stack_alloc(ofs->numlayer - 1); - if (!stack) - goto out_put_upper; + ctx->stack = ovl_stack_alloc(ofs->numlayer - 1); + if (!ctx->stack) + return err; } - for (i = 0; !d.stop && i < ovl_numlower(poe); i++) { + for (i = 0; !d->stop && i < ovl_numlower(poe); i++) { struct ovl_path lower = ovl_lowerstack(poe)[i]; - if (!ovl_redirect_follow(ofs)) - d.last = i == ovl_numlower(poe) - 1; - else if (d.is_dir || !ofs->numdatalayer) - d.last = lower.layer->idx == ovl_numlower(roe); + if (!ovl_check_follow_redirect(d)) { + err = -EPERM; + return err; + } + + if (!check_redirect) + d->last = i == ovl_numlower(poe) - 1; + else if (d->is_dir || !ofs->numdatalayer) + d->last = lower.layer->idx == ovl_numlower(roe); - d.mnt = lower.layer->mnt; - err = ovl_lookup_layer(lower.dentry, &d, &this, false); + d->layer = lower.layer; + err = ovl_lookup_layer(lower.dentry, d, &this, false); if (err) - goto out_put; + return err; if (!this) continue; - if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) { - dput(this); - err = -EPERM; - pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry); - goto out_put; - } - /* * If no origin fh is stored in upper of a merge dir, store fh * of lower dir and set upper parent "impure". */ - if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) { - err = ovl_fix_origin(ofs, dentry, this, upperdentry); + if (ctx->upperdentry && !ctx->ctr && !ofs->noxattr && d->is_dir) { + err = ovl_fix_origin(ofs, dentry, this, ctx->upperdentry); if (err) { dput(this); - goto out_put; + return err; } } @@ -1127,23 +1187,23 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, * matches the dentry found using path based lookup, * otherwise error out. */ - if (upperdentry && !ctr && - ((d.is_dir && ovl_verify_lower(dentry->d_sb)) || - (!d.is_dir && ofs->config.index && origin_path))) { - err = ovl_verify_origin(ofs, upperdentry, this, false); + if (ctx->upperdentry && !ctx->ctr && + ((d->is_dir && ovl_verify_lower(dentry->d_sb)) || + (!d->is_dir && ofs->config.index && ctx->origin_path))) { + err = ovl_verify_origin(ofs, ctx->upperdentry, this, false); if (err) { dput(this); - if (d.is_dir) + if (d->is_dir) break; - goto out_put; + return err; } origin = this; } - if (!upperdentry && !d.is_dir && !ctr && d.metacopy) - metacopy_size = d.metacopy; + if (!ctx->upperdentry && !d->is_dir && !ctx->ctr && d->metacopy) + metacopy_size = d->metacopy; - if (d.metacopy && ctr) { + if (d->metacopy && ctx->ctr) { /* * Do not store intermediate metacopy dentries in * lower chain, except top most lower metacopy dentry. @@ -1153,42 +1213,31 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, dput(this); this = NULL; } else { - stack[ctr].dentry = this; - stack[ctr].layer = lower.layer; - ctr++; + ctx->stack[ctx->ctr].dentry = this; + ctx->stack[ctx->ctr].layer = lower.layer; + ctx->ctr++; } - /* - * Following redirects can have security consequences: it's like - * a symlink into the lower layer without the permission checks. - * This is only a problem if the upper layer is untrusted (e.g - * comes from an USB drive). This can allow a non-readable file - * or directory to become readable. - * - * Only following redirects when redirects are enabled disables - * this attack vector when not necessary. - */ - err = -EPERM; - if (d.redirect && !ovl_redirect_follow(ofs)) { - pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n", - dentry); - goto out_put; - } - - if (d.stop) + if (d->stop) break; - if (d.redirect && d.redirect[0] == '/' && poe != roe) { + if (d->redirect && d->redirect[0] == '/' && poe != roe) { poe = roe; /* Find the current layer on the root dentry */ i = lower.layer->idx - 1; } } - /* Defer lookup of lowerdata in data-only layers to first access */ - if (d.metacopy && ctr && ofs->numdatalayer && d.absolute_redirect) { - d.metacopy = 0; - ctr++; + /* + * Defer lookup of lowerdata in data-only layers to first access. + * Don't require redirect=follow and metacopy=on in this case. + */ + if (d->metacopy && ctx->ctr && ofs->numdatalayer && d->absolute_redirect) { + d->metacopy = 0; + ctx->ctr++; + } else if (!ovl_check_follow_redirect(d)) { + err = -EPERM; + return err; } /* @@ -1199,20 +1248,20 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, * For metacopy dentry, path based lookup will find lower dentries. * Just make sure a corresponding data dentry has been found. */ - if (d.metacopy || (uppermetacopy && !ctr)) { + if (d->metacopy || (uppermetacopy && !ctx->ctr)) { pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n", dentry); err = -EIO; - goto out_put; - } else if (!d.is_dir && upperdentry && !ctr && origin_path) { - if (WARN_ON(stack != NULL)) { + return err; + } else if (!d->is_dir && ctx->upperdentry && !ctx->ctr && ctx->origin_path) { + if (WARN_ON(ctx->stack != NULL)) { err = -EIO; - goto out_put; + return err; } - stack = origin_path; - ctr = 1; - origin = origin_path->dentry; - origin_path = NULL; + ctx->stack = ctx->origin_path; + ctx->ctr = 1; + origin = ctx->origin_path->dentry; + ctx->origin_path = NULL; } /* @@ -1234,116 +1283,141 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, * is enabled and if upper had an ORIGIN xattr. * */ - if (!upperdentry && ctr) - origin = stack[0].dentry; + if (!ctx->upperdentry && ctx->ctr) + origin = ctx->stack[0].dentry; if (origin && ovl_indexdir(dentry->d_sb) && - (!d.is_dir || ovl_index_all(dentry->d_sb))) { - index = ovl_lookup_index(ofs, upperdentry, origin, true); - if (IS_ERR(index)) { - err = PTR_ERR(index); - index = NULL; - goto out_put; + (!d->is_dir || ovl_index_all(dentry->d_sb))) { + ctx->index = ovl_lookup_index(ofs, ctx->upperdentry, origin, true); + if (IS_ERR(ctx->index)) { + err = PTR_ERR(ctx->index); + ctx->index = NULL; + return err; } } - if (ctr) { - oe = ovl_alloc_entry(ctr); + if (ctx->ctr) { + ctx->oe = ovl_alloc_entry(ctx->ctr); err = -ENOMEM; - if (!oe) - goto out_put; + if (!ctx->oe) + return err; - ovl_stack_cpy(ovl_lowerstack(oe), stack, ctr); + ovl_stack_cpy(ovl_lowerstack(ctx->oe), ctx->stack, ctx->ctr); } if (upperopaque) ovl_dentry_set_opaque(dentry); + if (d->xwhiteouts) + ovl_dentry_set_xwhiteouts(dentry); - if (upperdentry) + if (ctx->upperdentry) ovl_dentry_set_upper_alias(dentry); - else if (index) { + else if (ctx->index) { + char *upperredirect; struct path upperpath = { - .dentry = upperdentry = dget(index), + .dentry = ctx->upperdentry = dget(ctx->index), .mnt = ovl_upper_mnt(ofs), }; /* * It's safe to assign upperredirect here: the previous - * assignment of happens only if upperdentry is non-NULL, and + * assignment happens only if upperdentry is non-NULL, and * this one only if upperdentry is NULL. */ upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0); - if (IS_ERR(upperredirect)) { - err = PTR_ERR(upperredirect); - upperredirect = NULL; - goto out_free_oe; - } + if (IS_ERR(upperredirect)) + return PTR_ERR(upperredirect); + d->upperredirect = upperredirect; + err = ovl_check_metacopy_xattr(ofs, &upperpath, NULL); if (err < 0) - goto out_free_oe; - uppermetacopy = err; + return err; + d->metacopy = uppermetacopy = err; metacopy_size = err; + + if (!ovl_check_follow_redirect(d)) { + err = -EPERM; + return err; + } } - if (upperdentry || ctr) { + if (ctx->upperdentry || ctx->ctr) { + struct inode *inode; struct ovl_inode_params oip = { - .upperdentry = upperdentry, - .oe = oe, - .index = index, - .redirect = upperredirect, + .upperdentry = ctx->upperdentry, + .oe = ctx->oe, + .index = ctx->index, + .redirect = d->upperredirect, }; /* Store lowerdata redirect for lazy lookup */ - if (ctr > 1 && !d.is_dir && !stack[ctr - 1].dentry) { - oip.lowerdata_redirect = d.redirect; - d.redirect = NULL; + if (ctx->ctr > 1 && !d->is_dir && !ctx->stack[ctx->ctr - 1].dentry) { + oip.lowerdata_redirect = d->redirect; + d->redirect = NULL; } + inode = ovl_get_inode(dentry->d_sb, &oip); - err = PTR_ERR(inode); if (IS_ERR(inode)) - goto out_free_oe; - if (upperdentry && !uppermetacopy) - ovl_set_flag(OVL_UPPERDATA, inode); + return PTR_ERR(inode); + + ctx->inode = inode; + if (ctx->upperdentry && !uppermetacopy) + ovl_set_flag(OVL_UPPERDATA, ctx->inode); if (metacopy_size > OVL_METACOPY_MIN_SIZE) - ovl_set_flag(OVL_HAS_DIGEST, inode); + ovl_set_flag(OVL_HAS_DIGEST, ctx->inode); } - ovl_dentry_init_reval(dentry, upperdentry, OVL_I_E(inode)); + ovl_dentry_init_reval(dentry, ctx->upperdentry, OVL_I_E(ctx->inode)); + + return 0; +} + +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + struct ovl_entry *poe = OVL_E(dentry->d_parent); + bool check_redirect = (ovl_redirect_follow(ofs) || ofs->numdatalayer); + int err; + struct ovl_lookup_ctx ctx = { + .dentry = dentry, + }; + struct ovl_lookup_data d = { + .sb = dentry->d_sb, + .dentry = dentry, + .name = dentry->d_name, + .last = check_redirect ? false : !ovl_numlower(poe), + }; + + if (dentry->d_name.len > ofs->namelen) + return ERR_PTR(-ENAMETOOLONG); - revert_creds(old_cred); - if (origin_path) { - dput(origin_path->dentry); - kfree(origin_path); + with_ovl_creds(dentry->d_sb) + err = ovl_lookup_layers(&ctx, &d); + + if (ctx.origin_path) { + dput(ctx.origin_path->dentry); + kfree(ctx.origin_path); } - dput(index); - ovl_stack_free(stack, ctr); + dput(ctx.index); + ovl_stack_free(ctx.stack, ctx.ctr); kfree(d.redirect); - return d_splice_alias(inode, dentry); -out_free_oe: - ovl_free_entry(oe); -out_put: - dput(index); - ovl_stack_free(stack, ctr); -out_put_upper: - if (origin_path) { - dput(origin_path->dentry); - kfree(origin_path); + if (err) { + ovl_free_entry(ctx.oe); + dput(ctx.upperdentry); + kfree(d.upperredirect); + return ERR_PTR(err); } - dput(upperdentry); - kfree(upperredirect); -out: - kfree(d.redirect); - revert_creds(old_cred); - return ERR_PTR(err); + + return d_splice_alias(ctx.inode, dentry); } bool ovl_lower_positive(struct dentry *dentry) { struct ovl_entry *poe = OVL_E(dentry->d_parent); const struct qstr *name = &dentry->d_name; - const struct cred *old_cred; unsigned int i; bool positive = false; bool done = false; @@ -1359,36 +1433,45 @@ bool ovl_lower_positive(struct dentry *dentry) if (!ovl_dentry_upper(dentry)) return true; - old_cred = ovl_override_creds(dentry->d_sb); - /* Positive upper -> have to look up lower to see whether it exists */ - for (i = 0; !done && !positive && i < ovl_numlower(poe); i++) { - struct dentry *this; - struct ovl_path *parentpath = &ovl_lowerstack(poe)[i]; - - this = lookup_one_positive_unlocked( - mnt_idmap(parentpath->layer->mnt), - name->name, parentpath->dentry, name->len); - if (IS_ERR(this)) { - switch (PTR_ERR(this)) { - case -ENOENT: - case -ENAMETOOLONG: - break; - - default: - /* - * Assume something is there, we just couldn't - * access it. - */ - positive = true; - break; + with_ovl_creds(dentry->d_sb) { + /* Positive upper -> have to look up lower to see whether it exists */ + for (i = 0; !done && !positive && i < ovl_numlower(poe); i++) { + struct dentry *this; + struct ovl_path *parentpath = &ovl_lowerstack(poe)[i]; + + /* + * We need to make a non-const copy of dentry->d_name, + * because lookup_one_positive_unlocked() will hash name + * with parentpath base, which is on another (lower fs). + */ + this = lookup_one_positive_unlocked(mnt_idmap(parentpath->layer->mnt), + &QSTR_LEN(name->name, name->len), + parentpath->dentry); + if (IS_ERR(this)) { + switch (PTR_ERR(this)) { + case -ENOENT: + case -ENAMETOOLONG: + break; + + default: + /* + * Assume something is there, we just couldn't + * access it. + */ + positive = true; + break; + } + } else { + struct path path = { + .dentry = this, + .mnt = parentpath->layer->mnt, + }; + positive = !ovl_path_is_whiteout(OVL_FS(dentry->d_sb), &path); + done = true; + dput(this); } - } else { - positive = !ovl_is_whiteout(this); - done = true; - dput(this); } } - revert_creds(old_cred); return positive; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 9817b2dcb132..f9ac9bdde830 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -28,7 +28,16 @@ enum ovl_path_type { #define OVL_XATTR_NAMESPACE "overlay." #define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE +#define OVL_XATTR_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) #define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE +#define OVL_XATTR_USER_PREFIX_LEN (sizeof(OVL_XATTR_USER_PREFIX) - 1) + +#define OVL_XATTR_ESCAPE_PREFIX OVL_XATTR_NAMESPACE +#define OVL_XATTR_ESCAPE_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_PREFIX) - 1) +#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ESCAPE_PREFIX +#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_TRUSTED_PREFIX) - 1) +#define OVL_XATTR_ESCAPE_USER_PREFIX OVL_XATTR_USER_PREFIX OVL_XATTR_ESCAPE_PREFIX +#define OVL_XATTR_ESCAPE_USER_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_USER_PREFIX) - 1) enum ovl_xattr { OVL_XATTR_OPAQUE, @@ -40,6 +49,7 @@ enum ovl_xattr { OVL_XATTR_UUID, OVL_XATTR_METACOPY, OVL_XATTR_PROTATTR, + OVL_XATTR_XWHITEOUT, }; enum ovl_inode_flag { @@ -59,6 +69,8 @@ enum ovl_entry_flag { OVL_E_UPPER_ALIAS, OVL_E_OPAQUE, OVL_E_CONNECTED, + /* Lower stack may contain xwhiteout entries */ + OVL_E_XWHITEOUTS, }; enum { @@ -163,6 +175,9 @@ static inline int ovl_metadata_digest_size(const struct ovl_metacopy *metacopy) return (int)metacopy->len - OVL_METACOPY_MIN_SIZE; } +/* No atime modification on underlying */ +#define OVL_OPEN_FLAGS (O_NOATIME) + extern const char *const ovl_xattr_table[][2]; static inline const char *ovl_xattr(struct ovl_fs *ofs, enum ovl_xattr ox) { @@ -191,7 +206,7 @@ static inline int ovl_do_notify_change(struct ovl_fs *ofs, static inline int ovl_do_rmdir(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { - int err = vfs_rmdir(ovl_upper_mnt_idmap(ofs), dir, dentry); + int err = vfs_rmdir(ovl_upper_mnt_idmap(ofs), dir, dentry, NULL); pr_debug("rmdir(%pd2) = %i\n", dentry, err); return err; @@ -220,26 +235,29 @@ static inline int ovl_do_create(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode) { - int err = vfs_create(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, true); + int err = vfs_create(ovl_upper_mnt_idmap(ofs), dentry, mode, NULL); pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); return err; } -static inline int ovl_do_mkdir(struct ovl_fs *ofs, - struct inode *dir, struct dentry *dentry, - umode_t mode) +static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs, + struct inode *dir, + struct dentry *dentry, + umode_t mode) { - int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); - return err; + struct dentry *ret; + + ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, NULL); + pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret)); + return ret; } static inline int ovl_do_mknod(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev); + int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev, NULL); pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err); return err; @@ -249,7 +267,7 @@ static inline int ovl_do_symlink(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, const char *oldname) { - int err = vfs_symlink(ovl_upper_mnt_idmap(ofs), dir, dentry, oldname); + int err = vfs_symlink(ovl_upper_mnt_idmap(ofs), dir, dentry, oldname, NULL); pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); return err; @@ -337,30 +355,36 @@ static inline int ovl_do_remove_acl(struct ovl_fs *ofs, struct dentry *dentry, return vfs_remove_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name); } -static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir, - struct dentry *olddentry, struct inode *newdir, - struct dentry *newdentry, unsigned int flags) +static inline int ovl_do_rename_rd(struct renamedata *rd) { int err; - struct renamedata rd = { - .old_mnt_idmap = ovl_upper_mnt_idmap(ofs), - .old_dir = olddir, - .old_dentry = olddentry, - .new_mnt_idmap = ovl_upper_mnt_idmap(ofs), - .new_dir = newdir, - .new_dentry = newdentry, - .flags = flags, - }; - pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags); - err = vfs_rename(&rd); + pr_debug("rename(%pd2, %pd2, 0x%x)\n", rd->old_dentry, rd->new_dentry, + rd->flags); + err = vfs_rename(rd); if (err) { pr_debug("...rename(%pd2, %pd2, ...) = %i\n", - olddentry, newdentry, err); + rd->old_dentry, rd->new_dentry, err); } return err; } +static inline int ovl_do_rename(struct ovl_fs *ofs, struct dentry *olddir, + struct dentry *olddentry, struct dentry *newdir, + struct dentry *newdentry, unsigned int flags) +{ + struct renamedata rd = { + .mnt_idmap = ovl_upper_mnt_idmap(ofs), + .old_parent = olddir, + .old_dentry = olddentry, + .new_parent = newdir, + .new_dentry = newdentry, + .flags = flags, + }; + + return ovl_do_rename_rd(&rd); +} + static inline int ovl_do_whiteout(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { @@ -386,7 +410,32 @@ static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs, const char *name, struct dentry *base, int len) { - return lookup_one(ovl_upper_mnt_idmap(ofs), name, base, len); + return lookup_one(ovl_upper_mnt_idmap(ofs), &QSTR_LEN(name, len), base); +} + +static inline struct dentry *ovl_lookup_upper_unlocked(struct ovl_fs *ofs, + const char *name, + struct dentry *base, + int len) +{ + return lookup_one_unlocked(ovl_upper_mnt_idmap(ofs), + &QSTR_LEN(name, len), base); +} + +static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs, + struct dentry *parent, + struct qstr *name) +{ + return start_creating(ovl_upper_mnt_idmap(ofs), + parent, name); +} + +static inline struct dentry *ovl_start_removing_upper(struct ovl_fs *ofs, + struct dentry *parent, + struct qstr *name) +{ + return start_removing(ovl_upper_mnt_idmap(ofs), + parent, name); } static inline bool ovl_open_flags_need_copy_up(int flags) @@ -398,10 +447,25 @@ static inline bool ovl_open_flags_need_copy_up(int flags) } /* util.c */ +int ovl_get_write_access(struct dentry *dentry); +void ovl_put_write_access(struct dentry *dentry); +void ovl_start_write(struct dentry *dentry); +void ovl_end_write(struct dentry *dentry); int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); + +EXTEND_CLASS(override_creds, _ovl, ovl_override_creds(sb), struct super_block *sb) + +#define with_ovl_creds(sb) \ + scoped_class(override_creds_ovl, __UNIQUE_ID(label), sb) + +static inline const struct cred *ovl_creds(struct super_block *sb) +{ + return OVL_FS(sb)->creator_cred; +} + int ovl_can_decode_fh(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb); bool ovl_index_all(struct super_block *sb); @@ -419,6 +483,12 @@ void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry, void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry, struct ovl_entry *oe, unsigned int mask); bool ovl_dentry_weird(struct dentry *dentry); + +static inline bool ovl_dentry_casefolded(struct dentry *dentry) +{ + return sb_has_encoding(dentry->d_sb) && IS_CASEFOLDED(d_inode(dentry)); +} + enum ovl_path_type ovl_path_type(struct dentry *dentry); void ovl_path_upper(struct dentry *dentry, struct path *path); void ovl_path_lower(struct dentry *dentry, struct path *path); @@ -448,6 +518,10 @@ bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry); bool ovl_dentry_is_opaque(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); +bool ovl_dentry_has_xwhiteouts(struct dentry *dentry); +void ovl_dentry_set_xwhiteouts(struct dentry *dentry); +void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, + const struct ovl_layer *layer); bool ovl_dentry_has_upper_alias(struct dentry *dentry); void ovl_dentry_set_upper_alias(struct dentry *dentry); bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags); @@ -460,16 +534,28 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry); void ovl_dir_modified(struct dentry *dentry, bool impurity); u64 ovl_inode_version_get(struct inode *inode); bool ovl_is_whiteout(struct dentry *dentry); +bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path); struct file *ovl_path_open(const struct path *path, int flags); int ovl_copy_up_start(struct dentry *dentry, int flags); void ovl_copy_up_end(struct dentry *dentry); bool ovl_already_copied_up(struct dentry *dentry, int flags); -bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, - enum ovl_xattr ox); +char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, + enum ovl_xattr ox); bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path); +bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path); bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs, const struct path *upperpath); +static inline bool ovl_upper_is_whiteout(struct ovl_fs *ofs, + struct dentry *upperdentry) +{ + struct path upperpath = { + .dentry = upperdentry, + .mnt = ovl_upper_mnt(ofs), + }; + return ovl_path_is_whiteout(ofs, &upperpath); +} + static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs, struct dentry *upperdentry) { @@ -490,20 +576,19 @@ bool ovl_is_inuse(struct dentry *dentry); bool ovl_need_index(struct dentry *dentry); int ovl_nlink_start(struct dentry *dentry); void ovl_nlink_end(struct dentry *dentry); -int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); +int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *work, + struct dentry *upperdir, struct dentry *upper); int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path, struct ovl_metacopy *data); int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d, struct ovl_metacopy *metacopy); bool ovl_is_metacopy_dentry(struct dentry *dentry); char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding); -int ovl_ensure_verity_loaded(struct path *path); -int ovl_get_verity_xattr(struct ovl_fs *ofs, const struct path *path, - u8 *digest_buf, int *buf_length); +int ovl_ensure_verity_loaded(const struct path *path); int ovl_validate_verity(struct ovl_fs *ofs, - struct path *metapath, - struct path *datapath); -int ovl_get_verity_digest(struct ovl_fs *ofs, struct path *src, + const struct path *metapath, + const struct path *datapath); +int ovl_get_verity_digest(struct ovl_fs *ofs, const struct path *src, struct ovl_metacopy *metacopy); int ovl_sync_status(struct ovl_fs *ofs); @@ -531,7 +616,13 @@ static inline bool ovl_is_impuredir(struct super_block *sb, .mnt = ovl_upper_mnt(ofs), }; - return ovl_path_check_dir_xattr(ofs, &upperpath, OVL_XATTR_IMPURE); + return ovl_get_dir_xattr_val(ofs, &upperpath, OVL_XATTR_IMPURE) == 'y'; +} + +static inline char ovl_get_opaquedir_val(struct ovl_fs *ofs, + const struct path *path) +{ + return ovl_get_dir_xattr_val(ofs, path, OVL_XATTR_OPAQUE); } static inline bool ovl_redirect_follow(struct ovl_fs *ofs) @@ -624,33 +715,45 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, struct dentry *upperdentry, struct ovl_path **stackp); int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, - enum ovl_xattr ox, struct dentry *real, bool is_upper, - bool set); + enum ovl_xattr ox, const struct ovl_fh *fh, + bool is_upper, bool set); +int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, struct dentry *real, + bool is_upper, bool set); struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index, bool connected); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); +int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name); int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, struct qstr *name); struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, struct dentry *origin, bool verify); -int ovl_path_next(int idx, struct dentry *dentry, struct path *path); +int ovl_path_next(int idx, struct dentry *dentry, struct path *path, + const struct ovl_layer **layer); int ovl_verify_lowerdata(struct dentry *dentry); struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); bool ovl_lower_positive(struct dentry *dentry); +static inline int ovl_verify_origin_fh(struct ovl_fs *ofs, struct dentry *upper, + const struct ovl_fh *fh, bool set) +{ + return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, fh, false, set); +} + static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper, struct dentry *origin, bool set) { - return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, origin, - false, set); + return ovl_verify_origin_xattr(ofs, upper, OVL_XATTR_ORIGIN, origin, + false, set); } static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index, struct dentry *upper, bool set) { - return ovl_verify_set_fh(ofs, index, OVL_XATTR_UPPER, upper, true, set); + return ovl_verify_origin_xattr(ofs, index, OVL_XATTR_UPPER, upper, + true, set); } /* readdir.c */ @@ -662,7 +765,7 @@ void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, void ovl_cache_free(struct list_head *list); void ovl_dir_cache_free(struct inode *inode); int ovl_check_d_type_supported(const struct path *realpath); -int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, +int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent, struct vfsmount *mnt, struct dentry *dentry, int level); int ovl_indexdir_cleanup(struct ovl_fs *ofs); @@ -684,17 +787,8 @@ int ovl_set_nlink_lower(struct dentry *dentry); unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry, struct dentry *upperdentry, unsigned int fallback); -int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, - struct iattr *attr); -int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, - struct kstat *stat, u32 request_mask, unsigned int flags); int ovl_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); -int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, - const void *value, size_t size, int flags); -int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, - void *value, size_t size); -ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap, @@ -748,10 +842,12 @@ struct inode *ovl_get_inode(struct super_block *sb, struct ovl_inode_params *oip); void ovl_copyattr(struct inode *to); +/* vfs fileattr flags read from overlay.protattr xattr to ovl inode */ +#define OVL_PROT_I_FLAGS_MASK (S_APPEND | S_IMMUTABLE) +/* vfs fileattr flags copied from real to ovl inode */ +#define OVL_FATTR_I_FLAGS_MASK (OVL_PROT_I_FLAGS_MASK | S_SYNC | S_NOATIME) /* vfs inode flags copied from real to ovl inode */ -#define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE) -/* vfs inode flags read from overlay.protattr xattr to ovl inode */ -#define OVL_PROT_I_FLAGS_MASK (S_APPEND | S_IMMUTABLE) +#define OVL_COPY_I_FLAGS_MASK (OVL_FATTR_I_FLAGS_MASK | S_CASEFOLD) /* * fileattr flags copied from lower to upper inode on copy up. @@ -765,7 +861,7 @@ void ovl_copyattr(struct inode *to); void ovl_check_protattr(struct inode *inode, struct dentry *upper); int ovl_set_protattr(struct inode *inode, struct dentry *upper, - struct fileattr *fa); + struct file_kattr *fa); static inline void ovl_copyflags(struct inode *from, struct inode *to) { @@ -776,7 +872,7 @@ static inline void ovl_copyflags(struct inode *from, struct inode *to) /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; -int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, +int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir, struct dentry *dentry); struct ovl_cattr { dev_t rdev; @@ -787,25 +883,25 @@ struct ovl_cattr { #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, - struct dentry **newdentry, umode_t mode); struct dentry *ovl_create_real(struct ovl_fs *ofs, - struct inode *dir, struct dentry *newdentry, + struct dentry *parent, struct dentry *newdentry, struct ovl_cattr *attr); -int ovl_cleanup(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry); -struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir); +int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir, struct dentry *dentry); +#define OVL_TEMPNAME_SIZE 20 +void ovl_tempname(char name[OVL_TEMPNAME_SIZE]); struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, struct ovl_cattr *attr); /* file.c */ extern const struct file_operations ovl_file_operations; -int __init ovl_aio_request_cache_init(void); -void ovl_aio_request_cache_destroy(void); -int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa); -int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa); -int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int ovl_real_fileattr_get(const struct path *realpath, struct file_kattr *fa); +int ovl_real_fileattr_set(const struct path *realpath, struct file_kattr *fa); +int ovl_fileattr_get(struct dentry *dentry, struct file_kattr *fa); int ovl_fileattr_set(struct mnt_idmap *idmap, - struct dentry *dentry, struct fileattr *fa); + struct dentry *dentry, struct file_kattr *fa); +struct ovl_file; +struct ovl_file *ovl_file_alloc(struct file *realfile); +void ovl_file_free(struct ovl_file *of); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); @@ -813,10 +909,11 @@ int ovl_copy_up_with_data(struct dentry *dentry); int ovl_maybe_copy_up(struct dentry *dentry, int flags); int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new); int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat); -struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, +struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode, bool is_upper); -int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, - struct dentry *upper); +struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin); +int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh, + struct dentry *upper); /* export.c */ extern const struct export_operations ovl_export_operations; @@ -830,3 +927,12 @@ static inline bool ovl_force_readonly(struct ovl_fs *ofs) { return (!ovl_upper_mnt(ofs) || !ofs->workdir); } + +/* xattr.c */ + +const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs); +int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr); +int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int flags); +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index e9539f98e86a..1d4828dbcf7a 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -8,6 +8,7 @@ struct ovl_config { char *upperdir; char *workdir; + char **lowerdirs; bool default_permissions; int redirect_mode; int verity_mode; @@ -39,17 +40,10 @@ struct ovl_layer { int idx; /* One fsid per unique underlying sb (upper fsid == 0) */ int fsid; - char *name; + /* xwhiteouts were found on this layer */ + bool has_xwhiteouts; }; -/* - * ovl_free_fs() relies on @mnt being the first member when unmounting - * the private mounts created for each layer. Let's check both the - * offset and type. - */ -static_assert(offsetof(struct ovl_layer, mnt) == 0); -static_assert(__same_type(typeof_member(struct ovl_layer, mnt), struct vfsmount *)); - struct ovl_path { const struct ovl_layer *layer; struct dentry *dentry; @@ -57,7 +51,7 @@ struct ovl_path { struct ovl_entry { unsigned int __numlower; - struct ovl_path __lowerstack[]; + struct ovl_path __lowerstack[] __counted_by(__numlower); }; /* private information held for overlayfs's superblock */ @@ -67,14 +61,12 @@ struct ovl_fs { unsigned int numfs; /* Number of data-only lower layers */ unsigned int numdatalayer; - const struct ovl_layer *layers; + struct ovl_layer *layers; struct ovl_sb *fs; /* workbasedir is the path at workdir= mount option */ struct dentry *workbasedir; - /* workdir is the 'work' directory under workbasedir */ + /* workdir is the 'work' or 'index' directory under workbasedir */ struct dentry *workdir; - /* index directory listing overlay inodes by origin file handle */ - struct dentry *indexdir; long namelen; /* pathnames of lower and upper dirs, for show_options */ struct ovl_config config; @@ -89,7 +81,6 @@ struct ovl_fs { /* Traps in ovl inode cache */ struct inode *workbasedir_trap; struct inode *workdir_trap; - struct inode *indexdir_trap; /* -1: disabled, 0: same fs, 1..32: number of unused ino bits */ int xino_mode; /* For allocation of non-persistent inode numbers */ @@ -97,8 +88,10 @@ struct ovl_fs { /* Shared whiteout cache */ struct dentry *whiteout; bool no_shared_whiteout; + struct mutex whiteout_lock; /* r/o snapshot of upperdir sb's only taken on volatile mounts */ errseq_t errseq; + bool casefold; }; /* Number of lower layers, not including data-only layers */ diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index b9355bb6d75a..63b7346c5ee1 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -43,8 +43,10 @@ module_param_named(metacopy, ovl_metacopy_def, bool, 0644); MODULE_PARM_DESC(metacopy, "Default to on or off for the metadata only copy up feature"); -enum { +enum ovl_opt { Opt_lowerdir, + Opt_lowerdir_add, + Opt_datadir_add, Opt_upperdir, Opt_workdir, Opt_default_permissions, @@ -57,6 +59,7 @@ enum { Opt_metacopy, Opt_verity, Opt_volatile, + Opt_override_creds, }; static const struct constant_table ovl_parameter_bool[] = { @@ -137,13 +140,12 @@ static int ovl_verity_mode_def(void) return OVL_VERITY_OFF; } -#define fsparam_string_empty(NAME, OPT) \ - __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) - const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_string_empty("lowerdir", Opt_lowerdir), - fsparam_string("upperdir", Opt_upperdir), - fsparam_string("workdir", Opt_workdir), + fsparam_file_or_string("lowerdir+", Opt_lowerdir_add), + fsparam_file_or_string("datadir+", Opt_datadir_add), + fsparam_file_or_string("upperdir", Opt_upperdir), + fsparam_file_or_string("workdir", Opt_workdir), fsparam_flag("default_permissions", Opt_default_permissions), fsparam_enum("redirect_dir", Opt_redirect_dir, ovl_parameter_redirect_dir), fsparam_enum("index", Opt_index, ovl_parameter_bool), @@ -154,9 +156,38 @@ const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool), fsparam_enum("verity", Opt_verity, ovl_parameter_verity), fsparam_flag("volatile", Opt_volatile), + fsparam_flag_no("override_creds", Opt_override_creds), {} }; +static char *ovl_next_opt(char **s) +{ + char *sbegin = *s; + char *p; + + if (sbegin == NULL) + return NULL; + + for (p = sbegin; *p; p++) { + if (*p == '\\') { + p++; + if (!*p) + break; + } else if (*p == ',') { + *p = '\0'; + *s = p + 1; + return sbegin; + } + } + *s = NULL; + return sbegin; +} + +static int ovl_parse_monolithic(struct fs_context *fc, void *data) +{ + return vfs_parse_monolithic_sep(fc, data, ovl_next_opt); +} + static ssize_t ovl_parse_param_split_lowerdirs(char *str) { ssize_t nr_layers = 1, nr_colons = 0; @@ -164,7 +195,8 @@ static ssize_t ovl_parse_param_split_lowerdirs(char *str) for (s = d = str;; s++, d++) { if (*s == '\\') { - s++; + /* keep esc chars in split lowerdir */ + *d++ = *s++; } else if (*s == ':') { bool next_colon = (*(s + 1) == ':'); @@ -209,19 +241,8 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) pr_err("failed to resolve '%s': %i\n", name, err); goto out; } - err = -EINVAL; - if (ovl_dentry_weird(path->dentry)) { - pr_err("filesystem on '%s' not supported\n", name); - goto out_put; - } - if (!d_is_dir(path->dentry)) { - pr_err("'%s' not a directory\n", name); - goto out_put; - } return 0; -out_put: - path_put_init(path); out: return err; } @@ -247,68 +268,223 @@ static int ovl_mount_dir(const char *name, struct path *path) if (tmp) { ovl_unescape(tmp); err = ovl_mount_dir_noesc(tmp, path); - - if (!err && path->dentry->d_flags & DCACHE_OP_REAL) { - pr_err("filesystem on '%s' not supported as upperdir\n", - tmp); - path_put_init(path); - err = -EINVAL; - } kfree(tmp); } return err; } -static int ovl_parse_param_upperdir(const char *name, struct fs_context *fc, - bool workdir) +static int ovl_mount_dir_check(struct fs_context *fc, const struct path *path, + enum ovl_opt layer, const char *name, bool upper) { - int err; - struct ovl_fs *ofs = fc->s_fs_info; - struct ovl_config *config = &ofs->config; + bool is_casefolded = ovl_dentry_casefolded(path->dentry); struct ovl_fs_context *ctx = fc->fs_private; - struct path path; - char *dup; + struct ovl_fs *ofs = fc->s_fs_info; - err = ovl_mount_dir(name, &path); - if (err) - return err; + if (!d_is_dir(path->dentry)) + return invalfc(fc, "%s is not a directory", name); + + /* + * Allow filesystems that are case-folding capable but deny composing + * ovl stack from inconsistent case-folded directories. + */ + if (!ctx->casefold_set) { + ofs->casefold = is_casefolded; + ctx->casefold_set = true; + } + + if (ofs->casefold != is_casefolded) { + return invalfc(fc, "case-%ssensitive directory on %s is inconsistent", + is_casefolded ? "in" : "", name); + } + + if (ovl_dentry_weird(path->dentry)) + return invalfc(fc, "filesystem on %s not supported", name); /* * Check whether upper path is read-only here to report failures * early. Don't forget to recheck when the superblock is created * as the mount attributes could change. */ - if (__mnt_is_readonly(path.mnt)) { - path_put(&path); - return -EINVAL; + if (upper) { + if (path->dentry->d_flags & DCACHE_OP_REAL) + return invalfc(fc, "filesystem on %s not supported as upperdir", name); + if (__mnt_is_readonly(path->mnt)) + return invalfc(fc, "filesystem on %s is read-only", name); + } else { + if (ctx->lowerdir_all && layer != Opt_lowerdir) + return invalfc(fc, "lowerdir+ and datadir+ cannot follow lowerdir"); + if (ctx->nr_data && layer == Opt_lowerdir_add) + return invalfc(fc, "regular lower layers cannot follow data layers"); + if (ctx->nr == OVL_MAX_STACK) + return invalfc(fc, "too many lower directories, limit is %d", + OVL_MAX_STACK); } + return 0; +} - dup = kstrdup(name, GFP_KERNEL); - if (!dup) { - path_put(&path); +static int ovl_ctx_realloc_lower(struct fs_context *fc) +{ + struct ovl_fs_context *ctx = fc->fs_private; + struct ovl_fs_context_layer *l; + size_t nr; + + if (ctx->nr < ctx->capacity) + return 0; + + nr = min_t(size_t, max(4096 / sizeof(*l), ctx->capacity * 2), + OVL_MAX_STACK); + l = krealloc_array(ctx->lower, nr, sizeof(*l), GFP_KERNEL_ACCOUNT); + if (!l) return -ENOMEM; + + ctx->lower = l; + ctx->capacity = nr; + return 0; +} + +static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer, + struct path *path, char **pname) +{ + struct ovl_fs *ofs = fc->s_fs_info; + struct ovl_config *config = &ofs->config; + struct ovl_fs_context *ctx = fc->fs_private; + struct ovl_fs_context_layer *l; + + switch (layer) { + case Opt_workdir: + swap(config->workdir, *pname); + swap(ctx->work, *path); + break; + case Opt_upperdir: + swap(config->upperdir, *pname); + swap(ctx->upper, *path); + break; + case Opt_datadir_add: + ctx->nr_data++; + fallthrough; + case Opt_lowerdir: + fallthrough; + case Opt_lowerdir_add: + WARN_ON(ctx->nr >= ctx->capacity); + l = &ctx->lower[ctx->nr++]; + memset(l, 0, sizeof(*l)); + swap(l->name, *pname); + swap(l->path, *path); + break; + default: + WARN_ON(1); } +} - if (workdir) { - kfree(config->workdir); - config->workdir = dup; - path_put(&ctx->work); - ctx->work = path; - } else { - kfree(config->upperdir); - config->upperdir = dup; - path_put(&ctx->upper); - ctx->upper = path; +static inline bool is_upper_layer(enum ovl_opt layer) +{ + return layer == Opt_upperdir || layer == Opt_workdir; +} + +/* Handle non-file descriptor-based layer options that require path lookup. */ +static inline int ovl_kern_path(const char *layer_name, struct path *layer_path, + enum ovl_opt layer) +{ + int err; + + switch (layer) { + case Opt_upperdir: + fallthrough; + case Opt_workdir: + fallthrough; + case Opt_lowerdir: + err = ovl_mount_dir(layer_name, layer_path); + break; + case Opt_lowerdir_add: + fallthrough; + case Opt_datadir_add: + err = ovl_mount_dir_noesc(layer_name, layer_path); + break; + default: + WARN_ON_ONCE(true); + err = -EINVAL; } - return 0; + + return err; +} + +static int ovl_do_parse_layer(struct fs_context *fc, const char *layer_name, + struct path *layer_path, enum ovl_opt layer) +{ + char *name __free(kfree) = kstrdup(layer_name, GFP_KERNEL); + bool upper; + int err = 0; + + if (!name) + return -ENOMEM; + + upper = is_upper_layer(layer); + err = ovl_mount_dir_check(fc, layer_path, layer, name, upper); + if (err) + return err; + + if (!upper) { + err = ovl_ctx_realloc_lower(fc); + if (err) + return err; + } + + /* Store the user provided path string in ctx to show in mountinfo */ + ovl_add_layer(fc, layer, layer_path, &name); + return err; } -static void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx) +static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param, + enum ovl_opt layer) { - for (size_t nr = 0; nr < ctx->nr; nr++) { - path_put(&ctx->lower[nr].path); - kfree(ctx->lower[nr].name); - ctx->lower[nr].name = NULL; + struct path layer_path __free(path_put) = {}; + int err = 0; + + switch (param->type) { + case fs_value_is_string: + err = ovl_kern_path(param->string, &layer_path, layer); + if (err) + return err; + err = ovl_do_parse_layer(fc, param->string, &layer_path, layer); + break; + case fs_value_is_file: { + char *buf __free(kfree); + char *layer_name; + + buf = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); + if (!buf) + return -ENOMEM; + + layer_path = param->file->f_path; + path_get(&layer_path); + + layer_name = d_path(&layer_path, buf, PATH_MAX); + if (IS_ERR(layer_name)) + return PTR_ERR(layer_name); + + err = ovl_do_parse_layer(fc, layer_name, &layer_path, layer); + break; + } + default: + WARN_ON_ONCE(true); + err = -EINVAL; + } + + return err; +} + +static void ovl_reset_lowerdirs(struct ovl_fs_context *ctx) +{ + struct ovl_fs_context_layer *l = ctx->lower; + + // Reset old user provided lowerdir string + kfree(ctx->lowerdir_all); + ctx->lowerdir_all = NULL; + + for (size_t nr = 0; nr < ctx->nr; nr++, l++) { + path_put(&l->path); + kfree(l->name); + l->name = NULL; } ctx->nr = 0; ctx->nr_data = 0; @@ -317,25 +493,18 @@ static void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx) /* * Parse lowerdir= mount option: * - * (1) lowerdir=/lower1:/lower2:/lower3::/data1::/data2 + * e.g.: lowerdir=/lower1:/lower2:/lower3::/data1::/data2 * Set "/lower1", "/lower2", and "/lower3" as lower layers and * "/data1" and "/data2" as data lower layers. Any existing lower * layers are replaced. - * (2) lowerdir=:/lower4 - * Append "/lower4" to current stack of lower layers. This requires - * that there already is at least one lower layer configured. - * (3) lowerdir=::/lower5 - * Append data "/lower5" as data lower layer. This requires that - * there's at least one regular lower layer present. */ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) { int err; struct ovl_fs_context *ctx = fc->fs_private; - struct ovl_fs_context_layer *l; - char *dup = NULL, *dup_iter; - ssize_t nr_lower = 0, nr = 0, nr_data = 0; - bool append = false, data_layer = false; + char *dup = NULL, *iter; + ssize_t nr_lower, nr; + bool data_layer = false; /* * Ensure we're backwards compatible with mount(2) @@ -343,56 +512,21 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) */ /* drop all existing lower layers */ - if (!*name) { - ovl_parse_param_drop_lowerdir(ctx); - return 0; - } - - if (strncmp(name, "::", 2) == 0) { - /* - * This is a data layer. - * There must be at least one regular lower layer - * specified. - */ - if (ctx->nr == 0) { - pr_err("data lower layers without regular lower layers not allowed"); - return -EINVAL; - } + ovl_reset_lowerdirs(ctx); - /* Skip the leading "::". */ - name += 2; - data_layer = true; - /* - * A data layer is automatically an append as there - * must've been at least one regular lower layer. - */ - append = true; - } else if (*name == ':') { - /* - * This is a regular lower layer. - * If users want to append a layer enforce that they - * have already specified a first layer before. It's - * better to be strict. - */ - if (ctx->nr == 0) { - pr_err("cannot append layer if no previous layer has been specified"); - return -EINVAL; - } - - /* - * Once a sequence of data layers has started regular - * lower layers are forbidden. - */ - if (ctx->nr_data > 0) { - pr_err("regular lower layers cannot follow data lower layers"); - return -EINVAL; - } + if (!*name) + return 0; - /* Skip the leading ":". */ - name++; - append = true; + if (*name == ':') { + pr_err("cannot append lower layer\n"); + return -EINVAL; } + // Store user provided lowerdir string to show in mount options + ctx->lowerdir_all = kstrdup(name, GFP_KERNEL); + if (!ctx->lowerdir_all) + return -ENOMEM; + dup = kstrdup(name, GFP_KERNEL); if (!dup) return -ENOMEM; @@ -402,102 +536,40 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) if (nr_lower < 0) goto out_err; - if ((nr_lower > OVL_MAX_STACK) || - (append && (size_add(ctx->nr, nr_lower) > OVL_MAX_STACK))) { + if (nr_lower > OVL_MAX_STACK) { pr_err("too many lower directories, limit is %d\n", OVL_MAX_STACK); goto out_err; } - if (!append) - ovl_parse_param_drop_lowerdir(ctx); + iter = dup; + for (nr = 0; nr < nr_lower; nr++) { + struct path path __free(path_put) = {}; - /* - * (1) append - * - * We want nr <= nr_lower <= capacity We know nr > 0 and nr <= - * capacity. If nr == 0 this wouldn't be append. If nr + - * nr_lower is <= capacity then nr <= nr_lower <= capacity - * already holds. If nr + nr_lower exceeds capacity, we realloc. - * - * (2) replace - * - * Ensure we're backwards compatible with mount(2) which allows - * "lowerdir=/a:/b:/c,lowerdir=/d:/e:/f" causing the last - * specified lowerdir mount option to win. - * - * We want nr <= nr_lower <= capacity We know either (i) nr == 0 - * or (ii) nr > 0. We also know nr_lower > 0. The capacity - * could've been changed multiple times already so we only know - * nr <= capacity. If nr + nr_lower > capacity we realloc, - * otherwise nr <= nr_lower <= capacity holds already. - */ - nr_lower += ctx->nr; - if (nr_lower > ctx->capacity) { - err = -ENOMEM; - l = krealloc_array(ctx->lower, nr_lower, sizeof(*ctx->lower), - GFP_KERNEL_ACCOUNT); - if (!l) + err = ovl_kern_path(iter, &path, Opt_lowerdir); + if (err) goto out_err; - ctx->lower = l; - ctx->capacity = nr_lower; - } - - /* - * (3) By (1) and (2) we know nr <= nr_lower <= capacity. - * (4) If ctx->nr == 0 => replace - * We have verified above that the lowerdir mount option - * isn't an append, i.e., the lowerdir mount option - * doesn't start with ":" or "::". - * (4.1) The lowerdir mount options only contains regular lower - * layers ":". - * => Nothing to verify. - * (4.2) The lowerdir mount options contains regular ":" and - * data "::" layers. - * => We need to verify that data lower layers "::" aren't - * followed by regular ":" lower layers - * (5) If ctx->nr > 0 => append - * We know that there's at least one regular layer - * otherwise we would've failed when parsing the previous - * lowerdir mount option. - * (5.1) The lowerdir mount option is a regular layer ":" append - * => We need to verify that no data layers have been - * specified before. - * (5.2) The lowerdir mount option is a data layer "::" append - * We know that there's at least one regular layer or - * other data layers. => There's nothing to verify. - */ - dup_iter = dup; - for (nr = ctx->nr; nr < nr_lower; nr++) { - l = &ctx->lower[nr]; - memset(l, 0, sizeof(*l)); - - err = ovl_mount_dir_noesc(dup_iter, &l->path); + err = ovl_do_parse_layer(fc, iter, &path, Opt_lowerdir); if (err) - goto out_put; - - err = -ENOMEM; - l->name = kstrdup(dup_iter, GFP_KERNEL_ACCOUNT); - if (!l->name) - goto out_put; + goto out_err; if (data_layer) - nr_data++; + ctx->nr_data++; /* Calling strchr() again would overrun. */ - if ((nr + 1) == nr_lower) + if (ctx->nr == nr_lower) break; err = -EINVAL; - dup_iter = strchr(dup_iter, '\0') + 1; - if (*dup_iter) { + iter = strchr(iter, '\0') + 1; + if (*iter) { /* * This is a regular layer so we require that * there are no data layers. */ - if ((ctx->nr_data + nr_data) > 0) { - pr_err("regular lower layers cannot follow data lower layers"); - goto out_put; + if (ctx->nr_data > 0) { + pr_err("regular lower layers cannot follow data lower layers\n"); + goto out_err; } data_layer = false; @@ -506,30 +578,11 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) /* This is a data lower layer. */ data_layer = true; - dup_iter++; + iter++; } - ctx->nr = nr_lower; - ctx->nr_data += nr_data; kfree(dup); return 0; -out_put: - /* - * We know nr >= ctx->nr < nr_lower. If we failed somewhere - * we want to undo until nr == ctx->nr. This is correct for - * both ctx->nr == 0 and ctx->nr > 0. - */ - for (; nr >= ctx->nr; nr--) { - l = &ctx->lower[nr]; - kfree(l->name); - l->name = NULL; - path_put(&l->path); - - /* don't overflow */ - if (nr == 0) - break; - } - out_err: kfree(dup); @@ -573,11 +626,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_lowerdir: err = ovl_parse_param_lowerdir(param->string, fc); break; + case Opt_lowerdir_add: + case Opt_datadir_add: case Opt_upperdir: - fallthrough; case Opt_workdir: - err = ovl_parse_param_upperdir(param->string, fc, - (Opt_workdir == opt)); + err = ovl_parse_layer(fc, param, opt); break; case Opt_default_permissions: config->default_permissions = true; @@ -618,6 +671,29 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_userxattr: config->userxattr = true; break; + case Opt_override_creds: { + const struct cred *cred = NULL; + + if (result.negated) { + swap(cred, ofs->creator_cred); + put_cred(cred); + break; + } + + if (!current_in_userns(fc->user_ns)) { + err = -EINVAL; + break; + } + + cred = prepare_creds(); + if (cred) + swap(cred, ofs->creator_cred); + else + err = -ENOMEM; + + put_cred(cred); + break; + } default: pr_err("unrecognized mount option \"%s\" or missing value\n", param->key); @@ -634,7 +710,7 @@ static int ovl_get_tree(struct fs_context *fc) static inline void ovl_fs_context_free(struct ovl_fs_context *ctx) { - ovl_parse_param_drop_lowerdir(ctx); + ovl_reset_lowerdirs(ctx); path_put(&ctx->upper); path_put(&ctx->work); kfree(ctx->lower); @@ -682,6 +758,7 @@ static int ovl_reconfigure(struct fs_context *fc) } static const struct fs_context_operations ovl_context_ops = { + .parse_monolithic = ovl_parse_monolithic, .parse_param = ovl_parse_param, .get_tree = ovl_get_tree, .reconfigure = ovl_reconfigure, @@ -727,6 +804,8 @@ int ovl_init_fs_context(struct fs_context *fc) fc->s_fs_info = ofs; fc->fs_private = ctx; fc->ops = &ovl_context_ops; + + mutex_init(&ofs->whiteout_lock); return 0; out_err: @@ -741,10 +820,8 @@ void ovl_free_fs(struct ovl_fs *ofs) unsigned i; iput(ofs->workbasedir_trap); - iput(ofs->indexdir_trap); iput(ofs->workdir_trap); dput(ofs->whiteout); - dput(ofs->indexdir); dput(ofs->workdir); if (ofs->workdir_locked) ovl_inuse_unlock(ofs->workbasedir); @@ -752,12 +829,12 @@ void ovl_free_fs(struct ovl_fs *ofs) if (ofs->upperdir_locked) ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root); - /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */ - mounts = (struct vfsmount **) ofs->layers; + /* Reuse ofs->config.lowerdirs as a vfsmount array before freeing it */ + mounts = (struct vfsmount **) ofs->config.lowerdirs; for (i = 0; i < ofs->numlayer; i++) { iput(ofs->layers[i].trap); + kfree(ofs->config.lowerdirs[i]); mounts[i] = ofs->layers[i].mnt; - kfree(ofs->layers[i].name); } kern_unmount_array(mounts, ofs->numlayer); kfree(ofs->layers); @@ -765,6 +842,7 @@ void ovl_free_fs(struct ovl_fs *ofs) free_anon_bdev(ofs->fs[i].pseudo_dev); kfree(ofs->fs); + kfree(ofs->config.lowerdirs); kfree(ofs->config.upperdir); kfree(ofs->config.workdir); if (ofs->creator_cred) @@ -777,11 +855,6 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx, { struct ovl_opt_set set = ctx->set; - if (ctx->nr_data > 0 && !config->metacopy) { - pr_err("lower data-only dirs require metacopy support.\n"); - return -EINVAL; - } - /* Workdir/index are useless in non-upper mount */ if (!config->upperdir) { if (config->workdir) { @@ -807,18 +880,6 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx, config->uuid = OVL_UUID_NULL; } - /* Resolve verity -> metacopy dependency */ - if (config->verity_mode && !config->metacopy) { - /* Don't allow explicit specified conflicting combinations */ - if (set.metacopy) { - pr_err("conflicting options: metacopy=off,verity=%s\n", - ovl_verity_mode(config)); - return -EINVAL; - } - /* Otherwise automatically enable metacopy. */ - config->metacopy = true; - } - /* * This is to make the logic below simpler. It doesn't make any other * difference, since redirect_dir=on is only used for upper. @@ -826,18 +887,13 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx, if (!config->upperdir && config->redirect_mode == OVL_REDIRECT_FOLLOW) config->redirect_mode = OVL_REDIRECT_ON; - /* Resolve verity -> metacopy -> redirect_dir dependency */ + /* metacopy -> redirect_dir dependency */ if (config->metacopy && config->redirect_mode != OVL_REDIRECT_ON) { if (set.metacopy && set.redirect) { pr_err("conflicting options: metacopy=on,redirect_dir=%s\n", ovl_redirect_mode(config)); return -EINVAL; } - if (config->verity_mode && set.redirect) { - pr_err("conflicting options: verity=%s,redirect_dir=%s\n", - ovl_verity_mode(config), ovl_redirect_mode(config)); - return -EINVAL; - } if (set.redirect) { /* * There was an explicit redirect_dir=... that resulted @@ -906,7 +962,7 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx, } - /* Resolve userxattr -> !redirect && !metacopy && !verity dependency */ + /* Resolve userxattr -> !redirect && !metacopy dependency */ if (config->userxattr) { if (set.redirect && config->redirect_mode != OVL_REDIRECT_NOFOLLOW) { @@ -918,11 +974,6 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx, pr_err("conflicting options: userxattr,metacopy=on\n"); return -EINVAL; } - if (config->verity_mode) { - pr_err("conflicting options: userxattr,verity=%s\n", - ovl_verity_mode(config)); - return -EINVAL; - } /* * Silently disable default setting of redirect and metacopy. * This shall be the default in the future as well: these @@ -933,6 +984,34 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx, config->metacopy = false; } + /* + * Fail if we don't have trusted xattr capability and a feature was + * explicitly requested that requires them. + */ + if (!config->userxattr && !capable(CAP_SYS_ADMIN)) { + if (set.redirect && + config->redirect_mode != OVL_REDIRECT_NOFOLLOW) { + pr_err("redirect_dir requires permission to access trusted xattrs\n"); + return -EPERM; + } + if (config->metacopy && set.metacopy) { + pr_err("metacopy requires permission to access trusted xattrs\n"); + return -EPERM; + } + if (config->verity_mode) { + pr_err("verity requires permission to access trusted xattrs\n"); + return -EPERM; + } + if (ctx->nr_data > 0) { + pr_err("lower data-only dirs require permission to access trusted xattrs\n"); + return -EPERM; + } + /* + * Other xattr-dependent features should be disabled without + * great disturbance to the user in ovl_make_workdir(). + */ + } + return 0; } @@ -948,17 +1027,29 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) { struct super_block *sb = dentry->d_sb; struct ovl_fs *ofs = OVL_FS(sb); - size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer; - const struct ovl_layer *data_layers = &ofs->layers[nr_merged_lower]; - - /* ofs->layers[0] is the upper layer */ - seq_printf(m, ",lowerdir=%s", ofs->layers[1].name); - /* dump regular lower layers */ - for (nr = 2; nr < nr_merged_lower; nr++) - seq_printf(m, ":%s", ofs->layers[nr].name); - /* dump data lower layers */ - for (nr = 0; nr < ofs->numdatalayer; nr++) - seq_printf(m, "::%s", data_layers[nr].name); + size_t nr, nr_merged_lower, nr_lower = 0; + char **lowerdirs = ofs->config.lowerdirs; + + /* + * lowerdirs[0] holds the colon separated list that user provided + * with lowerdir mount option. + * lowerdirs[1..numlayer] hold the lowerdir paths that were added + * using the lowerdir+ and datadir+ mount options. + * For now, we do not allow mixing the legacy lowerdir mount option + * with the new lowerdir+ and datadir+ mount options. + */ + if (lowerdirs[0]) { + seq_show_option(m, "lowerdir", lowerdirs[0]); + } else { + nr_lower = ofs->numlayer; + nr_merged_lower = nr_lower - ofs->numdatalayer; + } + for (nr = 1; nr < nr_lower; nr++) { + if (nr < nr_merged_lower) + seq_show_option(m, "lowerdir+", lowerdirs[nr]); + else + seq_show_option(m, "datadir+", lowerdirs[nr]); + } if (ofs->config.upperdir) { seq_show_option(m, "upperdir", ofs->config.upperdir); seq_show_option(m, "workdir", ofs->config.workdir); @@ -969,17 +1060,16 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) seq_printf(m, ",redirect_dir=%s", ovl_redirect_mode(&ofs->config)); if (ofs->config.index != ovl_index_def) - seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); + seq_printf(m, ",index=%s", str_on_off(ofs->config.index)); if (ofs->config.uuid != ovl_uuid_def()) seq_printf(m, ",uuid=%s", ovl_uuid_mode(&ofs->config)); if (ofs->config.nfs_export != ovl_nfs_export_def) - seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? - "on" : "off"); + seq_printf(m, ",nfs_export=%s", + str_on_off(ofs->config.nfs_export)); if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(ofs)) seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config)); if (ofs->config.metacopy != ovl_metacopy_def) - seq_printf(m, ",metacopy=%s", - ofs->config.metacopy ? "on" : "off"); + seq_printf(m, ",metacopy=%s", str_on_off(ofs->config.metacopy)); if (ofs->config.ovl_volatile) seq_puts(m, ",volatile"); if (ofs->config.userxattr) diff --git a/fs/overlayfs/params.h b/fs/overlayfs/params.h index 8750da68ab2a..ffd53cdd8482 100644 --- a/fs/overlayfs/params.h +++ b/fs/overlayfs/params.h @@ -32,6 +32,8 @@ struct ovl_fs_context { size_t nr_data; struct ovl_opt_set set; struct ovl_fs_context_layer *lower; + char *lowerdir_all; /* user provided lowerdir string */ + bool casefold_set; }; int ovl_init_fs_context(struct fs_context *fc); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index de39e067ae65..160960bb0ad0 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -13,6 +13,7 @@ #include <linux/security.h> #include <linux/cred.h> #include <linux/ratelimit.h> +#include <linux/overflow.h> #include "overlayfs.h" struct ovl_cache_entry { @@ -25,6 +26,9 @@ struct ovl_cache_entry { struct ovl_cache_entry *next_maybe_whiteout; bool is_upper; bool is_whiteout; + bool check_xwhiteout; + const char *c_name; + int c_len; char name[]; }; @@ -43,10 +47,12 @@ struct ovl_readdir_data { struct list_head *list; struct list_head middle; struct ovl_cache_entry *first_maybe_whiteout; + struct unicode_map *map; int count; int err; bool is_upper; bool d_type_supported; + bool in_xwhiteouts_dir; }; struct ovl_dir_file { @@ -63,6 +69,31 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) return rb_entry(n, struct ovl_cache_entry, node); } +static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len, + char **dst) +{ + const struct qstr qstr = { .name = str, .len = len }; + char *cf_name; + int cf_len; + + if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || is_dot_dotdot(str, len)) + return 0; + + cf_name = kmalloc(NAME_MAX, GFP_KERNEL); + if (!cf_name) { + rdd->err = -ENOMEM; + return -ENOMEM; + } + + cf_len = utf8_casefold(rdd->map, &qstr, cf_name, NAME_MAX); + if (cf_len > 0) + *dst = cf_name; + else + kfree(cf_name); + + return cf_len; +} + static bool ovl_cache_entry_find_link(const char *name, int len, struct rb_node ***link, struct rb_node **parent) @@ -76,10 +107,10 @@ static bool ovl_cache_entry_find_link(const char *name, int len, *parent = *newp; tmp = ovl_cache_entry_from_node(*newp); - cmp = strncmp(name, tmp->name, len); + cmp = strncmp(name, tmp->c_name, len); if (cmp > 0) newp = &tmp->node.rb_right; - else if (cmp < 0 || len < tmp->len) + else if (cmp < 0 || len < tmp->c_len) newp = &tmp->node.rb_left; else found = true; @@ -98,10 +129,10 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, while (node) { struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); - cmp = strncmp(name, p->name, len); + cmp = strncmp(name, p->c_name, len); if (cmp > 0) node = p->node.rb_right; - else if (cmp < 0 || len < p->len) + else if (cmp < 0 || len < p->c_len) node = p->node.rb_left; else return p; @@ -142,12 +173,12 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd, static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, const char *name, int len, + const char *c_name, int c_len, u64 ino, unsigned int d_type) { struct ovl_cache_entry *p; - size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); - p = kmalloc(size, GFP_KERNEL); + p = kmalloc(struct_size(p, name, len + 1), GFP_KERNEL); if (!p) return NULL; @@ -162,6 +193,16 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, p->ino = 0; p->is_upper = rdd->is_upper; p->is_whiteout = false; + /* Defer check for overlay.whiteout to ovl_iterate() */ + p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG; + + if (c_name && c_name != name) { + p->c_name = c_name; + p->c_len = c_len; + } else { + p->c_name = p->name; + p->c_len = len; + } if (d_type == DT_CHR) { p->next_maybe_whiteout = rdd->first_maybe_whiteout; @@ -170,48 +211,62 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, return p; } -static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, - const char *name, int len, u64 ino, +/* Return 0 for found, 1 for added, <0 for error */ +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, + const char *name, int len, + const char *c_name, int c_len, + u64 ino, unsigned int d_type) { struct rb_node **newp = &rdd->root->rb_node; struct rb_node *parent = NULL; struct ovl_cache_entry *p; - if (ovl_cache_entry_find_link(name, len, &newp, &parent)) - return true; + if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent)) + return 0; - p = ovl_cache_entry_new(rdd, name, len, ino, d_type); + p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type); if (p == NULL) { rdd->err = -ENOMEM; - return false; + return -ENOMEM; } list_add_tail(&p->l_node, rdd->list); rb_link_node(&p->node, parent, newp); rb_insert_color(&p->node, rdd->root); - return true; + return 1; } -static bool ovl_fill_lowest(struct ovl_readdir_data *rdd, +/* Return 0 for found, 1 for added, <0 for error */ +static int ovl_fill_lowest(struct ovl_readdir_data *rdd, const char *name, int namelen, + const char *c_name, int c_len, loff_t offset, u64 ino, unsigned int d_type) { struct ovl_cache_entry *p; - p = ovl_cache_entry_find(rdd->root, name, namelen); + p = ovl_cache_entry_find(rdd->root, c_name, c_len); if (p) { list_move_tail(&p->l_node, &rdd->middle); + return 0; } else { - p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); + p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len, + ino, d_type); if (p == NULL) rdd->err = -ENOMEM; else list_add_tail(&p->l_node, &rdd->middle); } - return rdd->err == 0; + return rdd->err ?: 1; +} + +static void ovl_cache_entry_free(struct ovl_cache_entry *p) +{ + if (p->c_name != p->name) + kfree(p->c_name); + kfree(p); } void ovl_cache_free(struct list_head *list) @@ -220,7 +275,7 @@ void ovl_cache_free(struct list_head *list) struct ovl_cache_entry *n; list_for_each_entry_safe(p, n, list, l_node) - kfree(p); + ovl_cache_entry_free(p); INIT_LIST_HEAD(list); } @@ -256,39 +311,61 @@ static bool ovl_fill_merge(struct dir_context *ctx, const char *name, { struct ovl_readdir_data *rdd = container_of(ctx, struct ovl_readdir_data, ctx); + struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb); + const char *c_name = NULL; + char *cf_name = NULL; + int c_len = 0, ret; + + if (ofs->casefold) + c_len = ovl_casefold(rdd, name, namelen, &cf_name); + + if (rdd->err) + return false; + + if (c_len <= 0) { + c_name = name; + c_len = namelen; + } else { + c_name = cf_name; + } rdd->count++; if (!rdd->is_lowest) - return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); + ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type); else - return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type); + ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type); + + /* + * If ret == 1, that means that c_name is being used as part of struct + * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise, + * c_name was found in the rb-tree so we can free it here. + */ + if (ret != 1 && c_name != name) + kfree(c_name); + + return ret >= 0; } static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd) { - int err; - struct ovl_cache_entry *p; struct dentry *dentry, *dir = path->dentry; - const struct cred *old_cred; - - old_cred = ovl_override_creds(rdd->dentry->d_sb); - - err = down_write_killable(&dir->d_inode->i_rwsem); - if (!err) { - while (rdd->first_maybe_whiteout) { - p = rdd->first_maybe_whiteout; - rdd->first_maybe_whiteout = p->next_maybe_whiteout; - dentry = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len); - if (!IS_ERR(dentry)) { - p->is_whiteout = ovl_is_whiteout(dentry); - dput(dentry); - } + + while (rdd->first_maybe_whiteout) { + struct ovl_cache_entry *p = + rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p->next_maybe_whiteout; + dentry = lookup_one_positive_killable(mnt_idmap(path->mnt), + &QSTR_LEN(p->name, p->len), + dir); + if (!IS_ERR(dentry)) { + p->is_whiteout = ovl_is_whiteout(dentry); + dput(dentry); + } else if (PTR_ERR(dentry) == -EINTR) { + return -EINTR; } - inode_unlock(dir->d_inode); } - revert_creds(old_cred); - return err; + return 0; } static inline int ovl_dir_read(const struct path *realpath, @@ -347,16 +424,26 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list, struct path realpath; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_merge, + .ctx.count = INT_MAX, .dentry = dentry, .list = list, .root = root, .is_lowest = false, + .map = NULL, }; int idx, next; + const struct ovl_layer *layer; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); for (idx = 0; idx != -1; idx = next) { - next = ovl_path_next(idx, dentry, &realpath); + next = ovl_path_next(idx, dentry, &realpath, &layer); + + if (ofs->casefold) + rdd.map = sb_encoding(realpath.dentry->d_sb); + rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry; + rdd.in_xwhiteouts_dir = layer->has_xwhiteouts && + ovl_dentry_has_xwhiteouts(dentry); if (next != -1) { err = ovl_dir_read(&realpath, &rdd); @@ -447,7 +534,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, } /* - * Set d_ino for upper entries. Non-upper entries should always report + * Set d_ino for upper entries if needed. Non-upper entries should always report * the uppermost real inode ino and should not call this function. * * When not all layer are on same fs, report real ino also for upper. @@ -455,8 +542,11 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, * When all layers are on the same fs, and upper has a reference to * copy up origin, call vfs_getattr() on the overlay entry to make * sure that d_ino will be consistent with st_ino from stat(2). + * + * Also checks the overlay.whiteout xattr by doing a full lookup which will return + * negative in this case. */ -static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry *p) +static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino) { struct dentry *dir = path->dentry; @@ -467,7 +557,7 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry int xinobits = ovl_xino_bits(ofs); int err = 0; - if (!ovl_same_dev(ofs)) + if (!ovl_same_dev(ofs) && !p->check_xwhiteout) goto out; if (p->name[0] == '.') { @@ -481,7 +571,8 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry goto get; } } - this = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len); + /* This checks also for xwhiteouts */ + this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir); if (IS_ERR_OR_NULL(this) || !this->d_inode) { /* Mark a stale entry */ p->is_whiteout = true; @@ -494,6 +585,9 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry } get: + if (!ovl_same_dev(ofs) || !update_ino) + goto out; + type = ovl_path_type(this); if (OVL_TYPE_ORIGIN(type)) { struct kstat stat; @@ -539,7 +633,7 @@ static bool ovl_fill_plain(struct dir_context *ctx, const char *name, container_of(ctx, struct ovl_readdir_data, ctx); rdd->count++; - p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); + p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type); if (p == NULL) { rdd->err = -ENOMEM; return false; @@ -557,6 +651,7 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list, struct ovl_cache_entry *p, *n; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_plain, + .ctx.count = INT_MAX, .list = list, .root = root, }; @@ -572,13 +667,13 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list, list_for_each_entry_safe(p, n, list, l_node) { if (strcmp(p->name, ".") != 0 && strcmp(p->name, "..") != 0) { - err = ovl_cache_update_ino(path, p); + err = ovl_cache_update(path, p, true); if (err) return err; } if (p->ino == p->real_ino) { list_del(&p->l_node); - kfree(p); + ovl_cache_entry_free(p); } else { struct rb_node **newp = &root->rb_node; struct rb_node *parent = NULL; @@ -658,6 +753,7 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name, struct ovl_readdir_translate *rdt = container_of(ctx, struct ovl_readdir_translate, ctx); struct dir_context *orig_ctx = rdt->orig_ctx; + bool res; if (rdt->parent_ino && strcmp(name, "..") == 0) { ino = rdt->parent_ino; @@ -672,7 +768,10 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name, name, namelen, rdt->xinowarn); } - return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); + res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); + ctx->count = orig_ctx->count; + + return res; } static bool ovl_is_impure_dir(struct file *file) @@ -699,6 +798,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx) const struct ovl_layer *lower_layer = ovl_layer_lower(dir); struct ovl_readdir_translate rdt = { .ctx.actor = ovl_fill_real, + .ctx.count = ctx->count, .orig_ctx = ctx, .xinobits = ovl_xino_bits(ofs), .xinowarn = ovl_xino_warn(ofs), @@ -732,36 +832,12 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx) return err; } - -static int ovl_iterate(struct file *file, struct dir_context *ctx) +static int ovl_iterate_merged(struct file *file, struct dir_context *ctx) { struct ovl_dir_file *od = file->private_data; struct dentry *dentry = file->f_path.dentry; - struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct ovl_cache_entry *p; - const struct cred *old_cred; - int err; - - old_cred = ovl_override_creds(dentry->d_sb); - if (!ctx->pos) - ovl_dir_reset(file); - - if (od->is_real) { - /* - * If parent is merge, then need to adjust d_ino for '..', if - * dir is impure then need to adjust d_ino for copied up - * entries. - */ - if (ovl_xino_bits(ofs) || - (ovl_same_fs(ofs) && - (ovl_is_impure_dir(file) || - OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { - err = ovl_iterate_real(file, ctx); - } else { - err = iterate_dir(od->realfile, ctx); - } - goto out; - } + int err = 0; if (!od->cache) { struct ovl_dir_cache *cache; @@ -769,7 +845,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) cache = ovl_cache_get(dentry); err = PTR_ERR(cache); if (IS_ERR(cache)) - goto out; + return err; od->cache = cache; ovl_seek_cursor(od, ctx->pos); @@ -778,13 +854,13 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) while (od->cursor != &od->cache->entries) { p = list_entry(od->cursor, struct ovl_cache_entry, l_node); if (!p->is_whiteout) { - if (!p->ino) { - err = ovl_cache_update_ino(&file->f_path, p); + if (!p->ino || p->check_xwhiteout) { + err = ovl_cache_update(&file->f_path, p, !p->ino); if (err) - goto out; + return err; } } - /* ovl_cache_update_ino() sets is_whiteout on stale entry */ + /* ovl_cache_update() sets is_whiteout on stale entry */ if (!p->is_whiteout) { if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) break; @@ -792,12 +868,50 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) od->cursor = p->l_node.next; ctx->pos++; } - err = 0; -out: - revert_creds(old_cred); return err; } +static bool ovl_need_adjust_d_ino(struct file *file) +{ + struct dentry *dentry = file->f_path.dentry; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + + /* If parent is merge, then need to adjust d_ino for '..' */ + if (ovl_xino_bits(ofs)) + return true; + + /* Can't do consistent inode numbering */ + if (!ovl_same_fs(ofs)) + return false; + + /* If dir is impure then need to adjust d_ino for copied up entries */ + if (ovl_is_impure_dir(file) || + OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))) + return true; + + /* Pure: no need to adjust d_ino */ + return false; +} + + +static int ovl_iterate(struct file *file, struct dir_context *ctx) +{ + struct ovl_dir_file *od = file->private_data; + + if (!ctx->pos) + ovl_dir_reset(file); + + with_ovl_creds(file_dentry(file)->d_sb) { + if (!od->is_real) + return ovl_iterate_merged(file, ctx); + + if (ovl_need_adjust_d_ino(file)) + return ovl_iterate_real(file, ctx); + + return iterate_dir(od->realfile, ctx); + } +} + static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) { loff_t res; @@ -841,14 +955,8 @@ out_unlock: static struct file *ovl_dir_open_realfile(const struct file *file, const struct path *realpath) { - struct file *res; - const struct cred *old_cred; - - old_cred = ovl_override_creds(file_inode(file)->i_sb); - res = ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE)); - revert_creds(old_cred); - - return res; + with_ovl_creds(file_inode(file)->i_sb) + return ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE)); } /* @@ -969,11 +1077,9 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) int err; struct ovl_cache_entry *p, *n; struct rb_root root = RB_ROOT; - const struct cred *old_cred; - old_cred = ovl_override_creds(dentry->d_sb); - err = ovl_dir_read_merged(dentry, list, &root); - revert_creds(old_cred); + with_ovl_creds(dentry->d_sb) + err = ovl_dir_read_merged(dentry, list, &root); if (err) return err; @@ -1001,7 +1107,7 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) del_entry: list_del(&p->l_node); - kfree(p); + ovl_cache_entry_free(p); } return err; @@ -1012,14 +1118,13 @@ void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, { struct ovl_cache_entry *p; - inode_lock_nested(upper->d_inode, I_MUTEX_CHILD); list_for_each_entry(p, list, l_node) { struct dentry *dentry; if (WARN_ON(!p->is_whiteout || !p->is_upper)) continue; - dentry = ovl_lookup_upper(ofs, p->name, upper, p->len); + dentry = ovl_lookup_upper_unlocked(ofs, p->name, upper, p->len); if (IS_ERR(dentry)) { pr_err("lookup '%s/%.*s' failed (%i)\n", upper->d_name.name, p->len, p->name, @@ -1027,10 +1132,9 @@ void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, continue; } if (dentry->d_inode) - ovl_cleanup(ofs, upper->d_inode, dentry); + ovl_cleanup(ofs, upper, dentry); dput(dentry); } - inode_unlock(upper->d_inode); } static bool ovl_check_d_type(struct dir_context *ctx, const char *name, @@ -1059,6 +1163,7 @@ int ovl_check_d_type_supported(const struct path *realpath) int err; struct ovl_readdir_data rdd = { .ctx.actor = ovl_check_d_type, + .ctx.count = INT_MAX, .d_type_supported = false, }; @@ -1075,11 +1180,11 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa int level) { int err; - struct inode *dir = path->dentry->d_inode; LIST_HEAD(list); struct ovl_cache_entry *p; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_plain, + .ctx.count = INT_MAX, .list = &list, }; bool incompat = false; @@ -1100,7 +1205,6 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa if (err) goto out; - inode_lock_nested(dir, I_MUTEX_PARENT); list_for_each_entry(p, &list, l_node) { struct dentry *dentry; @@ -1115,39 +1219,40 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa err = -EINVAL; break; } - dentry = ovl_lookup_upper(ofs, p->name, path->dentry, p->len); + dentry = ovl_lookup_upper_unlocked(ofs, p->name, path->dentry, p->len); if (IS_ERR(dentry)) continue; if (dentry->d_inode) - err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level); + err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt, + dentry, level); dput(dentry); if (err) break; } - inode_unlock(dir); out: ovl_cache_free(&list); return err; } -int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, +int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent, struct vfsmount *mnt, struct dentry *dentry, int level) { int err; - if (!d_is_dir(dentry) || level > 1) { - return ovl_cleanup(ofs, dir, dentry); - } + if (!d_is_dir(dentry) || level > 1) + return ovl_cleanup(ofs, parent, dentry); - err = ovl_do_rmdir(ofs, dir, dentry); + dentry = start_removing_dentry(parent, dentry); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + err = ovl_do_rmdir(ofs, parent->d_inode, dentry); + end_removing(dentry); if (err) { struct path path = { .mnt = mnt, .dentry = dentry }; - inode_unlock(dir); err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1); - inode_lock_nested(dir, I_MUTEX_PARENT); if (!err) - err = ovl_cleanup(ofs, dir, dentry); + err = ovl_cleanup(ofs, parent, dentry); } return err; @@ -1156,14 +1261,14 @@ int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, int ovl_indexdir_cleanup(struct ovl_fs *ofs) { int err; - struct dentry *indexdir = ofs->indexdir; + struct dentry *indexdir = ofs->workdir; struct dentry *index = NULL; - struct inode *dir = indexdir->d_inode; struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir }; LIST_HEAD(list); struct ovl_cache_entry *p; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_plain, + .ctx.count = INT_MAX, .list = &list, }; @@ -1171,7 +1276,6 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) if (err) goto out; - inode_lock_nested(dir, I_MUTEX_PARENT); list_for_each_entry(p, &list, l_node) { if (p->name[0] == '.') { if (p->len == 1) @@ -1179,7 +1283,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) if (p->len == 2 && p->name[1] == '.') continue; } - index = ovl_lookup_upper(ofs, p->name, indexdir, p->len); + index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, p->len); if (IS_ERR(index)) { err = PTR_ERR(index); index = NULL; @@ -1187,7 +1291,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) } /* Cleanup leftover from index create/cleanup attempt */ if (index->d_name.name[0] == '#') { - err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1); + err = ovl_workdir_cleanup(ofs, indexdir, path.mnt, index, 1); if (err) break; goto next; @@ -1197,7 +1301,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) goto next; } else if (err == -ESTALE) { /* Cleanup stale index entries */ - err = ovl_cleanup(ofs, dir, index); + err = ovl_cleanup(ofs, indexdir, index); } else if (err != -ENOENT) { /* * Abort mount to avoid corrupting the index if @@ -1210,10 +1314,10 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) * Whiteout orphan index to block future open by * handle after overlay nlink dropped to zero. */ - err = ovl_cleanup_and_whiteout(ofs, dir, index); + err = ovl_cleanup_and_whiteout(ofs, indexdir, index); } else { /* Cleanup orphan index entries */ - err = ovl_cleanup(ofs, dir, index); + err = ovl_cleanup(ofs, indexdir, index); } if (err) @@ -1224,7 +1328,6 @@ next: index = NULL; } dput(index); - inode_unlock(dir); out: ovl_cache_free(&list); if (err) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index def266b5e2a3..ba9146f22a2c 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -28,33 +28,38 @@ MODULE_LICENSE("GPL"); struct ovl_dir_cache; -static struct dentry *ovl_d_real(struct dentry *dentry, - const struct inode *inode) +static struct dentry *ovl_d_real(struct dentry *dentry, enum d_real_type type) { - struct dentry *real = NULL, *lower; + struct dentry *upper, *lower; int err; - /* It's an overlay file */ - if (inode && d_inode(dentry) == inode) - return dentry; + switch (type) { + case D_REAL_DATA: + case D_REAL_METADATA: + break; + default: + goto bug; + } if (!d_is_reg(dentry)) { - if (!inode || inode == d_inode(dentry)) - return dentry; - goto bug; + /* d_real_inode() is only relevant for regular files */ + return dentry; } - real = ovl_dentry_upper(dentry); - if (real && (inode == d_inode(real))) - return real; + upper = ovl_dentry_upper(dentry); + if (upper && (type == D_REAL_METADATA || + ovl_has_upperdata(d_inode(dentry)))) + return upper; - if (real && !inode && ovl_has_upperdata(d_inode(dentry))) - return real; + if (type == D_REAL_METADATA) { + lower = ovl_dentry_lower(dentry); + goto real_lower; + } /* - * Best effort lazy lookup of lowerdata for !inode case to return + * Best effort lazy lookup of lowerdata for D_REAL_DATA case to return * the real lowerdata dentry. The only current caller of d_real() with - * NULL inode is d_real_inode() from trace_uprobe and this caller is + * D_REAL_DATA is d_real_inode() from trace_uprobe and this caller is * likely going to be followed reading from the file, before placing * uprobes on offset within the file, so lowerdata should be available * when setting the uprobe. @@ -65,18 +70,13 @@ static struct dentry *ovl_d_real(struct dentry *dentry, lower = ovl_dentry_lowerdata(dentry); if (!lower) goto bug; - real = lower; - /* Handle recursion */ - real = d_real(real, inode); +real_lower: + /* Handle recursion into stacked lower fs */ + return d_real(lower, type); - if (!inode || inode == d_inode(real)) - return real; bug: - WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n", - __func__, dentry, inode ? inode->i_sb->s_id : "NULL", - inode ? inode->i_ino : 0, real, - real && d_inode(real) ? d_inode(real)->i_ino : 0); + WARN(1, "%s(%pd4, %d): real dentry not found\n", __func__, dentry, type); return dentry; } @@ -91,7 +91,24 @@ static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) ret = d->d_op->d_weak_revalidate(d, flags); } else if (d->d_flags & DCACHE_OP_REVALIDATE) { - ret = d->d_op->d_revalidate(d, flags); + struct dentry *parent; + struct inode *dir; + struct name_snapshot n; + + if (flags & LOOKUP_RCU) { + parent = READ_ONCE(d->d_parent); + dir = d_inode_rcu(parent); + if (!dir) + return -ECHILD; + } else { + parent = dget_parent(d); + dir = d_inode(parent); + } + take_dentry_name_snapshot(&n, d); + ret = d->d_op->d_revalidate(dir, &n.name, d, flags); + release_dentry_name_snapshot(&n); + if (!(flags & LOOKUP_RCU)) + dput(parent); if (!ret) { if (!(flags & LOOKUP_RCU)) d_invalidate(d); @@ -104,17 +121,27 @@ static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) static int ovl_dentry_revalidate_common(struct dentry *dentry, unsigned int flags, bool weak) { - struct ovl_entry *oe = OVL_E(dentry); - struct ovl_path *lowerstack = ovl_lowerstack(oe); + struct ovl_entry *oe; + struct ovl_path *lowerstack; struct inode *inode = d_inode_rcu(dentry); struct dentry *upper; unsigned int i; int ret = 1; - /* Careful in RCU mode */ - if (!inode) + if (!inode) { + /* + * Lookup of negative dentries will call ovl_dentry_init_flags() + * with NULL upperdentry and NULL oe, resulting in the + * DCACHE_OP*_REVALIDATE flags being cleared. Hence the only + * way to get a negative inode is due to a race with dentry + * destruction. + */ + WARN_ON(!(flags & LOOKUP_RCU)); return -ECHILD; + } + oe = OVL_I_E(inode); + lowerstack = ovl_lowerstack(oe); upper = ovl_i_dentry_upper(inode); if (upper) ret = ovl_revalidate_real(upper, flags, weak); @@ -125,7 +152,8 @@ static int ovl_dentry_revalidate_common(struct dentry *dentry, return ret; } -static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) +static int ovl_dentry_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { return ovl_dentry_revalidate_common(dentry, flags, false); } @@ -141,6 +169,16 @@ static const struct dentry_operations ovl_dentry_operations = { .d_weak_revalidate = ovl_dentry_weak_revalidate, }; +#if IS_ENABLED(CONFIG_UNICODE) +static const struct dentry_operations ovl_dentry_ci_operations = { + .d_real = ovl_d_real, + .d_revalidate = ovl_dentry_revalidate, + .d_weak_revalidate = ovl_dentry_weak_revalidate, + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, +}; +#endif + static struct kmem_cache *ovl_inode_cachep; static struct inode *ovl_alloc_inode(struct super_block *sb) @@ -167,6 +205,7 @@ static void ovl_free_inode(struct inode *inode) struct ovl_inode *oi = OVL_I(inode); kfree(oi->redirect); + kfree(oi->oe); mutex_destroy(&oi->lock); kmem_cache_free(ovl_inode_cachep, oi); } @@ -176,7 +215,7 @@ static void ovl_destroy_inode(struct inode *inode) struct ovl_inode *oi = OVL_I(inode); dput(oi->__upperdentry); - ovl_free_entry(oi->oe); + ovl_stack_put(ovl_lowerstack(oi->oe), ovl_numlower(oi->oe)); if (S_ISDIR(inode->i_mode)) ovl_dir_cache_free(inode); else @@ -199,15 +238,9 @@ static int ovl_sync_fs(struct super_block *sb, int wait) int ret; ret = ovl_sync_status(ofs); - /* - * We have to always set the err, because the return value isn't - * checked in syncfs, and instead indirectly return an error via - * the sb's writeback errseq, which VFS inspects after this call. - */ - if (ret < 0) { - errseq_set(&sb->s_wb_err, -EIO); + + if (ret < 0) return -EIO; - } if (!ret) return ret; @@ -265,7 +298,7 @@ static const struct super_operations ovl_super_operations = { .alloc_inode = ovl_alloc_inode, .free_inode = ovl_free_inode, .destroy_inode = ovl_destroy_inode, - .drop_inode = generic_delete_inode, + .drop_inode = inode_just_drop, .put_super = ovl_put_super, .sync_fs = ovl_sync_fs, .statfs = ovl_statfs, @@ -284,9 +317,8 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, int err; bool retried = false; - inode_lock_nested(dir, I_MUTEX_PARENT); retry: - work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name)); + work = ovl_start_creating_upper(ofs, ofs->workbasedir, &QSTR(name)); if (!IS_ERR(work)) { struct iattr attr = { @@ -295,26 +327,26 @@ retry: }; if (work->d_inode) { + end_creating_keep(work); + if (persist) + return work; err = -EEXIST; if (retried) goto out_dput; - - if (persist) - goto out_unlock; - retried = true; - err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0); + err = ovl_workdir_cleanup(ofs, ofs->workbasedir, mnt, work, 0); dput(work); - if (err == -EINVAL) { - work = ERR_PTR(err); - goto out_unlock; - } + if (err == -EINVAL) + return ERR_PTR(err); + goto retry; } - err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); - if (err) - goto out_dput; + work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode); + end_creating_keep(work); + err = PTR_ERR(work); + if (IS_ERR(work)) + goto out_err; /* Weird filesystem returning with hashed negative (kernfs)? */ err = -EINVAL; @@ -352,8 +384,6 @@ retry: err = PTR_ERR(work); goto out_err; } -out_unlock: - inode_unlock(dir); return work; out_dput: @@ -361,8 +391,7 @@ out_dput: out_err: pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n", ofs->config.workdir, name, -err); - work = NULL; - goto out_unlock; + return NULL; } static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs, @@ -379,7 +408,7 @@ static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs, return err; } -static int ovl_lower_dir(const char *name, struct path *path, +static int ovl_lower_dir(const char *name, const struct path *path, struct ovl_fs *ofs, int *stack_depth) { int fh_type; @@ -428,74 +457,14 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) bool ok = false; if (workdir != upperdir) { - ok = (lock_rename(workdir, upperdir) == NULL); - unlock_rename(workdir, upperdir); + struct dentry *trap = lock_rename(workdir, upperdir); + if (!IS_ERR(trap)) + unlock_rename(workdir, upperdir); + ok = (trap == NULL); } return ok; } -static int ovl_own_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) -{ - return -EOPNOTSUPP; -} - -static int ovl_own_xattr_set(const struct xattr_handler *handler, - struct mnt_idmap *idmap, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) -{ - return -EOPNOTSUPP; -} - -static int ovl_other_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) -{ - return ovl_xattr_get(dentry, inode, name, buffer, size); -} - -static int ovl_other_xattr_set(const struct xattr_handler *handler, - struct mnt_idmap *idmap, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) -{ - return ovl_xattr_set(dentry, inode, name, value, size, flags); -} - -static const struct xattr_handler ovl_own_trusted_xattr_handler = { - .prefix = OVL_XATTR_TRUSTED_PREFIX, - .get = ovl_own_xattr_get, - .set = ovl_own_xattr_set, -}; - -static const struct xattr_handler ovl_own_user_xattr_handler = { - .prefix = OVL_XATTR_USER_PREFIX, - .get = ovl_own_xattr_get, - .set = ovl_own_xattr_set, -}; - -static const struct xattr_handler ovl_other_xattr_handler = { - .prefix = "", /* catch all */ - .get = ovl_other_xattr_get, - .set = ovl_other_xattr_set, -}; - -static const struct xattr_handler *ovl_trusted_xattr_handlers[] = { - &ovl_own_trusted_xattr_handler, - &ovl_other_xattr_handler, - NULL -}; - -static const struct xattr_handler *ovl_user_xattr_handlers[] = { - &ovl_own_user_xattr_handler, - &ovl_other_xattr_handler, - NULL -}; - static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, struct inode **ptrap, const char *name) { @@ -569,11 +538,6 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, upper_layer->idx = 0; upper_layer->fsid = 0; - err = -ENOMEM; - upper_layer->name = kstrdup(ofs->config.upperdir, GFP_KERNEL); - if (!upper_layer->name) - goto out; - /* * Inherit SB_NOSEC flag from upperdir. * @@ -606,56 +570,56 @@ out: static int ovl_check_rename_whiteout(struct ovl_fs *ofs) { struct dentry *workdir = ofs->workdir; - struct inode *dir = d_inode(workdir); struct dentry *temp; - struct dentry *dest; struct dentry *whiteout; struct name_snapshot name; + struct renamedata rd = {}; + char name2[OVL_TEMPNAME_SIZE]; int err; - inode_lock_nested(dir, I_MUTEX_PARENT); - temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0)); err = PTR_ERR(temp); if (IS_ERR(temp)) - goto out_unlock; + return err; - dest = ovl_lookup_temp(ofs, workdir); - err = PTR_ERR(dest); - if (IS_ERR(dest)) { + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = workdir; + rd.new_parent = workdir; + rd.flags = RENAME_WHITEOUT; + ovl_tempname(name2); + err = start_renaming_dentry(&rd, 0, temp, &QSTR(name2)); + if (err) { dput(temp); - goto out_unlock; + return err; } /* Name is inline and stable - using snapshot as a copy helper */ take_dentry_name_snapshot(&name, temp); - err = ovl_do_rename(ofs, dir, temp, dir, dest, RENAME_WHITEOUT); + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); if (err) { if (err == -EINVAL) err = 0; goto cleanup_temp; } - whiteout = ovl_lookup_upper(ofs, name.name.name, workdir, name.name.len); + whiteout = ovl_lookup_upper_unlocked(ofs, name.name.name, + workdir, name.name.len); err = PTR_ERR(whiteout); if (IS_ERR(whiteout)) goto cleanup_temp; - err = ovl_is_whiteout(whiteout); + err = ovl_upper_is_whiteout(ofs, whiteout); /* Best effort cleanup of whiteout and temp file */ if (err) - ovl_cleanup(ofs, dir, whiteout); + ovl_cleanup(ofs, workdir, whiteout); dput(whiteout); cleanup_temp: - ovl_cleanup(ofs, dir, temp); + ovl_cleanup(ofs, workdir, temp); release_dentry_name_snapshot(&name); dput(temp); - dput(dest); - -out_unlock: - inode_unlock(dir); return err; } @@ -664,15 +628,15 @@ static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs, struct dentry *parent, const char *name, umode_t mode) { - size_t len = strlen(name); struct dentry *child; - inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); - child = ovl_lookup_upper(ofs, name, parent, len); - if (!IS_ERR(child) && !child->d_inode) - child = ovl_create_real(ofs, parent->d_inode, child, - OVL_CATTR(mode)); - inode_unlock(parent->d_inode); + child = ovl_start_creating_upper(ofs, parent, &QSTR(name)); + if (!IS_ERR(child)) { + if (!child->d_inode) + child = ovl_create_real(ofs, parent, child, + OVL_CATTR(mode)); + end_creating_keep(child); + } dput(parent); return child; @@ -881,15 +845,20 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, { struct vfsmount *mnt = ovl_upper_mnt(ofs); struct dentry *indexdir; + struct dentry *origin = ovl_lowerstack(oe)->dentry; + const struct ovl_fh *fh; int err; + fh = ovl_get_origin_fh(ofs, origin); + if (IS_ERR(fh)) + return PTR_ERR(fh); + err = mnt_want_write(mnt); if (err) - return err; + goto out_free_fh; /* Verify lower root is upper root origin */ - err = ovl_verify_origin(ofs, upperpath->dentry, - ovl_lowerstack(oe)->dentry, true); + err = ovl_verify_origin_fh(ofs, upperpath->dentry, fh, true); if (err) { pr_err("failed to verify upper root origin\n"); goto out; @@ -904,10 +873,8 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, if (IS_ERR(indexdir)) { err = PTR_ERR(indexdir); } else if (indexdir) { - ofs->indexdir = indexdir; - ofs->workdir = dget(indexdir); - - err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap, + ofs->workdir = indexdir; + err = ovl_setup_trap(sb, indexdir, &ofs->workdir_trap, "indexdir"); if (err) goto out; @@ -920,15 +887,15 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, * ".overlay.upper" to indicate that index may have * directory entries. */ - if (ovl_check_origin_xattr(ofs, ofs->indexdir)) { - err = ovl_verify_set_fh(ofs, ofs->indexdir, - OVL_XATTR_ORIGIN, - upperpath->dentry, true, false); + if (ovl_check_origin_xattr(ofs, indexdir)) { + err = ovl_verify_origin_xattr(ofs, indexdir, + OVL_XATTR_ORIGIN, + upperpath->dentry, true, + false); if (err) pr_err("failed to verify index dir 'origin' xattr\n"); } - err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry, - true); + err = ovl_verify_upper(ofs, indexdir, upperpath->dentry, true); if (err) pr_err("failed to verify index dir 'upper' xattr\n"); @@ -936,11 +903,13 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, if (!err) err = ovl_indexdir_cleanup(ofs); } - if (err || !ofs->indexdir) + if (err || !indexdir) pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); out: mnt_drop_write(mnt); +out_free_fh: + kfree(fh); return err; } @@ -1035,6 +1004,25 @@ static int ovl_get_data_fsid(struct ovl_fs *ofs) return ofs->numfs; } +/* + * Set the ovl sb encoding as the same one used by the first layer + */ +static int ovl_set_encoding(struct super_block *sb, struct super_block *fs_sb) +{ + if (!sb_has_encoding(fs_sb)) + return 0; + +#if IS_ENABLED(CONFIG_UNICODE) + if (sb_has_strict_encoding(fs_sb)) { + pr_err("strict encoding not supported\n"); + return -EINVAL; + } + + sb->s_encoding = fs_sb->s_encoding; + sb->s_encoding_flags = fs_sb->s_encoding_flags; +#endif + return 0; +} static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, struct ovl_fs_context *ctx, struct ovl_layer *layers) @@ -1068,6 +1056,12 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, if (ovl_upper_mnt(ofs)) { ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb; ofs->fs[0].is_lower = false; + + if (ofs->casefold) { + err = ovl_set_encoding(sb, ofs->fs[0].sb); + if (err) + return err; + } } nr_merged_lower = ctx->nr - ctx->nr_data; @@ -1122,10 +1116,24 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, layers[ofs->numlayer].idx = ofs->numlayer; layers[ofs->numlayer].fsid = fsid; layers[ofs->numlayer].fs = &ofs->fs[fsid]; - layers[ofs->numlayer].name = l->name; + /* Store for printing lowerdir=... in ovl_show_options() */ + ofs->config.lowerdirs[ofs->numlayer] = l->name; l->name = NULL; ofs->numlayer++; ofs->fs[fsid].is_lower = true; + + if (ofs->casefold) { + if (!ovl_upper_mnt(ofs) && !sb_has_encoding(sb)) { + err = ovl_set_encoding(sb, ofs->fs[fsid].sb); + if (err) + return err; + } + + if (!sb_same_encoding(sb, mnt->mnt_sb)) { + pr_err("all layers must have the same encoding\n"); + return -EINVAL; + } + } } /* @@ -1181,6 +1189,11 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, return ERR_PTR(-EINVAL); } + if (ctx->nr == ctx->nr_data) { + pr_err("at least one non-data lowerdir is required\n"); + return ERR_PTR(-EINVAL); + } + err = -EINVAL; for (i = 0; i < ctx->nr; i++) { l = &ctx->lower[i]; @@ -1297,6 +1310,7 @@ static struct dentry *ovl_get_root(struct super_block *sb, struct ovl_entry *oe) { struct dentry *root; + struct ovl_fs *ofs = OVL_FS(sb); struct ovl_path *lowerpath = ovl_lowerstack(oe); unsigned long ino = d_inode(lowerpath->dentry)->i_ino; int fsid = lowerpath->layer->fsid; @@ -1318,11 +1332,26 @@ static struct dentry *ovl_get_root(struct super_block *sb, ovl_set_flag(OVL_IMPURE, d_inode(root)); } + /* Look for xwhiteouts marker except in the lowermost layer */ + for (int i = 0; i < ovl_numlower(oe) - 1; i++, lowerpath++) { + struct path path = { + .mnt = lowerpath->layer->mnt, + .dentry = lowerpath->dentry, + }; + + /* overlay.opaque=x means xwhiteouts directory */ + if (ovl_get_opaquedir_val(ofs, &path) == 'x') { + ovl_layer_set_xwhiteouts(ofs, lowerpath->layer); + ovl_dentry_set_xwhiteouts(root); + } + } + /* Root is always merge -> can have whiteouts */ ovl_set_flag(OVL_WHITEOUTS, d_inode(root)); ovl_dentry_set_flag(OVL_E_CONNECTED, root); ovl_set_upperdata(d_inode(root)); ovl_inode_init(d_inode(root), &oip, ino, fsid); + WARN_ON(!!IS_CASEFOLDED(d_inode(root)) != ofs->casefold); ovl_dentry_init_flags(root, upperdentry, oe, DCACHE_OP_WEAK_REVALIDATE); /* root keeps a reference of upperdentry */ dget(upperdentry); @@ -1330,45 +1359,57 @@ static struct dentry *ovl_get_root(struct super_block *sb, return root; } -int ovl_fill_super(struct super_block *sb, struct fs_context *fc) +static void ovl_set_d_op(struct super_block *sb) { +#if IS_ENABLED(CONFIG_UNICODE) struct ovl_fs *ofs = sb->s_fs_info; + + if (ofs->casefold) { + set_default_d_op(sb, &ovl_dentry_ci_operations); + return; + } +#endif + set_default_d_op(sb, &ovl_dentry_operations); +} + +static int ovl_fill_super_creds(struct fs_context *fc, struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + struct cred *creator_cred = (struct cred *)ofs->creator_cred; struct ovl_fs_context *ctx = fc->fs_private; - struct dentry *root_dentry; - struct ovl_entry *oe; struct ovl_layer *layers; - struct cred *cred; + struct ovl_entry *oe = NULL; int err; - err = -EIO; - if (WARN_ON(fc->user_ns != current_user_ns())) - goto out_err; - - sb->s_d_op = &ovl_dentry_operations; - - err = -ENOMEM; - ofs->creator_cred = cred = prepare_creds(); - if (!cred) - goto out_err; - err = ovl_fs_params_verify(ctx, &ofs->config); if (err) - goto out_err; + return err; err = -EINVAL; if (ctx->nr == 0) { if (!(fc->sb_flags & SB_SILENT)) pr_err("missing 'lowerdir'\n"); - goto out_err; + return err; } err = -ENOMEM; layers = kcalloc(ctx->nr + 1, sizeof(struct ovl_layer), GFP_KERNEL); if (!layers) - goto out_err; + return err; + ofs->config.lowerdirs = kcalloc(ctx->nr + 1, sizeof(char *), GFP_KERNEL); + if (!ofs->config.lowerdirs) { + kfree(layers); + return err; + } ofs->layers = layers; - /* Layer 0 is reserved for upper even if there's no upper */ + /* + * Layer 0 is reserved for upper even if there's no upper. + * config.lowerdirs[0] is used for storing the user provided colon + * separated lowerdir string. + */ + ofs->config.lowerdirs[0] = ctx->lowerdir_all; + ctx->lowerdir_all = NULL; ofs->numlayer = 1; sb->s_stack_depth = 0; @@ -1392,12 +1433,12 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) err = -EINVAL; if (!ofs->config.workdir) { pr_err("missing 'workdir'\n"); - goto out_err; + return err; } err = ovl_get_upper(sb, ofs, &layers[0], &ctx->upper); if (err) - goto out_err; + return err; upper_sb = ovl_upper_mnt(ofs)->mnt_sb; if (!ovl_should_sync(ofs)) { @@ -1405,13 +1446,13 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) { err = -EIO; pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n"); - goto out_err; + return err; } } err = ovl_get_workdir(sb, ofs, &ctx->upper, &ctx->work); if (err) - goto out_err; + return err; if (!ofs->workdir) sb->s_flags |= SB_RDONLY; @@ -1422,7 +1463,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) oe = ovl_get_lowerstack(sb, ctx, ofs, layers); err = PTR_ERR(oe); if (IS_ERR(oe)) - goto out_err; + return err; /* If the upper fs is nonexistent, we mark overlayfs r/o too */ if (!ovl_upper_mnt(ofs)) @@ -1442,7 +1483,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) goto out_free_oe; /* Force r/o mount with no index dir */ - if (!ofs->indexdir) + if (!ofs->workdir) sb->s_flags |= SB_RDONLY; } @@ -1451,7 +1492,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) goto out_free_oe; /* Show index=off in /proc/mounts for forced r/o mount */ - if (!ofs->indexdir) { + if (!ofs->workdir) { ofs->config.index = false; if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) { pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n"); @@ -1475,29 +1516,62 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_export_op = &ovl_export_fid_operations; /* Never override disk quota limits or use reserved space */ - cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); + cap_lower(creator_cred->cap_effective, CAP_SYS_RESOURCE); sb->s_magic = OVERLAYFS_SUPER_MAGIC; - sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers : - ovl_trusted_xattr_handlers; + sb->s_xattr = ovl_xattr_handlers(ofs); sb->s_fs_info = ofs; +#ifdef CONFIG_FS_POSIX_ACL sb->s_flags |= SB_POSIXACL; - sb->s_iflags |= SB_I_SKIP_SYNC | SB_I_IMA_UNVERIFIABLE_SIGNATURE; +#endif + sb->s_iflags |= SB_I_SKIP_SYNC; + /* + * Ensure that umask handling is done by the filesystems used + * for the the upper layer instead of overlayfs as that would + * lead to unexpected results. + */ + sb->s_iflags |= SB_I_NOUMASK; + sb->s_iflags |= SB_I_EVM_HMAC_UNSUPPORTED; err = -ENOMEM; - root_dentry = ovl_get_root(sb, ctx->upper.dentry, oe); - if (!root_dentry) + sb->s_root = ovl_get_root(sb, ctx->upper.dentry, oe); + if (!sb->s_root) goto out_free_oe; - sb->s_root = root_dentry; - return 0; out_free_oe: ovl_free_entry(oe); + return err; +} + +int ovl_fill_super(struct super_block *sb, struct fs_context *fc) +{ + struct ovl_fs *ofs = sb->s_fs_info; + int err; + + err = -EIO; + if (WARN_ON(fc->user_ns != current_user_ns())) + goto out_err; + + ovl_set_d_op(sb); + + if (!ofs->creator_cred) { + err = -ENOMEM; + ofs->creator_cred = prepare_creds(); + if (!ofs->creator_cred) + goto out_err; + } + + with_ovl_creds(sb) + err = ovl_fill_super_creds(fc, sb); + out_err: - ovl_free_fs(ofs); - sb->s_fs_info = NULL; + if (err) { + ovl_free_fs(ofs); + sb->s_fs_info = NULL; + } + return err; } @@ -1525,19 +1599,15 @@ static int __init ovl_init(void) ovl_inode_cachep = kmem_cache_create("ovl_inode", sizeof(struct ovl_inode), 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), + SLAB_ACCOUNT), ovl_inode_init_once); if (ovl_inode_cachep == NULL) return -ENOMEM; - err = ovl_aio_request_cache_init(); - if (!err) { - err = register_filesystem(&ovl_fs_type); - if (!err) - return 0; + err = register_filesystem(&ovl_fs_type); + if (!err) + return 0; - ovl_aio_request_cache_destroy(); - } kmem_cache_destroy(ovl_inode_cachep); return err; @@ -1553,7 +1623,6 @@ static void __exit ovl_exit(void) */ rcu_barrier(); kmem_cache_destroy(ovl_inode_cachep); - ovl_aio_request_cache_destroy(); } module_init(ovl_init); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 89e0d60d35b6..94986d11a166 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -15,14 +15,41 @@ #include <linux/uuid.h> #include <linux/namei.h> #include <linux/ratelimit.h> +#include <linux/overflow.h> #include "overlayfs.h" +/* Get write access to upper mnt - may fail if upper sb was remounted ro */ +int ovl_get_write_access(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + return mnt_get_write_access(ovl_upper_mnt(ofs)); +} + +/* Get write access to upper sb - may block if upper sb is frozen */ +void ovl_start_write(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + sb_start_write(ovl_upper_mnt(ofs)->mnt_sb); +} + int ovl_want_write(struct dentry *dentry) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); return mnt_want_write(ovl_upper_mnt(ofs)); } +void ovl_put_write_access(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + mnt_put_write_access(ovl_upper_mnt(ofs)); +} + +void ovl_end_write(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + sb_end_write(ovl_upper_mnt(ofs)->mnt_sb); +} + void ovl_drop_write(struct dentry *dentry) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); @@ -55,7 +82,7 @@ int ovl_can_decode_fh(struct super_block *sb) if (!capable(CAP_DAC_READ_SEARCH)) return 0; - if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry) + if (!exportfs_can_decode_fh(sb->s_export_op)) return 0; return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN; @@ -65,7 +92,7 @@ struct dentry *ovl_indexdir(struct super_block *sb) { struct ovl_fs *ofs = OVL_FS(sb); - return ofs->indexdir; + return ofs->config.index ? ofs->workdir : NULL; } /* Index all files on copy up. For now only enabled for NFS export */ @@ -114,9 +141,9 @@ void ovl_stack_free(struct ovl_path *stack, unsigned int n) struct ovl_entry *ovl_alloc_entry(unsigned int numlower) { - size_t size = offsetof(struct ovl_entry, __lowerstack[numlower]); - struct ovl_entry *oe = kzalloc(size, GFP_KERNEL); + struct ovl_entry *oe; + oe = kzalloc(struct_size(oe, __lowerstack, numlower), GFP_KERNEL); if (oe) oe->__numlower = numlower; @@ -171,10 +198,20 @@ void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry, bool ovl_dentry_weird(struct dentry *dentry) { - return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | - DCACHE_MANAGE_TRANSIT | - DCACHE_OP_HASH | - DCACHE_OP_COMPARE); + if (!d_can_lookup(dentry) && !d_is_file(dentry) && !d_is_symlink(dentry)) + return true; + + if (dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | DCACHE_MANAGE_TRANSIT)) + return true; + + /* + * Exceptionally for layers with casefold, we accept that they have + * their own hash and compare operations + */ + if (sb_has_encoding(dentry->d_sb)) + return false; + + return dentry->d_flags & (DCACHE_OP_HASH | DCACHE_OP_COMPARE); } enum ovl_path_type ovl_path_type(struct dentry *dentry) @@ -271,7 +308,9 @@ enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path) struct dentry *ovl_dentry_upper(struct dentry *dentry) { - return ovl_upperdentry_dereference(OVL_I(d_inode(dentry))); + struct inode *inode = d_inode(dentry); + + return inode ? ovl_upperdentry_dereference(OVL_I(inode)) : NULL; } struct dentry *ovl_dentry_lower(struct dentry *dentry) @@ -435,6 +474,33 @@ void ovl_dentry_set_opaque(struct dentry *dentry) ovl_dentry_set_flag(OVL_E_OPAQUE, dentry); } +bool ovl_dentry_has_xwhiteouts(struct dentry *dentry) +{ + return ovl_dentry_test_flag(OVL_E_XWHITEOUTS, dentry); +} + +void ovl_dentry_set_xwhiteouts(struct dentry *dentry) +{ + ovl_dentry_set_flag(OVL_E_XWHITEOUTS, dentry); +} + +/* + * ovl_layer_set_xwhiteouts() is called before adding the overlay dir + * dentry to dcache, while readdir of that same directory happens after + * the overlay dir dentry is in dcache, so if some cpu observes that + * ovl_dentry_is_xwhiteouts(), it will also observe layer->has_xwhiteouts + * for the layers where xwhiteouts marker was found in that merge dir. + */ +void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, + const struct ovl_layer *layer) +{ + if (layer->has_xwhiteouts) + return; + + /* Write once to read-mostly layer properties */ + ofs->layers[layer->idx].has_xwhiteouts = true; +} + /* * For hard links and decoded file handles, it's possible for ovl_dentry_upper() * to return positive, while there's no actual upper alias for the inode. @@ -575,6 +641,16 @@ bool ovl_is_whiteout(struct dentry *dentry) return inode && IS_WHITEOUT(inode); } +/* + * Use this over ovl_is_whiteout for upper and lower files, as it also + * handles overlay.whiteout xattr whiteout files. + */ +bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path) +{ + return ovl_is_whiteout(path->dentry) || + ovl_path_check_xwhiteout_xattr(ofs, path); +} + struct file *ovl_path_open(const struct path *path, int flags) { struct inode *inode = d_inode(path->dentry); @@ -644,22 +720,36 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags) return false; } +/* + * The copy up "transaction" keeps an elevated mnt write count on upper mnt, + * but leaves taking freeze protection on upper sb to lower level helpers. + */ int ovl_copy_up_start(struct dentry *dentry, int flags) { struct inode *inode = d_inode(dentry); int err; err = ovl_inode_lock_interruptible(inode); - if (!err && ovl_already_copied_up_locked(dentry, flags)) { + if (err) + return err; + + if (ovl_already_copied_up_locked(dentry, flags)) err = 1; /* Already copied up */ - ovl_inode_unlock(inode); - } + else + err = ovl_get_write_access(dentry); + if (err) + goto out_unlock; + + return 0; +out_unlock: + ovl_inode_unlock(inode); return err; } void ovl_copy_up_end(struct dentry *dentry) { + ovl_put_write_access(dentry); ovl_inode_unlock(d_inode(dentry)); } @@ -676,6 +766,19 @@ bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path) return false; } +bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path) +{ + struct dentry *dentry = path->dentry; + int res; + + /* xattr.whiteout must be a zero size regular file */ + if (!d_is_reg(dentry) || i_size_read(d_inode(dentry)) != 0) + return false; + + res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUT, NULL, 0); + return res >= 0; +} + /* * Load persistent uuid from xattr into s_uuid if found, or store a new * random generated value in s_uuid and in xattr. @@ -684,13 +787,14 @@ bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs, const struct path *upperpath) { bool set = false; + uuid_t uuid; int res; /* Try to load existing persistent uuid */ - res = ovl_path_getxattr(ofs, upperpath, OVL_XATTR_UUID, sb->s_uuid.b, + res = ovl_path_getxattr(ofs, upperpath, OVL_XATTR_UUID, uuid.b, UUID_SIZE); if (res == UUID_SIZE) - return true; + goto set_uuid; if (res != -ENODATA) goto fail; @@ -718,37 +822,37 @@ bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs, } /* Generate overlay instance uuid */ - uuid_gen(&sb->s_uuid); + uuid_gen(&uuid); /* Try to store persistent uuid */ set = true; - res = ovl_setxattr(ofs, upperpath->dentry, OVL_XATTR_UUID, sb->s_uuid.b, + res = ovl_setxattr(ofs, upperpath->dentry, OVL_XATTR_UUID, uuid.b, UUID_SIZE); - if (res == 0) - return true; + if (res) + goto fail; + +set_uuid: + super_set_uuid(sb, uuid.b, sizeof(uuid)); + return true; fail: - memset(sb->s_uuid.b, 0, UUID_SIZE); ofs->config.uuid = OVL_UUID_NULL; pr_warn("failed to %s uuid (%pd2, err=%i); falling back to uuid=null.\n", set ? "set" : "get", upperpath->dentry, res); return false; } -bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, - enum ovl_xattr ox) +char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, + enum ovl_xattr ox) { int res; char val; if (!d_is_dir(path->dentry)) - return false; + return 0; res = ovl_path_getxattr(ofs, path, ox, &val, 1); - if (res == 1 && val == 'y') - return true; - - return false; + return res == 1 ? val : 0; } #define OVL_XATTR_OPAQUE_POSTFIX "opaque" @@ -760,6 +864,7 @@ bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, #define OVL_XATTR_UUID_POSTFIX "uuid" #define OVL_XATTR_METACOPY_POSTFIX "metacopy" #define OVL_XATTR_PROTATTR_POSTFIX "protattr" +#define OVL_XATTR_XWHITEOUT_POSTFIX "whiteout" #define OVL_XATTR_TAB_ENTRY(x) \ [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \ @@ -775,6 +880,7 @@ const char *const ovl_xattr_table[][2] = { OVL_XATTR_TAB_ENTRY(OVL_XATTR_UUID), OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT), }; int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, @@ -855,7 +961,7 @@ void ovl_check_protattr(struct inode *inode, struct dentry *upper) } int ovl_set_protattr(struct inode *inode, struct dentry *upper, - struct fileattr *fa) + struct file_kattr *fa) { struct ovl_fs *ofs = OVL_FS(inode->i_sb); char buf[OVL_PROTATTR_MAX]; @@ -898,7 +1004,7 @@ int ovl_set_protattr(struct inode *inode, struct dentry *upper, return 0; } -/** +/* * Caller must hold a reference to inode to prevent it from being freed while * it is marked inuse. */ @@ -908,8 +1014,8 @@ bool ovl_inuse_trylock(struct dentry *dentry) bool locked = false; spin_lock(&inode->i_lock); - if (!(inode->i_state & I_OVL_INUSE)) { - inode->i_state |= I_OVL_INUSE; + if (!(inode_state_read(inode) & I_OVL_INUSE)) { + inode_state_set(inode, I_OVL_INUSE); locked = true; } spin_unlock(&inode->i_lock); @@ -923,8 +1029,8 @@ void ovl_inuse_unlock(struct dentry *dentry) struct inode *inode = d_inode(dentry); spin_lock(&inode->i_lock); - WARN_ON(!(inode->i_state & I_OVL_INUSE)); - inode->i_state &= ~I_OVL_INUSE; + WARN_ON(!(inode_state_read(inode) & I_OVL_INUSE)); + inode_state_clear(inode, I_OVL_INUSE); spin_unlock(&inode->i_lock); } } @@ -935,7 +1041,7 @@ bool ovl_is_inuse(struct dentry *dentry) bool inuse; spin_lock(&inode->i_lock); - inuse = (inode->i_state & I_OVL_INUSE); + inuse = (inode_state_read(inode) & I_OVL_INUSE); spin_unlock(&inode->i_lock); return inuse; @@ -967,18 +1073,23 @@ static void ovl_cleanup_index(struct dentry *dentry) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *indexdir = ovl_indexdir(dentry->d_sb); - struct inode *dir = indexdir->d_inode; struct dentry *lowerdentry = ovl_dentry_lower(dentry); struct dentry *upperdentry = ovl_dentry_upper(dentry); struct dentry *index = NULL; struct inode *inode; struct qstr name = { }; + bool got_write = false; int err; err = ovl_get_index_name(ofs, lowerdentry, &name); if (err) goto fail; + err = ovl_want_write(dentry); + if (err) + goto fail; + + got_write = true; inode = d_inode(upperdentry); if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) { pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", @@ -997,25 +1108,24 @@ static void ovl_cleanup_index(struct dentry *dentry) goto out; } - inode_lock_nested(dir, I_MUTEX_PARENT); - index = ovl_lookup_upper(ofs, name.name, indexdir, name.len); + index = ovl_lookup_upper_unlocked(ofs, name.name, indexdir, name.len); err = PTR_ERR(index); if (IS_ERR(index)) { index = NULL; } else if (ovl_index_all(dentry->d_sb)) { /* Whiteout orphan index to block future open by handle */ err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb), - dir, index); + indexdir, index); } else { /* Cleanup orphan index entries */ - err = ovl_cleanup(ofs, dir, index); + err = ovl_cleanup(ofs, indexdir, index); } - - inode_unlock(dir); if (err) goto fail; out: + if (got_write) + ovl_drop_write(dentry); kfree(name.name); dput(index); return; @@ -1032,7 +1142,6 @@ fail: int ovl_nlink_start(struct dentry *dentry) { struct inode *inode = d_inode(dentry); - const struct cred *old_cred; int err; if (WARN_ON(!inode)) @@ -1062,22 +1171,30 @@ int ovl_nlink_start(struct dentry *dentry) if (err) return err; + err = ovl_want_write(dentry); + if (err) + goto out_unlock; + if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, inode)) - goto out; + return 0; - old_cred = ovl_override_creds(dentry->d_sb); /* * The overlay inode nlink should be incremented/decremented IFF the * upper operation succeeds, along with nlink change of upper inode. * Therefore, before link/unlink/rename, we store the union nlink * value relative to the upper inode nlink in an upper inode xattr. */ - err = ovl_set_nlink_upper(dentry); - revert_creds(old_cred); - -out: + with_ovl_creds(dentry->d_sb) + err = ovl_set_nlink_upper(dentry); if (err) - ovl_inode_unlock(inode); + goto out_drop_write; + + return 0; + +out_drop_write: + ovl_drop_write(dentry); +out_unlock: + ovl_inode_unlock(inode); return err; } @@ -1086,25 +1203,30 @@ void ovl_nlink_end(struct dentry *dentry) { struct inode *inode = d_inode(dentry); - if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) { - const struct cred *old_cred; + ovl_drop_write(dentry); - old_cred = ovl_override_creds(dentry->d_sb); - ovl_cleanup_index(dentry); - revert_creds(old_cred); + if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) { + with_ovl_creds(dentry->d_sb) + ovl_cleanup_index(dentry); } ovl_inode_unlock(inode); } -int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir) +int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *work, + struct dentry *upperdir, struct dentry *upper) { - /* Workdir should not be the same as upperdir */ - if (workdir == upperdir) - goto err; + struct dentry *trap; /* Workdir should not be subdir of upperdir and vice versa */ - if (lock_rename(workdir, upperdir) != NULL) + trap = lock_rename(workdir, upperdir); + if (IS_ERR(trap)) + goto err; + if (trap) + goto err_unlock; + if (work && (work->d_parent != workdir || d_unhashed(work))) + goto err_unlock; + if (upper && (upper->d_parent != upperdir || d_unhashed(upper))) goto err_unlock; return 0; @@ -1249,7 +1371,7 @@ err_free: } /* Call with mounter creds as it may open the file */ -int ovl_ensure_verity_loaded(struct path *datapath) +int ovl_ensure_verity_loaded(const struct path *datapath) { struct inode *inode = d_inode(datapath->dentry); struct file *filp; @@ -1259,7 +1381,7 @@ int ovl_ensure_verity_loaded(struct path *datapath) * If this inode was not yet opened, the verity info hasn't been * loaded yet, so we need to do that here to force it into memory. */ - filp = kernel_file_open(datapath, O_RDONLY, inode, current_cred()); + filp = kernel_file_open(datapath, O_RDONLY, current_cred()); if (IS_ERR(filp)) return PTR_ERR(filp); fput(filp); @@ -1269,8 +1391,8 @@ int ovl_ensure_verity_loaded(struct path *datapath) } int ovl_validate_verity(struct ovl_fs *ofs, - struct path *metapath, - struct path *datapath) + const struct path *metapath, + const struct path *datapath) { struct ovl_metacopy metacopy_data; u8 actual_digest[FS_VERITY_MAX_DIGEST_SIZE]; @@ -1323,7 +1445,7 @@ int ovl_validate_verity(struct ovl_fs *ofs, return 0; } -int ovl_get_verity_digest(struct ovl_fs *ofs, struct path *src, +int ovl_get_verity_digest(struct ovl_fs *ofs, const struct path *src, struct ovl_metacopy *metacopy) { int err, digest_size; @@ -1403,14 +1525,16 @@ void ovl_copyattr(struct inode *inode) realinode = ovl_i_path_real(inode, &realpath); real_idmap = mnt_idmap(realpath.mnt); + spin_lock(&inode->i_lock); vfsuid = i_uid_into_vfsuid(real_idmap, realinode); vfsgid = i_gid_into_vfsgid(real_idmap, realinode); inode->i_uid = vfsuid_into_kuid(vfsuid); inode->i_gid = vfsgid_into_kgid(vfsgid); inode->i_mode = realinode->i_mode; - inode->i_atime = realinode->i_atime; - inode->i_mtime = realinode->i_mtime; + inode_set_atime_to_ts(inode, inode_get_atime(realinode)); + inode_set_mtime_to_ts(inode, inode_get_mtime(realinode)); inode_set_ctime_to_ts(inode, inode_get_ctime(realinode)); i_size_write(inode, i_size_read(realinode)); + spin_unlock(&inode->i_lock); } diff --git a/fs/overlayfs/xattrs.c b/fs/overlayfs/xattrs.c new file mode 100644 index 000000000000..aa95855c7023 --- /dev/null +++ b/fs/overlayfs/xattrs.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/fs.h> +#include <linux/xattr.h> +#include "overlayfs.h" + +static bool ovl_is_escaped_xattr(struct super_block *sb, const char *name) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + if (ofs->config.userxattr) + return strncmp(name, OVL_XATTR_ESCAPE_USER_PREFIX, + OVL_XATTR_ESCAPE_USER_PREFIX_LEN) == 0; + else + return strncmp(name, OVL_XATTR_ESCAPE_TRUSTED_PREFIX, + OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN - 1) == 0; +} + +static bool ovl_is_own_xattr(struct super_block *sb, const char *name) +{ + struct ovl_fs *ofs = OVL_FS(sb); + + if (ofs->config.userxattr) + return strncmp(name, OVL_XATTR_USER_PREFIX, + OVL_XATTR_USER_PREFIX_LEN) == 0; + else + return strncmp(name, OVL_XATTR_TRUSTED_PREFIX, + OVL_XATTR_TRUSTED_PREFIX_LEN) == 0; +} + +bool ovl_is_private_xattr(struct super_block *sb, const char *name) +{ + return ovl_is_own_xattr(sb, name) && !ovl_is_escaped_xattr(sb, name); +} + +static int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + int err; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + struct dentry *upperdentry = ovl_i_dentry_upper(inode); + struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); + struct path realpath; + + if (!value && !upperdentry) { + ovl_path_lower(dentry, &realpath); + with_ovl_creds(dentry->d_sb) + err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0); + if (err < 0) + goto out; + } + + if (!upperdentry) { + err = ovl_copy_up(dentry); + if (err) + goto out; + + realdentry = ovl_dentry_upper(dentry); + } + + err = ovl_want_write(dentry); + if (err) + goto out; + + with_ovl_creds(dentry->d_sb) { + if (value) { + err = ovl_do_setxattr(ofs, realdentry, name, value, size, flags); + } else { + WARN_ON(flags != XATTR_REPLACE); + err = ovl_do_removexattr(ofs, realdentry, name); + } + } + ovl_drop_write(dentry); + + /* copy c/mtime */ + ovl_copyattr(inode); +out: + return err; +} + +static int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, + void *value, size_t size) +{ + struct path realpath; + + ovl_i_path_real(inode, &realpath); + with_ovl_creds(dentry->d_sb) + return vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size); +} + +static bool ovl_can_list(struct super_block *sb, const char *s) +{ + /* Never list private (.overlay) */ + if (ovl_is_private_xattr(sb, s)) + return false; + + /* List all non-trusted xattrs */ + if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) + return true; + + /* list other trusted for superuser only */ + return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); +} + +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) +{ + struct dentry *realdentry = ovl_dentry_real(dentry); + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + ssize_t res; + size_t len; + char *s; + size_t prefix_len, name_len; + + with_ovl_creds(dentry->d_sb) + res = vfs_listxattr(realdentry, list, size); + if (res <= 0 || size == 0) + return res; + + prefix_len = ofs->config.userxattr ? + OVL_XATTR_USER_PREFIX_LEN : OVL_XATTR_TRUSTED_PREFIX_LEN; + + /* filter out private xattrs */ + for (s = list, len = res; len;) { + size_t slen = strnlen(s, len) + 1; + + /* underlying fs providing us with an broken xattr list? */ + if (WARN_ON(slen > len)) + return -EIO; + + len -= slen; + if (!ovl_can_list(dentry->d_sb, s)) { + res -= slen; + memmove(s, s + slen, len); + } else if (ovl_is_escaped_xattr(dentry->d_sb, s)) { + res -= OVL_XATTR_ESCAPE_PREFIX_LEN; + name_len = slen - prefix_len - OVL_XATTR_ESCAPE_PREFIX_LEN; + s += prefix_len; + memmove(s, s + OVL_XATTR_ESCAPE_PREFIX_LEN, name_len + len); + s += name_len; + } else { + s += slen; + } + } + + return res; +} + +static char *ovl_xattr_escape_name(const char *prefix, const char *name) +{ + size_t prefix_len = strlen(prefix); + size_t name_len = strlen(name); + size_t escaped_len; + char *escaped, *s; + + escaped_len = prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN + name_len; + if (escaped_len > XATTR_NAME_MAX) + return ERR_PTR(-EOPNOTSUPP); + + escaped = kmalloc(escaped_len + 1, GFP_KERNEL); + if (escaped == NULL) + return ERR_PTR(-ENOMEM); + + s = escaped; + memcpy(s, prefix, prefix_len); + s += prefix_len; + memcpy(s, OVL_XATTR_ESCAPE_PREFIX, OVL_XATTR_ESCAPE_PREFIX_LEN); + s += OVL_XATTR_ESCAPE_PREFIX_LEN; + memcpy(s, name, name_len + 1); + + return escaped; +} + +static int ovl_own_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + char *escaped; + int r; + + escaped = ovl_xattr_escape_name(handler->prefix, name); + if (IS_ERR(escaped)) + return PTR_ERR(escaped); + + r = ovl_xattr_get(dentry, inode, escaped, buffer, size); + + kfree(escaped); + + return r; +} + +static int ovl_own_xattr_set(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + char *escaped; + int r; + + escaped = ovl_xattr_escape_name(handler->prefix, name); + if (IS_ERR(escaped)) + return PTR_ERR(escaped); + + r = ovl_xattr_set(dentry, inode, escaped, value, size, flags); + + kfree(escaped); + + return r; +} + +static int ovl_other_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return ovl_xattr_get(dentry, inode, name, buffer, size); +} + +static int ovl_other_xattr_set(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + return ovl_xattr_set(dentry, inode, name, value, size, flags); +} + +static const struct xattr_handler ovl_own_trusted_xattr_handler = { + .prefix = OVL_XATTR_TRUSTED_PREFIX, + .get = ovl_own_xattr_get, + .set = ovl_own_xattr_set, +}; + +static const struct xattr_handler ovl_own_user_xattr_handler = { + .prefix = OVL_XATTR_USER_PREFIX, + .get = ovl_own_xattr_get, + .set = ovl_own_xattr_set, +}; + +static const struct xattr_handler ovl_other_xattr_handler = { + .prefix = "", /* catch all */ + .get = ovl_other_xattr_get, + .set = ovl_other_xattr_set, +}; + +static const struct xattr_handler * const ovl_trusted_xattr_handlers[] = { + &ovl_own_trusted_xattr_handler, + &ovl_other_xattr_handler, + NULL +}; + +static const struct xattr_handler * const ovl_user_xattr_handlers[] = { + &ovl_own_user_xattr_handler, + &ovl_other_xattr_handler, + NULL +}; + +const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs) +{ + return ofs->config.userxattr ? ovl_user_xattr_handlers : + ovl_trusted_xattr_handlers; +} |
