diff options
Diffstat (limited to 'fs/overlayfs/util.c')
| -rw-r--r-- | fs/overlayfs/util.c | 656 |
1 files changed, 537 insertions, 119 deletions
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index bde291623c8c..94986d11a166 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -10,33 +10,61 @@ #include <linux/cred.h> #include <linux/xattr.h> #include <linux/exportfs.h> +#include <linux/file.h> #include <linux/fileattr.h> #include <linux/uuid.h> #include <linux/namei.h> #include <linux/ratelimit.h> +#include <linux/overflow.h> #include "overlayfs.h" +/* Get write access to upper mnt - may fail if upper sb was remounted ro */ +int ovl_get_write_access(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + return mnt_get_write_access(ovl_upper_mnt(ofs)); +} + +/* Get write access to upper sb - may block if upper sb is frozen */ +void ovl_start_write(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + sb_start_write(ovl_upper_mnt(ofs)->mnt_sb); +} + int ovl_want_write(struct dentry *dentry) { - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); return mnt_want_write(ovl_upper_mnt(ofs)); } +void ovl_put_write_access(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + mnt_put_write_access(ovl_upper_mnt(ofs)); +} + +void ovl_end_write(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + sb_end_write(ovl_upper_mnt(ofs)->mnt_sb); +} + void ovl_drop_write(struct dentry *dentry) { - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); mnt_drop_write(ovl_upper_mnt(ofs)); } struct dentry *ovl_workdir(struct dentry *dentry) { - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); return ofs->workdir; } const struct cred *ovl_override_creds(struct super_block *sb) { - struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_fs *ofs = OVL_FS(sb); return override_creds(ofs->creator_cred); } @@ -54,7 +82,7 @@ int ovl_can_decode_fh(struct super_block *sb) if (!capable(CAP_DAC_READ_SEARCH)) return 0; - if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry) + if (!exportfs_can_decode_fh(sb->s_export_op)) return 0; return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN; @@ -62,15 +90,15 @@ int ovl_can_decode_fh(struct super_block *sb) struct dentry *ovl_indexdir(struct super_block *sb) { - struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_fs *ofs = OVL_FS(sb); - return ofs->indexdir; + return ofs->config.index ? ofs->workdir : NULL; } /* Index all files on copy up. For now only enabled for NFS export */ bool ovl_index_all(struct super_block *sb) { - struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_fs *ofs = OVL_FS(sb); return ofs->config.nfs_export && ofs->config.index; } @@ -78,38 +106,89 @@ bool ovl_index_all(struct super_block *sb) /* Verify lower origin on lookup. For now only enabled for NFS export */ bool ovl_verify_lower(struct super_block *sb) { - struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_fs *ofs = OVL_FS(sb); return ofs->config.nfs_export && ofs->config.index; } +struct ovl_path *ovl_stack_alloc(unsigned int n) +{ + return kcalloc(n, sizeof(struct ovl_path), GFP_KERNEL); +} + +void ovl_stack_cpy(struct ovl_path *dst, struct ovl_path *src, unsigned int n) +{ + unsigned int i; + + memcpy(dst, src, sizeof(struct ovl_path) * n); + for (i = 0; i < n; i++) + dget(src[i].dentry); +} + +void ovl_stack_put(struct ovl_path *stack, unsigned int n) +{ + unsigned int i; + + for (i = 0; stack && i < n; i++) + dput(stack[i].dentry); +} + +void ovl_stack_free(struct ovl_path *stack, unsigned int n) +{ + ovl_stack_put(stack, n); + kfree(stack); +} + struct ovl_entry *ovl_alloc_entry(unsigned int numlower) { - size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); - struct ovl_entry *oe = kzalloc(size, GFP_KERNEL); + struct ovl_entry *oe; + oe = kzalloc(struct_size(oe, __lowerstack, numlower), GFP_KERNEL); if (oe) - oe->numlower = numlower; + oe->__numlower = numlower; return oe; } +void ovl_free_entry(struct ovl_entry *oe) +{ + ovl_stack_put(ovl_lowerstack(oe), ovl_numlower(oe)); + kfree(oe); +} + +#define OVL_D_REVALIDATE (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE) + bool ovl_dentry_remote(struct dentry *dentry) { - return dentry->d_flags & - (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); + return dentry->d_flags & OVL_D_REVALIDATE; } -void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry, - unsigned int mask) +void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry) { - struct ovl_entry *oe = OVL_E(dentry); + if (!ovl_dentry_remote(realdentry)) + return; + + spin_lock(&dentry->d_lock); + dentry->d_flags |= realdentry->d_flags & OVL_D_REVALIDATE; + spin_unlock(&dentry->d_lock); +} + +void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry, + struct ovl_entry *oe) +{ + return ovl_dentry_init_flags(dentry, upperdentry, oe, OVL_D_REVALIDATE); +} + +void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry, + struct ovl_entry *oe, unsigned int mask) +{ + struct ovl_path *lowerstack = ovl_lowerstack(oe); unsigned int i, flags = 0; if (upperdentry) flags |= upperdentry->d_flags; - for (i = 0; i < oe->numlower; i++) - flags |= oe->lowerstack[i].dentry->d_flags; + for (i = 0; i < ovl_numlower(oe) && lowerstack[i].dentry; i++) + flags |= lowerstack[i].dentry->d_flags; spin_lock(&dentry->d_lock); dentry->d_flags &= ~mask; @@ -119,15 +198,25 @@ void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry, bool ovl_dentry_weird(struct dentry *dentry) { - return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | - DCACHE_MANAGE_TRANSIT | - DCACHE_OP_HASH | - DCACHE_OP_COMPARE); + if (!d_can_lookup(dentry) && !d_is_file(dentry) && !d_is_symlink(dentry)) + return true; + + if (dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | DCACHE_MANAGE_TRANSIT)) + return true; + + /* + * Exceptionally for layers with casefold, we accept that they have + * their own hash and compare operations + */ + if (sb_has_encoding(dentry->d_sb)) + return false; + + return dentry->d_flags & (DCACHE_OP_HASH | DCACHE_OP_COMPARE); } enum ovl_path_type ovl_path_type(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; + struct ovl_entry *oe = OVL_E(dentry); enum ovl_path_type type = 0; if (ovl_dentry_upper(dentry)) { @@ -136,7 +225,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) /* * Non-dir dentry can hold lower dentry of its copy up origin. */ - if (oe->numlower) { + if (ovl_numlower(oe)) { if (ovl_test_flag(OVL_CONST_INO, d_inode(dentry))) type |= __OVL_PATH_ORIGIN; if (d_is_dir(dentry) || @@ -144,7 +233,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) type |= __OVL_PATH_MERGE; } } else { - if (oe->numlower > 1) + if (ovl_numlower(oe) > 1) type |= __OVL_PATH_MERGE; } return type; @@ -152,7 +241,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) void ovl_path_upper(struct dentry *dentry, struct path *path) { - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); path->mnt = ovl_upper_mnt(ofs); path->dentry = ovl_dentry_upper(dentry); @@ -160,11 +249,12 @@ void ovl_path_upper(struct dentry *dentry, struct path *path) void ovl_path_lower(struct dentry *dentry, struct path *path) { - struct ovl_entry *oe = dentry->d_fsdata; + struct ovl_entry *oe = OVL_E(dentry); + struct ovl_path *lowerpath = ovl_lowerstack(oe); - if (oe->numlower) { - path->mnt = oe->lowerstack[0].layer->mnt; - path->dentry = oe->lowerstack[0].dentry; + if (ovl_numlower(oe)) { + path->mnt = lowerpath->layer->mnt; + path->dentry = lowerpath->dentry; } else { *path = (struct path) { }; } @@ -172,11 +262,19 @@ void ovl_path_lower(struct dentry *dentry, struct path *path) void ovl_path_lowerdata(struct dentry *dentry, struct path *path) { - struct ovl_entry *oe = dentry->d_fsdata; + struct ovl_entry *oe = OVL_E(dentry); + struct ovl_path *lowerdata = ovl_lowerdata(oe); + struct dentry *lowerdata_dentry = ovl_lowerdata_dentry(oe); - if (oe->numlower) { - path->mnt = oe->lowerstack[oe->numlower - 1].layer->mnt; - path->dentry = oe->lowerstack[oe->numlower - 1].dentry; + if (lowerdata_dentry) { + path->dentry = lowerdata_dentry; + /* + * Pairs with smp_wmb() in ovl_dentry_set_lowerdata(). + * Make sure that if lowerdata->dentry is visible, then + * datapath->layer is visible as well. + */ + smp_rmb(); + path->mnt = READ_ONCE(lowerdata->layer)->mnt; } else { *path = (struct path) { }; } @@ -210,21 +308,23 @@ enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path) struct dentry *ovl_dentry_upper(struct dentry *dentry) { - return ovl_upperdentry_dereference(OVL_I(d_inode(dentry))); + struct inode *inode = d_inode(dentry); + + return inode ? ovl_upperdentry_dereference(OVL_I(inode)) : NULL; } struct dentry *ovl_dentry_lower(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; + struct ovl_entry *oe = OVL_E(dentry); - return oe->numlower ? oe->lowerstack[0].dentry : NULL; + return ovl_numlower(oe) ? ovl_lowerstack(oe)->dentry : NULL; } const struct ovl_layer *ovl_layer_lower(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; + struct ovl_entry *oe = OVL_E(dentry); - return oe->numlower ? oe->lowerstack[0].layer : NULL; + return ovl_numlower(oe) ? ovl_lowerstack(oe)->layer : NULL; } /* @@ -235,9 +335,30 @@ const struct ovl_layer *ovl_layer_lower(struct dentry *dentry) */ struct dentry *ovl_dentry_lowerdata(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; + return ovl_lowerdata_dentry(OVL_E(dentry)); +} + +int ovl_dentry_set_lowerdata(struct dentry *dentry, struct ovl_path *datapath) +{ + struct ovl_entry *oe = OVL_E(dentry); + struct ovl_path *lowerdata = ovl_lowerdata(oe); + struct dentry *datadentry = datapath->dentry; - return oe->numlower ? oe->lowerstack[oe->numlower - 1].dentry : NULL; + if (WARN_ON_ONCE(ovl_numlower(oe) <= 1)) + return -EIO; + + WRITE_ONCE(lowerdata->layer, datapath->layer); + /* + * Pairs with smp_rmb() in ovl_path_lowerdata(). + * Make sure that if lowerdata->dentry is visible, then + * lowerdata->layer is visible as well. + */ + smp_wmb(); + WRITE_ONCE(lowerdata->dentry, dget(datadentry)); + + ovl_dentry_update_reval(dentry, datadentry); + + return 0; } struct dentry *ovl_dentry_real(struct dentry *dentry) @@ -250,15 +371,19 @@ struct dentry *ovl_i_dentry_upper(struct inode *inode) return ovl_upperdentry_dereference(OVL_I(inode)); } -void ovl_i_path_real(struct inode *inode, struct path *path) +struct inode *ovl_i_path_real(struct inode *inode, struct path *path) { + struct ovl_path *lowerpath = ovl_lowerpath(OVL_I_E(inode)); + path->dentry = ovl_i_dentry_upper(inode); if (!path->dentry) { - path->dentry = OVL_I(inode)->lowerpath.dentry; - path->mnt = OVL_I(inode)->lowerpath.layer->mnt; + path->dentry = lowerpath->dentry; + path->mnt = lowerpath->layer->mnt; } else { path->mnt = ovl_upper_mnt(OVL_FS(inode->i_sb)); } + + return path->dentry ? d_inode_rcu(path->dentry) : NULL; } struct inode *ovl_inode_upper(struct inode *inode) @@ -270,9 +395,9 @@ struct inode *ovl_inode_upper(struct inode *inode) struct inode *ovl_inode_lower(struct inode *inode) { - struct dentry *lowerdentry = OVL_I(inode)->lowerpath.dentry; + struct ovl_path *lowerpath = ovl_lowerpath(OVL_I_E(inode)); - return lowerdentry ? d_inode(lowerdentry) : NULL; + return lowerpath ? d_inode(lowerpath->dentry) : NULL; } struct inode *ovl_inode_real(struct inode *inode) @@ -283,10 +408,12 @@ struct inode *ovl_inode_real(struct inode *inode) /* Return inode which contains lower data. Do not return metacopy */ struct inode *ovl_inode_lowerdata(struct inode *inode) { + struct dentry *lowerdata = ovl_lowerdata_dentry(OVL_I_E(inode)); + if (WARN_ON(!S_ISREG(inode->i_mode))) return NULL; - return OVL_I(inode)->lowerdata ?: ovl_inode_lower(inode); + return lowerdata ? d_inode(lowerdata) : NULL; } /* Return real inode which contains data. Does not return metacopy inode */ @@ -301,9 +428,15 @@ struct inode *ovl_inode_realdata(struct inode *inode) return ovl_inode_lowerdata(inode); } +const char *ovl_lowerdata_redirect(struct inode *inode) +{ + return inode && S_ISREG(inode->i_mode) ? + OVL_I(inode)->lowerdata_redirect : NULL; +} + struct ovl_dir_cache *ovl_dir_cache(struct inode *inode) { - return OVL_I(inode)->cache; + return inode && S_ISDIR(inode->i_mode) ? OVL_I(inode)->cache : NULL; } void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache) @@ -313,17 +446,17 @@ void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache) void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry) { - set_bit(flag, &OVL_E(dentry)->flags); + set_bit(flag, OVL_E_FLAGS(dentry)); } void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry) { - clear_bit(flag, &OVL_E(dentry)->flags); + clear_bit(flag, OVL_E_FLAGS(dentry)); } bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry) { - return test_bit(flag, &OVL_E(dentry)->flags); + return test_bit(flag, OVL_E_FLAGS(dentry)); } bool ovl_dentry_is_opaque(struct dentry *dentry) @@ -341,6 +474,33 @@ void ovl_dentry_set_opaque(struct dentry *dentry) ovl_dentry_set_flag(OVL_E_OPAQUE, dentry); } +bool ovl_dentry_has_xwhiteouts(struct dentry *dentry) +{ + return ovl_dentry_test_flag(OVL_E_XWHITEOUTS, dentry); +} + +void ovl_dentry_set_xwhiteouts(struct dentry *dentry) +{ + ovl_dentry_set_flag(OVL_E_XWHITEOUTS, dentry); +} + +/* + * ovl_layer_set_xwhiteouts() is called before adding the overlay dir + * dentry to dcache, while readdir of that same directory happens after + * the overlay dir dentry is in dcache, so if some cpu observes that + * ovl_dentry_is_xwhiteouts(), it will also observe layer->has_xwhiteouts + * for the layers where xwhiteouts marker was found in that merge dir. + */ +void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, + const struct ovl_layer *layer) +{ + if (layer->has_xwhiteouts) + return; + + /* Write once to read-mostly layer properties */ + ofs->layers[layer->idx].has_xwhiteouts = true; +} + /* * For hard links and decoded file handles, it's possible for ovl_dentry_upper() * to return positive, while there's no actual upper alias for the inode. @@ -413,13 +573,6 @@ bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags) return !ovl_has_upperdata(d_inode(dentry)); } -bool ovl_redirect_dir(struct super_block *sb) -{ - struct ovl_fs *ofs = sb->s_fs_info; - - return ofs->config.redirect_dir && !ofs->noxattr; -} - const char *ovl_dentry_get_redirect(struct dentry *dentry) { return OVL_I(d_inode(dentry))->redirect; @@ -488,10 +641,20 @@ bool ovl_is_whiteout(struct dentry *dentry) return inode && IS_WHITEOUT(inode); } +/* + * Use this over ovl_is_whiteout for upper and lower files, as it also + * handles overlay.whiteout xattr whiteout files. + */ +bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path) +{ + return ovl_is_whiteout(path->dentry) || + ovl_path_check_xwhiteout_xattr(ofs, path); +} + struct file *ovl_path_open(const struct path *path, int flags) { struct inode *inode = d_inode(path->dentry); - struct user_namespace *real_mnt_userns = mnt_user_ns(path->mnt); + struct mnt_idmap *real_idmap = mnt_idmap(path->mnt); int err, acc_mode; if (flags & ~(O_ACCMODE | O_LARGEFILE)) @@ -508,12 +671,12 @@ struct file *ovl_path_open(const struct path *path, int flags) BUG(); } - err = inode_permission(real_mnt_userns, inode, acc_mode | MAY_OPEN); + err = inode_permission(real_idmap, inode, acc_mode | MAY_OPEN); if (err) return ERR_PTR(err); /* O_NOATIME is an optimization, don't fail if not permitted */ - if (inode_owner_or_capable(real_mnt_userns, inode)) + if (inode_owner_or_capable(real_idmap, inode)) flags |= O_NOATIME; return dentry_open(path, flags, current_cred()); @@ -557,22 +720,36 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags) return false; } +/* + * The copy up "transaction" keeps an elevated mnt write count on upper mnt, + * but leaves taking freeze protection on upper sb to lower level helpers. + */ int ovl_copy_up_start(struct dentry *dentry, int flags) { struct inode *inode = d_inode(dentry); int err; err = ovl_inode_lock_interruptible(inode); - if (!err && ovl_already_copied_up_locked(dentry, flags)) { + if (err) + return err; + + if (ovl_already_copied_up_locked(dentry, flags)) err = 1; /* Already copied up */ - ovl_inode_unlock(inode); - } + else + err = ovl_get_write_access(dentry); + if (err) + goto out_unlock; + + return 0; +out_unlock: + ovl_inode_unlock(inode); return err; } void ovl_copy_up_end(struct dentry *dentry) { + ovl_put_write_access(dentry); ovl_inode_unlock(d_inode(dentry)); } @@ -589,30 +766,105 @@ bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path) return false; } -bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, - enum ovl_xattr ox) +bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path) { + struct dentry *dentry = path->dentry; int res; - char val; - if (!d_is_dir(path->dentry)) + /* xattr.whiteout must be a zero size regular file */ + if (!d_is_reg(dentry) || i_size_read(d_inode(dentry)) != 0) return false; - res = ovl_path_getxattr(ofs, path, ox, &val, 1); - if (res == 1 && val == 'y') - return true; + res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUT, NULL, 0); + return res >= 0; +} +/* + * Load persistent uuid from xattr into s_uuid if found, or store a new + * random generated value in s_uuid and in xattr. + */ +bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs, + const struct path *upperpath) +{ + bool set = false; + uuid_t uuid; + int res; + + /* Try to load existing persistent uuid */ + res = ovl_path_getxattr(ofs, upperpath, OVL_XATTR_UUID, uuid.b, + UUID_SIZE); + if (res == UUID_SIZE) + goto set_uuid; + + if (res != -ENODATA) + goto fail; + + /* + * With uuid=auto, if uuid xattr is found, it will be used. + * If uuid xattrs is not found, generate a persistent uuid only on mount + * of new overlays where upper root dir is not yet marked as impure. + * An upper dir is marked as impure on copy up or lookup of its subdirs. + */ + if (ofs->config.uuid == OVL_UUID_AUTO) { + res = ovl_path_getxattr(ofs, upperpath, OVL_XATTR_IMPURE, NULL, + 0); + if (res > 0) { + /* Any mount of old overlay - downgrade to uuid=null */ + ofs->config.uuid = OVL_UUID_NULL; + return true; + } else if (res == -ENODATA) { + /* First mount of new overlay - upgrade to uuid=on */ + ofs->config.uuid = OVL_UUID_ON; + } else if (res < 0) { + goto fail; + } + + } + + /* Generate overlay instance uuid */ + uuid_gen(&uuid); + + /* Try to store persistent uuid */ + set = true; + res = ovl_setxattr(ofs, upperpath->dentry, OVL_XATTR_UUID, uuid.b, + UUID_SIZE); + if (res) + goto fail; + +set_uuid: + super_set_uuid(sb, uuid.b, sizeof(uuid)); + return true; + +fail: + ofs->config.uuid = OVL_UUID_NULL; + pr_warn("failed to %s uuid (%pd2, err=%i); falling back to uuid=null.\n", + set ? "set" : "get", upperpath->dentry, res); return false; } +char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, + enum ovl_xattr ox) +{ + int res; + char val; + + if (!d_is_dir(path->dentry)) + return 0; + + res = ovl_path_getxattr(ofs, path, ox, &val, 1); + return res == 1 ? val : 0; +} + #define OVL_XATTR_OPAQUE_POSTFIX "opaque" #define OVL_XATTR_REDIRECT_POSTFIX "redirect" #define OVL_XATTR_ORIGIN_POSTFIX "origin" #define OVL_XATTR_IMPURE_POSTFIX "impure" #define OVL_XATTR_NLINK_POSTFIX "nlink" #define OVL_XATTR_UPPER_POSTFIX "upper" +#define OVL_XATTR_UUID_POSTFIX "uuid" #define OVL_XATTR_METACOPY_POSTFIX "metacopy" #define OVL_XATTR_PROTATTR_POSTFIX "protattr" +#define OVL_XATTR_XWHITEOUT_POSTFIX "whiteout" #define OVL_XATTR_TAB_ENTRY(x) \ [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \ @@ -625,8 +877,10 @@ const char *const ovl_xattr_table[][2] = { OVL_XATTR_TAB_ENTRY(OVL_XATTR_IMPURE), OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK), OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_UUID), OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT), }; int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, @@ -707,7 +961,7 @@ void ovl_check_protattr(struct inode *inode, struct dentry *upper) } int ovl_set_protattr(struct inode *inode, struct dentry *upper, - struct fileattr *fa) + struct file_kattr *fa) { struct ovl_fs *ofs = OVL_FS(inode->i_sb); char buf[OVL_PROTATTR_MAX]; @@ -750,7 +1004,7 @@ int ovl_set_protattr(struct inode *inode, struct dentry *upper, return 0; } -/** +/* * Caller must hold a reference to inode to prevent it from being freed while * it is marked inuse. */ @@ -760,8 +1014,8 @@ bool ovl_inuse_trylock(struct dentry *dentry) bool locked = false; spin_lock(&inode->i_lock); - if (!(inode->i_state & I_OVL_INUSE)) { - inode->i_state |= I_OVL_INUSE; + if (!(inode_state_read(inode) & I_OVL_INUSE)) { + inode_state_set(inode, I_OVL_INUSE); locked = true; } spin_unlock(&inode->i_lock); @@ -775,8 +1029,8 @@ void ovl_inuse_unlock(struct dentry *dentry) struct inode *inode = d_inode(dentry); spin_lock(&inode->i_lock); - WARN_ON(!(inode->i_state & I_OVL_INUSE)); - inode->i_state &= ~I_OVL_INUSE; + WARN_ON(!(inode_state_read(inode) & I_OVL_INUSE)); + inode_state_clear(inode, I_OVL_INUSE); spin_unlock(&inode->i_lock); } } @@ -787,7 +1041,7 @@ bool ovl_is_inuse(struct dentry *dentry) bool inuse; spin_lock(&inode->i_lock); - inuse = (inode->i_state & I_OVL_INUSE); + inuse = (inode_state_read(inode) & I_OVL_INUSE); spin_unlock(&inode->i_lock); return inuse; @@ -819,18 +1073,23 @@ static void ovl_cleanup_index(struct dentry *dentry) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *indexdir = ovl_indexdir(dentry->d_sb); - struct inode *dir = indexdir->d_inode; struct dentry *lowerdentry = ovl_dentry_lower(dentry); struct dentry *upperdentry = ovl_dentry_upper(dentry); struct dentry *index = NULL; struct inode *inode; struct qstr name = { }; + bool got_write = false; int err; err = ovl_get_index_name(ofs, lowerdentry, &name); if (err) goto fail; + err = ovl_want_write(dentry); + if (err) + goto fail; + + got_write = true; inode = d_inode(upperdentry); if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) { pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", @@ -849,25 +1108,24 @@ static void ovl_cleanup_index(struct dentry *dentry) goto out; } - inode_lock_nested(dir, I_MUTEX_PARENT); - index = ovl_lookup_upper(ofs, name.name, indexdir, name.len); + index = ovl_lookup_upper_unlocked(ofs, name.name, indexdir, name.len); err = PTR_ERR(index); if (IS_ERR(index)) { index = NULL; } else if (ovl_index_all(dentry->d_sb)) { /* Whiteout orphan index to block future open by handle */ err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb), - dir, index); + indexdir, index); } else { /* Cleanup orphan index entries */ - err = ovl_cleanup(ofs, dir, index); + err = ovl_cleanup(ofs, indexdir, index); } - - inode_unlock(dir); if (err) goto fail; out: + if (got_write) + ovl_drop_write(dentry); kfree(name.name); dput(index); return; @@ -884,7 +1142,6 @@ fail: int ovl_nlink_start(struct dentry *dentry) { struct inode *inode = d_inode(dentry); - const struct cred *old_cred; int err; if (WARN_ON(!inode)) @@ -914,22 +1171,30 @@ int ovl_nlink_start(struct dentry *dentry) if (err) return err; + err = ovl_want_write(dentry); + if (err) + goto out_unlock; + if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, inode)) - goto out; + return 0; - old_cred = ovl_override_creds(dentry->d_sb); /* * The overlay inode nlink should be incremented/decremented IFF the * upper operation succeeds, along with nlink change of upper inode. * Therefore, before link/unlink/rename, we store the union nlink * value relative to the upper inode nlink in an upper inode xattr. */ - err = ovl_set_nlink_upper(dentry); - revert_creds(old_cred); - -out: + with_ovl_creds(dentry->d_sb) + err = ovl_set_nlink_upper(dentry); if (err) - ovl_inode_unlock(inode); + goto out_drop_write; + + return 0; + +out_drop_write: + ovl_drop_write(dentry); +out_unlock: + ovl_inode_unlock(inode); return err; } @@ -938,25 +1203,30 @@ void ovl_nlink_end(struct dentry *dentry) { struct inode *inode = d_inode(dentry); - if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) { - const struct cred *old_cred; + ovl_drop_write(dentry); - old_cred = ovl_override_creds(dentry->d_sb); - ovl_cleanup_index(dentry); - revert_creds(old_cred); + if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) { + with_ovl_creds(dentry->d_sb) + ovl_cleanup_index(dentry); } ovl_inode_unlock(inode); } -int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir) +int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *work, + struct dentry *upperdir, struct dentry *upper) { - /* Workdir should not be the same as upperdir */ - if (workdir == upperdir) - goto err; + struct dentry *trap; /* Workdir should not be subdir of upperdir and vice versa */ - if (lock_rename(workdir, upperdir) != NULL) + trap = lock_rename(workdir, upperdir); + if (IS_ERR(trap)) + goto err; + if (trap) + goto err_unlock; + if (work && (work->d_parent != workdir || d_unhashed(work))) + goto err_unlock; + if (upper && (upper->d_parent != upperdir || d_unhashed(upper))) goto err_unlock; return 0; @@ -968,8 +1238,12 @@ err: return -EIO; } -/* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */ -int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path) +/* + * err < 0, 0 if no metacopy xattr, metacopy data size if xattr found. + * an empty xattr returns OVL_METACOPY_MIN_SIZE to distinguish from no xattr value. + */ +int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path, + struct ovl_metacopy *data) { int res; @@ -977,7 +1251,8 @@ int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path) if (!S_ISREG(d_inode(path->dentry)->i_mode)) return 0; - res = ovl_path_getxattr(ofs, path, OVL_XATTR_METACOPY, NULL, 0); + res = ovl_path_getxattr(ofs, path, OVL_XATTR_METACOPY, + data, data ? OVL_METACOPY_MAX_SIZE : 0); if (res < 0) { if (res == -ENODATA || res == -EOPNOTSUPP) return 0; @@ -991,15 +1266,51 @@ int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path) goto out; } - return 1; + if (res == 0) { + /* Emulate empty data for zero size metacopy xattr */ + res = OVL_METACOPY_MIN_SIZE; + if (data) { + memset(data, 0, res); + data->len = res; + } + } else if (res < OVL_METACOPY_MIN_SIZE) { + pr_warn_ratelimited("metacopy file '%pd' has too small xattr\n", + path->dentry); + return -EIO; + } else if (data) { + if (data->version != 0) { + pr_warn_ratelimited("metacopy file '%pd' has unsupported version\n", + path->dentry); + return -EIO; + } + if (res != data->len) { + pr_warn_ratelimited("metacopy file '%pd' has invalid xattr size\n", + path->dentry); + return -EIO; + } + } + + return res; out: pr_warn_ratelimited("failed to get metacopy (%i)\n", res); return res; } +int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d, struct ovl_metacopy *metacopy) +{ + size_t len = metacopy->len; + + /* If no flags or digest fall back to empty metacopy file */ + if (metacopy->version == 0 && metacopy->flags == 0 && metacopy->digest_algo == 0) + len = 0; + + return ovl_check_setxattr(ofs, d, OVL_XATTR_METACOPY, + metacopy, len, -EOPNOTSUPP); +} + bool ovl_is_metacopy_dentry(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; + struct ovl_entry *oe = OVL_E(dentry); if (!d_is_reg(dentry)) return false; @@ -1010,7 +1321,7 @@ bool ovl_is_metacopy_dentry(struct dentry *dentry) return false; } - return (oe->numlower > 1); + return (ovl_numlower(oe) > 1); } char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding) @@ -1059,6 +1370,112 @@ err_free: return ERR_PTR(res); } +/* Call with mounter creds as it may open the file */ +int ovl_ensure_verity_loaded(const struct path *datapath) +{ + struct inode *inode = d_inode(datapath->dentry); + struct file *filp; + + if (!fsverity_active(inode) && IS_VERITY(inode)) { + /* + * If this inode was not yet opened, the verity info hasn't been + * loaded yet, so we need to do that here to force it into memory. + */ + filp = kernel_file_open(datapath, O_RDONLY, current_cred()); + if (IS_ERR(filp)) + return PTR_ERR(filp); + fput(filp); + } + + return 0; +} + +int ovl_validate_verity(struct ovl_fs *ofs, + const struct path *metapath, + const struct path *datapath) +{ + struct ovl_metacopy metacopy_data; + u8 actual_digest[FS_VERITY_MAX_DIGEST_SIZE]; + int xattr_digest_size, digest_size; + int xattr_size, err; + u8 verity_algo; + + if (!ofs->config.verity_mode || + /* Verity only works on regular files */ + !S_ISREG(d_inode(metapath->dentry)->i_mode)) + return 0; + + xattr_size = ovl_check_metacopy_xattr(ofs, metapath, &metacopy_data); + if (xattr_size < 0) + return xattr_size; + + if (!xattr_size || !metacopy_data.digest_algo) { + if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) { + pr_warn_ratelimited("metacopy file '%pd' has no digest specified\n", + metapath->dentry); + return -EIO; + } + return 0; + } + + xattr_digest_size = ovl_metadata_digest_size(&metacopy_data); + + err = ovl_ensure_verity_loaded(datapath); + if (err < 0) { + pr_warn_ratelimited("lower file '%pd' failed to load fs-verity info\n", + datapath->dentry); + return -EIO; + } + + digest_size = fsverity_get_digest(d_inode(datapath->dentry), actual_digest, + &verity_algo, NULL); + if (digest_size == 0) { + pr_warn_ratelimited("lower file '%pd' has no fs-verity digest\n", datapath->dentry); + return -EIO; + } + + if (xattr_digest_size != digest_size || + metacopy_data.digest_algo != verity_algo || + memcmp(metacopy_data.digest, actual_digest, xattr_digest_size) != 0) { + pr_warn_ratelimited("lower file '%pd' has the wrong fs-verity digest\n", + datapath->dentry); + return -EIO; + } + + return 0; +} + +int ovl_get_verity_digest(struct ovl_fs *ofs, const struct path *src, + struct ovl_metacopy *metacopy) +{ + int err, digest_size; + + if (!ofs->config.verity_mode || !S_ISREG(d_inode(src->dentry)->i_mode)) + return 0; + + err = ovl_ensure_verity_loaded(src); + if (err < 0) { + pr_warn_ratelimited("lower file '%pd' failed to load fs-verity info\n", + src->dentry); + return -EIO; + } + + digest_size = fsverity_get_digest(d_inode(src->dentry), + metacopy->digest, &metacopy->digest_algo, NULL); + if (digest_size == 0 || + WARN_ON_ONCE(digest_size > FS_VERITY_MAX_DIGEST_SIZE)) { + if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) { + pr_warn_ratelimited("lower file '%pd' has no fs-verity digest\n", + src->dentry); + return -EIO; + } + return 0; + } + + metacopy->len += digest_size; + return 0; +} + /* * ovl_sync_status() - Check fs sync status for volatile mounts * @@ -1101,22 +1518,23 @@ void ovl_copyattr(struct inode *inode) { struct path realpath; struct inode *realinode; - struct user_namespace *real_mnt_userns; + struct mnt_idmap *real_idmap; vfsuid_t vfsuid; vfsgid_t vfsgid; - ovl_i_path_real(inode, &realpath); - realinode = d_inode(realpath.dentry); - real_mnt_userns = mnt_user_ns(realpath.mnt); + realinode = ovl_i_path_real(inode, &realpath); + real_idmap = mnt_idmap(realpath.mnt); - vfsuid = i_uid_into_vfsuid(real_mnt_userns, realinode); - vfsgid = i_gid_into_vfsgid(real_mnt_userns, realinode); + spin_lock(&inode->i_lock); + vfsuid = i_uid_into_vfsuid(real_idmap, realinode); + vfsgid = i_gid_into_vfsgid(real_idmap, realinode); inode->i_uid = vfsuid_into_kuid(vfsuid); inode->i_gid = vfsgid_into_kgid(vfsgid); inode->i_mode = realinode->i_mode; - inode->i_atime = realinode->i_atime; - inode->i_mtime = realinode->i_mtime; - inode->i_ctime = realinode->i_ctime; + inode_set_atime_to_ts(inode, inode_get_atime(realinode)); + inode_set_mtime_to_ts(inode, inode_get_mtime(realinode)); + inode_set_ctime_to_ts(inode, inode_get_ctime(realinode)); i_size_write(inode, i_size_read(realinode)); + spin_unlock(&inode->i_lock); } |
