diff options
Diffstat (limited to 'fs/exportfs/expfs.c')
| -rw-r--r-- | fs/exportfs/expfs.c | 468 |
1 files changed, 283 insertions, 185 deletions
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 293bc2e47a73..d3e55de4a2a2 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Neil Brown 2002 * Copyright (C) Christoph Hellwig 2007 @@ -6,7 +7,7 @@ * and for mapping back from file handles to dentries. * * For details on why we do all the strange and hairy things in here - * take a look at Documentation/filesystems/nfs/Exporting. + * take a look at Documentation/filesystems/nfs/exporting.rst. */ #include <linux/exportfs.h> #include <linux/fs.h> @@ -15,8 +16,9 @@ #include <linux/mount.h> #include <linux/namei.h> #include <linux/sched.h> +#include <linux/cred.h> -#define dprintk(fmt, args...) do{}while(0) +#define dprintk(fmt, args...) pr_debug(fmt, ##args) static int get_name(const struct path *path, char *name, struct dentry *child); @@ -50,7 +52,7 @@ find_acceptable_alias(struct dentry *result, inode = result->d_inode; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) @@ -69,145 +71,165 @@ find_acceptable_alias(struct dentry *result, return NULL; } -/* - * Find root of a disconnected subtree and return a reference to it. - */ -static struct dentry * -find_disconnected_root(struct dentry *dentry) +static bool dentry_connected(struct dentry *dentry) { dget(dentry); - while (!IS_ROOT(dentry)) { + while (dentry->d_flags & DCACHE_DISCONNECTED) { struct dentry *parent = dget_parent(dentry); - if (!(parent->d_flags & DCACHE_DISCONNECTED)) { + dput(dentry); + if (dentry == parent) { dput(parent); - break; + return false; } + dentry = parent; + } + dput(dentry); + return true; +} + +static void clear_disconnected(struct dentry *dentry) +{ + dget(dentry); + while (dentry->d_flags & DCACHE_DISCONNECTED) { + struct dentry *parent = dget_parent(dentry); + + WARN_ON_ONCE(IS_ROOT(dentry)); + + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_DISCONNECTED; + spin_unlock(&dentry->d_lock); dput(dentry); dentry = parent; } - return dentry; + dput(dentry); +} + +/* + * Reconnect a directory dentry with its parent. + * + * This can return a dentry, or NULL, or an error. + * + * In the first case the returned dentry is the parent of the given + * dentry, and may itself need to be reconnected to its parent. + * + * In the NULL case, a concurrent VFS operation has either renamed or + * removed this directory. The concurrent operation has reconnected our + * dentry, so we no longer need to. + */ +static struct dentry *reconnect_one(struct vfsmount *mnt, + struct dentry *dentry, char *nbuf) +{ + struct dentry *parent; + struct dentry *tmp; + int err; + + parent = ERR_PTR(-EACCES); + if (mnt->mnt_sb->s_export_op->get_parent) + parent = mnt->mnt_sb->s_export_op->get_parent(dentry); + + if (IS_ERR(parent)) { + dprintk("get_parent of %lu failed, err %ld\n", + dentry->d_inode->i_ino, PTR_ERR(parent)); + return parent; + } + + dprintk("%s: find name of %lu in %lu\n", __func__, + dentry->d_inode->i_ino, parent->d_inode->i_ino); + err = exportfs_get_name(mnt, parent, nbuf, dentry); + if (err == -ENOENT) + goto out_reconnected; + if (err) + goto out_err; + dprintk("%s: found name: %s\n", __func__, nbuf); + tmp = lookup_one_unlocked(mnt_idmap(mnt), &QSTR(nbuf), parent); + if (IS_ERR(tmp)) { + dprintk("lookup failed: %ld\n", PTR_ERR(tmp)); + err = PTR_ERR(tmp); + goto out_err; + } + if (tmp != dentry) { + /* + * Somebody has renamed it since exportfs_get_name(); + * great, since it could've only been renamed if it + * got looked up and thus connected, and it would + * remain connected afterwards. We are done. + */ + dput(tmp); + goto out_reconnected; + } + dput(tmp); + if (IS_ROOT(dentry)) { + err = -ESTALE; + goto out_err; + } + return parent; + +out_err: + dput(parent); + return ERR_PTR(err); +out_reconnected: + dput(parent); + /* + * Someone must have renamed our entry into another parent, in + * which case it has been reconnected by the rename. + * + * Or someone removed it entirely, in which case filehandle + * lookup will succeed but the directory is now IS_DEAD and + * subsequent operations on it will fail. + * + * Alternatively, maybe there was no race at all, and the + * filesystem is just corrupt and gave us a parent that doesn't + * actually contain any entry pointing to this inode. So, + * double check that this worked and return -ESTALE if not: + */ + if (!dentry_connected(dentry)) + return ERR_PTR(-ESTALE); + return NULL; } /* * Make sure target_dir is fully connected to the dentry tree. * - * It may already be, as the flag isn't always updated when connection happens. + * On successful return, DCACHE_DISCONNECTED will be cleared on + * target_dir, and target_dir->d_parent->...->d_parent will reach the + * root of the filesystem. + * + * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected. + * But the converse is not true: target_dir may have DCACHE_DISCONNECTED + * set but already be connected. In that case we'll verify the + * connection to root and then clear the flag. + * + * Note that target_dir could be removed by a concurrent operation. In + * that case reconnect_path may still succeed with target_dir fully + * connected, but further operations using the filehandle will fail when + * necessary (due to S_DEAD being set on the directory). */ static int reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) { - int noprogress = 0; - int err = -ESTALE; + struct dentry *dentry, *parent; - /* - * It is possible that a confused file system might not let us complete - * the path to the root. For example, if get_parent returns a directory - * in which we cannot find a name for the child. While this implies a - * very sick filesystem we don't want it to cause knfsd to spin. Hence - * the noprogress counter. If we go through the loop 10 times (2 is - * probably enough) without getting anywhere, we just give up - */ - while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) { - struct dentry *pd = find_disconnected_root(target_dir); - - if (!IS_ROOT(pd)) { - /* must have found a connected parent - great */ - spin_lock(&pd->d_lock); - pd->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&pd->d_lock); - noprogress = 0; - } else if (pd == mnt->mnt_sb->s_root) { - printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); - spin_lock(&pd->d_lock); - pd->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&pd->d_lock); - noprogress = 0; - } else { - /* - * We have hit the top of a disconnected path, try to - * find parent and connect. - * - * Racing with some other process renaming a directory - * isn't much of a problem here. If someone renames - * the directory, it will end up properly connected, - * which is what we want - * - * Getting the parent can't be supported generically, - * the locking is too icky. - * - * Instead we just return EACCES. If server reboots - * or inodes get flushed, you lose - */ - struct dentry *ppd = ERR_PTR(-EACCES); - struct dentry *npd; - - mutex_lock(&pd->d_inode->i_mutex); - if (mnt->mnt_sb->s_export_op->get_parent) - ppd = mnt->mnt_sb->s_export_op->get_parent(pd); - mutex_unlock(&pd->d_inode->i_mutex); - - if (IS_ERR(ppd)) { - err = PTR_ERR(ppd); - dprintk("%s: get_parent of %ld failed, err %d\n", - __func__, pd->d_inode->i_ino, err); - dput(pd); - break; - } + dentry = dget(target_dir); - dprintk("%s: find name of %lu in %lu\n", __func__, - pd->d_inode->i_ino, ppd->d_inode->i_ino); - err = exportfs_get_name(mnt, ppd, nbuf, pd); - if (err) { - dput(ppd); - dput(pd); - if (err == -ENOENT) - /* some race between get_parent and - * get_name? just try again - */ - continue; - break; - } - dprintk("%s: found name: %s\n", __func__, nbuf); - mutex_lock(&ppd->d_inode->i_mutex); - npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); - mutex_unlock(&ppd->d_inode->i_mutex); - if (IS_ERR(npd)) { - err = PTR_ERR(npd); - dprintk("%s: lookup failed: %d\n", - __func__, err); - dput(ppd); - dput(pd); - break; - } - /* we didn't really want npd, we really wanted - * a side-effect of the lookup. - * hopefully, npd == pd, though it isn't really - * a problem if it isn't - */ - if (npd == pd) - noprogress = 0; - else - printk("%s: npd != pd\n", __func__); - dput(npd); - dput(ppd); - if (IS_ROOT(pd)) { - /* something went wrong, we have to give up */ - dput(pd); - break; - } - } - dput(pd); - } + while (dentry->d_flags & DCACHE_DISCONNECTED) { + BUG_ON(dentry == mnt->mnt_sb->s_root); - if (target_dir->d_flags & DCACHE_DISCONNECTED) { - /* something went wrong - oh-well */ - if (!err) - err = -ESTALE; - return err; - } + if (IS_ROOT(dentry)) + parent = reconnect_one(mnt, dentry, nbuf); + else + parent = dget_parent(dentry); + if (!parent) + break; + dput(dentry); + if (IS_ERR(parent)) + return PTR_ERR(parent); + dentry = parent; + } + dput(dentry); + clear_disconnected(target_dir); return 0; } @@ -215,7 +237,7 @@ struct getdents_callback { struct dir_context ctx; char *name; /* name that was found. It already points to a buffer NAME_MAX+1 is size */ - unsigned long ino; /* the inum we are looking for */ + u64 ino; /* the inum we are looking for */ int found; /* inode matched? */ int sequence; /* sequence counter */ }; @@ -224,25 +246,25 @@ struct getdents_callback { * A rather strange filldir function to capture * the name matching the specified inode number. */ -static int filldir_one(void * __buf, const char * name, int len, +static bool filldir_one(struct dir_context *ctx, const char *name, int len, loff_t pos, u64 ino, unsigned int d_type) { - struct getdents_callback *buf = __buf; - int result = 0; + struct getdents_callback *buf = + container_of(ctx, struct getdents_callback, ctx); buf->sequence++; - if (buf->ino == ino) { + if (buf->ino == ino && len <= NAME_MAX && !is_dot_dotdot(name, len)) { memcpy(buf->name, name, len); buf->name[len] = '\0'; buf->found = 1; - result = -1; + return false; // no more } - return result; + return true; } /** * get_name - default export_operations->get_name function - * @dentry: the directory in which to find a name + * @path: the directory in which to find a name * @name: a pointer to a %NAME_MAX+1 char buffer to store the name * @child: the dentry for the child directory. * @@ -255,10 +277,15 @@ static int get_name(const struct path *path, char *name, struct dentry *child) struct inode *dir = path->dentry->d_inode; int error; struct file *file; + struct kstat stat; + struct path child_path = { + .mnt = path->mnt, + .dentry = child, + }; struct getdents_callback buffer = { .ctx.actor = filldir_one, + .ctx.count = INT_MAX, .name = name, - .ino = child->d_inode->i_ino }; error = -ENOTDIR; @@ -268,6 +295,17 @@ static int get_name(const struct path *path, char *name, struct dentry *child) if (!dir->i_fop) goto out; /* + * inode->i_ino is unsigned long, kstat->ino is u64, so the + * former would be insufficient on 32-bit hosts when the + * filesystem supports 64-bit inode numbers. So we need to + * actually call ->getattr, not just read i_ino: + */ + error = vfs_getattr_nosec(&child_path, &stat, + STATX_INO, AT_STATX_SYNC_AS_STAT); + if (error) + return error; + buffer.ino = stat.ino; + /* * Open the directory ... */ file = dentry_open(path, O_RDONLY, cred); @@ -276,7 +314,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child) goto out; error = -EINVAL; - if (!file->f_op->iterate) + if (!file->f_op->iterate_shared) goto out_close; buffer.sequence = 0; @@ -303,65 +341,84 @@ out: return error; } +#define FILEID_INO64_GEN_LEN 3 + /** - * export_encode_fh - default export_operations->encode_fh function + * exportfs_encode_ino64_fid - encode non-decodeable 64bit ino file id * @inode: the object to encode - * @fh: where to store the file handle fragment - * @max_len: maximum length to store there - * @parent: parent directory inode, if wanted + * @fid: where to store the file handle fragment + * @max_len: maximum length to store there (in 4 byte units) * - * This default encode_fh function assumes that the 32 inode number - * is suitable for locating an inode, and that the generation number - * can be used to check that it is still valid. It places them in the - * filehandle fragment where export_decode_fh expects to find them. + * This generic function is used to encode a non-decodeable file id for + * fanotify for filesystems that do not support NFS export. */ -static int export_encode_fh(struct inode *inode, struct fid *fid, - int *max_len, struct inode *parent) +static int exportfs_encode_ino64_fid(struct inode *inode, struct fid *fid, + int *max_len) { - int len = *max_len; - int type = FILEID_INO32_GEN; - - if (parent && (len < 4)) { - *max_len = 4; - return FILEID_INVALID; - } else if (len < 2) { - *max_len = 2; + if (*max_len < FILEID_INO64_GEN_LEN) { + *max_len = FILEID_INO64_GEN_LEN; return FILEID_INVALID; } - len = 2; - fid->i32.ino = inode->i_ino; - fid->i32.gen = inode->i_generation; - if (parent) { - fid->i32.parent_ino = parent->i_ino; - fid->i32.parent_gen = parent->i_generation; - len = 4; - type = FILEID_INO32_GEN_PARENT; - } - *max_len = len; - return type; + fid->i64.ino = inode->i_ino; + fid->i64.gen = inode->i_generation; + *max_len = FILEID_INO64_GEN_LEN; + + return FILEID_INO64_GEN; } +/** + * exportfs_encode_inode_fh - encode a file handle from inode + * @inode: the object to encode + * @fid: where to store the file handle fragment + * @max_len: maximum length to store there + * @parent: parent directory inode, if wanted + * @flags: properties of the requested file handle + * + * Returns an enum fid_type or a negative errno. + */ int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, - int *max_len, struct inode *parent) + int *max_len, struct inode *parent, int flags) { const struct export_operations *nop = inode->i_sb->s_export_op; + enum fid_type type; + + if (!exportfs_can_encode_fh(nop, flags)) + return -EOPNOTSUPP; - if (nop && nop->encode_fh) - return nop->encode_fh(inode, fid->raw, max_len, parent); + if (!nop && (flags & EXPORT_FH_FID)) + type = exportfs_encode_ino64_fid(inode, fid, max_len); + else + type = nop->encode_fh(inode, fid->raw, max_len, parent); + + if (type > 0 && FILEID_USER_FLAGS(type)) { + pr_warn_once("%s: unexpected fh type value 0x%x from fstype %s.\n", + __func__, type, inode->i_sb->s_type->name); + return -EINVAL; + } + + return type; - return export_encode_fh(inode, fid, max_len, parent); } EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh); +/** + * exportfs_encode_fh - encode a file handle from dentry + * @dentry: the object to encode + * @fid: where to store the file handle fragment + * @max_len: maximum length to store there + * @flags: properties of the requested file handle + * + * Returns an enum fid_type or a negative errno. + */ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, - int connectable) + int flags) { int error; struct dentry *p = NULL; struct inode *inode = dentry->d_inode, *parent = NULL; - if (connectable && !S_ISDIR(inode->i_mode)) { + if ((flags & EXPORT_FH_CONNECTABLE) && !S_ISDIR(inode->i_mode)) { p = dget_parent(dentry); /* * note that while p might've ceased to be our parent already, @@ -370,34 +427,51 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, parent = p->d_inode; } - error = exportfs_encode_inode_fh(inode, fid, max_len, parent); + error = exportfs_encode_inode_fh(inode, fid, max_len, parent, flags); dput(p); return error; } EXPORT_SYMBOL_GPL(exportfs_encode_fh); -struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, - int fh_len, int fileid_type, - int (*acceptable)(void *, struct dentry *), void *context) +struct dentry * +exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, + int fileid_type, unsigned int flags, + int (*acceptable)(void *, struct dentry *), + void *context) { const struct export_operations *nop = mnt->mnt_sb->s_export_op; struct dentry *result, *alias; char nbuf[NAME_MAX+1]; int err; + if (fileid_type < 0 || FILEID_USER_FLAGS(fileid_type)) + return ERR_PTR(-EINVAL); + /* * Try to get any dentry for the given file handle from the filesystem. */ - if (!nop || !nop->fh_to_dentry) + if (!exportfs_can_decode_fh(nop)) return ERR_PTR(-ESTALE); result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); - if (!result) - result = ERR_PTR(-ESTALE); - if (IS_ERR(result)) + if (IS_ERR_OR_NULL(result)) return result; - if (S_ISDIR(result->d_inode->i_mode)) { + if ((flags & EXPORT_FH_DIR_ONLY) && !d_is_dir(result)) { + err = -ENOTDIR; + goto err_result; + } + + /* + * If no acceptance criteria was specified by caller, a disconnected + * dentry is also accepatable. Callers may use this mode to query if + * file handle is stale or to get a reference to an inode without + * risking the high overhead caused by directory reconnect. + */ + if (!acceptable) + return result; + + if (d_is_dir(result)) { /* * This request is for a directory. * @@ -470,26 +544,31 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, * inode is actually connected to the parent. */ err = exportfs_get_name(mnt, target_dir, nbuf, result); - if (!err) { - mutex_lock(&target_dir->d_inode->i_mutex); - nresult = lookup_one_len(nbuf, target_dir, - strlen(nbuf)); - mutex_unlock(&target_dir->d_inode->i_mutex); - if (!IS_ERR(nresult)) { - if (nresult->d_inode) { - dput(result); - result = nresult; - } else - dput(nresult); - } + if (err) { + dput(target_dir); + goto err_result; } + nresult = lookup_one_unlocked(mnt_idmap(mnt), &QSTR(nbuf), target_dir); + if (!IS_ERR(nresult)) { + if (unlikely(nresult->d_inode != result->d_inode)) { + dput(nresult); + nresult = ERR_PTR(-ESTALE); + } + } /* * At this point we are done with the parent, but it's pinned * by the child dentry anyway. */ dput(target_dir); + if (IS_ERR(nresult)) { + err = PTR_ERR(nresult); + goto err_result; + } + dput(result); + result = nresult; + /* * And finally make sure the dentry is actually acceptable * to NFSD. @@ -507,6 +586,25 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, dput(result); return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(exportfs_decode_fh_raw); + +struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, + int fh_len, int fileid_type, + int (*acceptable)(void *, struct dentry *), + void *context) +{ + struct dentry *ret; + + ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, 0, + acceptable, context); + if (IS_ERR_OR_NULL(ret)) { + if (ret == ERR_PTR(-ENOMEM)) + return ret; + return ERR_PTR(-ESTALE); + } + return ret; +} EXPORT_SYMBOL_GPL(exportfs_decode_fh); +MODULE_DESCRIPTION("Code mapping from inodes to file handles"); MODULE_LICENSE("GPL"); |
