From 57e3715cfa3fb01581555934d7191f8eabf740f4 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Mon, 30 Nov 2015 11:11:59 -0500 Subject: typo in fs/namei.c comment Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 0c3974cd3ecd..e818ed135df0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -657,7 +657,7 @@ static bool legitimize_links(struct nameidata *nd) * Path walking has 2 modes, rcu-walk and ref-walk (see * Documentation/filesystems/path-lookup.txt). In situations when we can't * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab - * normal reference counts on dentries and vfsmounts to transition to rcu-walk + * normal reference counts on dentries and vfsmounts to transition to ref-walk * mode. Refcounts are grabbed at the last known good point before rcu-walk * got stuck, so ref-walk may continue from there. If this is not successful * (eg. a seqcount has changed), then failure is returned and it's up to caller -- cgit From 9e6697e26f9888cdb6088664d31c3772b0dff0a4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Dec 2015 20:07:21 -0500 Subject: namei.c: fold set_root_rcu() into set_root() Signed-off-by: Al Viro --- fs/namei.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index e818ed135df0..f89fe5f7eac3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -806,20 +806,20 @@ static int complete_walk(struct nameidata *nd) } static void set_root(struct nameidata *nd) -{ - get_fs_root(current->fs, &nd->root); -} - -static void set_root_rcu(struct nameidata *nd) { struct fs_struct *fs = current->fs; - unsigned seq; - do { - seq = read_seqcount_begin(&fs->seq); - nd->root = fs->root; - nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq); - } while (read_seqcount_retry(&fs->seq, seq)); + if (nd->flags & LOOKUP_RCU) { + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + nd->root = fs->root; + nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq); + } while (read_seqcount_retry(&fs->seq, seq)); + } else { + get_fs_root(fs, &nd->root); + } } static void path_put_conditional(struct path *path, struct nameidata *nd) @@ -1015,10 +1015,10 @@ const char *get_link(struct nameidata *nd) } } if (*res == '/') { + if (!nd->root.mnt) + set_root(nd); if (nd->flags & LOOKUP_RCU) { struct dentry *d; - if (!nd->root.mnt) - set_root_rcu(nd); nd->path = nd->root; d = nd->path.dentry; nd->inode = d->d_inode; @@ -1026,8 +1026,6 @@ const char *get_link(struct nameidata *nd) if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq))) return ERR_PTR(-ECHILD); } else { - if (!nd->root.mnt) - set_root(nd); path_put(&nd->path); nd->path = nd->root; path_get(&nd->root); @@ -1294,8 +1292,6 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, static int follow_dotdot_rcu(struct nameidata *nd) { struct inode *inode = nd->inode; - if (!nd->root.mnt) - set_root_rcu(nd); while (1) { if (path_equal(&nd->path, &nd->root)) @@ -1415,9 +1411,6 @@ static void follow_mount(struct path *path) static int follow_dotdot(struct nameidata *nd) { - if (!nd->root.mnt) - set_root(nd); - while(1) { struct dentry *old = nd->path.dentry; @@ -1655,6 +1648,8 @@ static inline int may_lookup(struct nameidata *nd) static inline int handle_dots(struct nameidata *nd, int type) { if (type == LAST_DOTDOT) { + if (!nd->root.mnt) + set_root(nd); if (nd->flags & LOOKUP_RCU) { return follow_dotdot_rcu(nd); } else @@ -2023,15 +2018,16 @@ static const char *path_init(struct nameidata *nd, unsigned flags) nd->m_seq = read_seqbegin(&mount_lock); if (*s == '/') { - if (flags & LOOKUP_RCU) { + if (flags & LOOKUP_RCU) rcu_read_lock(); - set_root_rcu(nd); + set_root(nd); + if (flags & LOOKUP_RCU) { nd->seq = nd->root_seq; + nd->path = nd->root; } else { - set_root(nd); path_get(&nd->root); + nd->path = nd->root; } - nd->path = nd->root; } else if (nd->dfd == AT_FDCWD) { if (flags & LOOKUP_RCU) { struct fs_struct *fs = current->fs; -- cgit From ef55d91700d54f29b9ac301658b5b8f377ef3206 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Dec 2015 20:25:06 -0500 Subject: path_init(): set nd->inode earlier in cwd-relative case that allows to kill the recheck of nd->seq on the way out in this case, and this check on the way out is left only for absolute pathnames. Signed-off-by: Al Viro --- fs/namei.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index f89fe5f7eac3..a08018b1485c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2028,6 +2028,15 @@ static const char *path_init(struct nameidata *nd, unsigned flags) path_get(&nd->root); nd->path = nd->root; } + nd->inode = nd->path.dentry->d_inode; + if (!(flags & LOOKUP_RCU)) + return s; + if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq))) + return s; + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; + rcu_read_unlock(); + return ERR_PTR(-ECHILD); } else if (nd->dfd == AT_FDCWD) { if (flags & LOOKUP_RCU) { struct fs_struct *fs = current->fs; @@ -2038,11 +2047,14 @@ static const char *path_init(struct nameidata *nd, unsigned flags) do { seq = read_seqcount_begin(&fs->seq); nd->path = fs->pwd; + nd->inode = nd->path.dentry->d_inode; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } while (read_seqcount_retry(&fs->seq, seq)); } else { get_fs_pwd(current->fs, &nd->path); + nd->inode = nd->path.dentry->d_inode; } + return s; } else { /* Caller must check execute permissions on the starting path component */ struct fd f = fdget_raw(nd->dfd); @@ -2072,16 +2084,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags) fdput(f); return s; } - - nd->inode = nd->path.dentry->d_inode; - if (!(flags & LOOKUP_RCU)) - return s; - if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq))) - return s; - if (!(nd->flags & LOOKUP_ROOT)) - nd->root.mnt = NULL; - rcu_read_unlock(); - return ERR_PTR(-ECHILD); } static const char *trailing_symlink(struct nameidata *nd) -- cgit From 248fb5b9557aa117f0b8c68b8cf2ce436e4d839d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Dec 2015 20:51:58 -0500 Subject: namei.c: take "jump to root" into a new helper ... and use it both in path_init() (for absolute pathnames) and get_link() (for absolute symlinks). Signed-off-by: Al Viro --- fs/namei.c | 54 ++++++++++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 28 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index a08018b1485c..0baf64b116bd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -841,6 +841,26 @@ static inline void path_to_nameidata(const struct path *path, nd->path.dentry = path->dentry; } +static int nd_jump_root(struct nameidata *nd) +{ + if (nd->flags & LOOKUP_RCU) { + struct dentry *d; + nd->path = nd->root; + d = nd->path.dentry; + nd->inode = d->d_inode; + nd->seq = nd->root_seq; + if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq))) + return -ECHILD; + } else { + path_put(&nd->path); + nd->path = nd->root; + path_get(&nd->path); + nd->inode = nd->path.dentry->d_inode; + } + nd->flags |= LOOKUP_JUMPED; + return 0; +} + /* * Helper to directly jump to a known parsed path from ->follow_link, * caller must have taken a reference to path beforehand. @@ -1017,21 +1037,8 @@ const char *get_link(struct nameidata *nd) if (*res == '/') { if (!nd->root.mnt) set_root(nd); - if (nd->flags & LOOKUP_RCU) { - struct dentry *d; - nd->path = nd->root; - d = nd->path.dentry; - nd->inode = d->d_inode; - nd->seq = nd->root_seq; - if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq))) - return ERR_PTR(-ECHILD); - } else { - path_put(&nd->path); - nd->path = nd->root; - path_get(&nd->root); - nd->inode = nd->path.dentry->d_inode; - } - nd->flags |= LOOKUP_JUMPED; + if (unlikely(nd_jump_root(nd))) + return ERR_PTR(-ECHILD); while (unlikely(*++res == '/')) ; } @@ -2015,26 +2022,17 @@ static const char *path_init(struct nameidata *nd, unsigned flags) } nd->root.mnt = NULL; + nd->path.mnt = NULL; + nd->path.dentry = NULL; nd->m_seq = read_seqbegin(&mount_lock); if (*s == '/') { if (flags & LOOKUP_RCU) rcu_read_lock(); set_root(nd); - if (flags & LOOKUP_RCU) { - nd->seq = nd->root_seq; - nd->path = nd->root; - } else { - path_get(&nd->root); - nd->path = nd->root; - } - nd->inode = nd->path.dentry->d_inode; - if (!(flags & LOOKUP_RCU)) - return s; - if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq))) + if (likely(!nd_jump_root(nd))) return s; - if (!(nd->flags & LOOKUP_ROOT)) - nd->root.mnt = NULL; + nd->root.mnt = NULL; rcu_read_unlock(); return ERR_PTR(-ECHILD); } else if (nd->dfd == AT_FDCWD) { -- cgit From e1a63bbc40c00d5198b1c1d133b139e962f5e872 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Dec 2015 21:06:33 -0500 Subject: restore_nameidata(): no need to clear now->stack microoptimization: in all callers *now is in the frame we are about to leave. Signed-off-by: Al Viro --- fs/namei.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 0baf64b116bd..9e102aca3480 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -534,10 +534,8 @@ static void restore_nameidata(void) current->nameidata = old; if (old) old->total_link_count = now->total_link_count; - if (now->stack != now->internal) { + if (now->stack != now->internal) kfree(now->stack); - now->stack = now->internal; - } } static int __nd_alloc_stack(struct nameidata *nd) -- cgit From 62fb4a155f745285d9b1640c3ef53bf90c12f17c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 26 Dec 2015 22:33:24 -0500 Subject: don't carry MAY_OPEN in op->acc_mode Signed-off-by: Al Viro --- fs/namei.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 9e102aca3480..45c702edce3c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2663,10 +2663,6 @@ static int may_open(struct path *path, int acc_mode, int flag) struct inode *inode = dentry->d_inode; int error; - /* O_PATH? */ - if (!acc_mode) - return 0; - if (!inode) return -ENOENT; @@ -2688,7 +2684,7 @@ static int may_open(struct path *path, int acc_mode, int flag) break; } - error = inode_permission(inode, acc_mode); + error = inode_permission(inode, MAY_OPEN | acc_mode); if (error) return error; @@ -2880,7 +2876,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, if (*opened & FILE_CREATED) { WARN_ON(!(open_flag & O_CREAT)); fsnotify_create(dir, dentry); - acc_mode = MAY_OPEN; + acc_mode = 0; } error = may_open(&file->f_path, acc_mode, open_flag); if (error) @@ -3093,7 +3089,7 @@ retry_lookup: /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; will_truncate = false; - acc_mode = MAY_OPEN; + acc_mode = 0; path_to_nameidata(&path, nd); goto finish_open_created; } @@ -3177,10 +3173,11 @@ finish_open: got_write = true; } finish_open_created: - error = may_open(&nd->path, acc_mode, open_flag); - if (error) - goto out; - + if (likely(!(open_flag & O_PATH))) { + error = may_open(&nd->path, acc_mode, open_flag); + if (error) + goto out; + } BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ error = vfs_open(&nd->path, file, current_cred()); if (!error) { @@ -3267,7 +3264,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, goto out2; audit_inode(nd->name, child, 0); /* Don't check for other permissions, the inode was just created */ - error = may_open(&path, MAY_OPEN, op->open_flag); + error = may_open(&path, 0, op->open_flag); if (error) goto out2; file->f_path.mnt = path.mnt; -- cgit From bbddca8e8fac07ece3938e03526b5d00fa791a4c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jan 2016 16:08:20 -0500 Subject: nfsd: don't hold i_mutex over userspace upcalls We need information about exports when crossing mountpoints during lookup or NFSv4 readdir. If we don't already have that information cached, we may have to ask (and wait for) rpc.mountd. In both cases we currently hold the i_mutex on the parent of the directory we're asking rpc.mountd about. We've seen situations where rpc.mountd performs some operation on that directory that tries to take the i_mutex again, resulting in deadlock. With some care, we may be able to avoid that in rpc.mountd. But it seems better just to avoid holding a mutex while waiting on userspace. It appears that lookup_one_len is pretty much the only operation that needs the i_mutex. So we could just drop the i_mutex elsewhere and do something like mutex_lock() lookup_one_len() mutex_unlock() In many cases though the lookup would have been cached and not required the i_mutex, so it's more efficient to create a lookup_one_len() variant that only takes the i_mutex when necessary. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- fs/namei.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 45c702edce3c..1067f7a0287a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2272,6 +2272,8 @@ EXPORT_SYMBOL(vfs_path_lookup); * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. + * + * The caller must hold base->i_mutex. */ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { @@ -2315,6 +2317,75 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) } EXPORT_SYMBOL(lookup_one_len); +/** + * lookup_one_len_unlocked - filesystem helper to lookup single pathname component + * @name: pathname component to lookup + * @base: base directory to lookup from + * @len: maximum length @len should be interpreted to + * + * Note that this routine is purely a helper for filesystem usage and should + * not be called by generic code. + * + * Unlike lookup_one_len, it should be called without the parent + * i_mutex held, and will take the i_mutex itself if necessary. + */ +struct dentry *lookup_one_len_unlocked(const char *name, + struct dentry *base, int len) +{ + struct qstr this; + unsigned int c; + int err; + struct dentry *ret; + + this.name = name; + this.len = len; + this.hash = full_name_hash(name, len); + if (!len) + return ERR_PTR(-EACCES); + + if (unlikely(name[0] == '.')) { + if (len < 2 || (len == 2 && name[1] == '.')) + return ERR_PTR(-EACCES); + } + + while (len--) { + c = *(const unsigned char *)name++; + if (c == '/' || c == '\0') + return ERR_PTR(-EACCES); + } + /* + * See if the low-level filesystem might want + * to use its own hash.. + */ + if (base->d_flags & DCACHE_OP_HASH) { + int err = base->d_op->d_hash(base, &this); + if (err < 0) + return ERR_PTR(err); + } + + err = inode_permission(base->d_inode, MAY_EXEC); + if (err) + return ERR_PTR(err); + + /* + * __d_lookup() is used to try to get a quick answer and avoid the + * mutex. A false-negative does no harm. + */ + ret = __d_lookup(base, &this); + if (ret && unlikely(ret->d_flags & DCACHE_OP_REVALIDATE)) { + dput(ret); + ret = NULL; + } + if (ret) + return ret; + + mutex_lock(&base->d_inode->i_mutex); + ret = __lookup_hash(&this, base, 0); + mutex_unlock(&base->d_inode->i_mutex); + return ret; +} +EXPORT_SYMBOL(lookup_one_len_unlocked); + int user_path_at_empty(int dfd, const char __user *name, unsigned flags, struct path *path, int *empty) { -- cgit