From d41efb522e902364ab09c782d511c1bedc388ddd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 4 Nov 2019 22:30:52 -0500 Subject: fs/namei.c: pull positivity check into follow_managed() There are 4 callers; two proceed to check if result is positive and fail with ENOENT if it isn't; one (in handle_lookup_down()) is guaranteed to yield positive and one (in lookup_fast()) is _preceded_ by positivity check. However, follow_managed() on a negative dentry is a (fairly cheap) no-op on anything other than autofs. And negative autofs dentries are never hashed, so lookup_fast() is not going to run into one of those. Moreover, successful follow_managed() on a _positive_ dentry never yields a negative one (and we significantly rely upon that in callers of lookup_fast()). In other words, we can easily transpose the positivity check and the call of follow_managed() in lookup_fast(). And that allows to fold the positivity check *into* follow_managed(), simplifying life for the code downstream of its calls. Signed-off-by: Al Viro --- fs/namei.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 671c3c1a3425..ef55155d152f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1206,25 +1206,25 @@ static int follow_automount(struct path *path, struct nameidata *nd, * - Flagged as automount point * * This may only be called in refwalk mode. + * On success path->dentry is known positive. * * Serialization is taken care of in namespace.c */ static int follow_managed(struct path *path, struct nameidata *nd) { struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */ - unsigned managed; + unsigned flags; bool need_mntput = false; int ret = 0; /* Given that we're not holding a lock here, we retain the value in a * local variable for each dentry as we look at it so that we don't see * the components of that value change under us */ - while (managed = READ_ONCE(path->dentry->d_flags), - managed &= DCACHE_MANAGED_DENTRY, - unlikely(managed != 0)) { + while (flags = READ_ONCE(path->dentry->d_flags), + unlikely(flags & DCACHE_MANAGED_DENTRY)) { /* Allow the filesystem to manage the transit without i_mutex * being held. */ - if (managed & DCACHE_MANAGE_TRANSIT) { + if (flags & DCACHE_MANAGE_TRANSIT) { BUG_ON(!path->dentry->d_op); BUG_ON(!path->dentry->d_op->d_manage); ret = path->dentry->d_op->d_manage(path, false); @@ -1233,7 +1233,7 @@ static int follow_managed(struct path *path, struct nameidata *nd) } /* Transit to a mounted filesystem. */ - if (managed & DCACHE_MOUNTED) { + if (flags & DCACHE_MOUNTED) { struct vfsmount *mounted = lookup_mnt(path); if (mounted) { dput(path->dentry); @@ -1252,7 +1252,7 @@ static int follow_managed(struct path *path, struct nameidata *nd) } /* Handle an automount point */ - if (managed & DCACHE_NEED_AUTOMOUNT) { + if (flags & DCACHE_NEED_AUTOMOUNT) { ret = follow_automount(path, nd, &need_mntput); if (ret < 0) break; @@ -1265,10 +1265,12 @@ static int follow_managed(struct path *path, struct nameidata *nd) if (need_mntput && path->mnt == mnt) mntput(path->mnt); - if (ret == -EISDIR || !ret) - ret = 1; if (need_mntput) nd->flags |= LOOKUP_JUMPED; + if (ret == -EISDIR || !ret) + ret = 1; + if (ret > 0 && unlikely(d_flags_negative(flags))) + ret = -ENOENT; if (unlikely(ret < 0)) path_put_conditional(path, nd); return ret; @@ -1617,10 +1619,6 @@ static int lookup_fast(struct nameidata *nd, dput(dentry); return status; } - if (unlikely(d_is_negative(dentry))) { - dput(dentry); - return -ENOENT; - } path->mnt = mnt; path->dentry = dentry; @@ -1807,11 +1805,6 @@ static int walk_component(struct nameidata *nd, int flags) if (unlikely(err < 0)) return err; - if (unlikely(d_is_negative(path.dentry))) { - path_to_nameidata(&path, nd); - return -ENOENT; - } - seq = 0; /* we are already out of RCU mode */ inode = d_backing_inode(path.dentry); } @@ -3352,11 +3345,6 @@ static int do_last(struct nameidata *nd, if (unlikely(error < 0)) return error; - if (unlikely(d_is_negative(path.dentry))) { - path_to_nameidata(&path, nd); - return -ENOENT; - } - /* * create/update audit record if it already exists. */ -- cgit From 6c2d4798a8d16cf4f3a28c3cd4af4f1dcbbb4d04 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 31 Oct 2019 01:21:58 -0400 Subject: new helper: lookup_positive_unlocked() Most of the callers of lookup_one_len_unlocked() treat negatives are ERR_PTR(-ENOENT). Provide a helper that would do just that. Note that a pinned positive dentry remains positive - it's ->d_inode is stable, etc.; a pinned _negative_ dentry can become positive at any point as long as you are not holding its parent at least shared. So using lookup_one_len_unlocked() needs to be careful; lookup_positive_unlocked() is safer and that's what the callers end up open-coding anyway. Signed-off-by: Al Viro --- fs/namei.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index ef55155d152f..6f72fb7ef5ad 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2557,6 +2557,26 @@ struct dentry *lookup_one_len_unlocked(const char *name, } EXPORT_SYMBOL(lookup_one_len_unlocked); +/* + * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT) + * on negatives. Returns known positive or ERR_PTR(); that's what + * most of the users want. Note that pinned negative with unlocked parent + * _can_ become positive at any time, so callers of lookup_one_len_unlocked() + * need to be very careful; pinned positives have ->d_inode stable, so + * this one avoids such problems. + */ +struct dentry *lookup_positive_unlocked(const char *name, + struct dentry *base, int len) +{ + struct dentry *ret = lookup_one_len_unlocked(name, base, len); + if (!IS_ERR(ret) && d_is_negative(ret)) { + dput(ret); + ret = ERR_PTR(-ENOENT); + } + return ret; +} +EXPORT_SYMBOL(lookup_positive_unlocked); + #ifdef CONFIG_UNIX98_PTYS int path_pts(struct path *path) { -- cgit From 2fa6b1e01a9b1a54769c394f06cd72c3d12a2d48 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 12 Nov 2019 16:13:06 -0500 Subject: fs/namei.c: fix missing barriers when checking positivity Pinned negative dentries can, generally, be made positive by another thread. Conditions that prevent that are * ->d_lock on dentry in question * parent directory held at least shared * nobody else could have observed the address of dentry Most of the places working with those fall into one of those categories; however, d_lookup() and friends need to be used with some care. Fortunately, there's not a lot of call sites, and with few exceptions all of those fall under one of the cases above. Exceptions are all in fs/namei.c - in lookup_fast(), lookup_dcache() and mountpoint_last(). Another one is lookup_slow() - there dcache lookup is done with parent held shared, but the result is used after we'd drop the lock. The same happens in do_last() - the lookup (in lookup_one()) is done with parent locked, but result is used after unlocking. lookup_fast(), do_last() and mountpoint_last() flat-out reject negatives. Most of lookup_dcache() calls are made with parent locked at least shared; the only exception is lookup_one_len_unlocked(). It might return pinned negative, needs serious care from callers. Fortunately, almost nobody calls it directly anymore; all but two callers have converted to lookup_positive_unlocked(), which rejects negatives. lookup_slow() is called by the same lookup_one_len_unlocked() (see above), mountpoint_last() and walk_component(). In those two negatives are rejected. In other words, there is a small set of places where we need to check carefully if a pinned potentially negative dentry is, in fact, positive. After that check we want to be sure that both ->d_inode and type bits in ->d_flags are stable and observed. The set consists of follow_managed() (where the rejection happens for lookup_fast(), walk_component() and do_last()), last_mountpoint() and lookup_positive_unlocked(). Solution: 1) transition from negative to positive (in __d_set_inode_and_type()) stores ->d_inode, then uses smp_store_release() to set ->d_flags type bits. 2) aforementioned 3 places in fs/namei.c fetch ->d_flags with smp_load_acquire() and bugger off if it type bits say "negative". That way anyone downstream of those checks has dentry know positive pinned, with ->d_inode and type bits of ->d_flags stable and observed. I considered splitting off d_lookup_positive(), so that the checks could be done right there, under ->d_lock. However, that leads to massive duplication of rather subtle code in fs/namei.c and fs/dcache.c. It's worse than it might seem, thanks to autofs ->d_manage() getting involved ;-/ No matter what, autofs_d_manage()/autofs_d_automount() must live with the possibility of pinned negative dentry passed their way, becoming positive under them - that's the intended behaviour when lookup comes in the middle of automount in progress, so we can't keep them out of the area that has to deal with those, more's the pity... Reported-by: Ritesh Harjani Signed-off-by: Al Viro --- fs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 6f72fb7ef5ad..117950657e63 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1220,7 +1220,7 @@ static int follow_managed(struct path *path, struct nameidata *nd) /* Given that we're not holding a lock here, we retain the value in a * local variable for each dentry as we look at it so that we don't see * the components of that value change under us */ - while (flags = READ_ONCE(path->dentry->d_flags), + while (flags = smp_load_acquire(&path->dentry->d_flags), unlikely(flags & DCACHE_MANAGED_DENTRY)) { /* Allow the filesystem to manage the transit without i_mutex * being held. */ @@ -2569,7 +2569,7 @@ struct dentry *lookup_positive_unlocked(const char *name, struct dentry *base, int len) { struct dentry *ret = lookup_one_len_unlocked(name, base, len); - if (!IS_ERR(ret) && d_is_negative(ret)) { + if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { dput(ret); ret = ERR_PTR(-ENOENT); } @@ -2671,7 +2671,7 @@ mountpoint_last(struct nameidata *nd) return PTR_ERR(path.dentry); } } - if (d_is_negative(path.dentry)) { + if (d_flags_negative(smp_load_acquire(&path.dentry->d_flags))) { dput(path.dentry); return -ENOENT; } -- cgit