summaryrefslogtreecommitdiff
path: root/fs/namei.c
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2022-07-04 18:12:39 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2022-07-06 13:14:42 -0400
commit03fa86e9f79d8b9a6aa28965829a4a8646139a0a (patch)
treed44fb5a33c51fc798cfb5f416b9639321f36fcaf /fs/namei.c
parent6e180327153071281dbbf6a16759e49862debdca (diff)
namei: stash the sampled ->d_seq into nameidata
New field: nd->next_seq. Set to 0 outside of RCU mode, holds the sampled value for the next dentry to be considered. Used instead of an arseload of local variables, arguments, etc. step_into() has lost seq argument; nd->next_seq is used, so dentry passed to it must be the one ->next_seq is about. There are two requirements for RCU pathwalk: 1) it should not give a hard failure (other than -ECHILD) unless non-RCU pathwalk might fail that way given suitable timings. 2) it should not succeed unless non-RCU pathwalk might succeed with the same end location given suitable timings. The use of seq numbers is the way we achieve that. Invariant we want to maintain is: if RCU pathwalk can reach the state with given nd->path, nd->inode and nd->seq after having traversed some part of pathname, it must be possible for non-RCU pathwalk to reach the same nd->path and nd->inode after having traversed the same part of pathname, and observe the nd->path.dentry->d_seq equal to what RCU pathwalk has in nd->seq For transition from parent to child, we sample child's ->d_seq and verify that parent's ->d_seq remains unchanged. Anything that disrupts parent-child relationship would've bumped ->d_seq on both. For transitions from child to parent we sample parent's ->d_seq and verify that child's ->d_seq has not changed. Same reasoning as for the previous case applies. For transition from mountpoint to root of mounted we sample the ->d_seq of root and verify that nobody has touched mount_lock since the beginning of pathwalk. That guarantees that mount we'd found had been there all along, with these mountpoint and root of the mounted. It would be possible for a non-RCU pathwalk to reach the previous state, find the same mount and observe its root at the moment we'd sampled ->d_seq of that For transitions from root of mounted to mountpoint we sample ->d_seq of mountpoint and verify that mount_lock had not been touched since the beginning of pathwalk. The same reasoning as in the previous case applies. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c98
1 files changed, 48 insertions, 50 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 4b7a2147c207..8dd7874816cc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -567,7 +567,7 @@ struct nameidata {
struct path root;
struct inode *inode; /* path.dentry.d_inode */
unsigned int flags, state;
- unsigned seq, m_seq, r_seq;
+ unsigned seq, next_seq, m_seq, r_seq;
int last_type;
unsigned depth;
int total_link_count;
@@ -668,6 +668,7 @@ static void drop_links(struct nameidata *nd)
static void leave_rcu(struct nameidata *nd)
{
nd->flags &= ~LOOKUP_RCU;
+ nd->seq = nd->next_seq = 0;
rcu_read_unlock();
}
@@ -792,7 +793,6 @@ out:
* try_to_unlazy_next - try to switch to ref-walk mode.
* @nd: nameidata pathwalk data
* @dentry: next dentry to step into
- * @seq: seq number to check @dentry against
* Returns: true on success, false on failure
*
* Similar to try_to_unlazy(), but here we have the next dentry already
@@ -801,7 +801,7 @@ out:
* Nothing should touch nameidata between try_to_unlazy_next() failure and
* terminate_walk().
*/
-static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsigned seq)
+static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
{
int res;
BUG_ON(!(nd->flags & LOOKUP_RCU));
@@ -826,7 +826,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi
*/
if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
goto out;
- if (read_seqcount_retry(&dentry->d_seq, seq))
+ if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
goto out_dput;
/*
* Sequence counts matched. Now make sure that the root is
@@ -1475,7 +1475,7 @@ EXPORT_SYMBOL(follow_down);
* we meet a managed dentry that would need blocking.
*/
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
- struct inode **inode, unsigned *seqp)
+ struct inode **inode)
{
struct dentry *dentry = path->dentry;
unsigned int flags = dentry->d_flags;
@@ -1504,7 +1504,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
path->mnt = &mounted->mnt;
dentry = path->dentry = mounted->mnt.mnt_root;
nd->state |= ND_JUMPED;
- *seqp = read_seqcount_begin(&dentry->d_seq);
+ nd->next_seq = read_seqcount_begin(&dentry->d_seq);
*inode = dentry->d_inode;
/*
* We don't need to re-check ->d_seq after this
@@ -1513,6 +1513,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
* becoming unpinned.
*/
flags = dentry->d_flags;
+ // makes sure that non-RCU pathwalk could reach
+ // this state.
if (read_seqretry(&mount_lock, nd->m_seq))
return false;
continue;
@@ -1525,8 +1527,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
}
static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
- struct path *path, struct inode **inode,
- unsigned int *seqp)
+ struct path *path, struct inode **inode)
{
bool jumped;
int ret;
@@ -1534,16 +1535,17 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
path->mnt = nd->path.mnt;
path->dentry = dentry;
if (nd->flags & LOOKUP_RCU) {
- unsigned int seq = *seqp;
+ unsigned int seq = nd->next_seq;
if (unlikely(!*inode))
return -ENOENT;
- if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
+ if (likely(__follow_mount_rcu(nd, path, inode)))
return 0;
- if (!try_to_unlazy_next(nd, dentry, seq))
- return -ECHILD;
- // *path might've been clobbered by __follow_mount_rcu()
+ // *path and nd->next_seq might've been clobbered
path->mnt = nd->path.mnt;
path->dentry = dentry;
+ nd->next_seq = seq;
+ if (!try_to_unlazy_next(nd, dentry))
+ return -ECHILD;
}
ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
if (jumped) {
@@ -1558,7 +1560,6 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
mntput(path->mnt);
} else {
*inode = d_backing_inode(path->dentry);
- *seqp = 0; /* out of RCU mode, so the value doesn't matter */
}
return ret;
}
@@ -1618,8 +1619,7 @@ static struct dentry *__lookup_hash(const struct qstr *name,
}
static struct dentry *lookup_fast(struct nameidata *nd,
- struct inode **inode,
- unsigned *seqp)
+ struct inode **inode)
{
struct dentry *dentry, *parent = nd->path.dentry;
int status = 1;
@@ -1630,8 +1630,7 @@ static struct dentry *lookup_fast(struct nameidata *nd,
* going to fall back to non-racy lookup.
*/
if (nd->flags & LOOKUP_RCU) {
- unsigned seq;
- dentry = __d_lookup_rcu(parent, &nd->last, &seq);
+ dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq);
if (unlikely(!dentry)) {
if (!try_to_unlazy(nd))
return ERR_PTR(-ECHILD);
@@ -1643,7 +1642,7 @@ static struct dentry *lookup_fast(struct nameidata *nd,
* the dentry name information from lookup.
*/
*inode = d_backing_inode(dentry);
- if (read_seqcount_retry(&dentry->d_seq, seq))
+ if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
return ERR_PTR(-ECHILD);
/*
@@ -1656,11 +1655,10 @@ static struct dentry *lookup_fast(struct nameidata *nd,
if (__read_seqcount_retry(&parent->d_seq, nd->seq))
return ERR_PTR(-ECHILD);
- *seqp = seq;
status = d_revalidate(dentry, nd->flags);
if (likely(status > 0))
return dentry;
- if (!try_to_unlazy_next(nd, dentry, seq))
+ if (!try_to_unlazy_next(nd, dentry))
return ERR_PTR(-ECHILD);
if (status == -ECHILD)
/* we'd been told to redo it in non-rcu mode */
@@ -1741,7 +1739,7 @@ static inline int may_lookup(struct user_namespace *mnt_userns,
return inode_permission(mnt_userns, nd->inode, MAY_EXEC);
}
-static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
+static int reserve_stack(struct nameidata *nd, struct path *link)
{
if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
return -ELOOP;
@@ -1756,7 +1754,7 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
if (nd->flags & LOOKUP_RCU) {
// we need to grab link before we do unlazy. And we can't skip
// unlazy even if we fail to grab the link - cleanup needs it
- bool grabbed_link = legitimize_path(nd, link, seq);
+ bool grabbed_link = legitimize_path(nd, link, nd->next_seq);
if (!try_to_unlazy(nd) || !grabbed_link)
return -ECHILD;
@@ -1770,11 +1768,11 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
static const char *pick_link(struct nameidata *nd, struct path *link,
- struct inode *inode, unsigned seq, int flags)
+ struct inode *inode, int flags)
{
struct saved *last;
const char *res;
- int error = reserve_stack(nd, link, seq);
+ int error = reserve_stack(nd, link);
if (unlikely(error)) {
if (!(nd->flags & LOOKUP_RCU))
@@ -1784,7 +1782,7 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
last = nd->stack + nd->depth++;
last->link = *link;
clear_delayed_call(&last->done);
- last->seq = seq;
+ last->seq = nd->next_seq;
if (flags & WALK_TRAILING) {
error = may_follow_link(nd, inode);
@@ -1846,12 +1844,14 @@ all_done: // pure jump
* to do this check without having to look at inode->i_op,
* so we keep a cache of "no, this doesn't need follow_link"
* for the common case.
+ *
+ * NOTE: dentry must be what nd->next_seq had been sampled from.
*/
static const char *step_into(struct nameidata *nd, int flags,
- struct dentry *dentry, struct inode *inode, unsigned seq)
+ struct dentry *dentry, struct inode *inode)
{
struct path path;
- int err = handle_mounts(nd, dentry, &path, &inode, &seq);
+ int err = handle_mounts(nd, dentry, &path, &inode);
if (err < 0)
return ERR_PTR(err);
@@ -1866,23 +1866,22 @@ static const char *step_into(struct nameidata *nd, int flags,
}
nd->path = path;
nd->inode = inode;
- nd->seq = seq;
+ nd->seq = nd->next_seq;
return NULL;
}
if (nd->flags & LOOKUP_RCU) {
/* make sure that d_is_symlink above matches inode */
- if (read_seqcount_retry(&path.dentry->d_seq, seq))
+ if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
return ERR_PTR(-ECHILD);
} else {
if (path.mnt == nd->path.mnt)
mntget(path.mnt);
}
- return pick_link(nd, &path, inode, seq, flags);
+ return pick_link(nd, &path, inode, flags);
}
static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
- struct inode **inodep,
- unsigned *seqp)
+ struct inode **inodep)
{
struct dentry *parent, *old;
@@ -1899,6 +1898,7 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
nd->path = path;
nd->inode = path.dentry->d_inode;
nd->seq = seq;
+ // makes sure that non-RCU pathwalk could reach this state
if (read_seqretry(&mount_lock, nd->m_seq))
return ERR_PTR(-ECHILD);
/* we know that mountpoint was pinned */
@@ -1906,7 +1906,8 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
old = nd->path.dentry;
parent = old->d_parent;
*inodep = parent->d_inode;
- *seqp = read_seqcount_begin(&parent->d_seq);
+ nd->next_seq = read_seqcount_begin(&parent->d_seq);
+ // makes sure that non-RCU pathwalk could reach this state
if (read_seqcount_retry(&old->d_seq, nd->seq))
return ERR_PTR(-ECHILD);
if (unlikely(!path_connected(nd->path.mnt, parent)))
@@ -1917,14 +1918,13 @@ in_root:
return ERR_PTR(-ECHILD);
if (unlikely(nd->flags & LOOKUP_BENEATH))
return ERR_PTR(-ECHILD);
- *seqp = nd->seq;
+ nd->next_seq = nd->seq;
*inodep = nd->path.dentry->d_inode;
return nd->path.dentry;
}
static struct dentry *follow_dotdot(struct nameidata *nd,
- struct inode **inodep,
- unsigned *seqp)
+ struct inode **inodep)
{
struct dentry *parent;
@@ -1948,14 +1948,12 @@ static struct dentry *follow_dotdot(struct nameidata *nd,
dput(parent);
return ERR_PTR(-ENOENT);
}
- *seqp = 0;
*inodep = parent->d_inode;
return parent;
in_root:
if (unlikely(nd->flags & LOOKUP_BENEATH))
return ERR_PTR(-EXDEV);
- *seqp = 0;
*inodep = nd->path.dentry->d_inode;
return dget(nd->path.dentry);
}
@@ -1966,7 +1964,6 @@ static const char *handle_dots(struct nameidata *nd, int type)
const char *error = NULL;
struct dentry *parent;
struct inode *inode;
- unsigned seq;
if (!nd->root.mnt) {
error = ERR_PTR(set_root(nd));
@@ -1974,12 +1971,12 @@ static const char *handle_dots(struct nameidata *nd, int type)
return error;
}
if (nd->flags & LOOKUP_RCU)
- parent = follow_dotdot_rcu(nd, &inode, &seq);
+ parent = follow_dotdot_rcu(nd, &inode);
else
- parent = follow_dotdot(nd, &inode, &seq);
+ parent = follow_dotdot(nd, &inode);
if (IS_ERR(parent))
return ERR_CAST(parent);
- error = step_into(nd, WALK_NOFOLLOW, parent, inode, seq);
+ error = step_into(nd, WALK_NOFOLLOW, parent, inode);
if (unlikely(error))
return error;
@@ -2004,7 +2001,6 @@ static const char *walk_component(struct nameidata *nd, int flags)
{
struct dentry *dentry;
struct inode *inode;
- unsigned seq;
/*
* "." and ".." are special - ".." especially so because it has
* to be able to know about the current root directory and
@@ -2015,7 +2011,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
put_link(nd);
return handle_dots(nd, nd->last_type);
}
- dentry = lookup_fast(nd, &inode, &seq);
+ dentry = lookup_fast(nd, &inode);
if (IS_ERR(dentry))
return ERR_CAST(dentry);
if (unlikely(!dentry)) {
@@ -2025,7 +2021,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
}
if (!(flags & WALK_MORE) && nd->depth)
put_link(nd);
- return step_into(nd, flags, dentry, inode, seq);
+ return step_into(nd, flags, dentry, inode);
}
/*
@@ -2380,6 +2376,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
flags &= ~LOOKUP_RCU;
if (flags & LOOKUP_RCU)
rcu_read_lock();
+ else
+ nd->seq = nd->next_seq = 0;
nd->flags = flags;
nd->state |= ND_JUMPED;
@@ -2481,8 +2479,9 @@ static int handle_lookup_down(struct nameidata *nd)
{
if (!(nd->flags & LOOKUP_RCU))
dget(nd->path.dentry);
+ nd->next_seq = nd->seq;
return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
- nd->path.dentry, nd->inode, nd->seq));
+ nd->path.dentry, nd->inode));
}
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
@@ -3401,7 +3400,6 @@ static const char *open_last_lookups(struct nameidata *nd,
struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
bool got_write = false;
- unsigned seq;
struct inode *inode;
struct dentry *dentry;
const char *res;
@@ -3418,7 +3416,7 @@ static const char *open_last_lookups(struct nameidata *nd,
if (nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
/* we _can_ be in RCU mode here */
- dentry = lookup_fast(nd, &inode, &seq);
+ dentry = lookup_fast(nd, &inode);
if (IS_ERR(dentry))
return ERR_CAST(dentry);
if (likely(dentry))
@@ -3472,7 +3470,7 @@ static const char *open_last_lookups(struct nameidata *nd,
finish_lookup:
if (nd->depth)
put_link(nd);
- res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
+ res = step_into(nd, WALK_TRAILING, dentry, inode);
if (unlikely(res))
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
return res;