From 9d2a6211a7b972563d20edebccaae42994c429fb Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Thu, 20 Nov 2025 01:38:02 +0100 Subject: fs: tidy up step_into() & friends before inlining Symlink handling is already marked as unlikely and pushing out some of it into pick_link() reduces register spillage on entry to step_into() with gcc 14.2. The compiler needed additional convincing that handle_mounts() is unlikely to fail. At the same time neither clang nor gcc could be convinced to tail-call into pick_link(). While pick_link() takes an address of stack-based object as an argument (which definitely prevents the optimization), splitting it into separate tuple did not help. The issue persists even when compiled without stack protector. As such nothing was done about this for the time being to not grow the diff. Signed-off-by: Mateusz Guzik Link: https://patch.msgid.link/20251120003803.2979978-1-mjguzik@gmail.com Signed-off-by: Christian Brauner --- fs/namei.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index efa592a98155..5fee8afa510e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1672,13 +1672,15 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, path->dentry = dentry; if (nd->flags & LOOKUP_RCU) { unsigned int seq = nd->next_seq; + if (likely(!d_managed(dentry))) + return 0; if (likely(__follow_mount_rcu(nd, path))) return 0; // *path and nd->next_seq might've been clobbered path->mnt = nd->path.mnt; path->dentry = dentry; nd->next_seq = seq; - if (!try_to_unlazy_next(nd, dentry)) + if (unlikely(!try_to_unlazy_next(nd, dentry))) return -ECHILD; } ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags); @@ -1941,13 +1943,23 @@ static int reserve_stack(struct nameidata *nd, struct path *link) enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4}; -static const char *pick_link(struct nameidata *nd, struct path *link, +static noinline const char *pick_link(struct nameidata *nd, struct path *link, struct inode *inode, int flags) { struct saved *last; const char *res; - int error = reserve_stack(nd, link); + int error; + + if (nd->flags & LOOKUP_RCU) { + /* make sure that d_is_symlink from step_into() matches the inode */ + if (read_seqcount_retry(&link->dentry->d_seq, nd->next_seq)) + return ERR_PTR(-ECHILD); + } else { + if (link->mnt == nd->path.mnt) + mntget(link->mnt); + } + error = reserve_stack(nd, link); if (unlikely(error)) { if (!(nd->flags & LOOKUP_RCU)) path_put(link); @@ -2026,9 +2038,10 @@ static const char *step_into(struct nameidata *nd, int flags, { struct path path; struct inode *inode; - int err = handle_mounts(nd, dentry, &path); + int err; - if (err < 0) + err = handle_mounts(nd, dentry, &path); + if (unlikely(err < 0)) return ERR_PTR(err); inode = path.dentry->d_inode; if (likely(!d_is_symlink(path.dentry)) || @@ -2050,14 +2063,6 @@ static const char *step_into(struct nameidata *nd, int flags, nd->seq = nd->next_seq; return NULL; } - if (nd->flags & LOOKUP_RCU) { - /* make sure that d_is_symlink above matches inode */ - if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq)) - return ERR_PTR(-ECHILD); - } else { - if (path.mnt == nd->path.mnt) - mntget(path.mnt); - } return pick_link(nd, &path, inode, flags); } -- cgit From 177fdbae39ecccb441d45e5e5ab146ea35b03d49 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Thu, 20 Nov 2025 01:38:03 +0100 Subject: fs: inline step_into() and walk_component() The primary consumer is link_path_walk(), calling walk_component() every time which in turn calls step_into(). Inlining these saves overhead of 2 function calls per path component, along with allowing the compiler to do better job optimizing them in place. step_into() had absolutely atrocious assembly to facilitate the slowpath. In order to lessen the burden at the callsite all the hard work is moved into step_into_slowpath() and instead an inline-able fastpath is implemented for rcu-walk. The new fastpath is a stripped down step_into() RCU handling with a d_managed() check from handle_mounts(). Benchmarked as follows on Sapphire Rapids: 1. the "before" was a kernel with not-yet-merged optimizations (notably elision of calls to security_inode_permission() and marking ext4 inodes as not having acls as applicable) 2. "after" is the same + the prep patch + this patch 3. benchmark consists of issuing 205 calls to access(2) in a loop with pathnames lifted out of gcc and the linker building real code, most of which have several path components and 118 of which fail with -ENOENT. Result in terms of ops/s: before: 21619 after: 22536 (+4%) profile before: 20.25% [kernel] [k] __d_lookup_rcu 10.54% [kernel] [k] link_path_walk 10.22% [kernel] [k] entry_SYSCALL_64 6.50% libc.so.6 [.] __GI___access 6.35% [kernel] [k] strncpy_from_user 4.87% [kernel] [k] step_into 3.68% [kernel] [k] kmem_cache_alloc_noprof 2.88% [kernel] [k] walk_component 2.86% [kernel] [k] kmem_cache_free 2.14% [kernel] [k] set_root 2.08% [kernel] [k] lookup_fast after: 23.38% [kernel] [k] __d_lookup_rcu 11.27% [kernel] [k] entry_SYSCALL_64 10.89% [kernel] [k] link_path_walk 7.00% libc.so.6 [.] __GI___access 6.88% [kernel] [k] strncpy_from_user 3.50% [kernel] [k] kmem_cache_alloc_noprof 2.01% [kernel] [k] kmem_cache_free 2.00% [kernel] [k] set_root 1.99% [kernel] [k] lookup_fast 1.81% [kernel] [k] do_syscall_64 1.69% [kernel] [k] entry_SYSCALL_64_safe_stack While walk_component() and step_into() of course disappear from the profile, the link_path_walk() barely gets more overhead despite the inlining thanks to the fast path added and while completing more walks per second. I did not investigate why overhead grew a lot on __d_lookup_rcu(). Signed-off-by: Mateusz Guzik Link: https://patch.msgid.link/20251120003803.2979978-2-mjguzik@gmail.com Signed-off-by: Christian Brauner --- fs/namei.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 5fee8afa510e..8281dfe5047f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1951,7 +1951,7 @@ static noinline const char *pick_link(struct nameidata *nd, struct path *link, int error; if (nd->flags & LOOKUP_RCU) { - /* make sure that d_is_symlink from step_into() matches the inode */ + /* make sure that d_is_symlink from step_into_slowpath() matches the inode */ if (read_seqcount_retry(&link->dentry->d_seq, nd->next_seq)) return ERR_PTR(-ECHILD); } else { @@ -2033,7 +2033,7 @@ all_done: // pure jump * * NOTE: dentry must be what nd->next_seq had been sampled from. */ -static const char *step_into(struct nameidata *nd, int flags, +static noinline const char *step_into_slowpath(struct nameidata *nd, int flags, struct dentry *dentry) { struct path path; @@ -2066,6 +2066,31 @@ static const char *step_into(struct nameidata *nd, int flags, return pick_link(nd, &path, inode, flags); } +static __always_inline const char *step_into(struct nameidata *nd, int flags, + struct dentry *dentry) +{ + /* + * In the common case we are in rcu-walk and traversing over a non-mounted on + * directory (as opposed to e.g., a symlink). + * + * We can handle that and negative entries with the checks below. + */ + if (likely((nd->flags & LOOKUP_RCU) && + !d_managed(dentry) && !d_is_symlink(dentry))) { + struct inode *inode = dentry->d_inode; + if (read_seqcount_retry(&dentry->d_seq, nd->next_seq)) + return ERR_PTR(-ECHILD); + if (unlikely(!inode)) + return ERR_PTR(-ENOENT); + nd->path.dentry = dentry; + /* nd->path.mnt is retained on purpose */ + nd->inode = inode; + nd->seq = nd->next_seq; + return NULL; + } + return step_into_slowpath(nd, flags, dentry); +} + static struct dentry *follow_dotdot_rcu(struct nameidata *nd) { struct dentry *parent, *old; @@ -2176,7 +2201,7 @@ static const char *handle_dots(struct nameidata *nd, int type) return NULL; } -static const char *walk_component(struct nameidata *nd, int flags) +static __always_inline const char *walk_component(struct nameidata *nd, int flags) { struct dentry *dentry; /* -- cgit