diff options
author | Christian Brauner <brauner@kernel.org> | 2025-02-17 09:26:01 +0100 |
---|---|---|
committer | Christian Brauner <brauner@kernel.org> | 2025-02-19 14:09:19 +0100 |
commit | 3789a0ab96af02c5604d1c29d89a43cb986b1222 (patch) | |
tree | af3811c151979d69bfb3999c069ef91167051f4d | |
parent | 2c3230fb8db9bf04d97a907f2fb86adb1e74e431 (diff) | |
parent | 204a575e91f3dace7589db5d1ff00067094c1e29 (diff) |
Merge patch series "VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry"
NeilBrown <neilb@suse.de> says:
I found these opportunities for simplification as part of my work to
enhance filesystem directory operations to not require an exclusive
lock on the directory.
There are quite a collection of users of these interfaces incluing NFS,
smb/server, bcachefs, devtmpfs, and audit. Hence the long Cc line.
* patches from https://lore.kernel.org/r/20250217003020.3170652-2-neilb@suse.de:
VFS: add common error checks to lookup_one_qstr_excl()
VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry
Link: https://lore.kernel.org/r/20250217003020.3170652-2-neilb@suse.de
Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r-- | Documentation/filesystems/porting.rst | 21 | ||||
-rw-r--r-- | drivers/base/devtmpfs.c | 65 | ||||
-rw-r--r-- | fs/bcachefs/fs-ioctl.c | 4 | ||||
-rw-r--r-- | fs/namei.c | 57 | ||||
-rw-r--r-- | fs/nfs/dir.c | 3 | ||||
-rw-r--r-- | fs/smb/server/vfs.c | 26 | ||||
-rw-r--r-- | kernel/audit_watch.c | 12 |
7 files changed, 90 insertions, 98 deletions
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 1639e78e3146..3ed3f39ecf71 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1157,3 +1157,24 @@ in normal case it points into the pathname being looked up. NOTE: if you need something like full path from the root of filesystem, you are still on your own - this assists with simple cases, but it's not magic. + +--- + +** recommended** + +kern_path_locked() and user_path_locked() no longer return a negative +dentry so this doesn't need to be checked. If the name cannot be found, +ERR_PTR(-ENOENT) is returned. + +** recommend** + +lookup_one_qstr_excl() is changed to return errors in more cases, so +these conditions don't require explicit checks: + + - if LOOKUP_CREATE is NOT given, then the dentry won't be negative, + ERR_PTR(-ENOENT) is returned instead + - if LOOKUP_EXCL IS given, then the dentry won't be positive, + ERR_PTR(-EEXIST) is rreturned instread + +LOOKUP_EXCL now means "target must not exist". It can be combined with +LOOK_CREATE or LOOKUP_RENAME_TARGET. diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index b848764ef018..c9e34842139f 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -245,15 +245,12 @@ static int dev_rmdir(const char *name) dentry = kern_path_locked(name, &parent); if (IS_ERR(dentry)) return PTR_ERR(dentry); - if (d_really_is_positive(dentry)) { - if (d_inode(dentry)->i_private == &thread) - err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry), - dentry); - else - err = -EPERM; - } else { - err = -ENOENT; - } + if (d_inode(dentry)->i_private == &thread) + err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry), + dentry); + else + err = -EPERM; + dput(dentry); inode_unlock(d_inode(parent.dentry)); path_put(&parent); @@ -310,6 +307,8 @@ static int handle_remove(const char *nodename, struct device *dev) { struct path parent; struct dentry *dentry; + struct kstat stat; + struct path p; int deleted = 0; int err; @@ -317,32 +316,28 @@ static int handle_remove(const char *nodename, struct device *dev) if (IS_ERR(dentry)) return PTR_ERR(dentry); - if (d_really_is_positive(dentry)) { - struct kstat stat; - struct path p = {.mnt = parent.mnt, .dentry = dentry}; - err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE, - AT_STATX_SYNC_AS_STAT); - if (!err && dev_mynode(dev, d_inode(dentry), &stat)) { - struct iattr newattrs; - /* - * before unlinking this node, reset permissions - * of possible references like hardlinks - */ - newattrs.ia_uid = GLOBAL_ROOT_UID; - newattrs.ia_gid = GLOBAL_ROOT_GID; - newattrs.ia_mode = stat.mode & ~0777; - newattrs.ia_valid = - ATTR_UID|ATTR_GID|ATTR_MODE; - inode_lock(d_inode(dentry)); - notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL); - inode_unlock(d_inode(dentry)); - err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry), - dentry, NULL); - if (!err || err == -ENOENT) - deleted = 1; - } - } else { - err = -ENOENT; + p.mnt = parent.mnt; + p.dentry = dentry; + err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE, + AT_STATX_SYNC_AS_STAT); + if (!err && dev_mynode(dev, d_inode(dentry), &stat)) { + struct iattr newattrs; + /* + * before unlinking this node, reset permissions + * of possible references like hardlinks + */ + newattrs.ia_uid = GLOBAL_ROOT_UID; + newattrs.ia_gid = GLOBAL_ROOT_GID; + newattrs.ia_mode = stat.mode & ~0777; + newattrs.ia_valid = + ATTR_UID|ATTR_GID|ATTR_MODE; + inode_lock(d_inode(dentry)); + notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL); + inode_unlock(d_inode(dentry)); + err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry), + dentry, NULL); + if (!err || err == -ENOENT) + deleted = 1; } dput(dentry); inode_unlock(d_inode(parent.dentry)); diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 15725b4ce393..595b57fabc9a 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -511,10 +511,6 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, ret = -EXDEV; goto err; } - if (!d_is_positive(victim)) { - ret = -ENOENT; - goto err; - } ret = __bch2_unlink(dir, victim, true); if (!ret) { fsnotify_rmdir(dir, victim); diff --git a/fs/namei.c b/fs/namei.c index 3ab9440c5b93..b7cdca902803 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1670,6 +1670,8 @@ static struct dentry *lookup_dcache(const struct qstr *name, * dentries - as the matter of fact, this only gets called * when directory is guaranteed to have no in-lookup children * at all. + * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. + * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. */ struct dentry *lookup_one_qstr_excl(const struct qstr *name, struct dentry *base, @@ -1680,7 +1682,7 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, struct inode *dir = base->d_inode; if (dentry) - return dentry; + goto found; /* Don't create child dentry for a dead directory. */ if (unlikely(IS_DEADDIR(dir))) @@ -1695,6 +1697,17 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, dput(dentry); dentry = old; } +found: + if (IS_ERR(dentry)) + return dentry; + if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) { + dput(dentry); + return ERR_PTR(-ENOENT); + } + if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) { + dput(dentry); + return ERR_PTR(-EEXIST); + } return dentry; } EXPORT_SYMBOL(lookup_one_qstr_excl); @@ -4078,27 +4091,13 @@ static struct dentry *filename_create(int dfd, struct filename *name, * '/', and a directory wasn't requested. */ if (last.name[last.len] && !want_dir) - create_flags = 0; + create_flags &= ~LOOKUP_CREATE; inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); dentry = lookup_one_qstr_excl(&last, path->dentry, reval_flag | create_flags); if (IS_ERR(dentry)) goto unlock; - error = -EEXIST; - if (d_is_positive(dentry)) - goto fail; - - /* - * Special case - lookup gave negative, but... we had foo/bar/ - * From the vfs_mknod() POV we just have a negative dentry - - * all is fine. Let's be bastards - you had / on the end, you've - * been asking for (non-existent) directory. -ENOENT for you. - */ - if (unlikely(!create_flags)) { - error = -ENOENT; - goto fail; - } if (unlikely(err2)) { error = err2; goto fail; @@ -4445,10 +4444,6 @@ retry: error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit3; - if (!dentry->d_inode) { - error = -ENOENT; - goto exit4; - } error = security_path_rmdir(&path, dentry); if (error) goto exit4; @@ -4579,7 +4574,7 @@ retry_deleg: if (!IS_ERR(dentry)) { /* Why not before? Because we want correct error value */ - if (last.name[last.len] || d_is_negative(dentry)) + if (last.name[last.len]) goto slashes; inode = dentry->d_inode; ihold(inode); @@ -4613,9 +4608,7 @@ exit1: return error; slashes: - if (d_is_negative(dentry)) - error = -ENOENT; - else if (d_is_dir(dentry)) + if (d_is_dir(dentry)) error = -EISDIR; else error = -ENOTDIR; @@ -5115,7 +5108,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, struct qstr old_last, new_last; int old_type, new_type; struct inode *delegated_inode = NULL; - unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET; + unsigned int lookup_flags = 0, target_flags = + LOOKUP_RENAME_TARGET | LOOKUP_CREATE; bool should_retry = false; int error = -EINVAL; @@ -5128,6 +5122,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, if (flags & RENAME_EXCHANGE) target_flags = 0; + if (flags & RENAME_NOREPLACE) + target_flags |= LOOKUP_EXCL; retry: error = filename_parentat(olddfd, from, lookup_flags, &old_path, @@ -5169,23 +5165,12 @@ retry_deleg: error = PTR_ERR(old_dentry); if (IS_ERR(old_dentry)) goto exit3; - /* source must exist */ - error = -ENOENT; - if (d_is_negative(old_dentry)) - goto exit4; new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry, lookup_flags | target_flags); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto exit4; - error = -EEXIST; - if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) - goto exit5; if (flags & RENAME_EXCHANGE) { - error = -ENOENT; - if (d_is_negative(new_dentry)) - goto exit5; - if (!d_is_dir(new_dentry)) { error = -ENOTDIR; if (new_last.name[new_last.len]) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2b04038b0e40..56cf16a72334 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1532,7 +1532,8 @@ static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags) { if (NFS_PROTO(dir)->version == 2) return 0; - return flags & LOOKUP_EXCL; + return (flags & (LOOKUP_CREATE | LOOKUP_EXCL)) == + (LOOKUP_CREATE | LOOKUP_EXCL); } /* diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 6890016e1923..fe29acef5872 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -113,11 +113,6 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, if (IS_ERR(d)) goto err_out; - if (d_is_negative(d)) { - dput(d); - goto err_out; - } - path->dentry = d; path->mnt = mntget(parent_path->mnt); @@ -693,6 +688,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, struct ksmbd_file *parent_fp; int new_type; int err, lookup_flags = LOOKUP_NO_SYMLINKS; + int target_lookup_flags = LOOKUP_RENAME_TARGET; if (ksmbd_override_fsids(work)) return -ENOMEM; @@ -703,6 +699,14 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, goto revert_fsids; } + /* + * explicitly handle file overwrite case, for compatibility with + * filesystems that may not support rename flags (e.g: fuse) + */ + if (flags & RENAME_NOREPLACE) + target_lookup_flags |= LOOKUP_EXCL; + flags &= ~(RENAME_NOREPLACE); + retry: err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH, &new_path, &new_last, &new_type, @@ -743,7 +747,7 @@ retry: } new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry, - lookup_flags | LOOKUP_RENAME_TARGET); + lookup_flags | target_lookup_flags); if (IS_ERR(new_dentry)) { err = PTR_ERR(new_dentry); goto out3; @@ -754,16 +758,6 @@ retry: goto out4; } - /* - * explicitly handle file overwrite case, for compatibility with - * filesystems that may not support rename flags (e.g: fuse) - */ - if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) { - err = -EEXIST; - goto out4; - } - flags &= ~(RENAME_NOREPLACE); - if (old_child == trap) { err = -EINVAL; goto out4; diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 7f358740e958..367eaf2c78b7 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -350,11 +350,10 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) struct dentry *d = kern_path_locked(watch->path, parent); if (IS_ERR(d)) return PTR_ERR(d); - if (d_is_positive(d)) { - /* update watch filter fields */ - watch->dev = d->d_sb->s_dev; - watch->ino = d_backing_inode(d)->i_ino; - } + /* update watch filter fields */ + watch->dev = d->d_sb->s_dev; + watch->ino = d_backing_inode(d)->i_ino; + inode_unlock(d_backing_inode(parent->dentry)); dput(d); return 0; @@ -419,10 +418,11 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) /* caller expects mutex locked */ mutex_lock(&audit_filter_mutex); - if (ret) { + if (ret && ret != -ENOENT) { audit_put_watch(watch); return ret; } + ret = 0; /* either find an old parent or attach a new one */ parent = audit_find_parent(d_backing_inode(parent_path.dentry)); |