summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2025-02-17 09:26:01 +0100
committerChristian Brauner <brauner@kernel.org>2025-02-19 14:09:19 +0100
commit3789a0ab96af02c5604d1c29d89a43cb986b1222 (patch)
treeaf3811c151979d69bfb3999c069ef91167051f4d
parent2c3230fb8db9bf04d97a907f2fb86adb1e74e431 (diff)
parent204a575e91f3dace7589db5d1ff00067094c1e29 (diff)
Merge patch series "VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry"
NeilBrown <neilb@suse.de> says: I found these opportunities for simplification as part of my work to enhance filesystem directory operations to not require an exclusive lock on the directory. There are quite a collection of users of these interfaces incluing NFS, smb/server, bcachefs, devtmpfs, and audit. Hence the long Cc line. * patches from https://lore.kernel.org/r/20250217003020.3170652-2-neilb@suse.de: VFS: add common error checks to lookup_one_qstr_excl() VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry Link: https://lore.kernel.org/r/20250217003020.3170652-2-neilb@suse.de Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--Documentation/filesystems/porting.rst21
-rw-r--r--drivers/base/devtmpfs.c65
-rw-r--r--fs/bcachefs/fs-ioctl.c4
-rw-r--r--fs/namei.c57
-rw-r--r--fs/nfs/dir.c3
-rw-r--r--fs/smb/server/vfs.c26
-rw-r--r--kernel/audit_watch.c12
7 files changed, 90 insertions, 98 deletions
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 1639e78e3146..3ed3f39ecf71 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1157,3 +1157,24 @@ in normal case it points into the pathname being looked up.
NOTE: if you need something like full path from the root of filesystem,
you are still on your own - this assists with simple cases, but it's not
magic.
+
+---
+
+** recommended**
+
+kern_path_locked() and user_path_locked() no longer return a negative
+dentry so this doesn't need to be checked. If the name cannot be found,
+ERR_PTR(-ENOENT) is returned.
+
+** recommend**
+
+lookup_one_qstr_excl() is changed to return errors in more cases, so
+these conditions don't require explicit checks:
+
+ - if LOOKUP_CREATE is NOT given, then the dentry won't be negative,
+ ERR_PTR(-ENOENT) is returned instead
+ - if LOOKUP_EXCL IS given, then the dentry won't be positive,
+ ERR_PTR(-EEXIST) is rreturned instread
+
+LOOKUP_EXCL now means "target must not exist". It can be combined with
+LOOK_CREATE or LOOKUP_RENAME_TARGET.
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index b848764ef018..c9e34842139f 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -245,15 +245,12 @@ static int dev_rmdir(const char *name)
dentry = kern_path_locked(name, &parent);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- if (d_really_is_positive(dentry)) {
- if (d_inode(dentry)->i_private == &thread)
- err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
- dentry);
- else
- err = -EPERM;
- } else {
- err = -ENOENT;
- }
+ if (d_inode(dentry)->i_private == &thread)
+ err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
+ dentry);
+ else
+ err = -EPERM;
+
dput(dentry);
inode_unlock(d_inode(parent.dentry));
path_put(&parent);
@@ -310,6 +307,8 @@ static int handle_remove(const char *nodename, struct device *dev)
{
struct path parent;
struct dentry *dentry;
+ struct kstat stat;
+ struct path p;
int deleted = 0;
int err;
@@ -317,32 +316,28 @@ static int handle_remove(const char *nodename, struct device *dev)
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- if (d_really_is_positive(dentry)) {
- struct kstat stat;
- struct path p = {.mnt = parent.mnt, .dentry = dentry};
- err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
- AT_STATX_SYNC_AS_STAT);
- if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
- struct iattr newattrs;
- /*
- * before unlinking this node, reset permissions
- * of possible references like hardlinks
- */
- newattrs.ia_uid = GLOBAL_ROOT_UID;
- newattrs.ia_gid = GLOBAL_ROOT_GID;
- newattrs.ia_mode = stat.mode & ~0777;
- newattrs.ia_valid =
- ATTR_UID|ATTR_GID|ATTR_MODE;
- inode_lock(d_inode(dentry));
- notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
- inode_unlock(d_inode(dentry));
- err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
- dentry, NULL);
- if (!err || err == -ENOENT)
- deleted = 1;
- }
- } else {
- err = -ENOENT;
+ p.mnt = parent.mnt;
+ p.dentry = dentry;
+ err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
+ AT_STATX_SYNC_AS_STAT);
+ if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
+ struct iattr newattrs;
+ /*
+ * before unlinking this node, reset permissions
+ * of possible references like hardlinks
+ */
+ newattrs.ia_uid = GLOBAL_ROOT_UID;
+ newattrs.ia_gid = GLOBAL_ROOT_GID;
+ newattrs.ia_mode = stat.mode & ~0777;
+ newattrs.ia_valid =
+ ATTR_UID|ATTR_GID|ATTR_MODE;
+ inode_lock(d_inode(dentry));
+ notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
+ inode_unlock(d_inode(dentry));
+ err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
+ dentry, NULL);
+ if (!err || err == -ENOENT)
+ deleted = 1;
}
dput(dentry);
inode_unlock(d_inode(parent.dentry));
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 15725b4ce393..595b57fabc9a 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -511,10 +511,6 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
ret = -EXDEV;
goto err;
}
- if (!d_is_positive(victim)) {
- ret = -ENOENT;
- goto err;
- }
ret = __bch2_unlink(dir, victim, true);
if (!ret) {
fsnotify_rmdir(dir, victim);
diff --git a/fs/namei.c b/fs/namei.c
index 3ab9440c5b93..b7cdca902803 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1670,6 +1670,8 @@ static struct dentry *lookup_dcache(const struct qstr *name,
* dentries - as the matter of fact, this only gets called
* when directory is guaranteed to have no in-lookup children
* at all.
+ * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed.
+ * Will return -EEXIST if name is found and LOOKUP_EXCL was passed.
*/
struct dentry *lookup_one_qstr_excl(const struct qstr *name,
struct dentry *base,
@@ -1680,7 +1682,7 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
struct inode *dir = base->d_inode;
if (dentry)
- return dentry;
+ goto found;
/* Don't create child dentry for a dead directory. */
if (unlikely(IS_DEADDIR(dir)))
@@ -1695,6 +1697,17 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
dput(dentry);
dentry = old;
}
+found:
+ if (IS_ERR(dentry))
+ return dentry;
+ if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) {
+ dput(dentry);
+ return ERR_PTR(-ENOENT);
+ }
+ if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) {
+ dput(dentry);
+ return ERR_PTR(-EEXIST);
+ }
return dentry;
}
EXPORT_SYMBOL(lookup_one_qstr_excl);
@@ -4078,27 +4091,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
* '/', and a directory wasn't requested.
*/
if (last.name[last.len] && !want_dir)
- create_flags = 0;
+ create_flags &= ~LOOKUP_CREATE;
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
dentry = lookup_one_qstr_excl(&last, path->dentry,
reval_flag | create_flags);
if (IS_ERR(dentry))
goto unlock;
- error = -EEXIST;
- if (d_is_positive(dentry))
- goto fail;
-
- /*
- * Special case - lookup gave negative, but... we had foo/bar/
- * From the vfs_mknod() POV we just have a negative dentry -
- * all is fine. Let's be bastards - you had / on the end, you've
- * been asking for (non-existent) directory. -ENOENT for you.
- */
- if (unlikely(!create_flags)) {
- error = -ENOENT;
- goto fail;
- }
if (unlikely(err2)) {
error = err2;
goto fail;
@@ -4445,10 +4444,6 @@ retry:
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit3;
- if (!dentry->d_inode) {
- error = -ENOENT;
- goto exit4;
- }
error = security_path_rmdir(&path, dentry);
if (error)
goto exit4;
@@ -4579,7 +4574,7 @@ retry_deleg:
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
- if (last.name[last.len] || d_is_negative(dentry))
+ if (last.name[last.len])
goto slashes;
inode = dentry->d_inode;
ihold(inode);
@@ -4613,9 +4608,7 @@ exit1:
return error;
slashes:
- if (d_is_negative(dentry))
- error = -ENOENT;
- else if (d_is_dir(dentry))
+ if (d_is_dir(dentry))
error = -EISDIR;
else
error = -ENOTDIR;
@@ -5115,7 +5108,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
struct qstr old_last, new_last;
int old_type, new_type;
struct inode *delegated_inode = NULL;
- unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
+ unsigned int lookup_flags = 0, target_flags =
+ LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
bool should_retry = false;
int error = -EINVAL;
@@ -5128,6 +5122,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
if (flags & RENAME_EXCHANGE)
target_flags = 0;
+ if (flags & RENAME_NOREPLACE)
+ target_flags |= LOOKUP_EXCL;
retry:
error = filename_parentat(olddfd, from, lookup_flags, &old_path,
@@ -5169,23 +5165,12 @@ retry_deleg:
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
- /* source must exist */
- error = -ENOENT;
- if (d_is_negative(old_dentry))
- goto exit4;
new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
lookup_flags | target_flags);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto exit4;
- error = -EEXIST;
- if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
- goto exit5;
if (flags & RENAME_EXCHANGE) {
- error = -ENOENT;
- if (d_is_negative(new_dentry))
- goto exit5;
-
if (!d_is_dir(new_dentry)) {
error = -ENOTDIR;
if (new_last.name[new_last.len])
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2b04038b0e40..56cf16a72334 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1532,7 +1532,8 @@ static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
{
if (NFS_PROTO(dir)->version == 2)
return 0;
- return flags & LOOKUP_EXCL;
+ return (flags & (LOOKUP_CREATE | LOOKUP_EXCL)) ==
+ (LOOKUP_CREATE | LOOKUP_EXCL);
}
/*
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 6890016e1923..fe29acef5872 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -113,11 +113,6 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
if (IS_ERR(d))
goto err_out;
- if (d_is_negative(d)) {
- dput(d);
- goto err_out;
- }
-
path->dentry = d;
path->mnt = mntget(parent_path->mnt);
@@ -693,6 +688,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
struct ksmbd_file *parent_fp;
int new_type;
int err, lookup_flags = LOOKUP_NO_SYMLINKS;
+ int target_lookup_flags = LOOKUP_RENAME_TARGET;
if (ksmbd_override_fsids(work))
return -ENOMEM;
@@ -703,6 +699,14 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
goto revert_fsids;
}
+ /*
+ * explicitly handle file overwrite case, for compatibility with
+ * filesystems that may not support rename flags (e.g: fuse)
+ */
+ if (flags & RENAME_NOREPLACE)
+ target_lookup_flags |= LOOKUP_EXCL;
+ flags &= ~(RENAME_NOREPLACE);
+
retry:
err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH,
&new_path, &new_last, &new_type,
@@ -743,7 +747,7 @@ retry:
}
new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
- lookup_flags | LOOKUP_RENAME_TARGET);
+ lookup_flags | target_lookup_flags);
if (IS_ERR(new_dentry)) {
err = PTR_ERR(new_dentry);
goto out3;
@@ -754,16 +758,6 @@ retry:
goto out4;
}
- /*
- * explicitly handle file overwrite case, for compatibility with
- * filesystems that may not support rename flags (e.g: fuse)
- */
- if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
- err = -EEXIST;
- goto out4;
- }
- flags &= ~(RENAME_NOREPLACE);
-
if (old_child == trap) {
err = -EINVAL;
goto out4;
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 7f358740e958..367eaf2c78b7 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -350,11 +350,10 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
struct dentry *d = kern_path_locked(watch->path, parent);
if (IS_ERR(d))
return PTR_ERR(d);
- if (d_is_positive(d)) {
- /* update watch filter fields */
- watch->dev = d->d_sb->s_dev;
- watch->ino = d_backing_inode(d)->i_ino;
- }
+ /* update watch filter fields */
+ watch->dev = d->d_sb->s_dev;
+ watch->ino = d_backing_inode(d)->i_ino;
+
inode_unlock(d_backing_inode(parent->dentry));
dput(d);
return 0;
@@ -419,10 +418,11 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
/* caller expects mutex locked */
mutex_lock(&audit_filter_mutex);
- if (ret) {
+ if (ret && ret != -ENOENT) {
audit_put_watch(watch);
return ret;
}
+ ret = 0;
/* either find an old parent or attach a new one */
parent = audit_find_parent(d_backing_inode(parent_path.dentry));