summaryrefslogtreecommitdiff
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c119
1 files changed, 75 insertions, 44 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 71c13b2990b4..5c318d657503 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -289,7 +289,7 @@ EXPORT_SYMBOL(putname);
* the vfsmount must be passed through @idmap. This function will then take
* care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
*/
static int check_acl(struct mnt_idmap *idmap,
struct inode *inode, int mask)
@@ -334,7 +334,7 @@ static int check_acl(struct mnt_idmap *idmap,
* the vfsmount must be passed through @idmap. This function will then take
* care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
*/
static int acl_permission_check(struct mnt_idmap *idmap,
struct inode *inode, int mask)
@@ -395,7 +395,7 @@ static int acl_permission_check(struct mnt_idmap *idmap,
* the vfsmount must be passed through @idmap. This function will then take
* care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
*/
int generic_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
@@ -1071,7 +1071,6 @@ static struct ctl_table namei_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
- { }
};
static int __init init_fs_namei_sysctls(void)
@@ -2467,7 +2466,7 @@ static int handle_lookup_down(struct nameidata *nd)
return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry));
}
-/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
+/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
{
const char *s = path_init(nd, flags);
@@ -2522,7 +2521,7 @@ int filename_lookup(int dfd, struct filename *name, unsigned flags,
return retval;
}
-/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
+/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
static int path_parentat(struct nameidata *nd, unsigned flags,
struct path *parent)
{
@@ -3014,27 +3013,37 @@ static inline int may_create(struct mnt_idmap *idmap,
return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
}
+// p1 != p2, both are on the same filesystem, ->s_vfs_rename_mutex is held
static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
{
- struct dentry *p;
+ struct dentry *p = p1, *q = p2, *r;
- p = d_ancestor(p2, p1);
- if (p) {
+ while ((r = p->d_parent) != p2 && r != p)
+ p = r;
+ if (r == p2) {
+ // p is a child of p2 and an ancestor of p1 or p1 itself
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
- inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2);
return p;
}
-
- p = d_ancestor(p1, p2);
- if (p) {
+ // p is the root of connected component that contains p1
+ // p2 does not occur on the path from p to p1
+ while ((r = q->d_parent) != p1 && r != p && r != q)
+ q = r;
+ if (r == p1) {
+ // q is a child of p1 and an ancestor of p2 or p2 itself
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
- inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
- return p;
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
+ return q;
+ } else if (likely(r == p)) {
+ // both p2 and p1 are descendents of p
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
+ return NULL;
+ } else { // no common ancestor at the time we'd been called
+ mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
+ return ERR_PTR(-EXDEV);
}
-
- lock_two_inodes(p1->d_inode, p2->d_inode,
- I_MUTEX_PARENT, I_MUTEX_PARENT2);
- return NULL;
}
/*
@@ -3158,7 +3167,7 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
* the vfsmount must be passed through @idmap. This function will then take
* care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
*/
int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool want_excl)
@@ -3646,7 +3655,7 @@ static int do_open(struct nameidata *nd,
* the vfsmount must be passed through @idmap. This function will then take
* care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
*/
static int vfs_tmpfile(struct mnt_idmap *idmap,
const struct path *parentpath,
@@ -3785,10 +3794,7 @@ static struct file *path_openat(struct nameidata *nd,
WARN_ON(1);
error = -EINVAL;
}
- if (unlikely(file->f_mode & FMODE_OPENED))
- fput(file);
- else
- release_empty_file(file);
+ fput(file);
if (error == -EOPENSTALE) {
if (flags & LOOKUP_RCU)
error = -ECHILD;
@@ -3954,7 +3960,7 @@ EXPORT_SYMBOL(user_path_create);
* the vfsmount must be passed through @idmap. This function will then take
* care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
*/
int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev)
@@ -4080,7 +4086,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
* the vfsmount must be passed through @idmap. This function will then take
* care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
*/
int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
@@ -4161,7 +4167,7 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
* the vfsmount must be passed through @idmap. This function will then take
* care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
*/
int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry)
@@ -4290,7 +4296,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
* the vfsmount must be passed through @idmap. This function will then take
* care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
*/
int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, struct inode **delegated_inode)
@@ -4443,7 +4449,7 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
* the vfsmount must be passed through @idmap. This function will then take
* care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
*/
int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *oldname)
@@ -4535,7 +4541,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
* the vfsmount must be passed through @idmap. This function will then take
* care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
*/
int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
struct inode *dir, struct dentry *new_dentry,
@@ -4716,11 +4722,12 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
*
* a) we can get into loop creation.
* b) race potential - two innocent renames can create a loop together.
- * That's where 4.4 screws up. Current fix: serialization on
+ * That's where 4.4BSD screws up. Current fix: serialization on
* sb->s_vfs_rename_mutex. We might be more accurate, but that's another
* story.
- * c) we have to lock _four_ objects - parents and victim (if it exists),
- * and source.
+ * c) we may have to lock up to _four_ objects - parents and victim (if it exists),
+ * and source (if it's a non-directory or a subdirectory that moves to
+ * different parent).
* And that - after we got ->i_mutex on parents (until then we don't know
* whether the target exists). Solution: try to be smart with locking
* order for inodes. We rely on the fact that tree topology may change
@@ -4752,6 +4759,7 @@ int vfs_rename(struct renamedata *rd)
bool new_is_dir = false;
unsigned max_links = new_dir->i_sb->s_max_links;
struct name_snapshot old_name;
+ bool lock_old_subdir, lock_new_subdir;
if (source == target)
return 0;
@@ -4805,15 +4813,32 @@ int vfs_rename(struct renamedata *rd)
take_dentry_name_snapshot(&old_name, old_dentry);
dget(new_dentry);
/*
- * Lock all moved children. Moved directories may need to change parent
- * pointer so they need the lock to prevent against concurrent
- * directory changes moving parent pointer. For regular files we've
- * historically always done this. The lockdep locking subclasses are
- * somewhat arbitrary but RENAME_EXCHANGE in particular can swap
- * regular files and directories so it's difficult to tell which
- * subclasses to use.
+ * Lock children.
+ * The source subdirectory needs to be locked on cross-directory
+ * rename or cross-directory exchange since its parent changes.
+ * The target subdirectory needs to be locked on cross-directory
+ * exchange due to parent change and on any rename due to becoming
+ * a victim.
+ * Non-directories need locking in all cases (for NFS reasons);
+ * they get locked after any subdirectories (in inode address order).
+ *
+ * NOTE: WE ONLY LOCK UNRELATED DIRECTORIES IN CROSS-DIRECTORY CASE.
+ * NEVER, EVER DO THAT WITHOUT ->s_vfs_rename_mutex.
*/
- lock_two_inodes(source, target, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
+ lock_old_subdir = new_dir != old_dir;
+ lock_new_subdir = new_dir != old_dir || !(flags & RENAME_EXCHANGE);
+ if (is_dir) {
+ if (lock_old_subdir)
+ inode_lock_nested(source, I_MUTEX_CHILD);
+ if (target && (!new_is_dir || lock_new_subdir))
+ inode_lock(target);
+ } else if (new_is_dir) {
+ if (lock_new_subdir)
+ inode_lock_nested(target, I_MUTEX_CHILD);
+ inode_lock(source);
+ } else {
+ lock_two_nondirectories(source, target);
+ }
error = -EPERM;
if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target)))
@@ -4861,8 +4886,9 @@ int vfs_rename(struct renamedata *rd)
d_exchange(old_dentry, new_dentry);
}
out:
- inode_unlock(source);
- if (target)
+ if (!is_dir || lock_old_subdir)
+ inode_unlock(source);
+ if (target && (!new_is_dir || lock_new_subdir))
inode_unlock(target);
dput(new_dentry);
if (!error) {
@@ -4933,6 +4959,10 @@ retry:
retry_deleg:
trap = lock_rename(new_path.dentry, old_path.dentry);
+ if (IS_ERR(trap)) {
+ error = PTR_ERR(trap);
+ goto exit_lock_rename;
+ }
old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
lookup_flags);
@@ -5000,6 +5030,7 @@ exit4:
dput(old_dentry);
exit3:
unlock_rename(new_path.dentry, old_path.dentry);
+exit_lock_rename:
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)