diff options
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 992 |
1 files changed, 637 insertions, 355 deletions
diff --git a/fs/namei.c b/fs/namei.c index c5b2a25be7d0..4bb889fc980b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -125,8 +125,15 @@ #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) +static inline void initname(struct filename *name, const char __user *uptr) +{ + name->uptr = uptr; + name->aname = NULL; + atomic_set(&name->refcnt, 1); +} + struct filename * -getname_flags(const char __user *filename, int flags, int *empty) +getname_flags(const char __user *filename, int flags) { struct filename *result; char *kname; @@ -148,9 +155,20 @@ getname_flags(const char __user *filename, int flags, int *empty) result->name = kname; len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX); - if (unlikely(len < 0)) { - __putname(result); - return ERR_PTR(len); + /* + * Handle both empty path and copy failure in one go. + */ + if (unlikely(len <= 0)) { + if (unlikely(len < 0)) { + __putname(result); + return ERR_PTR(len); + } + + /* The empty path is special. */ + if (!(flags & LOOKUP_EMPTY)) { + __putname(result); + return ERR_PTR(-ENOENT); + } } /* @@ -180,46 +198,50 @@ getname_flags(const char __user *filename, int flags, int *empty) kfree(result); return ERR_PTR(len); } + /* The empty path is special. */ + if (unlikely(!len) && !(flags & LOOKUP_EMPTY)) { + __putname(kname); + kfree(result); + return ERR_PTR(-ENOENT); + } if (unlikely(len == PATH_MAX)) { __putname(kname); kfree(result); return ERR_PTR(-ENAMETOOLONG); } } - - atomic_set(&result->refcnt, 1); - /* The empty path is special. */ - if (unlikely(!len)) { - if (empty) - *empty = 1; - if (!(flags & LOOKUP_EMPTY)) { - putname(result); - return ERR_PTR(-ENOENT); - } - } - - result->uptr = filename; - result->aname = NULL; + initname(result, filename); audit_getname(result); return result; } -struct filename * -getname_uflags(const char __user *filename, int uflags) +struct filename *getname_uflags(const char __user *filename, int uflags) { int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; - return getname_flags(filename, flags, NULL); + return getname_flags(filename, flags); } -struct filename * -getname(const char __user * filename) +struct filename *__getname_maybe_null(const char __user *pathname) { - return getname_flags(filename, 0, NULL); + struct filename *name; + char c; + + /* try to save on allocations; loss on um, though */ + if (get_user(c, pathname)) + return ERR_PTR(-EFAULT); + if (!c) + return NULL; + + name = getname_flags(pathname, LOOKUP_EMPTY); + if (!IS_ERR(name) && !(name->name[0])) { + putname(name); + name = NULL; + } + return name; } -struct filename * -getname_kernel(const char * filename) +struct filename *getname_kernel(const char * filename) { struct filename *result; int len = strlen(filename) + 1; @@ -246,25 +268,27 @@ getname_kernel(const char * filename) return ERR_PTR(-ENAMETOOLONG); } memcpy((char *)result->name, filename, len); - result->uptr = NULL; - result->aname = NULL; - atomic_set(&result->refcnt, 1); + initname(result, NULL); audit_getname(result); - return result; } EXPORT_SYMBOL(getname_kernel); void putname(struct filename *name) { - if (IS_ERR(name)) - return; + int refcnt; - if (WARN_ON_ONCE(!atomic_read(&name->refcnt))) + if (IS_ERR_OR_NULL(name)) return; - if (!atomic_dec_and_test(&name->refcnt)) - return; + refcnt = atomic_read(&name->refcnt); + if (refcnt != 1) { + if (WARN_ON_ONCE(!refcnt)) + return; + + if (!atomic_dec_and_test(&name->refcnt)) + return; + } if (name->name != name->iname) { __putname(name->name); @@ -319,6 +343,25 @@ static int check_acl(struct mnt_idmap *idmap, return -EAGAIN; } +/* + * Very quick optimistic "we know we have no ACL's" check. + * + * Note that this is purely for ACL_TYPE_ACCESS, and purely + * for the "we have cached that there are no ACLs" case. + * + * If this returns true, we know there are no ACLs. But if + * it returns false, we might still not have ACLs (it could + * be the is_uncached_acl() case). + */ +static inline bool no_acl_inode(struct inode *inode) +{ +#ifdef CONFIG_FS_POSIX_ACL + return likely(!READ_ONCE(inode->i_acl)); +#else + return true; +#endif +} + /** * acl_permission_check - perform basic UNIX permission checking * @idmap: idmap of the mount the inode was found from @@ -341,6 +384,28 @@ static int acl_permission_check(struct mnt_idmap *idmap, unsigned int mode = inode->i_mode; vfsuid_t vfsuid; + /* + * Common cheap case: everybody has the requested + * rights, and there are no ACLs to check. No need + * to do any owner/group checks in that case. + * + * - 'mask&7' is the requested permission bit set + * - multiplying by 0111 spreads them out to all of ugo + * - '& ~mode' looks for missing inode permission bits + * - the '!' is for "no missing permissions" + * + * After that, we just need to check that there are no + * ACL's on the inode - do the 'IS_POSIXACL()' check last + * because it will dereference the ->i_sb pointer and we + * want to avoid that if at all possible. + */ + if (!((mask & 7) * 0111 & ~mode)) { + if (no_acl_inode(inode)) + return 0; + if (!IS_POSIXACL(inode)) + return 0; + } + /* Are we the owner? If so, ACL's don't matter */ vfsuid = i_uid_into_vfsuid(idmap, inode); if (likely(vfsuid_eq_kuid(vfsuid, current_fsuid()))) { @@ -506,14 +571,14 @@ int inode_permission(struct mnt_idmap *idmap, int retval; retval = sb_permission(inode->i_sb, inode, mask); - if (retval) + if (unlikely(retval)) return retval; if (unlikely(mask & MAY_WRITE)) { /* * Nobody gets write access to an immutable file. */ - if (IS_IMMUTABLE(inode)) + if (unlikely(IS_IMMUTABLE(inode))) return -EPERM; /* @@ -521,16 +586,16 @@ int inode_permission(struct mnt_idmap *idmap, * written back improperly if their true value is unknown * to the vfs. */ - if (HAS_UNMAPPED_ID(idmap, inode)) + if (unlikely(HAS_UNMAPPED_ID(idmap, inode))) return -EACCES; } retval = do_inode_permission(idmap, inode, mask); - if (retval) + if (unlikely(retval)) return retval; retval = devcgroup_inode_permission(inode, mask); - if (retval) + if (unlikely(retval)) return retval; return security_inode_permission(inode, mask); @@ -581,6 +646,7 @@ struct nameidata { unsigned seq; } *stack, internal[EMBEDDED_LEVELS]; struct filename *name; + const char *pathname; struct nameidata *saved; unsigned root_seq; int dfd; @@ -599,6 +665,7 @@ static void __set_nameidata(struct nameidata *p, int dfd, struct filename *name) p->depth = 0; p->dfd = dfd; p->name = name; + p->pathname = likely(name) ? name->name : ""; p->path.mnt = NULL; p->path.dentry = NULL; p->total_link_count = old ? old->total_link_count : 0; @@ -855,10 +922,11 @@ out_dput: return false; } -static inline int d_revalidate(struct dentry *dentry, unsigned int flags) +static inline int d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) - return dentry->d_op->d_revalidate(dentry, flags); + return dentry->d_op->d_revalidate(dir, name, dentry, flags); else return 1; } @@ -1033,7 +1101,7 @@ static int sysctl_protected_fifos __read_mostly; static int sysctl_protected_regular __read_mostly; #ifdef CONFIG_SYSCTL -static struct ctl_table namei_sysctls[] = { +static const struct ctl_table namei_sysctls[] = { { .procname = "protected_symlinks", .data = &sysctl_protected_symlinks, @@ -1233,29 +1301,48 @@ int may_linkat(struct mnt_idmap *idmap, const struct path *link) * * Returns 0 if the open is allowed, -ve on error. */ -static int may_create_in_sticky(struct mnt_idmap *idmap, - struct nameidata *nd, struct inode *const inode) +static int may_create_in_sticky(struct mnt_idmap *idmap, struct nameidata *nd, + struct inode *const inode) { umode_t dir_mode = nd->dir_mode; - vfsuid_t dir_vfsuid = nd->dir_vfsuid; + vfsuid_t dir_vfsuid = nd->dir_vfsuid, i_vfsuid; + + if (likely(!(dir_mode & S_ISVTX))) + return 0; + + if (S_ISREG(inode->i_mode) && !sysctl_protected_regular) + return 0; - if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) || - (!sysctl_protected_regular && S_ISREG(inode->i_mode)) || - likely(!(dir_mode & S_ISVTX)) || - vfsuid_eq(i_uid_into_vfsuid(idmap, inode), dir_vfsuid) || - vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) + if (S_ISFIFO(inode->i_mode) && !sysctl_protected_fifos) return 0; - if (likely(dir_mode & 0002) || - (dir_mode & 0020 && - ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) || - (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) { - const char *operation = S_ISFIFO(inode->i_mode) ? - "sticky_create_fifo" : - "sticky_create_regular"; - audit_log_path_denied(AUDIT_ANOM_CREAT, operation); + i_vfsuid = i_uid_into_vfsuid(idmap, inode); + + if (vfsuid_eq(i_vfsuid, dir_vfsuid)) + return 0; + + if (vfsuid_eq_kuid(i_vfsuid, current_fsuid())) + return 0; + + if (likely(dir_mode & 0002)) { + audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create"); return -EACCES; } + + if (dir_mode & 0020) { + if (sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) { + audit_log_path_denied(AUDIT_ANOM_CREAT, + "sticky_create_fifo"); + return -EACCES; + } + + if (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode)) { + audit_log_path_denied(AUDIT_ANOM_CREAT, + "sticky_create_regular"); + return -EACCES; + } + } + return 0; } @@ -1567,7 +1654,7 @@ static struct dentry *lookup_dcache(const struct qstr *name, { struct dentry *dentry = d_lookup(dir, name); if (dentry) { - int error = d_revalidate(dentry, flags); + int error = d_revalidate(dir->d_inode, name, dentry, flags); if (unlikely(error <= 0)) { if (!error) d_invalidate(dentry); @@ -1578,25 +1665,20 @@ static struct dentry *lookup_dcache(const struct qstr *name, return dentry; } -/* - * Parent directory has inode locked exclusive. This is one - * and only case when ->lookup() gets called on non in-lookup - * dentries - as the matter of fact, this only gets called - * when directory is guaranteed to have no in-lookup children - * at all. - */ -struct dentry *lookup_one_qstr_excl(const struct qstr *name, - struct dentry *base, - unsigned int flags) +static struct dentry *lookup_one_qstr_excl_raw(const struct qstr *name, + struct dentry *base, + unsigned int flags) { - struct dentry *dentry = lookup_dcache(name, base, flags); + struct dentry *dentry; struct dentry *old; - struct inode *dir = base->d_inode; + struct inode *dir; + dentry = lookup_dcache(name, base, flags); if (dentry) return dentry; /* Don't create child dentry for a dead directory. */ + dir = base->d_inode; if (unlikely(IS_DEADDIR(dir))) return ERR_PTR(-ENOENT); @@ -1611,8 +1693,50 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, } return dentry; } + +/* + * Parent directory has inode locked exclusive. This is one + * and only case when ->lookup() gets called on non in-lookup + * dentries - as the matter of fact, this only gets called + * when directory is guaranteed to have no in-lookup children + * at all. + * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. + * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. + */ +struct dentry *lookup_one_qstr_excl(const struct qstr *name, + struct dentry *base, unsigned int flags) +{ + struct dentry *dentry; + + dentry = lookup_one_qstr_excl_raw(name, base, flags); + if (IS_ERR(dentry)) + return dentry; + if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) { + dput(dentry); + return ERR_PTR(-ENOENT); + } + if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) { + dput(dentry); + return ERR_PTR(-EEXIST); + } + return dentry; +} EXPORT_SYMBOL(lookup_one_qstr_excl); +/** + * lookup_fast - do fast lockless (but racy) lookup of a dentry + * @nd: current nameidata + * + * Do a fast, but racy lookup in the dcache for the given dentry, and + * revalidate it. Returns a valid dentry pointer or NULL if one wasn't + * found. On error, an ERR_PTR will be returned. + * + * If this function returns a valid dentry and the walk is no longer + * lazy, the dentry will carry a reference that must later be put. If + * RCU mode is still in force, then this is not the case and the dentry + * must be legitimized before use. If this returns NULL, then the walk + * will no longer be in RCU mode. + */ static struct dentry *lookup_fast(struct nameidata *nd) { struct dentry *dentry, *parent = nd->path.dentry; @@ -1638,19 +1762,20 @@ static struct dentry *lookup_fast(struct nameidata *nd) if (read_seqcount_retry(&parent->d_seq, nd->seq)) return ERR_PTR(-ECHILD); - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); if (likely(status > 0)) return dentry; if (!try_to_unlazy_next(nd, dentry)) return ERR_PTR(-ECHILD); if (status == -ECHILD) /* we'd been told to redo it in non-rcu mode */ - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(nd->inode, &nd->last, + dentry, nd->flags); } else { dentry = __d_lookup(parent, &nd->last); if (unlikely(!dentry)) return NULL; - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); } if (unlikely(status <= 0)) { if (!status) @@ -1678,7 +1803,7 @@ again: if (IS_ERR(dentry)) return dentry; if (unlikely(!d_in_lookup(dentry))) { - int error = d_revalidate(dentry, flags); + int error = d_revalidate(inode, name, dentry, flags); if (unlikely(error <= 0)) { if (!error) { d_invalidate(dentry); @@ -1712,17 +1837,26 @@ static struct dentry *lookup_slow(const struct qstr *name, } static inline int may_lookup(struct mnt_idmap *idmap, - struct nameidata *nd) + struct nameidata *restrict nd) { - if (nd->flags & LOOKUP_RCU) { - int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK); - if (!err) // success, keep going - return 0; - if (!try_to_unlazy(nd)) - return -ECHILD; // redo it all non-lazy - if (err != -ECHILD) // hard error - return err; - } + int err, mask; + + mask = nd->flags & LOOKUP_RCU ? MAY_NOT_BLOCK : 0; + err = inode_permission(idmap, nd->inode, mask | MAY_EXEC); + if (likely(!err)) + return 0; + + // If we failed, and we weren't in LOOKUP_RCU, it's final + if (!(nd->flags & LOOKUP_RCU)) + return err; + + // Drop out of RCU mode to make sure it wasn't transient + if (!try_to_unlazy(nd)) + return -ECHILD; // redo it all non-lazy + + if (err != -ECHILD) // hard error + return err; + return inode_permission(idmap, nd->inode, MAY_EXEC); } @@ -1781,13 +1915,13 @@ static const char *pick_link(struct nameidata *nd, struct path *link, unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW)) return ERR_PTR(-ELOOP); - if (!(nd->flags & LOOKUP_RCU)) { + if (unlikely(atime_needs_update(&last->link, inode))) { + if (nd->flags & LOOKUP_RCU) { + if (!try_to_unlazy(nd)) + return ERR_PTR(-ECHILD); + } touch_atime(&last->link); cond_resched(); - } else if (atime_needs_update(&last->link, inode)) { - if (!try_to_unlazy(nd)) - return ERR_PTR(-ECHILD); - touch_atime(&last->link); } error = security_inode_follow_link(link->dentry, inode, @@ -2163,21 +2297,39 @@ EXPORT_SYMBOL(hashlen_string); /* * Calculate the length and hash of the path component, and - * return the "hash_len" as the result. + * return the length as the result. */ -static inline u64 hash_name(const void *salt, const char *name) +static inline const char *hash_name(struct nameidata *nd, + const char *name, + unsigned long *lastword) { - unsigned long a = 0, b, x = 0, y = (unsigned long)salt; + unsigned long a, b, x, y = (unsigned long)nd->path.dentry; unsigned long adata, bdata, mask, len; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; - len = 0; - goto inside; + /* + * The first iteration is special, because it can result in + * '.' and '..' and has no mixing other than the final fold. + */ + a = load_unaligned_zeropad(name); + b = a ^ REPEAT_BYTE('/'); + if (has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)) { + adata = prep_zero_mask(a, adata, &constants); + bdata = prep_zero_mask(b, bdata, &constants); + mask = create_zero_mask(adata | bdata); + a &= zero_bytemask(mask); + *lastword = a; + len = find_zero(mask); + nd->last.hash = fold_hash(a, y); + nd->last.len = len; + return name + len; + } + len = 0; + x = 0; do { HASH_MIX(x, y, a); len += sizeof(unsigned long); -inside: a = load_unaligned_zeropad(name+len); b = a ^ REPEAT_BYTE('/'); } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants))); @@ -2185,11 +2337,25 @@ inside: adata = prep_zero_mask(a, adata, &constants); bdata = prep_zero_mask(b, bdata, &constants); mask = create_zero_mask(adata | bdata); - x ^= a & zero_bytemask(mask); + a &= zero_bytemask(mask); + x ^= a; + len += find_zero(mask); + *lastword = 0; // Multi-word components cannot be DOT or DOTDOT - return hashlen_create(fold_hash(x, y), len + find_zero(mask)); + nd->last.hash = fold_hash(x, y); + nd->last.len = len; + return name + len; } +/* + * Note that the 'last' word is always zero-masked, but + * was loaded as a possibly big-endian word. + */ +#ifdef __BIG_ENDIAN + #define LAST_WORD_IS_DOT (0x2eul << (BITS_PER_LONG-8)) + #define LAST_WORD_IS_DOTDOT (0x2e2eul << (BITS_PER_LONG-16)) +#endif + #else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */ /* Return the hash of a string of known length */ @@ -2222,22 +2388,35 @@ EXPORT_SYMBOL(hashlen_string); * We know there's a real path component here of at least * one character. */ -static inline u64 hash_name(const void *salt, const char *name) +static inline const char *hash_name(struct nameidata *nd, const char *name, unsigned long *lastword) { - unsigned long hash = init_name_hash(salt); - unsigned long len = 0, c; + unsigned long hash = init_name_hash(nd->path.dentry); + unsigned long len = 0, c, last = 0; c = (unsigned char)*name; do { + last = (last << 8) + c; len++; hash = partial_name_hash(c, hash); c = (unsigned char)name[len]; } while (c && c != '/'); - return hashlen_create(end_name_hash(hash), len); + + // This is reliable for DOT or DOTDOT, since the component + // cannot contain NUL characters - top bits being zero means + // we cannot have had any other pathnames. + *lastword = last; + nd->last.hash = end_name_hash(hash); + nd->last.len = len; + return name + len; } #endif +#ifndef LAST_WORD_IS_DOT + #define LAST_WORD_IS_DOT 0x2e + #define LAST_WORD_IS_DOTDOT 0x2e2e +#endif + /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -2255,9 +2434,12 @@ static int link_path_walk(const char *name, struct nameidata *nd) nd->flags |= LOOKUP_PARENT; if (IS_ERR(name)) return PTR_ERR(name); - while (*name=='/') - name++; - if (!*name) { + if (*name == '/') { + do { + name++; + } while (unlikely(*name == '/')); + } + if (unlikely(!*name)) { nd->dir_mode = 0; // short-circuit the 'hardening' idiocy return 0; } @@ -2266,45 +2448,38 @@ static int link_path_walk(const char *name, struct nameidata *nd) for(;;) { struct mnt_idmap *idmap; const char *link; - u64 hash_len; - int type; + unsigned long lastword; idmap = mnt_idmap(nd->path.mnt); err = may_lookup(idmap, nd); - if (err) + if (unlikely(err)) return err; - hash_len = hash_name(nd->path.dentry, name); + nd->last.name = name; + name = hash_name(nd, name, &lastword); - type = LAST_NORM; - if (name[0] == '.') switch (hashlen_len(hash_len)) { - case 2: - if (name[1] == '.') { - type = LAST_DOTDOT; - nd->state |= ND_JUMPED; - } - break; - case 1: - type = LAST_DOT; - } - if (likely(type == LAST_NORM)) { - struct dentry *parent = nd->path.dentry; + switch(lastword) { + case LAST_WORD_IS_DOTDOT: + nd->last_type = LAST_DOTDOT; + nd->state |= ND_JUMPED; + break; + + case LAST_WORD_IS_DOT: + nd->last_type = LAST_DOT; + break; + + default: + nd->last_type = LAST_NORM; nd->state &= ~ND_JUMPED; + + struct dentry *parent = nd->path.dentry; if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { - struct qstr this = { { .hash_len = hash_len }, .name = name }; - err = parent->d_op->d_hash(parent, &this); + err = parent->d_op->d_hash(parent, &nd->last); if (err < 0) return err; - hash_len = this.hash_len; - name = this.name; } } - nd->last.hash_len = hash_len; - nd->last.name = name; - nd->last_type = type; - - name += hashlen_len(hash_len); if (!*name) goto OK; /* @@ -2352,7 +2527,7 @@ OK: static const char *path_init(struct nameidata *nd, unsigned flags) { int error; - const char *s = nd->name->name; + const char *s = nd->pathname; /* LOOKUP_CACHED requires RCU, ask caller to retry */ if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED) @@ -2416,20 +2591,24 @@ static const char *path_init(struct nameidata *nd, unsigned flags) } } else { /* Caller must check execute permissions on the starting path component */ - struct fd f = fdget_raw(nd->dfd); + CLASS(fd_raw, f)(nd->dfd); struct dentry *dentry; - if (!f.file) + if (fd_empty(f)) return ERR_PTR(-EBADF); - dentry = f.file->f_path.dentry; + if (flags & LOOKUP_LINKAT_EMPTY) { + if (fd_file(f)->f_cred != current_cred() && + !ns_capable(fd_file(f)->f_cred->user_ns, CAP_DAC_READ_SEARCH)) + return ERR_PTR(-ENOENT); + } - if (*s && unlikely(!d_can_lookup(dentry))) { - fdput(f); + dentry = fd_file(f)->f_path.dentry; + + if (*s && unlikely(!d_can_lookup(dentry))) return ERR_PTR(-ENOTDIR); - } - nd->path = f.file->f_path; + nd->path = fd_file(f)->f_path; if (flags & LOOKUP_RCU) { nd->inode = nd->path.dentry->d_inode; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); @@ -2437,7 +2616,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags) path_get(&nd->path); nd->inode = nd->path.dentry->d_inode; } - fdput(f); } /* For scoped-lookups we need to set the root to the dirfd as well. */ @@ -2577,23 +2755,48 @@ static int filename_parentat(int dfd, struct filename *name, /* does lookup, returns the object with parent locked */ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct path *path) { + struct path parent_path __free(path_put) = {}; struct dentry *d; struct qstr last; int type, error; - error = filename_parentat(dfd, name, 0, path, &last, &type); + error = filename_parentat(dfd, name, 0, &parent_path, &last, &type); if (error) return ERR_PTR(error); - if (unlikely(type != LAST_NORM)) { - path_put(path); + if (unlikely(type != LAST_NORM)) return ERR_PTR(-EINVAL); + inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT); + d = lookup_one_qstr_excl(&last, parent_path.dentry, 0); + if (IS_ERR(d)) { + inode_unlock(parent_path.dentry->d_inode); + return d; } - inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); - d = lookup_one_qstr_excl(&last, path->dentry, 0); + path->dentry = no_free_ptr(parent_path.dentry); + path->mnt = no_free_ptr(parent_path.mnt); + return d; +} + +struct dentry *kern_path_locked_negative(const char *name, struct path *path) +{ + struct path parent_path __free(path_put) = {}; + struct filename *filename __free(putname) = getname_kernel(name); + struct dentry *d; + struct qstr last; + int type, error; + + error = filename_parentat(AT_FDCWD, filename, 0, &parent_path, &last, &type); + if (error) + return ERR_PTR(error); + if (unlikely(type != LAST_NORM)) + return ERR_PTR(-EINVAL); + inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT); + d = lookup_one_qstr_excl_raw(&last, parent_path.dentry, 0); if (IS_ERR(d)) { - inode_unlock(path->dentry->d_inode); - path_put(path); + inode_unlock(parent_path.dentry->d_inode); + return d; } + path->dentry = no_free_ptr(parent_path.dentry); + path->mnt = no_free_ptr(parent_path.mnt); return d; } @@ -2669,13 +2872,12 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, } EXPORT_SYMBOL(vfs_path_lookup); -static int lookup_one_common(struct mnt_idmap *idmap, - const char *name, struct dentry *base, int len, - struct qstr *this) +static int lookup_noperm_common(struct qstr *qname, struct dentry *base) { - this->name = name; - this->len = len; - this->hash = full_name_hash(base, name, len); + const char *name = qname->name; + u32 len = qname->len; + + qname->hash = full_name_hash(base, name, len); if (!len) return -EACCES; @@ -2692,140 +2894,135 @@ static int lookup_one_common(struct mnt_idmap *idmap, * to use its own hash.. */ if (base->d_flags & DCACHE_OP_HASH) { - int err = base->d_op->d_hash(base, this); + int err = base->d_op->d_hash(base, qname); if (err < 0) return err; } + return 0; +} +static int lookup_one_common(struct mnt_idmap *idmap, + struct qstr *qname, struct dentry *base) +{ + int err; + err = lookup_noperm_common(qname, base); + if (err < 0) + return err; return inode_permission(idmap, base->d_inode, MAY_EXEC); } /** - * try_lookup_one_len - filesystem helper to lookup single pathname component - * @name: pathname component to lookup + * try_lookup_noperm - filesystem helper to lookup single pathname component + * @name: qstr storing pathname component to lookup * @base: base directory to lookup from - * @len: maximum length @len should be interpreted to * * Look up a dentry by name in the dcache, returning NULL if it does not * currently exist. The function does not try to create a dentry. * * Note that this routine is purely a helper for filesystem usage and should - * not be called by generic code. + * not be called by generic code. It does no permission checking. + * + * No locks need be held - only a counted reference to @base is needed. * - * The caller must hold base->i_mutex. */ -struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len) +struct dentry *try_lookup_noperm(struct qstr *name, struct dentry *base) { - struct qstr this; int err; - WARN_ON_ONCE(!inode_is_locked(base->d_inode)); - - err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this); + err = lookup_noperm_common(name, base); if (err) return ERR_PTR(err); - return lookup_dcache(&this, base, 0); + return lookup_dcache(name, base, 0); } -EXPORT_SYMBOL(try_lookup_one_len); +EXPORT_SYMBOL(try_lookup_noperm); /** - * lookup_one_len - filesystem helper to lookup single pathname component - * @name: pathname component to lookup + * lookup_noperm - filesystem helper to lookup single pathname component + * @name: qstr storing pathname component to lookup * @base: base directory to lookup from - * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should - * not be called by generic code. + * not be called by generic code. It does no permission checking. * * The caller must hold base->i_mutex. */ -struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) +struct dentry *lookup_noperm(struct qstr *name, struct dentry *base) { struct dentry *dentry; - struct qstr this; int err; WARN_ON_ONCE(!inode_is_locked(base->d_inode)); - err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this); + err = lookup_noperm_common(name, base); if (err) return ERR_PTR(err); - dentry = lookup_dcache(&this, base, 0); - return dentry ? dentry : __lookup_slow(&this, base, 0); + dentry = lookup_dcache(name, base, 0); + return dentry ? dentry : __lookup_slow(name, base, 0); } -EXPORT_SYMBOL(lookup_one_len); +EXPORT_SYMBOL(lookup_noperm); /** - * lookup_one - filesystem helper to lookup single pathname component + * lookup_one - lookup single pathname component * @idmap: idmap of the mount the lookup is performed from - * @name: pathname component to lookup + * @name: qstr holding pathname component to lookup * @base: base directory to lookup from - * @len: maximum length @len should be interpreted to * - * Note that this routine is purely a helper for filesystem usage and should - * not be called by generic code. + * This can be used for in-kernel filesystem clients such as file servers. * * The caller must hold base->i_mutex. */ -struct dentry *lookup_one(struct mnt_idmap *idmap, const char *name, - struct dentry *base, int len) +struct dentry *lookup_one(struct mnt_idmap *idmap, struct qstr *name, + struct dentry *base) { struct dentry *dentry; - struct qstr this; int err; WARN_ON_ONCE(!inode_is_locked(base->d_inode)); - err = lookup_one_common(idmap, name, base, len, &this); + err = lookup_one_common(idmap, name, base); if (err) return ERR_PTR(err); - dentry = lookup_dcache(&this, base, 0); - return dentry ? dentry : __lookup_slow(&this, base, 0); + dentry = lookup_dcache(name, base, 0); + return dentry ? dentry : __lookup_slow(name, base, 0); } EXPORT_SYMBOL(lookup_one); /** - * lookup_one_unlocked - filesystem helper to lookup single pathname component + * lookup_one_unlocked - lookup single pathname component * @idmap: idmap of the mount the lookup is performed from - * @name: pathname component to lookup + * @name: qstr olding pathname component to lookup * @base: base directory to lookup from - * @len: maximum length @len should be interpreted to * - * Note that this routine is purely a helper for filesystem usage and should - * not be called by generic code. + * This can be used for in-kernel filesystem clients such as file servers. * - * Unlike lookup_one_len, it should be called without the parent - * i_mutex held, and will take the i_mutex itself if necessary. + * Unlike lookup_one, it should be called without the parent + * i_rwsem held, and will take the i_rwsem itself if necessary. */ -struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, - const char *name, struct dentry *base, - int len) +struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, struct qstr *name, + struct dentry *base) { - struct qstr this; int err; struct dentry *ret; - err = lookup_one_common(idmap, name, base, len, &this); + err = lookup_one_common(idmap, name, base); if (err) return ERR_PTR(err); - ret = lookup_dcache(&this, base, 0); + ret = lookup_dcache(name, base, 0); if (!ret) - ret = lookup_slow(&this, base, 0); + ret = lookup_slow(name, base, 0); return ret; } EXPORT_SYMBOL(lookup_one_unlocked); /** - * lookup_one_positive_unlocked - filesystem helper to lookup single - * pathname component + * lookup_one_positive_unlocked - lookup single pathname component * @idmap: idmap of the mount the lookup is performed from - * @name: pathname component to lookup + * @name: qstr holding pathname component to lookup * @base: base directory to lookup from - * @len: maximum length @len should be interpreted to * * This helper will yield ERR_PTR(-ENOENT) on negatives. The helper returns * known positive or ERR_PTR(). This is what most of the users want. @@ -2834,16 +3031,15 @@ EXPORT_SYMBOL(lookup_one_unlocked); * time, so callers of lookup_one_unlocked() need to be very careful; pinned * positives have >d_inode stable, so this one avoids such problems. * - * Note that this routine is purely a helper for filesystem usage and should - * not be called by generic code. + * This can be used for in-kernel filesystem clients such as file servers. * - * The helper should be called without i_mutex held. + * The helper should be called without i_rwsem held. */ struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, - const char *name, - struct dentry *base, int len) + struct qstr *name, + struct dentry *base) { - struct dentry *ret = lookup_one_unlocked(idmap, name, base, len); + struct dentry *ret = lookup_one_unlocked(idmap, name, base); if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { dput(ret); @@ -2854,38 +3050,48 @@ struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, EXPORT_SYMBOL(lookup_one_positive_unlocked); /** - * lookup_one_len_unlocked - filesystem helper to lookup single pathname component + * lookup_noperm_unlocked - filesystem helper to lookup single pathname component * @name: pathname component to lookup * @base: base directory to lookup from - * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should - * not be called by generic code. + * not be called by generic code. It does no permission checking. * - * Unlike lookup_one_len, it should be called without the parent - * i_mutex held, and will take the i_mutex itself if necessary. + * Unlike lookup_noperm, it should be called without the parent + * i_rwsem held, and will take the i_rwsem itself if necessary. */ -struct dentry *lookup_one_len_unlocked(const char *name, - struct dentry *base, int len) +struct dentry *lookup_noperm_unlocked(struct qstr *name, struct dentry *base) { - return lookup_one_unlocked(&nop_mnt_idmap, name, base, len); + struct dentry *ret; + + ret = try_lookup_noperm(name, base); + if (!ret) + ret = lookup_slow(name, base, 0); + return ret; } -EXPORT_SYMBOL(lookup_one_len_unlocked); +EXPORT_SYMBOL(lookup_noperm_unlocked); /* - * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT) + * Like lookup_noperm_unlocked(), except that it yields ERR_PTR(-ENOENT) * on negatives. Returns known positive or ERR_PTR(); that's what * most of the users want. Note that pinned negative with unlocked parent - * _can_ become positive at any time, so callers of lookup_one_len_unlocked() + * _can_ become positive at any time, so callers of lookup_noperm_unlocked() * need to be very careful; pinned positives have ->d_inode stable, so * this one avoids such problems. */ -struct dentry *lookup_positive_unlocked(const char *name, - struct dentry *base, int len) +struct dentry *lookup_noperm_positive_unlocked(struct qstr *name, + struct dentry *base) { - return lookup_one_positive_unlocked(&nop_mnt_idmap, name, base, len); + struct dentry *ret; + + ret = lookup_noperm_unlocked(name, base); + if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { + dput(ret); + ret = ERR_PTR(-ENOENT); + } + return ret; } -EXPORT_SYMBOL(lookup_positive_unlocked); +EXPORT_SYMBOL(lookup_noperm_positive_unlocked); #ifdef CONFIG_UNIX98_PTYS int path_pts(struct path *path) @@ -2914,16 +3120,16 @@ int path_pts(struct path *path) } #endif -int user_path_at_empty(int dfd, const char __user *name, unsigned flags, - struct path *path, int *empty) +int user_path_at(int dfd, const char __user *name, unsigned flags, + struct path *path) { - struct filename *filename = getname_flags(name, flags, empty); + struct filename *filename = getname_flags(name, flags); int ret = filename_lookup(dfd, filename, flags, path, NULL); putname(filename); return ret; } -EXPORT_SYMBOL(user_path_at_empty); +EXPORT_SYMBOL(user_path_at); int __check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode) @@ -3167,9 +3373,9 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, /** * vfs_create - create new file * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory - * @mode: mode of the new file + * @dir: inode of the parent directory + * @dentry: dentry of the child file + * @mode: mode of the child file * @want_excl: whether the file must not yet exist * * Create a new file. @@ -3264,6 +3470,8 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path, if ((acc_mode & MAY_EXEC) && path_noexec(path)) return -EACCES; break; + default: + VFS_BUG_ON_INODE(1, inode); } error = inode_permission(idmap, inode, MAY_OPEN | acc_mode); @@ -3426,7 +3634,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, if (d_in_lookup(dentry)) break; - error = d_revalidate(dentry, nd->flags); + error = d_revalidate(dir_inode, &nd->last, dentry, nd->flags); if (likely(error > 0)) break; if (error) @@ -3440,6 +3648,9 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, return dentry; } + if (open_flag & O_CREAT) + audit_inode(nd->name, dir, AUDIT_INODE_PARENT); + /* * Checking write permission is tricky, bacuse we don't know if we are * going to actually need it: O_CREAT opens should work as long as the @@ -3510,6 +3721,42 @@ out_dput: return ERR_PTR(error); } +static inline bool trailing_slashes(struct nameidata *nd) +{ + return (bool)nd->last.name[nd->last.len]; +} + +static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag) +{ + struct dentry *dentry; + + if (open_flag & O_CREAT) { + if (trailing_slashes(nd)) + return ERR_PTR(-EISDIR); + + /* Don't bother on an O_EXCL create */ + if (open_flag & O_EXCL) + return NULL; + } + + if (trailing_slashes(nd)) + nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; + + dentry = lookup_fast(nd); + if (IS_ERR_OR_NULL(dentry)) + return dentry; + + if (open_flag & O_CREAT) { + /* Discard negative dentries. Need inode_lock to do the create */ + if (!dentry->d_inode) { + if (!(nd->flags & LOOKUP_RCU)) + dput(dentry); + dentry = NULL; + } + } + return dentry; +} + static const char *open_last_lookups(struct nameidata *nd, struct file *file, const struct open_flags *op) { @@ -3527,28 +3774,22 @@ static const char *open_last_lookups(struct nameidata *nd, return handle_dots(nd, nd->last_type); } - if (!(open_flag & O_CREAT)) { - if (nd->last.name[nd->last.len]) - nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; - /* we _can_ be in RCU mode here */ - dentry = lookup_fast(nd); - if (IS_ERR(dentry)) - return ERR_CAST(dentry); - if (likely(dentry)) - goto finish_lookup; + /* We _can_ be in RCU mode here */ + dentry = lookup_fast_for_open(nd, open_flag); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + + if (likely(dentry)) + goto finish_lookup; + if (!(open_flag & O_CREAT)) { if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU)) return ERR_PTR(-ECHILD); } else { - /* create side of things */ if (nd->flags & LOOKUP_RCU) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); } - audit_inode(nd->name, dir, AUDIT_INODE_PARENT); - /* trailing slashes? */ - if (unlikely(nd->last.name[nd->last.len])) - return ERR_PTR(-EISDIR); } if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { @@ -3564,8 +3805,12 @@ static const char *open_last_lookups(struct nameidata *nd, else inode_lock_shared(dir->d_inode); dentry = lookup_open(nd, file, op, got_write); - if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED)) - fsnotify_create(dir->d_inode, dentry); + if (!IS_ERR(dentry)) { + if (file->f_mode & FMODE_CREATED) + fsnotify_create(dir->d_inode, dentry); + if (file->f_mode & FMODE_OPENED) + fsnotify_open(file); + } if (open_flag & O_CREAT) inode_unlock(dir->d_inode); else @@ -3668,9 +3913,9 @@ static int do_open(struct nameidata *nd, * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ -static int vfs_tmpfile(struct mnt_idmap *idmap, - const struct path *parentpath, - struct file *file, umode_t mode) +int vfs_tmpfile(struct mnt_idmap *idmap, + const struct path *parentpath, + struct file *file, umode_t mode) { struct dentry *child; struct inode *dir = d_inode(parentpath->dentry); @@ -3692,6 +3937,8 @@ static int vfs_tmpfile(struct mnt_idmap *idmap, mode = vfs_prepare_mode(idmap, dir, mode, mode, mode); error = dir->i_op->tmpfile(idmap, dir, file, mode); dput(child); + if (file->f_mode & FMODE_OPENED) + fsnotify_open(file); if (error) return error; /* Don't check for other permissions, the inode was just created */ @@ -3805,7 +4052,7 @@ static struct file *path_openat(struct nameidata *nd, WARN_ON(1); error = -EINVAL; } - fput(file); + fput_close(file); if (error == -EOPENSTALE) { if (flags & LOOKUP_RCU) error = -ECHILD; @@ -3888,27 +4135,13 @@ static struct dentry *filename_create(int dfd, struct filename *name, * '/', and a directory wasn't requested. */ if (last.name[last.len] && !want_dir) - create_flags = 0; + create_flags &= ~LOOKUP_CREATE; inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); dentry = lookup_one_qstr_excl(&last, path->dentry, reval_flag | create_flags); if (IS_ERR(dentry)) goto unlock; - error = -EEXIST; - if (d_is_positive(dentry)) - goto fail; - - /* - * Special case - lookup gave negative, but... we had foo/bar/ - * From the vfs_mknod() POV we just have a negative dentry - - * all is fine. Let's be bastards - you had / on the end, you've - * been asking for (non-existent) directory. -ENOENT for you. - */ - if (unlikely(!create_flags)) { - error = -ENOENT; - goto fail; - } if (unlikely(err2)) { error = err2; goto fail; @@ -3939,7 +4172,8 @@ EXPORT_SYMBOL(kern_path_create); void done_path_create(struct path *path, struct dentry *dentry) { - dput(dentry); + if (!IS_ERR(dentry)) + dput(dentry); inode_unlock(path->dentry->d_inode); mnt_drop_write(path->mnt); path_put(path); @@ -3960,9 +4194,9 @@ EXPORT_SYMBOL(user_path_create); /** * vfs_mknod - create device node or file * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory - * @mode: mode of the new device node or file + * @dir: inode of the parent directory + * @dentry: dentry of the child device node + * @mode: mode of the child device node * @dev: device number of device to create * * Create a device node or file. @@ -4085,11 +4319,11 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d } /** - * vfs_mkdir - create directory + * vfs_mkdir - create directory returning correct dentry if possible * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory - * @mode: mode of the new directory + * @dir: inode of the parent directory + * @dentry: dentry of the child directory + * @mode: mode of the child directory * * Create a directory. * @@ -4098,32 +4332,51 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. + * + * In the event that the filesystem does not use the *@dentry but leaves it + * negative or unhashes it and possibly splices a different one returning it, + * the original dentry is dput() and the alternate is returned. + * + * In case of an error the dentry is dput() and an ERR_PTR() is returned. */ -int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode) +struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { int error; unsigned max_links = dir->i_sb->s_max_links; + struct dentry *de; error = may_create(idmap, dir, dentry); if (error) - return error; + goto err; + error = -EPERM; if (!dir->i_op->mkdir) - return -EPERM; + goto err; mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); error = security_inode_mkdir(dir, dentry, mode); if (error) - return error; + goto err; + error = -EMLINK; if (max_links && dir->i_nlink >= max_links) - return -EMLINK; + goto err; - error = dir->i_op->mkdir(idmap, dir, dentry, mode); - if (!error) - fsnotify_mkdir(dir, dentry); - return error; + de = dir->i_op->mkdir(idmap, dir, dentry, mode); + error = PTR_ERR(de); + if (IS_ERR(de)) + goto err; + if (de) { + dput(dentry); + dentry = de; + } + fsnotify_mkdir(dir, dentry); + return dentry; + +err: + dput(dentry); + return ERR_PTR(error); } EXPORT_SYMBOL(vfs_mkdir); @@ -4143,8 +4396,10 @@ retry: error = security_path_mkdir(&path, dentry, mode_strip_umask(path.dentry->d_inode, mode)); if (!error) { - error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, + dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, mode); + if (IS_ERR(dentry)) + error = PTR_ERR(dentry); } done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { @@ -4169,8 +4424,8 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) /** * vfs_rmdir - remove directory * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory + * @dir: inode of the parent directory + * @dentry: dentry of the child directory * * Remove a directory. * @@ -4255,10 +4510,6 @@ retry: error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit3; - if (!dentry->d_inode) { - error = -ENOENT; - goto exit4; - } error = security_path_rmdir(&path, dentry); if (error) goto exit4; @@ -4389,7 +4640,7 @@ retry_deleg: if (!IS_ERR(dentry)) { /* Why not before? Because we want correct error value */ - if (last.name[last.len] || d_is_negative(dentry)) + if (last.name[last.len]) goto slashes; inode = dentry->d_inode; ihold(inode); @@ -4423,9 +4674,7 @@ exit1: return error; slashes: - if (d_is_negative(dentry)) - error = -ENOENT; - else if (d_is_dir(dentry)) + if (d_is_dir(dentry)) error = -EISDIR; else error = -ENOTDIR; @@ -4450,8 +4699,8 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) /** * vfs_symlink - create symlink * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory + * @dir: inode of the parent directory + * @dentry: dentry of the child symlink file * @oldname: name of the file to link to * * Create a symlink. @@ -4641,14 +4890,13 @@ int do_linkat(int olddfd, struct filename *old, int newdfd, goto out_putnames; } /* - * To use null names we require CAP_DAC_READ_SEARCH + * To use null names we require CAP_DAC_READ_SEARCH or + * that the open-time creds of the dfd matches current. * This ensures that not everyone will be able to create - * handlink using the passed filedescriptor. + * a hardlink using the passed file descriptor. */ - if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) { - error = -ENOENT; - goto out_putnames; - } + if (flags & AT_EMPTY_PATH) + how |= LOOKUP_LINKAT_EMPTY; if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; @@ -4926,7 +5174,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, struct qstr old_last, new_last; int old_type, new_type; struct inode *delegated_inode = NULL; - unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET; + unsigned int lookup_flags = 0, target_flags = + LOOKUP_RENAME_TARGET | LOOKUP_CREATE; bool should_retry = false; int error = -EINVAL; @@ -4939,6 +5188,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, if (flags & RENAME_EXCHANGE) target_flags = 0; + if (flags & RENAME_NOREPLACE) + target_flags |= LOOKUP_EXCL; retry: error = filename_parentat(olddfd, from, lookup_flags, &old_path, @@ -4980,23 +5231,12 @@ retry_deleg: error = PTR_ERR(old_dentry); if (IS_ERR(old_dentry)) goto exit3; - /* source must exist */ - error = -ENOENT; - if (d_is_negative(old_dentry)) - goto exit4; new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry, lookup_flags | target_flags); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto exit4; - error = -EEXIST; - if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) - goto exit5; if (flags & RENAME_EXCHANGE) { - error = -ENOENT; - if (d_is_negative(new_dentry)) - goto exit5; - if (!d_is_dir(new_dentry)) { error = -ENOTDIR; if (new_last.name[new_last.len]) @@ -5085,19 +5325,16 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna getname(newname), 0); } -int readlink_copy(char __user *buffer, int buflen, const char *link) +int readlink_copy(char __user *buffer, int buflen, const char *link, int linklen) { - int len = PTR_ERR(link); - if (IS_ERR(link)) - goto out; + int copylen; - len = strlen(link); - if (len > (unsigned) buflen) - len = buflen; - if (copy_to_user(buffer, link, len)) - len = -EFAULT; -out: - return len; + copylen = linklen; + if (unlikely(copylen > (unsigned) buflen)) + copylen = buflen; + if (copy_to_user(buffer, link, copylen)) + copylen = -EFAULT; + return copylen; } /** @@ -5117,6 +5354,9 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) const char *link; int res; + if (inode->i_opflags & IOP_CACHED_LINK) + return readlink_copy(buffer, buflen, inode->i_link, inode->i_linklen); + if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) { if (unlikely(inode->i_op->readlink)) return inode->i_op->readlink(dentry, buffer, buflen); @@ -5135,7 +5375,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) if (IS_ERR(link)) return PTR_ERR(link); } - res = readlink_copy(buffer, buflen, link); + res = readlink_copy(buffer, buflen, link, strlen(link)); do_delayed_call(&done); return res; } @@ -5167,47 +5407,89 @@ const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done) EXPORT_SYMBOL(vfs_get_link); /* get the link contents into pagecache */ -const char *page_get_link(struct dentry *dentry, struct inode *inode, - struct delayed_call *callback) +static char *__page_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) { - char *kaddr; - struct page *page; + struct folio *folio; struct address_space *mapping = inode->i_mapping; if (!dentry) { - page = find_get_page(mapping, 0); - if (!page) + folio = filemap_get_folio(mapping, 0); + if (IS_ERR(folio)) return ERR_PTR(-ECHILD); - if (!PageUptodate(page)) { - put_page(page); + if (!folio_test_uptodate(folio)) { + folio_put(folio); return ERR_PTR(-ECHILD); } } else { - page = read_mapping_page(mapping, 0, NULL); - if (IS_ERR(page)) - return (char*)page; + folio = read_mapping_folio(mapping, 0, NULL); + if (IS_ERR(folio)) + return ERR_CAST(folio); } - set_delayed_call(callback, page_put_link, page); + set_delayed_call(callback, page_put_link, folio); BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM); - kaddr = page_address(page); - nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); - return kaddr; + return folio_address(folio); } +const char *page_get_link_raw(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + return __page_get_link(dentry, inode, callback); +} +EXPORT_SYMBOL_GPL(page_get_link_raw); + +/** + * page_get_link() - An implementation of the get_link inode_operation. + * @dentry: The directory entry which is the symlink. + * @inode: The inode for the symlink. + * @callback: Used to drop the reference to the symlink. + * + * Filesystems which store their symlinks in the page cache should use + * this to implement the get_link() member of their inode_operations. + * + * Return: A pointer to the NUL-terminated symlink. + */ +const char *page_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + char *kaddr = __page_get_link(dentry, inode, callback); + + if (!IS_ERR(kaddr)) + nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); + return kaddr; +} EXPORT_SYMBOL(page_get_link); +/** + * page_put_link() - Drop the reference to the symlink. + * @arg: The folio which contains the symlink. + * + * This is used internally by page_get_link(). It is exported for use + * by filesystems which need to implement a variant of page_get_link() + * themselves. Despite the apparent symmetry, filesystems which use + * page_get_link() do not need to call page_put_link(). + * + * The argument, while it has a void pointer type, must be a pointer to + * the folio which was retrieved from the page cache. The delayed_call + * infrastructure is used to drop the reference count once the caller + * is done with the symlink. + */ void page_put_link(void *arg) { - put_page(arg); + folio_put(arg); } EXPORT_SYMBOL(page_put_link); int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) { + const char *link; + int res; + DEFINE_DELAYED_CALL(done); - int res = readlink_copy(buffer, buflen, - page_get_link(dentry, d_inode(dentry), - &done)); + link = page_get_link(dentry, d_inode(dentry), &done); + res = PTR_ERR(link); + if (!IS_ERR(link)) + res = readlink_copy(buffer, buflen, link, strlen(link)); do_delayed_call(&done); return res; } @@ -5218,7 +5500,7 @@ int page_symlink(struct inode *inode, const char *symname, int len) struct address_space *mapping = inode->i_mapping; const struct address_space_operations *aops = mapping->a_ops; bool nofs = !mapping_gfp_constraint(mapping, __GFP_FS); - struct page *page; + struct folio *folio; void *fsdata = NULL; int err; unsigned int flags; @@ -5226,16 +5508,16 @@ int page_symlink(struct inode *inode, const char *symname, int len) retry: if (nofs) flags = memalloc_nofs_save(); - err = aops->write_begin(NULL, mapping, 0, len-1, &page, &fsdata); + err = aops->write_begin(NULL, mapping, 0, len-1, &folio, &fsdata); if (nofs) memalloc_nofs_restore(flags); if (err) goto fail; - memcpy(page_address(page), symname, len-1); + memcpy(folio_address(folio), symname, len - 1); - err = aops->write_end(NULL, mapping, 0, len-1, len-1, - page, fsdata); + err = aops->write_end(NULL, mapping, 0, len - 1, len - 1, + folio, fsdata); if (err < 0) goto fail; if (err < len-1) |