diff options
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 564 |
1 files changed, 384 insertions, 180 deletions
diff --git a/fs/namei.c b/fs/namei.c index 9342fa6a38c2..ecb7b95c2ca3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -17,8 +17,8 @@ #include <linux/init.h> #include <linux/export.h> -#include <linux/kernel.h> #include <linux/slab.h> +#include <linux/wordpart.h> #include <linux/fs.h> #include <linux/filelock.h> #include <linux/namei.h> @@ -27,7 +27,6 @@ #include <linux/fsnotify.h> #include <linux/personality.h> #include <linux/security.h> -#include <linux/ima.h> #include <linux/syscalls.h> #include <linux/mount.h> #include <linux/audit.h> @@ -127,7 +126,7 @@ #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) struct filename * -getname_flags(const char __user *filename, int flags, int *empty) +getname_flags(const char __user *filename, int flags) { struct filename *result; char *kname; @@ -149,9 +148,20 @@ getname_flags(const char __user *filename, int flags, int *empty) result->name = kname; len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX); - if (unlikely(len < 0)) { - __putname(result); - return ERR_PTR(len); + /* + * Handle both empty path and copy failure in one go. + */ + if (unlikely(len <= 0)) { + if (unlikely(len < 0)) { + __putname(result); + return ERR_PTR(len); + } + + /* The empty path is special. */ + if (!(flags & LOOKUP_EMPTY)) { + __putname(result); + return ERR_PTR(-ENOENT); + } } /* @@ -181,6 +191,12 @@ getname_flags(const char __user *filename, int flags, int *empty) kfree(result); return ERR_PTR(len); } + /* The empty path is special. */ + if (unlikely(!len) && !(flags & LOOKUP_EMPTY)) { + __putname(kname); + kfree(result); + return ERR_PTR(-ENOENT); + } if (unlikely(len == PATH_MAX)) { __putname(kname); kfree(result); @@ -189,38 +205,44 @@ getname_flags(const char __user *filename, int flags, int *empty) } atomic_set(&result->refcnt, 1); - /* The empty path is special. */ - if (unlikely(!len)) { - if (empty) - *empty = 1; - if (!(flags & LOOKUP_EMPTY)) { - putname(result); - return ERR_PTR(-ENOENT); - } - } - result->uptr = filename; result->aname = NULL; audit_getname(result); return result; } -struct filename * -getname_uflags(const char __user *filename, int uflags) +struct filename *getname_uflags(const char __user *filename, int uflags) { int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; - return getname_flags(filename, flags, NULL); + return getname_flags(filename, flags); } -struct filename * -getname(const char __user * filename) +struct filename *getname(const char __user * filename) { - return getname_flags(filename, 0, NULL); + return getname_flags(filename, 0); } -struct filename * -getname_kernel(const char * filename) +struct filename *__getname_maybe_null(const char __user *pathname) +{ + struct filename *name; + char c; + + /* try to save on allocations; loss on um, though */ + if (get_user(c, pathname)) + return ERR_PTR(-EFAULT); + if (!c) + return NULL; + + name = getname_flags(pathname, LOOKUP_EMPTY); + if (!IS_ERR(name) && !(name->name[0])) { + putname(name); + name = NULL; + } + return name; +} + +struct filename *getname_kernel(const char * filename) { struct filename *result; int len = strlen(filename) + 1; @@ -258,7 +280,7 @@ EXPORT_SYMBOL(getname_kernel); void putname(struct filename *name) { - if (IS_ERR(name)) + if (IS_ERR_OR_NULL(name)) return; if (WARN_ON_ONCE(!atomic_read(&name->refcnt))) @@ -320,6 +342,25 @@ static int check_acl(struct mnt_idmap *idmap, return -EAGAIN; } +/* + * Very quick optimistic "we know we have no ACL's" check. + * + * Note that this is purely for ACL_TYPE_ACCESS, and purely + * for the "we have cached that there are no ACLs" case. + * + * If this returns true, we know there are no ACLs. But if + * it returns false, we might still not have ACLs (it could + * be the is_uncached_acl() case). + */ +static inline bool no_acl_inode(struct inode *inode) +{ +#ifdef CONFIG_FS_POSIX_ACL + return likely(!READ_ONCE(inode->i_acl)); +#else + return true; +#endif +} + /** * acl_permission_check - perform basic UNIX permission checking * @idmap: idmap of the mount the inode was found from @@ -342,6 +383,28 @@ static int acl_permission_check(struct mnt_idmap *idmap, unsigned int mode = inode->i_mode; vfsuid_t vfsuid; + /* + * Common cheap case: everybody has the requested + * rights, and there are no ACLs to check. No need + * to do any owner/group checks in that case. + * + * - 'mask&7' is the requested permission bit set + * - multiplying by 0111 spreads them out to all of ugo + * - '& ~mode' looks for missing inode permission bits + * - the '!' is for "no missing permissions" + * + * After that, we just need to check that there are no + * ACL's on the inode - do the 'IS_POSIXACL()' check last + * because it will dereference the ->i_sb pointer and we + * want to avoid that if at all possible. + */ + if (!((mask & 7) * 0111 & ~mode)) { + if (no_acl_inode(inode)) + return 0; + if (!IS_POSIXACL(inode)) + return 0; + } + /* Are we the owner? If so, ACL's don't matter */ vfsuid = i_uid_into_vfsuid(idmap, inode); if (likely(vfsuid_eq_kuid(vfsuid, current_fsuid()))) { @@ -582,6 +645,7 @@ struct nameidata { unsigned seq; } *stack, internal[EMBEDDED_LEVELS]; struct filename *name; + const char *pathname; struct nameidata *saved; unsigned root_seq; int dfd; @@ -600,6 +664,7 @@ static void __set_nameidata(struct nameidata *p, int dfd, struct filename *name) p->depth = 0; p->dfd = dfd; p->name = name; + p->pathname = likely(name) ? name->name : ""; p->path.mnt = NULL; p->path.dentry = NULL; p->total_link_count = old ? old->total_link_count : 0; @@ -856,10 +921,11 @@ out_dput: return false; } -static inline int d_revalidate(struct dentry *dentry, unsigned int flags) +static inline int d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) - return dentry->d_op->d_revalidate(dentry, flags); + return dentry->d_op->d_revalidate(dir, name, dentry, flags); else return 1; } @@ -1034,7 +1100,7 @@ static int sysctl_protected_fifos __read_mostly; static int sysctl_protected_regular __read_mostly; #ifdef CONFIG_SYSCTL -static struct ctl_table namei_sysctls[] = { +static const struct ctl_table namei_sysctls[] = { { .procname = "protected_symlinks", .data = &sysctl_protected_symlinks, @@ -1234,29 +1300,48 @@ int may_linkat(struct mnt_idmap *idmap, const struct path *link) * * Returns 0 if the open is allowed, -ve on error. */ -static int may_create_in_sticky(struct mnt_idmap *idmap, - struct nameidata *nd, struct inode *const inode) +static int may_create_in_sticky(struct mnt_idmap *idmap, struct nameidata *nd, + struct inode *const inode) { umode_t dir_mode = nd->dir_mode; - vfsuid_t dir_vfsuid = nd->dir_vfsuid; + vfsuid_t dir_vfsuid = nd->dir_vfsuid, i_vfsuid; + + if (likely(!(dir_mode & S_ISVTX))) + return 0; + + if (S_ISREG(inode->i_mode) && !sysctl_protected_regular) + return 0; + + if (S_ISFIFO(inode->i_mode) && !sysctl_protected_fifos) + return 0; + + i_vfsuid = i_uid_into_vfsuid(idmap, inode); - if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) || - (!sysctl_protected_regular && S_ISREG(inode->i_mode)) || - likely(!(dir_mode & S_ISVTX)) || - vfsuid_eq(i_uid_into_vfsuid(idmap, inode), dir_vfsuid) || - vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) + if (vfsuid_eq(i_vfsuid, dir_vfsuid)) return 0; - if (likely(dir_mode & 0002) || - (dir_mode & 0020 && - ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) || - (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) { - const char *operation = S_ISFIFO(inode->i_mode) ? - "sticky_create_fifo" : - "sticky_create_regular"; - audit_log_path_denied(AUDIT_ANOM_CREAT, operation); + if (vfsuid_eq_kuid(i_vfsuid, current_fsuid())) + return 0; + + if (likely(dir_mode & 0002)) { + audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create"); return -EACCES; } + + if (dir_mode & 0020) { + if (sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) { + audit_log_path_denied(AUDIT_ANOM_CREAT, + "sticky_create_fifo"); + return -EACCES; + } + + if (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode)) { + audit_log_path_denied(AUDIT_ANOM_CREAT, + "sticky_create_regular"); + return -EACCES; + } + } + return 0; } @@ -1568,7 +1653,7 @@ static struct dentry *lookup_dcache(const struct qstr *name, { struct dentry *dentry = d_lookup(dir, name); if (dentry) { - int error = d_revalidate(dentry, flags); + int error = d_revalidate(dir->d_inode, name, dentry, flags); if (unlikely(error <= 0)) { if (!error) d_invalidate(dentry); @@ -1614,6 +1699,20 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, } EXPORT_SYMBOL(lookup_one_qstr_excl); +/** + * lookup_fast - do fast lockless (but racy) lookup of a dentry + * @nd: current nameidata + * + * Do a fast, but racy lookup in the dcache for the given dentry, and + * revalidate it. Returns a valid dentry pointer or NULL if one wasn't + * found. On error, an ERR_PTR will be returned. + * + * If this function returns a valid dentry and the walk is no longer + * lazy, the dentry will carry a reference that must later be put. If + * RCU mode is still in force, then this is not the case and the dentry + * must be legitimized before use. If this returns NULL, then the walk + * will no longer be in RCU mode. + */ static struct dentry *lookup_fast(struct nameidata *nd) { struct dentry *dentry, *parent = nd->path.dentry; @@ -1639,19 +1738,20 @@ static struct dentry *lookup_fast(struct nameidata *nd) if (read_seqcount_retry(&parent->d_seq, nd->seq)) return ERR_PTR(-ECHILD); - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); if (likely(status > 0)) return dentry; if (!try_to_unlazy_next(nd, dentry)) return ERR_PTR(-ECHILD); if (status == -ECHILD) /* we'd been told to redo it in non-rcu mode */ - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(nd->inode, &nd->last, + dentry, nd->flags); } else { dentry = __d_lookup(parent, &nd->last); if (unlikely(!dentry)) return NULL; - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); } if (unlikely(status <= 0)) { if (!status) @@ -1679,7 +1779,7 @@ again: if (IS_ERR(dentry)) return dentry; if (unlikely(!d_in_lookup(dentry))) { - int error = d_revalidate(dentry, flags); + int error = d_revalidate(inode, name, dentry, flags); if (unlikely(error <= 0)) { if (!error) { d_invalidate(dentry); @@ -1713,17 +1813,26 @@ static struct dentry *lookup_slow(const struct qstr *name, } static inline int may_lookup(struct mnt_idmap *idmap, - struct nameidata *nd) + struct nameidata *restrict nd) { - if (nd->flags & LOOKUP_RCU) { - int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK); - if (!err) // success, keep going - return 0; - if (!try_to_unlazy(nd)) - return -ECHILD; // redo it all non-lazy - if (err != -ECHILD) // hard error - return err; - } + int err, mask; + + mask = nd->flags & LOOKUP_RCU ? MAY_NOT_BLOCK : 0; + err = inode_permission(idmap, nd->inode, mask | MAY_EXEC); + if (likely(!err)) + return 0; + + // If we failed, and we weren't in LOOKUP_RCU, it's final + if (!(nd->flags & LOOKUP_RCU)) + return err; + + // Drop out of RCU mode to make sure it wasn't transient + if (!try_to_unlazy(nd)) + return -ECHILD; // redo it all non-lazy + + if (err != -ECHILD) // hard error + return err; + return inode_permission(idmap, nd->inode, MAY_EXEC); } @@ -2164,21 +2273,39 @@ EXPORT_SYMBOL(hashlen_string); /* * Calculate the length and hash of the path component, and - * return the "hash_len" as the result. + * return the length as the result. */ -static inline u64 hash_name(const void *salt, const char *name) +static inline const char *hash_name(struct nameidata *nd, + const char *name, + unsigned long *lastword) { - unsigned long a = 0, b, x = 0, y = (unsigned long)salt; + unsigned long a, b, x, y = (unsigned long)nd->path.dentry; unsigned long adata, bdata, mask, len; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; - len = 0; - goto inside; + /* + * The first iteration is special, because it can result in + * '.' and '..' and has no mixing other than the final fold. + */ + a = load_unaligned_zeropad(name); + b = a ^ REPEAT_BYTE('/'); + if (has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)) { + adata = prep_zero_mask(a, adata, &constants); + bdata = prep_zero_mask(b, bdata, &constants); + mask = create_zero_mask(adata | bdata); + a &= zero_bytemask(mask); + *lastword = a; + len = find_zero(mask); + nd->last.hash = fold_hash(a, y); + nd->last.len = len; + return name + len; + } + len = 0; + x = 0; do { HASH_MIX(x, y, a); len += sizeof(unsigned long); -inside: a = load_unaligned_zeropad(name+len); b = a ^ REPEAT_BYTE('/'); } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants))); @@ -2186,11 +2313,25 @@ inside: adata = prep_zero_mask(a, adata, &constants); bdata = prep_zero_mask(b, bdata, &constants); mask = create_zero_mask(adata | bdata); - x ^= a & zero_bytemask(mask); + a &= zero_bytemask(mask); + x ^= a; + len += find_zero(mask); + *lastword = 0; // Multi-word components cannot be DOT or DOTDOT - return hashlen_create(fold_hash(x, y), len + find_zero(mask)); + nd->last.hash = fold_hash(x, y); + nd->last.len = len; + return name + len; } +/* + * Note that the 'last' word is always zero-masked, but + * was loaded as a possibly big-endian word. + */ +#ifdef __BIG_ENDIAN + #define LAST_WORD_IS_DOT (0x2eul << (BITS_PER_LONG-8)) + #define LAST_WORD_IS_DOTDOT (0x2e2eul << (BITS_PER_LONG-16)) +#endif + #else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */ /* Return the hash of a string of known length */ @@ -2223,22 +2364,35 @@ EXPORT_SYMBOL(hashlen_string); * We know there's a real path component here of at least * one character. */ -static inline u64 hash_name(const void *salt, const char *name) +static inline const char *hash_name(struct nameidata *nd, const char *name, unsigned long *lastword) { - unsigned long hash = init_name_hash(salt); - unsigned long len = 0, c; + unsigned long hash = init_name_hash(nd->path.dentry); + unsigned long len = 0, c, last = 0; c = (unsigned char)*name; do { + last = (last << 8) + c; len++; hash = partial_name_hash(c, hash); c = (unsigned char)name[len]; } while (c && c != '/'); - return hashlen_create(end_name_hash(hash), len); + + // This is reliable for DOT or DOTDOT, since the component + // cannot contain NUL characters - top bits being zero means + // we cannot have had any other pathnames. + *lastword = last; + nd->last.hash = end_name_hash(hash); + nd->last.len = len; + return name + len; } #endif +#ifndef LAST_WORD_IS_DOT + #define LAST_WORD_IS_DOT 0x2e + #define LAST_WORD_IS_DOTDOT 0x2e2e +#endif + /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -2267,45 +2421,38 @@ static int link_path_walk(const char *name, struct nameidata *nd) for(;;) { struct mnt_idmap *idmap; const char *link; - u64 hash_len; - int type; + unsigned long lastword; idmap = mnt_idmap(nd->path.mnt); err = may_lookup(idmap, nd); if (err) return err; - hash_len = hash_name(nd->path.dentry, name); + nd->last.name = name; + name = hash_name(nd, name, &lastword); - type = LAST_NORM; - if (name[0] == '.') switch (hashlen_len(hash_len)) { - case 2: - if (name[1] == '.') { - type = LAST_DOTDOT; - nd->state |= ND_JUMPED; - } - break; - case 1: - type = LAST_DOT; - } - if (likely(type == LAST_NORM)) { - struct dentry *parent = nd->path.dentry; + switch(lastword) { + case LAST_WORD_IS_DOTDOT: + nd->last_type = LAST_DOTDOT; + nd->state |= ND_JUMPED; + break; + + case LAST_WORD_IS_DOT: + nd->last_type = LAST_DOT; + break; + + default: + nd->last_type = LAST_NORM; nd->state &= ~ND_JUMPED; + + struct dentry *parent = nd->path.dentry; if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { - struct qstr this = { { .hash_len = hash_len }, .name = name }; - err = parent->d_op->d_hash(parent, &this); + err = parent->d_op->d_hash(parent, &nd->last); if (err < 0) return err; - hash_len = this.hash_len; - name = this.name; } } - nd->last.hash_len = hash_len; - nd->last.name = name; - nd->last_type = type; - - name += hashlen_len(hash_len); if (!*name) goto OK; /* @@ -2353,7 +2500,7 @@ OK: static const char *path_init(struct nameidata *nd, unsigned flags) { int error; - const char *s = nd->name->name; + const char *s = nd->pathname; /* LOOKUP_CACHED requires RCU, ask caller to retry */ if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED) @@ -2417,20 +2564,24 @@ static const char *path_init(struct nameidata *nd, unsigned flags) } } else { /* Caller must check execute permissions on the starting path component */ - struct fd f = fdget_raw(nd->dfd); + CLASS(fd_raw, f)(nd->dfd); struct dentry *dentry; - if (!f.file) + if (fd_empty(f)) return ERR_PTR(-EBADF); - dentry = f.file->f_path.dentry; + if (flags & LOOKUP_LINKAT_EMPTY) { + if (fd_file(f)->f_cred != current_cred() && + !ns_capable(fd_file(f)->f_cred->user_ns, CAP_DAC_READ_SEARCH)) + return ERR_PTR(-ENOENT); + } + + dentry = fd_file(f)->f_path.dentry; - if (*s && unlikely(!d_can_lookup(dentry))) { - fdput(f); + if (*s && unlikely(!d_can_lookup(dentry))) return ERR_PTR(-ENOTDIR); - } - nd->path = f.file->f_path; + nd->path = fd_file(f)->f_path; if (flags & LOOKUP_RCU) { nd->inode = nd->path.dentry->d_inode; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); @@ -2438,7 +2589,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags) path_get(&nd->path); nd->inode = nd->path.dentry->d_inode; } - fdput(f); } /* For scoped-lookups we need to set the root to the dirfd as well. */ @@ -2680,10 +2830,8 @@ static int lookup_one_common(struct mnt_idmap *idmap, if (!len) return -EACCES; - if (unlikely(name[0] == '.')) { - if (len < 2 || (len == 2 && name[1] == '.')) - return -EACCES; - } + if (is_dot_dotdot(name, len)) + return -EACCES; while (len--) { unsigned int c = *(const unsigned char *)name++; @@ -2917,16 +3065,16 @@ int path_pts(struct path *path) } #endif -int user_path_at_empty(int dfd, const char __user *name, unsigned flags, - struct path *path, int *empty) +int user_path_at(int dfd, const char __user *name, unsigned flags, + struct path *path) { - struct filename *filename = getname_flags(name, flags, empty); + struct filename *filename = getname_flags(name, flags); int ret = filename_lookup(dfd, filename, flags, path, NULL); putname(filename); return ret; } -EXPORT_SYMBOL(user_path_at_empty); +EXPORT_SYMBOL(user_path_at); int __check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode) @@ -3170,9 +3318,9 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, /** * vfs_create - create new file * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory - * @mode: mode of the new file + * @dir: inode of the parent directory + * @dentry: dentry of the child file + * @mode: mode of the child file * @want_excl: whether the file must not yet exist * * Create a new file. @@ -3429,7 +3577,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, if (d_in_lookup(dentry)) break; - error = d_revalidate(dentry, nd->flags); + error = d_revalidate(dir_inode, &nd->last, dentry, nd->flags); if (likely(error > 0)) break; if (error) @@ -3443,6 +3591,9 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, return dentry; } + if (open_flag & O_CREAT) + audit_inode(nd->name, dir, AUDIT_INODE_PARENT); + /* * Checking write permission is tricky, bacuse we don't know if we are * going to actually need it: O_CREAT opens should work as long as the @@ -3513,6 +3664,42 @@ out_dput: return ERR_PTR(error); } +static inline bool trailing_slashes(struct nameidata *nd) +{ + return (bool)nd->last.name[nd->last.len]; +} + +static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag) +{ + struct dentry *dentry; + + if (open_flag & O_CREAT) { + if (trailing_slashes(nd)) + return ERR_PTR(-EISDIR); + + /* Don't bother on an O_EXCL create */ + if (open_flag & O_EXCL) + return NULL; + } + + if (trailing_slashes(nd)) + nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; + + dentry = lookup_fast(nd); + if (IS_ERR_OR_NULL(dentry)) + return dentry; + + if (open_flag & O_CREAT) { + /* Discard negative dentries. Need inode_lock to do the create */ + if (!dentry->d_inode) { + if (!(nd->flags & LOOKUP_RCU)) + dput(dentry); + dentry = NULL; + } + } + return dentry; +} + static const char *open_last_lookups(struct nameidata *nd, struct file *file, const struct open_flags *op) { @@ -3530,28 +3717,22 @@ static const char *open_last_lookups(struct nameidata *nd, return handle_dots(nd, nd->last_type); } - if (!(open_flag & O_CREAT)) { - if (nd->last.name[nd->last.len]) - nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; - /* we _can_ be in RCU mode here */ - dentry = lookup_fast(nd); - if (IS_ERR(dentry)) - return ERR_CAST(dentry); - if (likely(dentry)) - goto finish_lookup; + /* We _can_ be in RCU mode here */ + dentry = lookup_fast_for_open(nd, open_flag); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + + if (likely(dentry)) + goto finish_lookup; + if (!(open_flag & O_CREAT)) { if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU)) return ERR_PTR(-ECHILD); } else { - /* create side of things */ if (nd->flags & LOOKUP_RCU) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); } - audit_inode(nd->name, dir, AUDIT_INODE_PARENT); - /* trailing slashes? */ - if (unlikely(nd->last.name[nd->last.len])) - return ERR_PTR(-EISDIR); } if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { @@ -3567,8 +3748,12 @@ static const char *open_last_lookups(struct nameidata *nd, else inode_lock_shared(dir->d_inode); dentry = lookup_open(nd, file, op, got_write); - if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED)) - fsnotify_create(dir->d_inode, dentry); + if (!IS_ERR(dentry)) { + if (file->f_mode & FMODE_CREATED) + fsnotify_create(dir->d_inode, dentry); + if (file->f_mode & FMODE_OPENED) + fsnotify_open(file); + } if (open_flag & O_CREAT) inode_unlock(dir->d_inode); else @@ -3644,7 +3829,7 @@ static int do_open(struct nameidata *nd, if (!error && !(file->f_mode & FMODE_OPENED)) error = vfs_open(&nd->path, file); if (!error) - error = ima_file_check(file, op->acc_mode); + error = security_file_post_open(file, op->acc_mode); if (!error && do_truncate) error = handle_truncate(idmap, file); if (unlikely(error > 0)) { @@ -3671,9 +3856,9 @@ static int do_open(struct nameidata *nd, * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ -static int vfs_tmpfile(struct mnt_idmap *idmap, - const struct path *parentpath, - struct file *file, umode_t mode) +int vfs_tmpfile(struct mnt_idmap *idmap, + const struct path *parentpath, + struct file *file, umode_t mode) { struct dentry *child; struct inode *dir = d_inode(parentpath->dentry); @@ -3695,6 +3880,8 @@ static int vfs_tmpfile(struct mnt_idmap *idmap, mode = vfs_prepare_mode(idmap, dir, mode, mode, mode); error = dir->i_op->tmpfile(idmap, dir, file, mode); dput(child); + if (file->f_mode & FMODE_OPENED) + fsnotify_open(file); if (error) return error; /* Don't check for other permissions, the inode was just created */ @@ -3707,7 +3894,7 @@ static int vfs_tmpfile(struct mnt_idmap *idmap, inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); } - ima_post_create_tmpfile(idmap, inode); + security_inode_post_create_tmpfile(idmap, inode); return 0; } @@ -3963,9 +4150,9 @@ EXPORT_SYMBOL(user_path_create); /** * vfs_mknod - create device node or file * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory - * @mode: mode of the new device node or file + * @dir: inode of the parent directory + * @dentry: dentry of the child device node + * @mode: mode of the child device node * @dev: device number of device to create * * Create a device node or file. @@ -4054,7 +4241,7 @@ retry: error = vfs_create(idmap, path.dentry->d_inode, dentry, mode, true); if (!error) - ima_post_path_mknod(idmap, dentry); + security_path_post_mknod(idmap, dentry); break; case S_IFCHR: case S_IFBLK: error = vfs_mknod(idmap, path.dentry->d_inode, @@ -4090,9 +4277,9 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d /** * vfs_mkdir - create directory * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory - * @mode: mode of the new directory + * @dir: inode of the parent directory + * @dentry: dentry of the child directory + * @mode: mode of the child directory * * Create a directory. * @@ -4172,8 +4359,8 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) /** * vfs_rmdir - remove directory * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory + * @dir: inode of the parent directory + * @dentry: dentry of the child directory * * Remove a directory. * @@ -4453,8 +4640,8 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) /** * vfs_symlink - create symlink * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory + * @dir: inode of the parent directory + * @dentry: dentry of the child symlink file * @oldname: name of the file to link to * * Create a symlink. @@ -4644,14 +4831,13 @@ int do_linkat(int olddfd, struct filename *old, int newdfd, goto out_putnames; } /* - * To use null names we require CAP_DAC_READ_SEARCH + * To use null names we require CAP_DAC_READ_SEARCH or + * that the open-time creds of the dfd matches current. * This ensures that not everyone will be able to create - * handlink using the passed filedescriptor. + * a hardlink using the passed file descriptor. */ - if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) { - error = -ENOENT; - goto out_putnames; - } + if (flags & AT_EMPTY_PATH) + how |= LOOKUP_LINKAT_EMPTY; if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; @@ -5088,19 +5274,16 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna getname(newname), 0); } -int readlink_copy(char __user *buffer, int buflen, const char *link) +int readlink_copy(char __user *buffer, int buflen, const char *link, int linklen) { - int len = PTR_ERR(link); - if (IS_ERR(link)) - goto out; + int copylen; - len = strlen(link); - if (len > (unsigned) buflen) - len = buflen; - if (copy_to_user(buffer, link, len)) - len = -EFAULT; -out: - return len; + copylen = linklen; + if (unlikely(copylen > (unsigned) buflen)) + copylen = buflen; + if (copy_to_user(buffer, link, copylen)) + copylen = -EFAULT; + return copylen; } /** @@ -5120,6 +5303,9 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) const char *link; int res; + if (inode->i_opflags & IOP_CACHED_LINK) + return readlink_copy(buffer, buflen, inode->i_link, inode->i_linklen); + if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) { if (unlikely(inode->i_op->readlink)) return inode->i_op->readlink(dentry, buffer, buflen); @@ -5138,7 +5324,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) if (IS_ERR(link)) return PTR_ERR(link); } - res = readlink_copy(buffer, buflen, link); + res = readlink_copy(buffer, buflen, link, strlen(link)); do_delayed_call(&done); return res; } @@ -5170,10 +5356,9 @@ const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done) EXPORT_SYMBOL(vfs_get_link); /* get the link contents into pagecache */ -const char *page_get_link(struct dentry *dentry, struct inode *inode, - struct delayed_call *callback) +static char *__page_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) { - char *kaddr; struct page *page; struct address_space *mapping = inode->i_mapping; @@ -5192,8 +5377,23 @@ const char *page_get_link(struct dentry *dentry, struct inode *inode, } set_delayed_call(callback, page_put_link, page); BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM); - kaddr = page_address(page); - nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); + return page_address(page); +} + +const char *page_get_link_raw(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + return __page_get_link(dentry, inode, callback); +} +EXPORT_SYMBOL_GPL(page_get_link_raw); + +const char *page_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + char *kaddr = __page_get_link(dentry, inode, callback); + + if (!IS_ERR(kaddr)) + nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); return kaddr; } @@ -5207,10 +5407,14 @@ EXPORT_SYMBOL(page_put_link); int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) { + const char *link; + int res; + DEFINE_DELAYED_CALL(done); - int res = readlink_copy(buffer, buflen, - page_get_link(dentry, d_inode(dentry), - &done)); + link = page_get_link(dentry, d_inode(dentry), &done); + res = PTR_ERR(link); + if (!IS_ERR(link)) + res = readlink_copy(buffer, buflen, link, strlen(link)); do_delayed_call(&done); return res; } @@ -5221,7 +5425,7 @@ int page_symlink(struct inode *inode, const char *symname, int len) struct address_space *mapping = inode->i_mapping; const struct address_space_operations *aops = mapping->a_ops; bool nofs = !mapping_gfp_constraint(mapping, __GFP_FS); - struct page *page; + struct folio *folio; void *fsdata = NULL; int err; unsigned int flags; @@ -5229,16 +5433,16 @@ int page_symlink(struct inode *inode, const char *symname, int len) retry: if (nofs) flags = memalloc_nofs_save(); - err = aops->write_begin(NULL, mapping, 0, len-1, &page, &fsdata); + err = aops->write_begin(NULL, mapping, 0, len-1, &folio, &fsdata); if (nofs) memalloc_nofs_restore(flags); if (err) goto fail; - memcpy(page_address(page), symname, len-1); + memcpy(folio_address(folio), symname, len - 1); - err = aops->write_end(NULL, mapping, 0, len-1, len-1, - page, fsdata); + err = aops->write_end(NULL, mapping, 0, len - 1, len - 1, + folio, fsdata); if (err < 0) goto fail; if (err < len-1) |