diff options
Diffstat (limited to 'ipc')
| -rw-r--r-- | ipc/ipc_sysctl.c | 52 | ||||
| -rw-r--r-- | ipc/mq_sysctl.c | 44 | ||||
| -rw-r--r-- | ipc/mqueue.c | 261 | ||||
| -rw-r--r-- | ipc/msg.c | 2 | ||||
| -rw-r--r-- | ipc/msgutil.c | 20 | ||||
| -rw-r--r-- | ipc/namespace.c | 35 | ||||
| -rw-r--r-- | ipc/sem.c | 8 | ||||
| -rw-r--r-- | ipc/shm.c | 51 | ||||
| -rw-r--r-- | ipc/util.c | 11 | ||||
| -rw-r--r-- | ipc/util.h | 1 |
10 files changed, 245 insertions, 240 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index ef313ecfb53a..15b17e86e198 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -14,9 +14,10 @@ #include <linux/ipc_namespace.h> #include <linux/msg.h> #include <linux/slab.h> +#include <linux/cred.h> #include "util.h" -static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, +static int proc_ipc_dointvec_minmax_orphans(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ipc_namespace *ns = @@ -32,7 +33,7 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, return err; } -static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, +static int proc_ipc_auto_msgmni(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -47,7 +48,7 @@ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); } -static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, +static int proc_ipc_sem_dointvec(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ipc_namespace *ns = @@ -72,7 +73,7 @@ int ipc_mni = IPCMNI; int ipc_mni_shift = IPCMNI_SHIFT; int ipc_min_cycle = RADIX_TREE_MAP_SIZE; -static struct ctl_table ipc_sysctls[] = { +static const struct ctl_table ipc_sysctls[] = { { .procname = "shmmax", .data = &init_ipc_ns.shm_ctlmax, @@ -177,7 +178,6 @@ static struct ctl_table ipc_sysctls[] = { .extra2 = SYSCTL_INT_MAX, }, #endif - {} }; static struct ctl_table_set *set_lookup(struct ctl_table_root *root) @@ -190,25 +190,56 @@ static int set_is_seen(struct ctl_table_set *set) return ¤t->nsproxy->ipc_ns->ipc_set == set; } -static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) +static void ipc_set_ownership(struct ctl_table_header *head, + kuid_t *uid, kgid_t *gid) +{ + struct ipc_namespace *ns = + container_of(head->set, struct ipc_namespace, ipc_set); + + kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); + kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); + + *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; + *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; +} + +static int ipc_permissions(struct ctl_table_header *head, const struct ctl_table *table) { int mode = table->mode; #ifdef CONFIG_CHECKPOINT_RESTORE - struct ipc_namespace *ns = current->nsproxy->ipc_ns; + struct ipc_namespace *ns = + container_of(head->set, struct ipc_namespace, ipc_set); if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || (table->data == &ns->ids[IPC_MSG_IDS].next_id) || (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && checkpoint_restore_ns_capable(ns->user_ns)) mode = 0666; + else #endif - return mode; + { + kuid_t ns_root_uid; + kgid_t ns_root_gid; + + ipc_set_ownership(head, &ns_root_uid, &ns_root_gid); + + if (uid_eq(current_euid(), ns_root_uid)) + mode >>= 6; + + else if (in_egroup_p(ns_root_gid)) + mode >>= 3; + } + + mode &= 7; + + return (mode << 6) | (mode << 3) | mode; } static struct ctl_table_root set_root = { .lookup = set_lookup, .permissions = ipc_permissions, + .set_ownership = ipc_set_ownership, }; bool setup_ipc_sysctls(struct ipc_namespace *ns) @@ -259,7 +290,8 @@ bool setup_ipc_sysctls(struct ipc_namespace *ns) tbl[i].data = NULL; } - ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl); + ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl, + ARRAY_SIZE(ipc_sysctls)); } if (!ns->ipc_sysctls) { kfree(tbl); @@ -272,7 +304,7 @@ bool setup_ipc_sysctls(struct ipc_namespace *ns) void retire_ipc_sysctls(struct ipc_namespace *ns) { - struct ctl_table *tbl; + const struct ctl_table *tbl; tbl = ns->ipc_sysctls->ctl_table_arg; unregister_sysctl_table(ns->ipc_sysctls); diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index fbf6a8b93a26..0dd12e1c9f53 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -12,6 +12,7 @@ #include <linux/stat.h> #include <linux/capability.h> #include <linux/slab.h> +#include <linux/cred.h> static int msg_max_limit_min = MIN_MSGMAX; static int msg_max_limit_max = HARD_MSGMAX; @@ -19,7 +20,7 @@ static int msg_max_limit_max = HARD_MSGMAX; static int msg_maxsize_limit_min = MIN_MSGSIZEMAX; static int msg_maxsize_limit_max = HARD_MSGSIZEMAX; -static struct ctl_table mq_sysctls[] = { +static const struct ctl_table mq_sysctls[] = { { .procname = "queues_max", .data = &init_ipc_ns.mq_queues_max, @@ -63,7 +64,6 @@ static struct ctl_table mq_sysctls[] = { .extra1 = &msg_maxsize_limit_min, .extra2 = &msg_maxsize_limit_max, }, - {} }; static struct ctl_table_set *set_lookup(struct ctl_table_root *root) @@ -76,8 +76,42 @@ static int set_is_seen(struct ctl_table_set *set) return ¤t->nsproxy->ipc_ns->mq_set == set; } +static void mq_set_ownership(struct ctl_table_header *head, + kuid_t *uid, kgid_t *gid) +{ + struct ipc_namespace *ns = + container_of(head->set, struct ipc_namespace, mq_set); + + kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); + kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); + + *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; + *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; +} + +static int mq_permissions(struct ctl_table_header *head, const struct ctl_table *table) +{ + int mode = table->mode; + kuid_t ns_root_uid; + kgid_t ns_root_gid; + + mq_set_ownership(head, &ns_root_uid, &ns_root_gid); + + if (uid_eq(current_euid(), ns_root_uid)) + mode >>= 6; + + else if (in_egroup_p(ns_root_gid)) + mode >>= 3; + + mode &= 7; + + return (mode << 6) | (mode << 3) | mode; +} + static struct ctl_table_root set_root = { .lookup = set_lookup, + .permissions = mq_permissions, + .set_ownership = mq_set_ownership, }; bool setup_mq_sysctls(struct ipc_namespace *ns) @@ -109,7 +143,9 @@ bool setup_mq_sysctls(struct ipc_namespace *ns) tbl[i].data = NULL; } - ns->mq_sysctls = __register_sysctl_table(&ns->mq_set, "fs/mqueue", tbl); + ns->mq_sysctls = __register_sysctl_table(&ns->mq_set, + "fs/mqueue", tbl, + ARRAY_SIZE(mq_sysctls)); } if (!ns->mq_sysctls) { kfree(tbl); @@ -122,7 +158,7 @@ bool setup_mq_sysctls(struct ipc_namespace *ns) void retire_mq_sysctls(struct ipc_namespace *ns) { - struct ctl_table *tbl; + const struct ctl_table *tbl; tbl = ns->mq_sysctls->ctl_table_arg; unregister_sysctl_table(ns->mq_sysctls); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 71881bddad25..c4f6d65596cf 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -302,7 +302,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_mtime = inode->i_ctime = inode->i_atime = current_time(inode); + simple_inode_init_ts(inode); if (S_ISREG(mode)) { struct mqueue_inode_info *info; @@ -411,6 +411,7 @@ static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = MQUEUE_MAGIC; sb->s_op = &mqueue_super_ops; + sb->s_d_flags = DCACHE_DONTCACHE; inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL); if (IS_ERR(inode)) @@ -482,7 +483,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns); - mnt = fc_mount(fc); + mnt = fc_mount_longterm(fc); put_fs_context(fc); return mnt; } @@ -596,10 +597,9 @@ static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg) put_ipc_ns(ipc_ns); dir->i_size += DIRENT_SIZE; - dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir); + simple_inode_init_ts(dir); - d_instantiate(dentry, inode); - dget(dentry); + d_make_persistent(dentry, inode); return 0; out_unlock: spin_unlock(&mq_lock); @@ -616,13 +616,8 @@ static int mqueue_create(struct mnt_idmap *idmap, struct inode *dir, static int mqueue_unlink(struct inode *dir, struct dentry *dentry) { - struct inode *inode = d_inode(dentry); - - dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir); dir->i_size -= DIRENT_SIZE; - drop_nlink(inode); - dput(dentry); - return 0; + return simple_unlink(dir, dentry); } /* @@ -635,7 +630,8 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry) static ssize_t mqueue_read_file(struct file *filp, char __user *u_data, size_t count, loff_t *off) { - struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp)); + struct inode *inode = file_inode(filp); + struct mqueue_inode_info *info = MQUEUE_I(inode); char buffer[FILENT_SIZE]; ssize_t ret; @@ -656,7 +652,7 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data, if (ret <= 0) return ret; - file_inode(filp)->i_atime = file_inode(filp)->i_ctime = current_time(file_inode(filp)); + inode_set_atime_to_ts(inode, inode_set_ctime_current(inode)); return ret; } @@ -890,52 +886,46 @@ static int prepare_open(struct dentry *dentry, int oflag, int ro, return inode_permission(&nop_mnt_idmap, d_inode(dentry), acc); } +static struct file *mqueue_file_open(struct filename *name, + struct vfsmount *mnt, int oflag, int ro, + umode_t mode, struct mq_attr *attr) +{ + struct dentry *dentry; + struct file *file; + int ret; + + dentry = start_creating_noperm(mnt->mnt_root, &QSTR(name->name)); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + + ret = prepare_open(dentry, oflag, ro, mode, name, attr); + file = ERR_PTR(ret); + if (!ret) { + const struct path path = { .mnt = mnt, .dentry = dentry }; + file = dentry_open(&path, oflag, current_cred()); + } + + end_creating(dentry); + return file; +} + static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, struct mq_attr *attr) { + struct filename *name __free(putname) = NULL;; struct vfsmount *mnt = current->nsproxy->ipc_ns->mq_mnt; - struct dentry *root = mnt->mnt_root; - struct filename *name; - struct path path; - int fd, error; - int ro; + int fd, ro; audit_mq_open(oflag, mode, attr); - if (IS_ERR(name = getname(u_name))) + name = getname(u_name); + if (IS_ERR(name)) return PTR_ERR(name); - fd = get_unused_fd_flags(O_CLOEXEC); - if (fd < 0) - goto out_putname; - ro = mnt_want_write(mnt); /* we'll drop it in any case */ - inode_lock(d_inode(root)); - path.dentry = lookup_one_len(name->name, root, strlen(name->name)); - if (IS_ERR(path.dentry)) { - error = PTR_ERR(path.dentry); - goto out_putfd; - } - path.mnt = mntget(mnt); - error = prepare_open(path.dentry, oflag, ro, mode, name, attr); - if (!error) { - struct file *file = dentry_open(&path, oflag, current_cred()); - if (!IS_ERR(file)) - fd_install(fd, file); - else - error = PTR_ERR(file); - } - path_put(&path); -out_putfd: - if (error) { - put_unused_fd(fd); - fd = error; - } - inode_unlock(d_inode(root)); + fd = FD_ADD(O_CLOEXEC, mqueue_file_open(name, mnt, oflag, ro, mode, attr)); if (!ro) mnt_drop_write(mnt); -out_putname: - putname(name); return fd; } @@ -954,7 +944,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) int err; struct filename *name; struct dentry *dentry; - struct inode *inode = NULL; + struct inode *inode; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; struct vfsmount *mnt = ipc_ns->mq_mnt; @@ -966,27 +956,20 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) err = mnt_want_write(mnt); if (err) goto out_name; - inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT); - dentry = lookup_one_len(name->name, mnt->mnt_root, - strlen(name->name)); + dentry = start_removing_noperm(mnt->mnt_root, &QSTR(name->name)); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); - goto out_unlock; + goto out_drop_write; } inode = d_inode(dentry); - if (!inode) { - err = -ENOENT; - } else { - ihold(inode); - err = vfs_unlink(&nop_mnt_idmap, d_inode(dentry->d_parent), - dentry, NULL); - } - dput(dentry); - -out_unlock: - inode_unlock(d_inode(mnt->mnt_root)); + ihold(inode); + err = vfs_unlink(&nop_mnt_idmap, d_inode(mnt->mnt_root), + dentry, NULL); + end_removing(dentry); iput(inode); + +out_drop_write: mnt_drop_write(mnt); out_name: putname(name); @@ -1061,7 +1044,6 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, size_t msg_len, unsigned int msg_prio, struct timespec64 *ts) { - struct fd f; struct inode *inode; struct ext_wait_queue wait; struct ext_wait_queue *receiver; @@ -1082,37 +1064,27 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, audit_mq_sendrecv(mqdes, msg_len, msg_prio, ts); - f = fdget(mqdes); - if (unlikely(!f.file)) { - ret = -EBADF; - goto out; - } + CLASS(fd, f)(mqdes); + if (fd_empty(f)) + return -EBADF; - inode = file_inode(f.file); - if (unlikely(f.file->f_op != &mqueue_file_operations)) { - ret = -EBADF; - goto out_fput; - } + inode = file_inode(fd_file(f)); + if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) + return -EBADF; info = MQUEUE_I(inode); - audit_file(f.file); + audit_file(fd_file(f)); - if (unlikely(!(f.file->f_mode & FMODE_WRITE))) { - ret = -EBADF; - goto out_fput; - } + if (unlikely(!(fd_file(f)->f_mode & FMODE_WRITE))) + return -EBADF; - if (unlikely(msg_len > info->attr.mq_msgsize)) { - ret = -EMSGSIZE; - goto out_fput; - } + if (unlikely(msg_len > info->attr.mq_msgsize)) + return -EMSGSIZE; /* First try to allocate memory, before doing anything with * existing queues. */ msg_ptr = load_msg(u_msg_ptr, msg_len); - if (IS_ERR(msg_ptr)) { - ret = PTR_ERR(msg_ptr); - goto out_fput; - } + if (IS_ERR(msg_ptr)) + return PTR_ERR(msg_ptr); msg_ptr->m_ts = msg_len; msg_ptr->m_type = msg_prio; @@ -1136,7 +1108,7 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, } if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) { - if (f.file->f_flags & O_NONBLOCK) { + if (fd_file(f)->f_flags & O_NONBLOCK) { ret = -EAGAIN; } else { wait.task = current; @@ -1162,8 +1134,7 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, goto out_unlock; __do_notify(info); } - inode->i_atime = inode->i_mtime = inode->i_ctime = - current_time(inode); + simple_inode_init_ts(inode); } out_unlock: spin_unlock(&info->lock); @@ -1171,9 +1142,6 @@ out_unlock: out_free: if (ret) free_msg(msg_ptr); -out_fput: - fdput(f); -out: return ret; } @@ -1183,7 +1151,6 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, { ssize_t ret; struct msg_msg *msg_ptr; - struct fd f; struct inode *inode; struct mqueue_inode_info *info; struct ext_wait_queue wait; @@ -1197,30 +1164,22 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, audit_mq_sendrecv(mqdes, msg_len, 0, ts); - f = fdget(mqdes); - if (unlikely(!f.file)) { - ret = -EBADF; - goto out; - } + CLASS(fd, f)(mqdes); + if (fd_empty(f)) + return -EBADF; - inode = file_inode(f.file); - if (unlikely(f.file->f_op != &mqueue_file_operations)) { - ret = -EBADF; - goto out_fput; - } + inode = file_inode(fd_file(f)); + if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) + return -EBADF; info = MQUEUE_I(inode); - audit_file(f.file); + audit_file(fd_file(f)); - if (unlikely(!(f.file->f_mode & FMODE_READ))) { - ret = -EBADF; - goto out_fput; - } + if (unlikely(!(fd_file(f)->f_mode & FMODE_READ))) + return -EBADF; /* checks if buffer is big enough */ - if (unlikely(msg_len < info->attr.mq_msgsize)) { - ret = -EMSGSIZE; - goto out_fput; - } + if (unlikely(msg_len < info->attr.mq_msgsize)) + return -EMSGSIZE; /* * msg_insert really wants us to have a valid, spare node struct so @@ -1241,7 +1200,7 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, } if (info->attr.mq_curmsgs == 0) { - if (f.file->f_flags & O_NONBLOCK) { + if (fd_file(f)->f_flags & O_NONBLOCK) { spin_unlock(&info->lock); ret = -EAGAIN; } else { @@ -1257,8 +1216,7 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, msg_ptr = msg_get(info); - inode->i_atime = inode->i_mtime = inode->i_ctime = - current_time(inode); + simple_inode_init_ts(inode); /* There is now free space in queue. */ pipelined_receive(&wake_q, info); @@ -1275,9 +1233,6 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, } free_msg(msg_ptr); } -out_fput: - fdput(f); -out: return ret; } @@ -1317,7 +1272,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, static int do_mq_notify(mqd_t mqdes, const struct sigevent *notification) { int ret; - struct fd f; struct sock *sock; struct inode *inode; struct mqueue_inode_info *info; @@ -1347,47 +1301,39 @@ static int do_mq_notify(mqd_t mqdes, const struct sigevent *notification) if (copy_from_user(nc->data, notification->sigev_value.sival_ptr, NOTIFY_COOKIE_LEN)) { - ret = -EFAULT; - goto free_skb; + kfree_skb(nc); + return -EFAULT; } /* TODO: add a header? */ skb_put(nc, NOTIFY_COOKIE_LEN); /* and attach it to the socket */ retry: - f = fdget(notification->sigev_signo); - if (!f.file) { - ret = -EBADF; - goto out; - } - sock = netlink_getsockbyfilp(f.file); - fdput(f); + sock = netlink_getsockbyfd(notification->sigev_signo); if (IS_ERR(sock)) { - ret = PTR_ERR(sock); - goto free_skb; + kfree_skb(nc); + return PTR_ERR(sock); } timeo = MAX_SCHEDULE_TIMEOUT; ret = netlink_attachskb(sock, nc, &timeo, NULL); - if (ret == 1) { - sock = NULL; + if (ret == 1) goto retry; - } if (ret) return ret; } } - f = fdget(mqdes); - if (!f.file) { + CLASS(fd, f)(mqdes); + if (fd_empty(f)) { ret = -EBADF; goto out; } - inode = file_inode(f.file); - if (unlikely(f.file->f_op != &mqueue_file_operations)) { + inode = file_inode(fd_file(f)); + if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) { ret = -EBADF; - goto out_fput; + goto out; } info = MQUEUE_I(inode); @@ -1396,7 +1342,8 @@ retry: if (notification == NULL) { if (info->notify_owner == task_tgid(current)) { remove_notification(info); - inode->i_atime = inode->i_ctime = current_time(inode); + inode_set_atime_to_ts(inode, + inode_set_ctime_current(inode)); } } else if (info->notify_owner != NULL) { ret = -EBUSY; @@ -1422,18 +1369,12 @@ retry: info->notify_owner = get_pid(task_tgid(current)); info->notify_user_ns = get_user_ns(current_user_ns()); - inode->i_atime = inode->i_ctime = current_time(inode); + inode_set_atime_to_ts(inode, inode_set_ctime_current(inode)); } spin_unlock(&info->lock); -out_fput: - fdput(f); out: if (sock) netlink_detachskb(sock, nc); - else -free_skb: - dev_kfree_skb(nc); - return ret; } @@ -1451,45 +1392,41 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, static int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old) { - struct fd f; struct inode *inode; struct mqueue_inode_info *info; if (new && (new->mq_flags & (~O_NONBLOCK))) return -EINVAL; - f = fdget(mqdes); - if (!f.file) + CLASS(fd, f)(mqdes); + if (fd_empty(f)) return -EBADF; - if (unlikely(f.file->f_op != &mqueue_file_operations)) { - fdput(f); + if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) return -EBADF; - } - inode = file_inode(f.file); + inode = file_inode(fd_file(f)); info = MQUEUE_I(inode); spin_lock(&info->lock); if (old) { *old = info->attr; - old->mq_flags = f.file->f_flags & O_NONBLOCK; + old->mq_flags = fd_file(f)->f_flags & O_NONBLOCK; } if (new) { audit_mq_getsetattr(mqdes, new); - spin_lock(&f.file->f_lock); + spin_lock(&fd_file(f)->f_lock); if (new->mq_flags & O_NONBLOCK) - f.file->f_flags |= O_NONBLOCK; + fd_file(f)->f_flags |= O_NONBLOCK; else - f.file->f_flags &= ~O_NONBLOCK; - spin_unlock(&f.file->f_lock); + fd_file(f)->f_flags &= ~O_NONBLOCK; + spin_unlock(&fd_file(f)->f_lock); - inode->i_atime = inode->i_ctime = current_time(inode); + inode_set_atime_to_ts(inode, inode_set_ctime_current(inode)); } spin_unlock(&info->lock); - fdput(f); return 0; } @@ -1682,7 +1619,7 @@ static const struct fs_context_operations mqueue_fs_context_ops = { static struct file_system_type mqueue_fs_type = { .name = "mqueue", .init_fs_context = mqueue_init_fs_context, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, .fs_flags = FS_USERNS_MOUNT, }; diff --git a/ipc/msg.c b/ipc/msg.c index fd08b3cb36d7..ee6af4fe52bf 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -978,7 +978,7 @@ SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, struct compat_msgbuf { compat_long_t mtype; - char mtext[1]; + char mtext[]; }; long compat_ksys_msgsnd(int msqid, compat_uptr_t msgp, diff --git a/ipc/msgutil.c b/ipc/msgutil.c index d0a0e877cadd..e28f0cecb2ec 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -15,6 +15,7 @@ #include <linux/proc_ns.h> #include <linux/uaccess.h> #include <linux/sched.h> +#include <linux/nstree.h> #include "util.h" @@ -26,12 +27,8 @@ DEFINE_SPINLOCK(mq_lock); * and not CONFIG_IPC_NS. */ struct ipc_namespace init_ipc_ns = { - .ns.count = REFCOUNT_INIT(1), + .ns = NS_COMMON_INIT(init_ipc_ns), .user_ns = &init_user_ns, - .ns.inum = PROC_IPC_INIT_INO, -#ifdef CONFIG_IPC_NS - .ns.ops = &ipcns_operations, -#endif }; struct msg_msgseg { @@ -42,6 +39,17 @@ struct msg_msgseg { #define DATALEN_MSG ((size_t)PAGE_SIZE-sizeof(struct msg_msg)) #define DATALEN_SEG ((size_t)PAGE_SIZE-sizeof(struct msg_msgseg)) +static kmem_buckets *msg_buckets __ro_after_init; + +static int __init init_msg_buckets(void) +{ + msg_buckets = kmem_buckets_create("msg_msg", SLAB_ACCOUNT, + sizeof(struct msg_msg), + DATALEN_MSG, NULL); + + return 0; +} +subsys_initcall(init_msg_buckets); static struct msg_msg *alloc_msg(size_t len) { @@ -50,7 +58,7 @@ static struct msg_msg *alloc_msg(size_t len) size_t alen; alen = min(len, DATALEN_MSG); - msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT); + msg = kmem_buckets_alloc(msg_buckets, sizeof(*msg) + alen, GFP_KERNEL); if (msg == NULL) return NULL; diff --git a/ipc/namespace.c b/ipc/namespace.c index 6ecc30effd3e..535f16ea40e1 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -15,6 +15,7 @@ #include <linux/mount.h> #include <linux/user_namespace.h> #include <linux/proc_ns.h> +#include <linux/nstree.h> #include <linux/sched/task.h> #include "util.h" @@ -61,12 +62,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, if (ns == NULL) goto fail_dec; - err = ns_alloc_inum(&ns->ns); + err = ns_common_init(ns); if (err) goto fail_free; - ns->ns.ops = &ipcns_operations; - refcount_set(&ns->ns.count, 1); + ns_tree_gen_id(ns); ns->user_ns = get_user_ns(user_ns); ns->ucounts = ucounts; @@ -76,26 +76,30 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, err = -ENOMEM; if (!setup_mq_sysctls(ns)) - goto fail_put; + goto fail_mq_mount; if (!setup_ipc_sysctls(ns)) - goto fail_mq; + goto fail_mq_sysctls; err = msg_init_ns(ns); if (err) - goto fail_put; + goto fail_ipc; sem_init_ns(ns); shm_init_ns(ns); + ns_tree_add_raw(ns); return ns; -fail_mq: +fail_ipc: + retire_ipc_sysctls(ns); +fail_mq_sysctls: retire_mq_sysctls(ns); - +fail_mq_mount: + mntput(ns->mq_mnt); fail_put: put_user_ns(ns->user_ns); - ns_free_inum(&ns->ns); + ns_common_free(ns); fail_free: kfree(ns); fail_dec: @@ -104,7 +108,7 @@ fail: return ERR_PTR(err); } -struct ipc_namespace *copy_ipcs(unsigned long flags, +struct ipc_namespace *copy_ipcs(u64 flags, struct user_namespace *user_ns, struct ipc_namespace *ns) { if (!(flags & CLONE_NEWIPC)) @@ -159,7 +163,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) dec_ipc_namespaces(ns->ucounts); put_user_ns(ns->user_ns); - ns_free_inum(&ns->ns); + ns_common_free(ns); kfree(ns); } @@ -197,20 +201,16 @@ static void free_ipc(struct work_struct *unused) */ void put_ipc_ns(struct ipc_namespace *ns) { - if (refcount_dec_and_lock(&ns->ns.count, &mq_lock)) { + if (ns_ref_put_and_lock(ns, &mq_lock)) { mq_clear_sbinfo(ns); spin_unlock(&mq_lock); + ns_tree_remove(ns); if (llist_add(&ns->mnt_llist, &free_ipc_list)) schedule_work(&free_ipc_work); } } -static inline struct ipc_namespace *to_ipc_ns(struct ns_common *ns) -{ - return container_of(ns, struct ipc_namespace, ns); -} - static struct ns_common *ipcns_get(struct task_struct *task) { struct ipc_namespace *ns = NULL; @@ -250,7 +250,6 @@ static struct user_namespace *ipcns_owner(struct ns_common *ns) const struct proc_ns_operations ipcns_operations = { .name = "ipc", - .type = CLONE_NEWIPC, .get = ipcns_get, .put = ipcns_put, .install = ipcns_install, diff --git a/ipc/sem.c b/ipc/sem.c index 00f88aa01ac5..0f06e4bd4673 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -152,7 +152,7 @@ struct sem_undo { struct list_head list_id; /* per semaphore array list: * all undos for one array */ int semid; /* semaphore set identifier */ - short *semadj; /* array of adjustments */ + short semadj[]; /* array of adjustments */ /* one per semaphore */ }; @@ -1938,8 +1938,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) rcu_read_unlock(); /* step 2: allocate new undo structure */ - new = kvzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, - GFP_KERNEL_ACCOUNT); + new = kvzalloc(struct_size(new, semadj, nsems), GFP_KERNEL_ACCOUNT); if (!new) { ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); return ERR_PTR(-ENOMEM); @@ -1967,7 +1966,6 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) goto success; } /* step 5: initialize & link new undo structure */ - new->semadj = (short *) &new[1]; new->ulp = ulp; new->semid = semid; assert_spin_locked(&ulp->lock); @@ -2305,7 +2303,7 @@ SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, * parent and child tasks. */ -int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) +int copy_semundo(u64 clone_flags, struct task_struct *tsk) { struct sem_undo_list *undo_list; int error; diff --git a/ipc/shm.c b/ipc/shm.c index 60e45e7045d4..3db36773dd10 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -29,6 +29,7 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/shm.h> +#include <uapi/linux/shm.h> #include <linux/init.h> #include <linux/file.h> #include <linux/mman.h> @@ -44,6 +45,7 @@ #include <linux/mount.h> #include <linux/ipc_namespace.h> #include <linux/rhashtable.h> +#include <linux/nstree.h> #include <linux/uaccess.h> @@ -147,6 +149,7 @@ void shm_exit_ns(struct ipc_namespace *ns) static int __init ipc_ns_init(void) { shm_init_ns(&init_ipc_ns); + ns_tree_add(&init_ipc_ns); return 0; } @@ -430,8 +433,11 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) void shm_destroy_orphaned(struct ipc_namespace *ns) { down_write(&shm_ids(ns).rwsem); - if (shm_ids(ns).in_use) + if (shm_ids(ns).in_use) { + rcu_read_lock(); idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); + rcu_read_unlock(); + } up_write(&shm_ids(ns).rwsem); } @@ -562,30 +568,25 @@ static unsigned long shm_pagesize(struct vm_area_struct *vma) } #ifdef CONFIG_NUMA -static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) +static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) { - struct file *file = vma->vm_file; - struct shm_file_data *sfd = shm_file_data(file); + struct shm_file_data *sfd = shm_file_data(vma->vm_file); int err = 0; if (sfd->vm_ops->set_policy) - err = sfd->vm_ops->set_policy(vma, new); + err = sfd->vm_ops->set_policy(vma, mpol); return err; } static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, - unsigned long addr) + unsigned long addr, pgoff_t *ilx) { - struct file *file = vma->vm_file; - struct shm_file_data *sfd = shm_file_data(file); - struct mempolicy *pol = NULL; + struct shm_file_data *sfd = shm_file_data(vma->vm_file); + struct mempolicy *mpol = vma->vm_policy; if (sfd->vm_ops->get_policy) - pol = sfd->vm_ops->get_policy(vma, addr); - else if (vma->vm_policy) - pol = vma->vm_policy; - - return pol; + mpol = sfd->vm_ops->get_policy(vma, addr, ilx); + return mpol; } #endif @@ -603,7 +604,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) if (ret) return ret; - ret = call_mmap(sfd->file, vma); + ret = vfs_mmap(sfd->file, vma); if (ret) { __shm_close(sfd); return ret; @@ -666,8 +667,8 @@ static const struct file_operations shm_file_operations = { }; /* - * shm_file_operations_huge is now identical to shm_file_operations, - * but we keep it distinct for the sake of is_file_shm_hugepages(). + * shm_file_operations_huge is now identical to shm_file_operations + * except for fop_flags */ static const struct file_operations shm_file_operations_huge = { .mmap = shm_mmap, @@ -676,13 +677,9 @@ static const struct file_operations shm_file_operations_huge = { .get_unmapped_area = shm_get_unmapped_area, .llseek = noop_llseek, .fallocate = shm_fallocate, + .fop_flags = FOP_HUGE_PAGES, }; -bool is_file_shm_hugepages(struct file *file) -{ - return file->f_op == &shm_file_operations_huge; -} - static const struct vm_operations_struct shm_vm_ops = { .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ @@ -1662,7 +1659,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, goto invalid; } - addr = do_mmap(file, addr, size, prot, flags, 0, &populate, NULL); + addr = do_mmap(file, addr, size, prot, flags, 0, 0, &populate, NULL); *raddr = addr; err = 0; if (IS_ERR_VALUE(addr)) @@ -1786,8 +1783,8 @@ long ksys_shmdt(char __user *shmaddr) */ file = vma->vm_file; size = i_size_read(file_inode(vma->vm_file)); - do_vma_munmap(&vmi, vma, vma->vm_start, vma->vm_end, - NULL, false); + do_vmi_align_munmap(&vmi, vma, mm, vma->vm_start, + vma->vm_end, NULL, false); /* * We discovered the size of the shm segment, so * break out of here and fall through to the next @@ -1811,8 +1808,8 @@ long ksys_shmdt(char __user *shmaddr) if ((vma->vm_ops == &shm_vm_ops) && ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) && (vma->vm_file == file)) { - do_vma_munmap(&vmi, vma, vma->vm_start, vma->vm_end, - NULL, false); + do_vmi_align_munmap(&vmi, vma, mm, vma->vm_start, + vma->vm_end, NULL, false); } vma = vma_next(&vmi); diff --git a/ipc/util.c b/ipc/util.c index 05cb9de66735..cae60f11d9c2 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -615,12 +615,11 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out) } /** - * ipc_obtain_object_idr + * ipc_obtain_object_idr - Look for an id in the ipc ids idr and + * return associated ipc object. * @ids: ipc identifier set * @id: ipc id to look for * - * Look for an id in the ipc ids idr and return associated ipc object. - * * Call inside the RCU critical section. * The ipc object is *not* locked on exit. */ @@ -637,13 +636,11 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id) } /** - * ipc_obtain_object_check + * ipc_obtain_object_check - Similar to ipc_obtain_object_idr() but + * also checks the ipc object sequence number. * @ids: ipc identifier set * @id: ipc id to look for * - * Similar to ipc_obtain_object_idr() but also checks the ipc object - * sequence number. - * * Call inside the RCU critical section. * The ipc object is *not* locked on exit. */ diff --git a/ipc/util.h b/ipc/util.h index 67bdd2aa2c28..a55d6cebe6d3 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -14,6 +14,7 @@ #include <linux/unistd.h> #include <linux/err.h> #include <linux/ipc_namespace.h> +#include <linux/pid.h> /* * The IPC ID contains 2 separate numbers - index and sequence number. |
