diff options
Diffstat (limited to 'ipc/ipc_sysctl.c')
| -rw-r--r-- | ipc/ipc_sysctl.c | 396 |
1 files changed, 225 insertions, 171 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 130dfece27ac..15b17e86e198 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2007 * * Author: Eric Biederman <ebiederm@xmision.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2 of the - * License. */ #include <linux/module.h> @@ -14,46 +10,21 @@ #include <linux/nsproxy.h> #include <linux/sysctl.h> #include <linux/uaccess.h> +#include <linux/capability.h> #include <linux/ipc_namespace.h> #include <linux/msg.h> +#include <linux/slab.h> +#include <linux/cred.h> #include "util.h" -static void *get_ipc(ctl_table *table) -{ - char *which = table->data; - struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; - return which; -} - -#ifdef CONFIG_PROC_SYSCTL -static int proc_ipc_dointvec(ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table ipc_table; - - memcpy(&ipc_table, table, sizeof(ipc_table)); - ipc_table.data = get_ipc(table); - - return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); -} - -static int proc_ipc_dointvec_minmax(ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int proc_ipc_dointvec_minmax_orphans(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { - struct ctl_table ipc_table; + struct ipc_namespace *ns = + container_of(table->data, struct ipc_namespace, shm_rmid_forced); + int err; - memcpy(&ipc_table, table, sizeof(ipc_table)); - ipc_table.data = get_ipc(table); - - return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); -} - -static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - struct ipc_namespace *ns = current->nsproxy->ipc_ns; - int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); + err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (err < 0) return err; @@ -62,127 +33,69 @@ static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, return err; } -static int proc_ipc_callback_dointvec(ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int proc_ipc_auto_msgmni(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; - size_t lenp_bef = *lenp; - int rc; + int dummy = 0; memcpy(&ipc_table, table, sizeof(ipc_table)); - ipc_table.data = get_ipc(table); + ipc_table.data = &dummy; - rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos); + if (write) + pr_info_once("writing to auto_msgmni has no effect"); - if (write && !rc && lenp_bef == *lenp) - /* - * Tunable has successfully been changed by hand. Disable its - * automatic adjustment. This simply requires unregistering - * the notifiers that trigger recalculation. - */ - unregister_ipcns_notifier(current->nsproxy->ipc_ns); - - return rc; + return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); } -static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int proc_ipc_sem_dointvec(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { - struct ctl_table ipc_table; - memcpy(&ipc_table, table, sizeof(ipc_table)); - ipc_table.data = get_ipc(table); + struct ipc_namespace *ns = + container_of(table->data, struct ipc_namespace, sem_ctls); + int ret, semmni; - return proc_doulongvec_minmax(&ipc_table, write, buffer, - lenp, ppos); -} + semmni = ns->sem_ctls[3]; + ret = proc_dointvec(table, write, buffer, lenp, ppos); -/* - * Routine that is called when the file "auto_msgmni" has successfully been - * written. - * Two values are allowed: - * 0: unregister msgmni's callback routine from the ipc namespace notifier - * chain. This means that msgmni won't be recomputed anymore upon memory - * add/remove or ipc namespace creation/removal. - * 1: register back the callback routine. - */ -static void ipc_auto_callback(int val) -{ - if (!val) - unregister_ipcns_notifier(current->nsproxy->ipc_ns); - else { - /* - * Re-enable automatic recomputing only if not already - * enabled. - */ - recompute_msgmni(current->nsproxy->ipc_ns); - cond_register_ipcns_notifier(current->nsproxy->ipc_ns); - } -} - -static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table ipc_table; - size_t lenp_bef = *lenp; - int oldval; - int rc; - - memcpy(&ipc_table, table, sizeof(ipc_table)); - ipc_table.data = get_ipc(table); - oldval = *((int *)(ipc_table.data)); - - rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); - - if (write && !rc && lenp_bef == *lenp) { - int newval = *((int *)(ipc_table.data)); - /* - * The file "auto_msgmni" has correctly been set. - * React by (un)registering the corresponding tunable, if the - * value has changed. - */ - if (newval != oldval) - ipc_auto_callback(newval); - } + if (!ret) + ret = sem_check_semmni(ns); - return rc; + /* + * Reset the semmni value if an error happens. + */ + if (ret) + ns->sem_ctls[3] = semmni; + return ret; } -#else -#define proc_ipc_doulongvec_minmax NULL -#define proc_ipc_dointvec NULL -#define proc_ipc_dointvec_minmax NULL -#define proc_ipc_dointvec_minmax_orphans NULL -#define proc_ipc_callback_dointvec NULL -#define proc_ipcauto_dointvec_minmax NULL -#endif - -static int zero; -static int one = 1; -#ifdef CONFIG_CHECKPOINT_RESTORE -static int int_max = INT_MAX; -#endif +int ipc_mni = IPCMNI; +int ipc_mni_shift = IPCMNI_SHIFT; +int ipc_min_cycle = RADIX_TREE_MAP_SIZE; -static struct ctl_table ipc_kern_table[] = { +static const struct ctl_table ipc_sysctls[] = { { .procname = "shmmax", .data = &init_ipc_ns.shm_ctlmax, - .maxlen = sizeof (init_ipc_ns.shm_ctlmax), + .maxlen = sizeof(init_ipc_ns.shm_ctlmax), .mode = 0644, - .proc_handler = proc_ipc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "shmall", .data = &init_ipc_ns.shm_ctlall, - .maxlen = sizeof (init_ipc_ns.shm_ctlall), + .maxlen = sizeof(init_ipc_ns.shm_ctlall), .mode = 0644, - .proc_handler = proc_ipc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "shmmni", .data = &init_ipc_ns.shm_ctlmni, - .maxlen = sizeof (init_ipc_ns.shm_ctlmni), + .maxlen = sizeof(init_ipc_ns.shm_ctlmni), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &ipc_mni, }, { .procname = "shm_rmid_forced", @@ -190,91 +103,232 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), .mode = 0644, .proc_handler = proc_ipc_dointvec_minmax_orphans, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "msgmax", .data = &init_ipc_ns.msg_ctlmax, - .maxlen = sizeof (init_ipc_ns.msg_ctlmax), + .maxlen = sizeof(init_ipc_ns.msg_ctlmax), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "msgmni", .data = &init_ipc_ns.msg_ctlmni, - .maxlen = sizeof (init_ipc_ns.msg_ctlmni), + .maxlen = sizeof(init_ipc_ns.msg_ctlmni), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &ipc_mni, + }, + { + .procname = "auto_msgmni", + .data = NULL, + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_ipc_callback_dointvec, + .proc_handler = proc_ipc_auto_msgmni, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "msgmnb", .data = &init_ipc_ns.msg_ctlmnb, - .maxlen = sizeof (init_ipc_ns.msg_ctlmnb), + .maxlen = sizeof(init_ipc_ns.msg_ctlmnb), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sem", .data = &init_ipc_ns.sem_ctls, - .maxlen = 4*sizeof (int), + .maxlen = 4*sizeof(int), .mode = 0644, - .proc_handler = proc_ipc_dointvec, - }, - { - .procname = "auto_msgmni", - .data = &init_ipc_ns.auto_msgmni, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_ipcauto_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .proc_handler = proc_ipc_sem_dointvec, }, #ifdef CONFIG_CHECKPOINT_RESTORE { .procname = "sem_next_id", .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), - .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &int_max, + .mode = 0444, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "msg_next_id", .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), - .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &int_max, + .mode = 0444, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "shm_next_id", .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), - .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &int_max, + .mode = 0444, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, #endif - {} }; -static struct ctl_table ipc_root_table[] = { +static struct ctl_table_set *set_lookup(struct ctl_table_root *root) +{ + return ¤t->nsproxy->ipc_ns->ipc_set; +} + +static int set_is_seen(struct ctl_table_set *set) +{ + return ¤t->nsproxy->ipc_ns->ipc_set == set; +} + +static void ipc_set_ownership(struct ctl_table_header *head, + kuid_t *uid, kgid_t *gid) +{ + struct ipc_namespace *ns = + container_of(head->set, struct ipc_namespace, ipc_set); + + kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); + kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); + + *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; + *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; +} + +static int ipc_permissions(struct ctl_table_header *head, const struct ctl_table *table) +{ + int mode = table->mode; + +#ifdef CONFIG_CHECKPOINT_RESTORE + struct ipc_namespace *ns = + container_of(head->set, struct ipc_namespace, ipc_set); + + if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || + (table->data == &ns->ids[IPC_MSG_IDS].next_id) || + (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && + checkpoint_restore_ns_capable(ns->user_ns)) + mode = 0666; + else +#endif { - .procname = "kernel", - .mode = 0555, - .child = ipc_kern_table, - }, - {} + kuid_t ns_root_uid; + kgid_t ns_root_gid; + + ipc_set_ownership(head, &ns_root_uid, &ns_root_gid); + + if (uid_eq(current_euid(), ns_root_uid)) + mode >>= 6; + + else if (in_egroup_p(ns_root_gid)) + mode >>= 3; + } + + mode &= 7; + + return (mode << 6) | (mode << 3) | mode; +} + +static struct ctl_table_root set_root = { + .lookup = set_lookup, + .permissions = ipc_permissions, + .set_ownership = ipc_set_ownership, }; +bool setup_ipc_sysctls(struct ipc_namespace *ns) +{ + struct ctl_table *tbl; + + setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen); + + tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL); + if (tbl) { + int i; + + for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) { + if (tbl[i].data == &init_ipc_ns.shm_ctlmax) + tbl[i].data = &ns->shm_ctlmax; + + else if (tbl[i].data == &init_ipc_ns.shm_ctlall) + tbl[i].data = &ns->shm_ctlall; + + else if (tbl[i].data == &init_ipc_ns.shm_ctlmni) + tbl[i].data = &ns->shm_ctlmni; + + else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced) + tbl[i].data = &ns->shm_rmid_forced; + + else if (tbl[i].data == &init_ipc_ns.msg_ctlmax) + tbl[i].data = &ns->msg_ctlmax; + + else if (tbl[i].data == &init_ipc_ns.msg_ctlmni) + tbl[i].data = &ns->msg_ctlmni; + + else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb) + tbl[i].data = &ns->msg_ctlmnb; + + else if (tbl[i].data == &init_ipc_ns.sem_ctls) + tbl[i].data = &ns->sem_ctls; +#ifdef CONFIG_CHECKPOINT_RESTORE + else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) + tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; + + else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) + tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; + + else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) + tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; +#endif + else + tbl[i].data = NULL; + } + + ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl, + ARRAY_SIZE(ipc_sysctls)); + } + if (!ns->ipc_sysctls) { + kfree(tbl); + retire_sysctl_set(&ns->ipc_set); + return false; + } + + return true; +} + +void retire_ipc_sysctls(struct ipc_namespace *ns) +{ + const struct ctl_table *tbl; + + tbl = ns->ipc_sysctls->ctl_table_arg; + unregister_sysctl_table(ns->ipc_sysctls); + retire_sysctl_set(&ns->ipc_set); + kfree(tbl); +} + static int __init ipc_sysctl_init(void) { - register_sysctl_table(ipc_root_table); + if (!setup_ipc_sysctls(&init_ipc_ns)) { + pr_warn("ipc sysctl registration failed\n"); + return -ENOMEM; + } return 0; } -__initcall(ipc_sysctl_init); +device_initcall(ipc_sysctl_init); + +static int __init ipc_mni_extend(char *str) +{ + ipc_mni = IPCMNI_EXTEND; + ipc_mni_shift = IPCMNI_EXTEND_SHIFT; + ipc_min_cycle = IPCMNI_EXTEND_MIN_CYCLE; + pr_info("IPCMNI extended to %d.\n", ipc_mni); + return 0; +} +early_param("ipcmni_extend", ipc_mni_extend); |
