diff options
Diffstat (limited to 'kernel/pid.c')
-rw-r--r-- | kernel/pid.c | 139 |
1 files changed, 125 insertions, 14 deletions
diff --git a/kernel/pid.c b/kernel/pid.c index 115448e89c3e..3a10a7b6fcf8 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -43,6 +43,7 @@ #include <linux/sched/task.h> #include <linux/idr.h> #include <linux/pidfs.h> +#include <linux/seqlock.h> #include <net/sock.h> #include <uapi/linux/pidfd.h> @@ -60,15 +61,8 @@ struct pid init_struct_pid = { }, } }; -int pid_max = PID_MAX_DEFAULT; - -int pid_max_min = RESERVED_PIDS + 1; -int pid_max_max = PID_MAX_LIMIT; -/* - * Pseudo filesystems start inode numbering after one. We use Reserved - * PIDs as a natural offset. - */ -static u64 pidfs_ino = RESERVED_PIDS; +static int pid_max_min = RESERVED_PIDS + 1; +static int pid_max_max = PID_MAX_LIMIT; /* * PID-map pages start out as NULL, they get allocated upon @@ -87,6 +81,7 @@ struct pid_namespace init_pid_ns = { #ifdef CONFIG_PID_NS .ns.ops = &pidns_operations, #endif + .pid_max = PID_MAX_DEFAULT, #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) .memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC, #endif @@ -108,6 +103,7 @@ EXPORT_SYMBOL_GPL(init_pid_ns); */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); +seqcount_spinlock_t pidmap_lock_seq = SEQCNT_SPINLOCK_ZERO(pidmap_lock_seq, &pidmap_lock); void put_pid(struct pid *pid) { @@ -158,6 +154,7 @@ void free_pid(struct pid *pid) idr_remove(&ns->idr, upid->nr); } + pidfs_remove_pid(pid); spin_unlock_irqrestore(&pidmap_lock, flags); call_rcu(&pid->rcu, delayed_put_pid); @@ -193,6 +190,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, for (i = ns->level; i >= 0; i--) { int tid = 0; + int pid_max = READ_ONCE(tmp->pid_max); if (set_tid_size) { tid = set_tid[ns->level - i]; @@ -273,22 +271,24 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, INIT_HLIST_HEAD(&pid->inodes); upid = pid->numbers + ns->level; + idr_preload(GFP_KERNEL); spin_lock_irq(&pidmap_lock); if (!(ns->pid_allocated & PIDNS_ADDING)) goto out_unlock; - pid->stashed = NULL; - pid->ino = ++pidfs_ino; + pidfs_add_pid(pid); for ( ; upid >= pid->numbers; --upid) { /* Make the PID visible to find_pid_ns. */ idr_replace(&upid->ns->idr, pid, upid->nr); upid->ns->pid_allocated++; } spin_unlock_irq(&pidmap_lock); + idr_preload_end(); return pid; out_unlock: spin_unlock_irq(&pidmap_lock); + idr_preload_end(); put_pid_ns(ns); out_free: @@ -644,17 +644,118 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) return fd; } +#ifdef CONFIG_SYSCTL +static struct ctl_table_set *pid_table_root_lookup(struct ctl_table_root *root) +{ + return &task_active_pid_ns(current)->set; +} + +static int set_is_seen(struct ctl_table_set *set) +{ + return &task_active_pid_ns(current)->set == set; +} + +static int pid_table_root_permissions(struct ctl_table_header *head, + const struct ctl_table *table) +{ + struct pid_namespace *pidns = + container_of(head->set, struct pid_namespace, set); + int mode = table->mode; + + if (ns_capable(pidns->user_ns, CAP_SYS_ADMIN) || + uid_eq(current_euid(), make_kuid(pidns->user_ns, 0))) + mode = (mode & S_IRWXU) >> 6; + else if (in_egroup_p(make_kgid(pidns->user_ns, 0))) + mode = (mode & S_IRWXG) >> 3; + else + mode = mode & S_IROTH; + return (mode << 6) | (mode << 3) | mode; +} + +static void pid_table_root_set_ownership(struct ctl_table_header *head, + kuid_t *uid, kgid_t *gid) +{ + struct pid_namespace *pidns = + container_of(head->set, struct pid_namespace, set); + kuid_t ns_root_uid; + kgid_t ns_root_gid; + + ns_root_uid = make_kuid(pidns->user_ns, 0); + if (uid_valid(ns_root_uid)) + *uid = ns_root_uid; + + ns_root_gid = make_kgid(pidns->user_ns, 0); + if (gid_valid(ns_root_gid)) + *gid = ns_root_gid; +} + +static struct ctl_table_root pid_table_root = { + .lookup = pid_table_root_lookup, + .permissions = pid_table_root_permissions, + .set_ownership = pid_table_root_set_ownership, +}; + +static struct ctl_table pid_table[] = { + { + .procname = "pid_max", + .data = &init_pid_ns.pid_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &pid_max_min, + .extra2 = &pid_max_max, + }, +}; +#endif + +int register_pidns_sysctls(struct pid_namespace *pidns) +{ +#ifdef CONFIG_SYSCTL + struct ctl_table *tbl; + + setup_sysctl_set(&pidns->set, &pid_table_root, set_is_seen); + + tbl = kmemdup(pid_table, sizeof(pid_table), GFP_KERNEL); + if (!tbl) + return -ENOMEM; + tbl->data = &pidns->pid_max; + pidns->pid_max = min(pid_max_max, max_t(int, pidns->pid_max, + PIDS_PER_CPU_DEFAULT * num_possible_cpus())); + + pidns->sysctls = __register_sysctl_table(&pidns->set, "kernel", tbl, + ARRAY_SIZE(pid_table)); + if (!pidns->sysctls) { + kfree(tbl); + retire_sysctl_set(&pidns->set); + return -ENOMEM; + } +#endif + return 0; +} + +void unregister_pidns_sysctls(struct pid_namespace *pidns) +{ +#ifdef CONFIG_SYSCTL + const struct ctl_table *tbl; + + tbl = pidns->sysctls->ctl_table_arg; + unregister_sysctl_table(pidns->sysctls); + retire_sysctl_set(&pidns->set); + kfree(tbl); +#endif +} + void __init pid_idr_init(void) { /* Verify no one has done anything silly: */ BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING); /* bump default and minimum pid_max based on number of cpus */ - pid_max = min(pid_max_max, max_t(int, pid_max, - PIDS_PER_CPU_DEFAULT * num_possible_cpus())); + init_pid_ns.pid_max = min(pid_max_max, max_t(int, init_pid_ns.pid_max, + PIDS_PER_CPU_DEFAULT * num_possible_cpus())); pid_max_min = max_t(int, pid_max_min, PIDS_PER_CPU_MIN * num_possible_cpus()); - pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min); + pr_info("pid_max: default: %u minimum: %u\n", init_pid_ns.pid_max, pid_max_min); idr_init(&init_pid_ns.idr); @@ -665,6 +766,16 @@ void __init pid_idr_init(void) NULL); } +static __init int pid_namespace_sysctl_init(void) +{ +#ifdef CONFIG_SYSCTL + /* "kernel" directory will have already been initialized. */ + BUG_ON(register_pidns_sysctls(&init_pid_ns)); +#endif + return 0; +} +subsys_initcall(pid_namespace_sysctl_init); + static struct file *__pidfd_fget(struct task_struct *task, int fd) { struct file *file; |