From b376c3e1b6770ddcb4f0782be16358095fcea0b6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 8 Aug 2016 13:41:24 -0500 Subject: userns: Add a limit on the number of user namespaces Export the export the maximum number of user namespaces as /proc/sys/userns/max_user_namespaces. Acked-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 6 +++++ kernel/fork.c | 2 ++ kernel/ucount.c | 53 ++++++++++++++++++++++++++++++++++++++++++ kernel/user_namespace.c | 31 +++++++++++++++--------- 4 files changed, 81 insertions(+), 11 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index e5697eaf6bf9..6421cca2daa9 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -44,9 +44,15 @@ struct user_namespace { struct ctl_table_set set; struct ctl_table_header *sysctls; #endif + int max_user_namespaces; + atomic_t user_namespaces; }; extern struct user_namespace init_user_ns; +extern bool setup_userns_sysctls(struct user_namespace *ns); +extern void retire_userns_sysctls(struct user_namespace *ns); +extern bool inc_user_namespaces(struct user_namespace *ns); +extern void dec_user_namespaces(struct user_namespace *ns); #ifdef CONFIG_USER_NS diff --git a/kernel/fork.c b/kernel/fork.c index 52e725d4a866..daa6a82b4900 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -321,6 +321,8 @@ void __init fork_init(void) init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC]; + + init_user_ns.max_user_namespaces = max_threads; } int __weak arch_dup_task_struct(struct task_struct *dst, diff --git a/kernel/ucount.c b/kernel/ucount.c index cbde1dc87851..6c2205c0befd 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -43,7 +43,18 @@ static struct ctl_table_root set_root = { .permissions = set_permissions, }; +static int zero = 0; +static int int_max = INT_MAX; static struct ctl_table userns_table[] = { + { + .procname = "max_user_namespaces", + .data = &init_user_ns.max_user_namespaces, + .maxlen = sizeof(init_user_ns.max_user_namespaces), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, + }, { } }; #endif /* CONFIG_SYSCTL */ @@ -55,6 +66,8 @@ bool setup_userns_sysctls(struct user_namespace *ns) setup_sysctl_set(&ns->set, &set_root, set_is_seen); tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL); if (tbl) { + tbl[0].data = &ns->max_user_namespaces; + ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl); } if (!ns->sysctls) { @@ -78,6 +91,46 @@ void retire_userns_sysctls(struct user_namespace *ns) #endif } +static inline bool atomic_inc_below(atomic_t *v, int u) +{ + int c, old; + c = atomic_read(v); + for (;;) { + if (unlikely(c >= u)) + return false; + old = atomic_cmpxchg(v, c, c+1); + if (likely(old == c)) + return true; + c = old; + } +} + +bool inc_user_namespaces(struct user_namespace *ns) +{ + struct user_namespace *pos, *bad; + for (pos = ns; pos; pos = pos->parent) { + int max = READ_ONCE(pos->max_user_namespaces); + if (!atomic_inc_below(&pos->user_namespaces, max)) + goto fail; + } + return true; +fail: + bad = pos; + for (pos = ns; pos != bad; pos = pos->parent) + atomic_dec(&pos->user_namespaces); + + return false; +} + +void dec_user_namespaces(struct user_namespace *ns) +{ + struct user_namespace *pos; + for (pos = ns; pos; pos = pos->parent) { + int dec = atomic_dec_if_positive(&pos->user_namespaces); + WARN_ON_ONCE(dec < 0); + } +} + static __init int user_namespace_sysctl_init(void) { #ifdef CONFIG_SYSCTL diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index a63332253c7e..7d87017a0040 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -23,9 +23,6 @@ #include #include -extern bool setup_userns_sysctls(struct user_namespace *ns); -extern void retire_userns_sysctls(struct user_namespace *ns); - static struct kmem_cache *user_ns_cachep __read_mostly; static DEFINE_MUTEX(userns_state_mutex); @@ -34,6 +31,7 @@ static bool new_idmap_permitted(const struct file *file, struct uid_gid_map *map); static void free_user_ns(struct work_struct *work); + static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) { /* Start with the same capabilities as init but useless for doing @@ -68,8 +66,12 @@ int create_user_ns(struct cred *new) kgid_t group = new->egid; int ret; + ret = -EUSERS; if (parent_ns->level > 32) - return -EUSERS; + goto fail; + + if (!inc_user_namespaces(parent_ns)) + goto fail; /* * Verify that we can not violate the policy of which files @@ -77,26 +79,27 @@ int create_user_ns(struct cred *new) * by verifing that the root directory is at the root of the * mount namespace which allows all files to be accessed. */ + ret = -EPERM; if (current_chrooted()) - return -EPERM; + goto fail_dec; /* The creator needs a mapping in the parent user namespace * or else we won't be able to reasonably tell userspace who * created a user_namespace. */ + ret = -EPERM; if (!kuid_has_mapping(parent_ns, owner) || !kgid_has_mapping(parent_ns, group)) - return -EPERM; + goto fail_dec; + ret = -ENOMEM; ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); if (!ns) - return -ENOMEM; + goto fail_dec; ret = ns_alloc_inum(&ns->ns); - if (ret) { - kmem_cache_free(user_ns_cachep, ns); - return ret; - } + if (ret) + goto fail_free; ns->ns.ops = &userns_operations; atomic_set(&ns->count, 1); @@ -106,6 +109,7 @@ int create_user_ns(struct cred *new) ns->owner = owner; ns->group = group; INIT_WORK(&ns->work, free_user_ns); + ns->max_user_namespaces = INT_MAX; /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ mutex_lock(&userns_state_mutex); @@ -126,7 +130,11 @@ fail_keyring: key_put(ns->persistent_keyring_register); #endif ns_free_inum(&ns->ns); +fail_free: kmem_cache_free(user_ns_cachep, ns); +fail_dec: + dec_user_namespaces(parent_ns); +fail: return ret; } @@ -163,6 +171,7 @@ static void free_user_ns(struct work_struct *work) #endif ns_free_inum(&ns->ns); kmem_cache_free(user_ns_cachep, ns); + dec_user_namespaces(parent); ns = parent; } while (atomic_dec_and_test(&parent->count)); } -- cgit