From 25be451aa4c0e9a96c59a626ab0e93d5cb7f6f48 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Sun, 19 Mar 2023 14:35:42 -0700 Subject: module: fold usermode helper kmod into modules directory The kernel/kmod.c is already only built if we enabled modules, so just stuff it under kernel/module/kmod.c and unify the MAINTAINERS file for it. Signed-off-by: Luis Chamberlain --- kernel/module/kmod.c | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 kernel/module/kmod.c (limited to 'kernel/module/kmod.c') diff --git a/kernel/module/kmod.c b/kernel/module/kmod.c new file mode 100644 index 000000000000..b717134ebe17 --- /dev/null +++ b/kernel/module/kmod.c @@ -0,0 +1,177 @@ +/* + * kmod - the kernel module loader + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * Assuming: + * + * threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, + * (u64) THREAD_SIZE * 8UL); + * + * If you need less than 50 threads would mean we're dealing with systems + * smaller than 3200 pages. This assumes you are capable of having ~13M memory, + * and this would only be an upper limit, after which the OOM killer would take + * effect. Systems like these are very unlikely if modules are enabled. + */ +#define MAX_KMOD_CONCURRENT 50 +static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT); +static DECLARE_WAIT_QUEUE_HEAD(kmod_wq); + +/* + * This is a restriction on having *all* MAX_KMOD_CONCURRENT threads + * running at the same time without returning. When this happens we + * believe you've somehow ended up with a recursive module dependency + * creating a loop. + * + * We have no option but to fail. + * + * Userspace should proactively try to detect and prevent these. + */ +#define MAX_KMOD_ALL_BUSY_TIMEOUT 5 + +/* + modprobe_path is set via /proc/sys. +*/ +char modprobe_path[KMOD_PATH_LEN] = CONFIG_MODPROBE_PATH; + +static void free_modprobe_argv(struct subprocess_info *info) +{ + kfree(info->argv[3]); /* check call_modprobe() */ + kfree(info->argv); +} + +static int call_modprobe(char *module_name, int wait) +{ + struct subprocess_info *info; + static char *envp[] = { + "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL + }; + + char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL); + if (!argv) + goto out; + + module_name = kstrdup(module_name, GFP_KERNEL); + if (!module_name) + goto free_argv; + + argv[0] = modprobe_path; + argv[1] = "-q"; + argv[2] = "--"; + argv[3] = module_name; /* check free_modprobe_argv() */ + argv[4] = NULL; + + info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL, + NULL, free_modprobe_argv, NULL); + if (!info) + goto free_module_name; + + return call_usermodehelper_exec(info, wait | UMH_KILLABLE); + +free_module_name: + kfree(module_name); +free_argv: + kfree(argv); +out: + return -ENOMEM; +} + +/** + * __request_module - try to load a kernel module + * @wait: wait (or not) for the operation to complete + * @fmt: printf style format string for the name of the module + * @...: arguments as specified in the format string + * + * Load a module using the user mode module loader. The function returns + * zero on success or a negative errno code or positive exit code from + * "modprobe" on failure. Note that a successful module load does not mean + * the module did not then unload and exit on an error of its own. Callers + * must check that the service they requested is now available not blindly + * invoke it. + * + * If module auto-loading support is disabled then this function + * simply returns -ENOENT. + */ +int __request_module(bool wait, const char *fmt, ...) +{ + va_list args; + char module_name[MODULE_NAME_LEN]; + int ret; + + /* + * We don't allow synchronous module loading from async. Module + * init may invoke async_synchronize_full() which will end up + * waiting for this task which already is waiting for the module + * loading to complete, leading to a deadlock. + */ + WARN_ON_ONCE(wait && current_is_async()); + + if (!modprobe_path[0]) + return -ENOENT; + + va_start(args, fmt); + ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); + va_end(args); + if (ret >= MODULE_NAME_LEN) + return -ENAMETOOLONG; + + ret = security_kernel_module_request(module_name); + if (ret) + return ret; + + if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) { + pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...", + atomic_read(&kmod_concurrent_max), + MAX_KMOD_CONCURRENT, module_name); + ret = wait_event_killable_timeout(kmod_wq, + atomic_dec_if_positive(&kmod_concurrent_max) >= 0, + MAX_KMOD_ALL_BUSY_TIMEOUT * HZ); + if (!ret) { + pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now", + module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT); + return -ETIME; + } else if (ret == -ERESTARTSYS) { + pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name); + return ret; + } + } + + trace_module_request(module_name, wait, _RET_IP_); + + ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC); + + atomic_inc(&kmod_concurrent_max); + wake_up(&kmod_wq); + + return ret; +} +EXPORT_SYMBOL(__request_module); -- cgit From 25a1b5b518f4336bff934ac8348da6c57158363a Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 24 Mar 2023 18:38:00 -0700 Subject: modules/kmod: replace implementation with a semaphore Simplify the concurrency delimiter we use for kmod with the semaphore. I had used the kmod strategy to try to implement a similar concurrency delimiter for the kernel_read*() calls from the finit_module() path so to reduce vmalloc() memory pressure. That effort didn't provide yet conclusive results, but one thing that became clear is we can use the suggested alternative solution with semaphores which Linus hinted at instead of using the atomic / wait strategy. I've stress tested this with kmod test 0008: time /data/linux-next/tools/testing/selftests/kmod/kmod.sh -t 0008 And I get only a *slight* delay. That delay however is small, a few seconds for a full test loop run that runs 150 times, for about ~30-40 seconds. The small delay is worth the simplfication IMHO. Reviewed-by: Davidlohr Bueso Reviewed-by: Miroslav Benes Reviewed-by: David Hildenbrand Signed-off-by: Luis Chamberlain --- kernel/module/kmod.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) (limited to 'kernel/module/kmod.c') diff --git a/kernel/module/kmod.c b/kernel/module/kmod.c index b717134ebe17..5899083436a3 100644 --- a/kernel/module/kmod.c +++ b/kernel/module/kmod.c @@ -40,8 +40,7 @@ * effect. Systems like these are very unlikely if modules are enabled. */ #define MAX_KMOD_CONCURRENT 50 -static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT); -static DECLARE_WAIT_QUEUE_HEAD(kmod_wq); +static DEFINE_SEMAPHORE(kmod_concurrent_max, MAX_KMOD_CONCURRENT); /* * This is a restriction on having *all* MAX_KMOD_CONCURRENT threads @@ -148,29 +147,18 @@ int __request_module(bool wait, const char *fmt, ...) if (ret) return ret; - if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) { - pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...", - atomic_read(&kmod_concurrent_max), - MAX_KMOD_CONCURRENT, module_name); - ret = wait_event_killable_timeout(kmod_wq, - atomic_dec_if_positive(&kmod_concurrent_max) >= 0, - MAX_KMOD_ALL_BUSY_TIMEOUT * HZ); - if (!ret) { - pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now", - module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT); - return -ETIME; - } else if (ret == -ERESTARTSYS) { - pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name); - return ret; - } + ret = down_timeout(&kmod_concurrent_max, MAX_KMOD_ALL_BUSY_TIMEOUT * HZ); + if (ret) { + pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now", + module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT); + return ret; } trace_module_request(module_name, wait, _RET_IP_); ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC); - atomic_inc(&kmod_concurrent_max); - wake_up(&kmod_wq); + up(&kmod_concurrent_max); return ret; } -- cgit From 8660484ed1cf3261e89e0bad94c6395597e87599 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 13 Apr 2023 22:28:39 -0700 Subject: module: add debugging auto-load duplicate module support The finit_module() system call can in the worst case use up to more than twice of a module's size in virtual memory. Duplicate finit_module() system calls are non fatal, however they unnecessarily strain virtual memory during bootup and in the worst case can cause a system to fail to boot. This is only known to currently be an issue on systems with larger number of CPUs. To help debug this situation we need to consider the different sources for finit_module(). Requests from the kernel that rely on module auto-loading, ie, the kernel's *request_module() API, are one source of calls. Although modprobe checks to see if a module is already loaded prior to calling finit_module() there is a small race possible allowing userspace to trigger multiple modprobe calls racing against modprobe and this not seeing the module yet loaded. This adds debugging support to the kernel module auto-loader (*request_module() calls) to easily detect duplicate module requests. To aid with possible bootup failure issues incurred by this, it will converge duplicates requests to a single request. This avoids any possible strain on virtual memory during bootup which could be incurred by duplicate module autoloading requests. Folks debugging virtual memory abuse on bootup can and should enable this to see what pr_warn()s come on, to see if module auto-loading is to blame for their wores. If they see duplicates they can further debug this by enabling the module.enable_dups_trace kernel parameter or by enabling CONFIG_MODULE_DEBUG_AUTOLOAD_DUPS_TRACE. Current evidence seems to point to only a few duplicates for module auto-loading. And so the source for other duplicates creating heavy virtual memory pressure due to larger number of CPUs should becoming from another place (likely udev). Signed-off-by: Luis Chamberlain --- kernel/module/kmod.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'kernel/module/kmod.c') diff --git a/kernel/module/kmod.c b/kernel/module/kmod.c index 5899083436a3..0800d9891692 100644 --- a/kernel/module/kmod.c +++ b/kernel/module/kmod.c @@ -1,6 +1,9 @@ /* * kmod - the kernel module loader + * + * Copyright (C) 2023 Luis Chamberlain */ + #include #include #include @@ -27,6 +30,7 @@ #include #include +#include "internal.h" /* * Assuming: @@ -65,7 +69,7 @@ static void free_modprobe_argv(struct subprocess_info *info) kfree(info->argv); } -static int call_modprobe(char *module_name, int wait) +static int call_modprobe(char *orig_module_name, int wait) { struct subprocess_info *info; static char *envp[] = { @@ -74,12 +78,14 @@ static int call_modprobe(char *module_name, int wait) "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; + char *module_name; + int ret; char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL); if (!argv) goto out; - module_name = kstrdup(module_name, GFP_KERNEL); + module_name = kstrdup(orig_module_name, GFP_KERNEL); if (!module_name) goto free_argv; @@ -94,13 +100,16 @@ static int call_modprobe(char *module_name, int wait) if (!info) goto free_module_name; - return call_usermodehelper_exec(info, wait | UMH_KILLABLE); + ret = call_usermodehelper_exec(info, wait | UMH_KILLABLE); + kmod_dup_request_announce(orig_module_name, ret); + return ret; free_module_name: kfree(module_name); free_argv: kfree(argv); out: + kmod_dup_request_announce(orig_module_name, -ENOMEM); return -ENOMEM; } @@ -124,7 +133,7 @@ int __request_module(bool wait, const char *fmt, ...) { va_list args; char module_name[MODULE_NAME_LEN]; - int ret; + int ret, dup_ret; /* * We don't allow synchronous module loading from async. Module @@ -156,8 +165,14 @@ int __request_module(bool wait, const char *fmt, ...) trace_module_request(module_name, wait, _RET_IP_); + if (kmod_dup_request_exists_wait(module_name, wait, &dup_ret)) { + ret = dup_ret; + goto out; + } + ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC); +out: up(&kmod_concurrent_max); return ret; -- cgit