From 25be451aa4c0e9a96c59a626ab0e93d5cb7f6f48 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Sun, 19 Mar 2023 14:35:42 -0700
Subject: module: fold usermode helper kmod into modules directory

The kernel/kmod.c is already only built if we enabled modules, so
just stuff it under kernel/module/kmod.c and unify the MAINTAINERS
file for it.

Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 kernel/module/kmod.c | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 kernel/module/kmod.c

(limited to 'kernel/module/kmod.c')

diff --git a/kernel/module/kmod.c b/kernel/module/kmod.c
new file mode 100644
index 000000000000..b717134ebe17
--- /dev/null
+++ b/kernel/module/kmod.c
@@ -0,0 +1,177 @@
+/*
+ * kmod - the kernel module loader
+ */
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/binfmts.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/kmod.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/cred.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/workqueue.h>
+#include <linux/security.h>
+#include <linux/mount.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/resource.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
+#include <linux/rwsem.h>
+#include <linux/ptrace.h>
+#include <linux/async.h>
+#include <linux/uaccess.h>
+
+#include <trace/events/module.h>
+
+/*
+ * Assuming:
+ *
+ * threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
+ *		       (u64) THREAD_SIZE * 8UL);
+ *
+ * If you need less than 50 threads would mean we're dealing with systems
+ * smaller than 3200 pages. This assumes you are capable of having ~13M memory,
+ * and this would only be an upper limit, after which the OOM killer would take
+ * effect. Systems like these are very unlikely if modules are enabled.
+ */
+#define MAX_KMOD_CONCURRENT 50
+static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
+static DECLARE_WAIT_QUEUE_HEAD(kmod_wq);
+
+/*
+ * This is a restriction on having *all* MAX_KMOD_CONCURRENT threads
+ * running at the same time without returning. When this happens we
+ * believe you've somehow ended up with a recursive module dependency
+ * creating a loop.
+ *
+ * We have no option but to fail.
+ *
+ * Userspace should proactively try to detect and prevent these.
+ */
+#define MAX_KMOD_ALL_BUSY_TIMEOUT 5
+
+/*
+	modprobe_path is set via /proc/sys.
+*/
+char modprobe_path[KMOD_PATH_LEN] = CONFIG_MODPROBE_PATH;
+
+static void free_modprobe_argv(struct subprocess_info *info)
+{
+	kfree(info->argv[3]); /* check call_modprobe() */
+	kfree(info->argv);
+}
+
+static int call_modprobe(char *module_name, int wait)
+{
+	struct subprocess_info *info;
+	static char *envp[] = {
+		"HOME=/",
+		"TERM=linux",
+		"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+		NULL
+	};
+
+	char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL);
+	if (!argv)
+		goto out;
+
+	module_name = kstrdup(module_name, GFP_KERNEL);
+	if (!module_name)
+		goto free_argv;
+
+	argv[0] = modprobe_path;
+	argv[1] = "-q";
+	argv[2] = "--";
+	argv[3] = module_name;	/* check free_modprobe_argv() */
+	argv[4] = NULL;
+
+	info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
+					 NULL, free_modprobe_argv, NULL);
+	if (!info)
+		goto free_module_name;
+
+	return call_usermodehelper_exec(info, wait | UMH_KILLABLE);
+
+free_module_name:
+	kfree(module_name);
+free_argv:
+	kfree(argv);
+out:
+	return -ENOMEM;
+}
+
+/**
+ * __request_module - try to load a kernel module
+ * @wait: wait (or not) for the operation to complete
+ * @fmt: printf style format string for the name of the module
+ * @...: arguments as specified in the format string
+ *
+ * Load a module using the user mode module loader. The function returns
+ * zero on success or a negative errno code or positive exit code from
+ * "modprobe" on failure. Note that a successful module load does not mean
+ * the module did not then unload and exit on an error of its own. Callers
+ * must check that the service they requested is now available not blindly
+ * invoke it.
+ *
+ * If module auto-loading support is disabled then this function
+ * simply returns -ENOENT.
+ */
+int __request_module(bool wait, const char *fmt, ...)
+{
+	va_list args;
+	char module_name[MODULE_NAME_LEN];
+	int ret;
+
+	/*
+	 * We don't allow synchronous module loading from async.  Module
+	 * init may invoke async_synchronize_full() which will end up
+	 * waiting for this task which already is waiting for the module
+	 * loading to complete, leading to a deadlock.
+	 */
+	WARN_ON_ONCE(wait && current_is_async());
+
+	if (!modprobe_path[0])
+		return -ENOENT;
+
+	va_start(args, fmt);
+	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
+	va_end(args);
+	if (ret >= MODULE_NAME_LEN)
+		return -ENAMETOOLONG;
+
+	ret = security_kernel_module_request(module_name);
+	if (ret)
+		return ret;
+
+	if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) {
+		pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...",
+				    atomic_read(&kmod_concurrent_max),
+				    MAX_KMOD_CONCURRENT, module_name);
+		ret = wait_event_killable_timeout(kmod_wq,
+						  atomic_dec_if_positive(&kmod_concurrent_max) >= 0,
+						  MAX_KMOD_ALL_BUSY_TIMEOUT * HZ);
+		if (!ret) {
+			pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now",
+					    module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT);
+			return -ETIME;
+		} else if (ret == -ERESTARTSYS) {
+			pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name);
+			return ret;
+		}
+	}
+
+	trace_module_request(module_name, wait, _RET_IP_);
+
+	ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
+
+	atomic_inc(&kmod_concurrent_max);
+	wake_up(&kmod_wq);
+
+	return ret;
+}
+EXPORT_SYMBOL(__request_module);
-- 
cgit 


From 25a1b5b518f4336bff934ac8348da6c57158363a Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 24 Mar 2023 18:38:00 -0700
Subject: modules/kmod: replace implementation with a semaphore

Simplify the concurrency delimiter we use for kmod with the semaphore.
I had used the kmod strategy to try to implement a similar concurrency
delimiter for the kernel_read*() calls from the finit_module() path
so to reduce vmalloc() memory pressure. That effort didn't provide yet
conclusive results, but one thing that became clear is we can use
the suggested alternative solution with semaphores which Linus hinted
at instead of using the atomic / wait strategy.

I've stress tested this with kmod test 0008:

time /data/linux-next/tools/testing/selftests/kmod/kmod.sh -t 0008

And I get only a *slight* delay. That delay however is small, a few
seconds for a full test loop run that runs 150 times, for about ~30-40
seconds. The small delay is worth the simplfication IMHO.

Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 kernel/module/kmod.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

(limited to 'kernel/module/kmod.c')

diff --git a/kernel/module/kmod.c b/kernel/module/kmod.c
index b717134ebe17..5899083436a3 100644
--- a/kernel/module/kmod.c
+++ b/kernel/module/kmod.c
@@ -40,8 +40,7 @@
  * effect. Systems like these are very unlikely if modules are enabled.
  */
 #define MAX_KMOD_CONCURRENT 50
-static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
-static DECLARE_WAIT_QUEUE_HEAD(kmod_wq);
+static DEFINE_SEMAPHORE(kmod_concurrent_max, MAX_KMOD_CONCURRENT);
 
 /*
  * This is a restriction on having *all* MAX_KMOD_CONCURRENT threads
@@ -148,29 +147,18 @@ int __request_module(bool wait, const char *fmt, ...)
 	if (ret)
 		return ret;
 
-	if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) {
-		pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...",
-				    atomic_read(&kmod_concurrent_max),
-				    MAX_KMOD_CONCURRENT, module_name);
-		ret = wait_event_killable_timeout(kmod_wq,
-						  atomic_dec_if_positive(&kmod_concurrent_max) >= 0,
-						  MAX_KMOD_ALL_BUSY_TIMEOUT * HZ);
-		if (!ret) {
-			pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now",
-					    module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT);
-			return -ETIME;
-		} else if (ret == -ERESTARTSYS) {
-			pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name);
-			return ret;
-		}
+	ret = down_timeout(&kmod_concurrent_max, MAX_KMOD_ALL_BUSY_TIMEOUT * HZ);
+	if (ret) {
+		pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now",
+				    module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT);
+		return ret;
 	}
 
 	trace_module_request(module_name, wait, _RET_IP_);
 
 	ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
 
-	atomic_inc(&kmod_concurrent_max);
-	wake_up(&kmod_wq);
+	up(&kmod_concurrent_max);
 
 	return ret;
 }
-- 
cgit 


From 8660484ed1cf3261e89e0bad94c6395597e87599 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Thu, 13 Apr 2023 22:28:39 -0700
Subject: module: add debugging auto-load duplicate module support

The finit_module() system call can in the worst case use up to more than
twice of a module's size in virtual memory. Duplicate finit_module()
system calls are non fatal, however they unnecessarily strain virtual
memory during bootup and in the worst case can cause a system to fail
to boot. This is only known to currently be an issue on systems with
larger number of CPUs.

To help debug this situation we need to consider the different sources for
finit_module(). Requests from the kernel that rely on module auto-loading,
ie, the kernel's *request_module() API, are one source of calls. Although
modprobe checks to see if a module is already loaded prior to calling
finit_module() there is a small race possible allowing userspace to
trigger multiple modprobe calls racing against modprobe and this not
seeing the module yet loaded.

This adds debugging support to the kernel module auto-loader (*request_module()
calls) to easily detect duplicate module requests. To aid with possible bootup
failure issues incurred by this, it will converge duplicates requests to a
single request. This avoids any possible strain on virtual memory during
bootup which could be incurred by duplicate module autoloading requests.

Folks debugging virtual memory abuse on bootup can and should enable
this to see what pr_warn()s come on, to see if module auto-loading is to
blame for their wores. If they see duplicates they can further debug this
by enabling the module.enable_dups_trace kernel parameter or by enabling
CONFIG_MODULE_DEBUG_AUTOLOAD_DUPS_TRACE.

Current evidence seems to point to only a few duplicates for module
auto-loading. And so the source for other duplicates creating heavy
virtual memory pressure due to larger number of CPUs should becoming
from another place (likely udev).

Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 kernel/module/kmod.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'kernel/module/kmod.c')

diff --git a/kernel/module/kmod.c b/kernel/module/kmod.c
index 5899083436a3..0800d9891692 100644
--- a/kernel/module/kmod.c
+++ b/kernel/module/kmod.c
@@ -1,6 +1,9 @@
 /*
  * kmod - the kernel module loader
+ *
+ * Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org>
  */
+
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
@@ -27,6 +30,7 @@
 #include <linux/uaccess.h>
 
 #include <trace/events/module.h>
+#include "internal.h"
 
 /*
  * Assuming:
@@ -65,7 +69,7 @@ static void free_modprobe_argv(struct subprocess_info *info)
 	kfree(info->argv);
 }
 
-static int call_modprobe(char *module_name, int wait)
+static int call_modprobe(char *orig_module_name, int wait)
 {
 	struct subprocess_info *info;
 	static char *envp[] = {
@@ -74,12 +78,14 @@ static int call_modprobe(char *module_name, int wait)
 		"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 		NULL
 	};
+	char *module_name;
+	int ret;
 
 	char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL);
 	if (!argv)
 		goto out;
 
-	module_name = kstrdup(module_name, GFP_KERNEL);
+	module_name = kstrdup(orig_module_name, GFP_KERNEL);
 	if (!module_name)
 		goto free_argv;
 
@@ -94,13 +100,16 @@ static int call_modprobe(char *module_name, int wait)
 	if (!info)
 		goto free_module_name;
 
-	return call_usermodehelper_exec(info, wait | UMH_KILLABLE);
+	ret = call_usermodehelper_exec(info, wait | UMH_KILLABLE);
+	kmod_dup_request_announce(orig_module_name, ret);
+	return ret;
 
 free_module_name:
 	kfree(module_name);
 free_argv:
 	kfree(argv);
 out:
+	kmod_dup_request_announce(orig_module_name, -ENOMEM);
 	return -ENOMEM;
 }
 
@@ -124,7 +133,7 @@ int __request_module(bool wait, const char *fmt, ...)
 {
 	va_list args;
 	char module_name[MODULE_NAME_LEN];
-	int ret;
+	int ret, dup_ret;
 
 	/*
 	 * We don't allow synchronous module loading from async.  Module
@@ -156,8 +165,14 @@ int __request_module(bool wait, const char *fmt, ...)
 
 	trace_module_request(module_name, wait, _RET_IP_);
 
+	if (kmod_dup_request_exists_wait(module_name, wait, &dup_ret)) {
+		ret = dup_ret;
+		goto out;
+	}
+
 	ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
 
+out:
 	up(&kmod_concurrent_max);
 
 	return ret;
-- 
cgit