From 49f0ce5f92321cdcf741e35f385669a421013cb7 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Tue, 21 Jan 2014 15:49:14 -0800 Subject: mm: add overcommit_kbytes sysctl variable Some applications that run on HPC clusters are designed around the availability of RAM and the overcommit ratio is fine tuned to get the maximum usage of memory without swapping. With growing memory, the 1%-of-all-RAM grain provided by overcommit_ratio has become too coarse for these workload (on a 2TB machine it represents no less than 20GB). This patch adds the new overcommit_kbytes sysctl variable that allow a much finer grain. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix nommu build] Signed-off-by: Jerome Marchand Cc: Dave Hansen Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c8da99f905cf..332cefcdb04b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -95,8 +95,6 @@ #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ -extern int sysctl_overcommit_memory; -extern int sysctl_overcommit_ratio; extern int max_threads; extern int suid_dumpable; #ifdef CONFIG_COREDUMP @@ -1121,7 +1119,14 @@ static struct ctl_table vm_table[] = { .data = &sysctl_overcommit_ratio, .maxlen = sizeof(sysctl_overcommit_ratio), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = overcommit_ratio_handler, + }, + { + .procname = "overcommit_kbytes", + .data = &sysctl_overcommit_kbytes, + .maxlen = sizeof(sysctl_overcommit_kbytes), + .mode = 0644, + .proc_handler = overcommit_kbytes_handler, }, { .procname = "page-cluster", -- cgit From 54a43d54988a3731d644fdeb7a1d6f46b4ac64c7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 23 Jan 2014 15:53:13 -0800 Subject: numa: add a sysctl for numa_balancing Add a working sysctl to enable/disable automatic numa memory balancing at runtime. This allows us to track down performance problems with this feature and is generally a good idea. This was possible earlier through debugfs, but only with special debugging options set. Also fix the boot message. [akpm@linux-foundation.org: s/sched_numa_balancing/sysctl_numa_balancing/] Signed-off-by: Andi Kleen Acked-by: Mel Gorman Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 332cefcdb04b..693eac39c202 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -389,6 +389,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "numa_balancing", + .data = NULL, /* filled in by handler */ + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_numa_balancing, + .extra1 = &zero, + .extra2 = &one, + }, #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_SCHED_DEBUG */ { -- cgit From 7984754b99b6c89054edc405e9d9d35810a91d36 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 23 Jan 2014 15:55:59 -0800 Subject: kexec: add sysctl to disable kexec_load For general-purpose (i.e. distro) kernel builds it makes sense to build with CONFIG_KEXEC to allow end users to choose what kind of things they want to do with kexec. However, in the face of trying to lock down a system with such a kernel, there needs to be a way to disable kexec_load (much like module loading can be disabled). Without this, it is too easy for the root user to modify kernel memory even when CONFIG_STRICT_DEVMEM and modules_disabled are set. With this change, it is still possible to load an image for use later, then disable kexec_load so the image (or lack of image) can't be altered. The intention is for using this in environments where "perfect" enforcement is hard. Without a verified boot, along with verified modules, and along with verified kexec, this is trying to give a system a better chance to defend itself (or at least grow the window of discoverability) against attack in the face of a privilege escalation. In my mind, I consider several boot scenarios: 1) Verified boot of read-only verified root fs loading fd-based verification of kexec images. 2) Secure boot of writable root fs loading signed kexec images. 3) Regular boot loading kexec (e.g. kcrash) image early and locking it. 4) Regular boot with no control of kexec image at all. 1 and 2 don't exist yet, but will soon once the verified kexec series has landed. 4 is the state of things now. The gap between 2 and 4 is too large, so this change creates scenario 3, a middle-ground above 4 when 2 and 1 are not possible for a system. Signed-off-by: Kees Cook Acked-by: Rik van Riel Cc: Vivek Goyal Cc: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 693eac39c202..096db7452cbd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -614,6 +615,18 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif +#ifdef CONFIG_KEXEC + { + .procname = "kexec_load_disabled", + .data = &kexec_load_disabled, + .maxlen = sizeof(int), + .mode = 0644, + /* only handle a transition from default "0" to "1" */ + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &one, + }, +#endif #ifdef CONFIG_MODULES { .procname = "modprobe", -- cgit From 2397efb1bb17595b35f31abb40d95074ebc04f1b Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Mon, 20 Jan 2014 17:34:12 +0000 Subject: sysctl: Add neg_one as a standard constraint Add neg_one to the list of standard constraints - will be used by the next patch. Signed-off-by: Aaron Tomlin Acked-by: Rik van Riel Acked-by: David Rientjes Cc: oleg@redhat.com Link: http://lkml.kernel.org/r/1390239253-24030-2-git-send-email-atomlin@redhat.com Signed-off-by: Ingo Molnar --- kernel/sysctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c8da99f905cf..c398a58673a7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -122,6 +122,7 @@ extern int blk_iopoll_enabled; static int sixty = 60; #endif +static int neg_one = -1; static int zero; static int __maybe_unused one = 1; static int __maybe_unused two = 2; -- cgit From 270750dbc18a71b23d660df110e433ff9616a2d4 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Mon, 20 Jan 2014 17:34:13 +0000 Subject: hung_task: Display every hung task warning When khungtaskd detects hung tasks, it prints out backtraces from a number of those tasks. Limiting the number of backtraces being printed out can result in the user not seeing the information necessary to debug the issue. The hung_task_warnings sysctl controls this feature. This patch makes it possible for hung_task_warnings to accept a special value to print an unlimited number of backtraces when khungtaskd detects hung tasks. The special value is -1. To use this value it is necessary to change types from ulong to int. Signed-off-by: Aaron Tomlin Reviewed-by: Rik van Riel Acked-by: David Rientjes Cc: oleg@redhat.com Link: http://lkml.kernel.org/r/1390239253-24030-3-git-send-email-atomlin@redhat.com [ Build warning fix. ] Signed-off-by: Ingo Molnar --- kernel/sysctl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c398a58673a7..dd5b4496637e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -122,7 +122,8 @@ extern int blk_iopoll_enabled; static int sixty = 60; #endif -static int neg_one = -1; +static int __maybe_unused neg_one = -1; + static int zero; static int __maybe_unused one = 1; static int __maybe_unused two = 2; @@ -978,9 +979,10 @@ static struct ctl_table kern_table[] = { { .procname = "hung_task_warnings", .data = &sysctl_hung_task_warnings, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_doulongvec_minmax, + .proc_handler = proc_dointvec_minmax, + .extra1 = &neg_one, }, #endif #ifdef CONFIG_COMPAT -- cgit