summaryrefslogtreecommitdiff
path: root/init
diff options
context:
space:
mode:
Diffstat (limited to 'init')
-rw-r--r--init/Kconfig45
-rw-r--r--init/initramfs.c147
-rw-r--r--init/main.c42
3 files changed, 152 insertions, 82 deletions
diff --git a/init/Kconfig b/init/Kconfig
index 4592bf7997c0..8b9ffe236e4f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -562,7 +562,6 @@ config BUILD_BIN2C
config IKCONFIG
tristate "Kernel .config support"
- select BUILD_BIN2C
---help---
This option enables the complete Linux kernel ".config" file
contents to be saved in the kernel. It provides documentation
@@ -580,6 +579,16 @@ config IKCONFIG_PROC
This option enables access to the kernel configuration file
through /proc/config.gz.
+config IKHEADERS_PROC
+ tristate "Enable kernel header artifacts through /proc/kheaders.tar.xz"
+ depends on PROC_FS
+ help
+ This option enables access to the kernel header and other artifacts that
+ are generated during the build process. These can be used to build eBPF
+ tracing programs, or similar programs. If you build the headers as a
+ module, a module called kheaders.ko is built which can be loaded on-demand
+ to get access to the headers.
+
config LOG_BUF_SHIFT
int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
range 12 25
@@ -1171,9 +1180,6 @@ config LD_DEAD_CODE_DATA_ELIMINATION
config SYSCTL
bool
-config ANON_INODES
- bool
-
config HAVE_UID16
bool
@@ -1378,14 +1384,12 @@ config HAVE_FUTEX_CMPXCHG
config EPOLL
bool "Enable eventpoll support" if EXPERT
default y
- select ANON_INODES
help
Disabling this option will cause the kernel to be built without
support for epoll family of system calls.
config SIGNALFD
bool "Enable signalfd() system call" if EXPERT
- select ANON_INODES
default y
help
Enable the signalfd() system call that allows to receive signals
@@ -1395,7 +1399,6 @@ config SIGNALFD
config TIMERFD
bool "Enable timerfd() system call" if EXPERT
- select ANON_INODES
default y
help
Enable the timerfd() system call that allows to receive timer
@@ -1405,7 +1408,6 @@ config TIMERFD
config EVENTFD
bool "Enable eventfd() system call" if EXPERT
- select ANON_INODES
default y
help
Enable the eventfd() system call that allows to receive both
@@ -1516,7 +1518,6 @@ config KALLSYMS_BASE_RELATIVE
# syscall, maps, verifier
config BPF_SYSCALL
bool "Enable bpf() system call"
- select ANON_INODES
select BPF
select IRQ_WORK
default n
@@ -1533,7 +1534,6 @@ config BPF_JIT_ALWAYS_ON
config USERFAULTFD
bool "Enable userfaultfd() system call"
- select ANON_INODES
depends on MMU
help
Enable the userfaultfd() system call that allows to intercept and
@@ -1600,7 +1600,6 @@ config PERF_EVENTS
bool "Kernel performance events and counters"
default y if PROFILING
depends on HAVE_PERF_EVENTS
- select ANON_INODES
select IRQ_WORK
select SRCU
help
@@ -1753,6 +1752,30 @@ config SLAB_FREELIST_HARDENED
sacrifies to harden the kernel slab allocator against common
freelist exploit methods.
+config SHUFFLE_PAGE_ALLOCATOR
+ bool "Page allocator randomization"
+ default SLAB_FREELIST_RANDOM && ACPI_NUMA
+ help
+ Randomization of the page allocator improves the average
+ utilization of a direct-mapped memory-side-cache. See section
+ 5.2.27 Heterogeneous Memory Attribute Table (HMAT) in the ACPI
+ 6.2a specification for an example of how a platform advertises
+ the presence of a memory-side-cache. There are also incidental
+ security benefits as it reduces the predictability of page
+ allocations to compliment SLAB_FREELIST_RANDOM, but the
+ default granularity of shuffling on the "MAX_ORDER - 1" i.e,
+ 10th order of pages is selected based on cache utilization
+ benefits on x86.
+
+ While the randomization improves cache utilization it may
+ negatively impact workloads on platforms without a cache. For
+ this reason, by default, the randomization is enabled only
+ after runtime detection of a direct-mapped memory-side-cache.
+ Otherwise, the randomization may be force enabled with the
+ 'page_alloc.shuffle' kernel command line parameter.
+
+ Say Y if unsure.
+
config SLUB_CPU_PARTIAL
default y
depends on SLUB && SMP
diff --git a/init/initramfs.c b/init/initramfs.c
index 4749e1115eef..178130fd61c2 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -513,42 +513,55 @@ static int __init retain_initrd_param(char *str)
}
__setup("retain_initrd", retain_initrd_param);
+#ifdef CONFIG_ARCH_HAS_KEEPINITRD
+static int __init keepinitrd_setup(char *__unused)
+{
+ do_retain_initrd = 1;
+ return 1;
+}
+__setup("keepinitrd", keepinitrd_setup);
+#endif
+
extern char __initramfs_start[];
extern unsigned long __initramfs_size;
#include <linux/initrd.h>
#include <linux/kexec.h>
-static void __init free_initrd(void)
+void __weak free_initrd_mem(unsigned long start, unsigned long end)
{
+ free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+ "initrd");
+}
+
#ifdef CONFIG_KEXEC_CORE
+static bool kexec_free_initrd(void)
+{
unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
unsigned long crashk_end = (unsigned long)__va(crashk_res.end);
-#endif
- if (do_retain_initrd)
- goto skip;
-#ifdef CONFIG_KEXEC_CORE
/*
* If the initrd region is overlapped with crashkernel reserved region,
* free only memory that is not part of crashkernel region.
*/
- if (initrd_start < crashk_end && initrd_end > crashk_start) {
- /*
- * Initialize initrd memory region since the kexec boot does
- * not do.
- */
- memset((void *)initrd_start, 0, initrd_end - initrd_start);
- if (initrd_start < crashk_start)
- free_initrd_mem(initrd_start, crashk_start);
- if (initrd_end > crashk_end)
- free_initrd_mem(crashk_end, initrd_end);
- } else
-#endif
- free_initrd_mem(initrd_start, initrd_end);
-skip:
- initrd_start = 0;
- initrd_end = 0;
+ if (initrd_start >= crashk_end || initrd_end <= crashk_start)
+ return false;
+
+ /*
+ * Initialize initrd memory region since the kexec boot does not do.
+ */
+ memset((void *)initrd_start, 0, initrd_end - initrd_start);
+ if (initrd_start < crashk_start)
+ free_initrd_mem(initrd_start, crashk_start);
+ if (initrd_end > crashk_end)
+ free_initrd_mem(crashk_end, initrd_end);
+ return true;
+}
+#else
+static inline bool kexec_free_initrd(void)
+{
+ return false;
}
+#endif /* CONFIG_KEXEC_CORE */
#ifdef CONFIG_BLK_DEV_RAM
#define BUF_SIZE 1024
@@ -597,7 +610,38 @@ static void __init clean_rootfs(void)
ksys_close(fd);
kfree(buf);
}
-#endif
+#else
+static inline void clean_rootfs(void)
+{
+}
+#endif /* CONFIG_BLK_DEV_RAM */
+
+#ifdef CONFIG_BLK_DEV_RAM
+static void populate_initrd_image(char *err)
+{
+ ssize_t written;
+ int fd;
+
+ unpack_to_rootfs(__initramfs_start, __initramfs_size);
+
+ printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n",
+ err);
+ fd = ksys_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
+ if (fd < 0)
+ return;
+
+ written = xwrite(fd, (char *)initrd_start, initrd_end - initrd_start);
+ if (written != initrd_end - initrd_start)
+ pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
+ written, initrd_end - initrd_start);
+ ksys_close(fd);
+}
+#else
+static void populate_initrd_image(char *err)
+{
+ printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
+}
+#endif /* CONFIG_BLK_DEV_RAM */
static int __init populate_rootfs(void)
{
@@ -605,46 +649,31 @@ static int __init populate_rootfs(void)
char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
if (err)
panic("%s", err); /* Failed to decompress INTERNAL initramfs */
- /* If available load the bootloader supplied initrd */
- if (initrd_start && !IS_ENABLED(CONFIG_INITRAMFS_FORCE)) {
-#ifdef CONFIG_BLK_DEV_RAM
- int fd;
+
+ if (!initrd_start || IS_ENABLED(CONFIG_INITRAMFS_FORCE))
+ goto done;
+
+ if (IS_ENABLED(CONFIG_BLK_DEV_RAM))
printk(KERN_INFO "Trying to unpack rootfs image as initramfs...\n");
- err = unpack_to_rootfs((char *)initrd_start,
- initrd_end - initrd_start);
- if (!err) {
- free_initrd();
- goto done;
- } else {
- clean_rootfs();
- unpack_to_rootfs(__initramfs_start, __initramfs_size);
- }
- printk(KERN_INFO "rootfs image is not initramfs (%s)"
- "; looks like an initrd\n", err);
- fd = ksys_open("/initrd.image",
- O_WRONLY|O_CREAT, 0700);
- if (fd >= 0) {
- ssize_t written = xwrite(fd, (char *)initrd_start,
- initrd_end - initrd_start);
-
- if (written != initrd_end - initrd_start)
- pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
- written, initrd_end - initrd_start);
-
- ksys_close(fd);
- free_initrd();
- }
- done:
- /* empty statement */;
-#else
+ else
printk(KERN_INFO "Unpacking initramfs...\n");
- err = unpack_to_rootfs((char *)initrd_start,
- initrd_end - initrd_start);
- if (err)
- printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
- free_initrd();
-#endif
+
+ err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start);
+ if (err) {
+ clean_rootfs();
+ populate_initrd_image(err);
}
+
+done:
+ /*
+ * If the initrd region is overlapped with crashkernel reserved region,
+ * free only memory that is not part of crashkernel region.
+ */
+ if (!do_retain_initrd && initrd_start && !kexec_free_initrd())
+ free_initrd_mem(initrd_start, initrd_end);
+ initrd_start = 0;
+ initrd_end = 0;
+
flush_delayed_fput();
return 0;
}
diff --git a/init/main.c b/init/main.c
index 598e278b46f7..5a2c69b4d7b3 100644
--- a/init/main.c
+++ b/init/main.c
@@ -504,6 +504,10 @@ void __init __weak thread_stack_cache_init(void)
void __init __weak mem_encrypt_init(void) { }
+void __init __weak poking_init(void) { }
+
+void __init __weak pgd_cache_init(void) { }
+
bool initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);
@@ -535,6 +539,7 @@ static void __init mm_init(void)
init_espfix_bsp();
/* Should be run after espfix64 is set up. */
pti_init();
+ pgd_cache_init();
}
void __init __weak arch_call_rest_init(void)
@@ -564,13 +569,6 @@ asmlinkage __visible void __init start_kernel(void)
page_address_init();
pr_notice("%s", linux_banner);
setup_arch(&command_line);
- /*
- * Set up the the initial canary and entropy after arch
- * and after adding latent and command line entropy.
- */
- add_latent_entropy();
- add_device_randomness(command_line, strlen(command_line));
- boot_init_stack_canary();
mm_init_cpumask(&init_mm);
setup_command_line(command_line);
setup_nr_cpu_ids();
@@ -582,6 +580,8 @@ asmlinkage __visible void __init start_kernel(void)
page_alloc_init();
pr_notice("Kernel command line: %s\n", boot_command_line);
+ /* parameters may set static keys */
+ jump_label_init();
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
@@ -591,8 +591,6 @@ asmlinkage __visible void __init start_kernel(void)
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
NULL, set_init_arg);
- jump_label_init();
-
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
@@ -655,6 +653,20 @@ asmlinkage __visible void __init start_kernel(void)
hrtimers_init();
softirq_init();
timekeeping_init();
+
+ /*
+ * For best initial stack canary entropy, prepare it after:
+ * - setup_arch() for any UEFI RNG entropy and boot cmdline access
+ * - timekeeping_init() for ktime entropy used in rand_initialize()
+ * - rand_initialize() to get any arch-specific entropy like RDRAND
+ * - add_latent_entropy() to get any latent entropy
+ * - adding command line entropy
+ */
+ rand_initialize();
+ add_latent_entropy();
+ add_device_randomness(command_line, strlen(command_line));
+ boot_init_stack_canary();
+
time_init();
printk_safe_init();
perf_event_init();
@@ -737,6 +749,7 @@ asmlinkage __visible void __init start_kernel(void)
taskstats_init_early();
delayacct_init();
+ poking_init();
check_bugs();
acpi_subsystem_init();
@@ -840,7 +853,7 @@ trace_initcall_start_cb(void *data, initcall_t fn)
{
ktime_t *calltime = (ktime_t *)data;
- printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current));
+ printk(KERN_DEBUG "calling %pS @ %i\n", fn, task_pid_nr(current));
*calltime = ktime_get();
}
@@ -854,7 +867,7 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
rettime = ktime_get();
delta = ktime_sub(rettime, *calltime);
duration = (unsigned long long) ktime_to_ns(delta) >> 10;
- printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n",
+ printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
fn, ret, duration);
}
@@ -911,7 +924,7 @@ int __init_or_module do_one_initcall(initcall_t fn)
strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
local_irq_enable();
}
- WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf);
+ WARN(msgbuf[0], "initcall %pS returned with %s\n", fn, msgbuf);
add_latent_entropy();
return ret;
@@ -1061,6 +1074,11 @@ static inline void mark_readonly(void)
}
#endif
+void __weak free_initmem(void)
+{
+ free_initmem_default(POISON_FREE_INITMEM);
+}
+
static int __ref kernel_init(void *unused)
{
int ret;