diff options
Diffstat (limited to 'init')
| -rw-r--r-- | init/.kunitconfig | 3 | ||||
| -rw-r--r-- | init/Kconfig | 302 | ||||
| -rw-r--r-- | init/Makefile | 1 | ||||
| -rw-r--r-- | init/calibrate.c | 13 | ||||
| -rw-r--r-- | init/do_mounts.c | 5 | ||||
| -rw-r--r-- | init/do_mounts_initrd.c | 4 | ||||
| -rw-r--r-- | init/do_mounts_rd.c | 17 | ||||
| -rw-r--r-- | init/init_task.c | 33 | ||||
| -rw-r--r-- | init/initramfs.c | 72 | ||||
| -rw-r--r-- | init/initramfs_internal.h | 8 | ||||
| -rw-r--r-- | init/initramfs_test.c | 472 | ||||
| -rw-r--r-- | init/main.c | 167 | ||||
| -rw-r--r-- | init/version-timestamp.c | 6 |
13 files changed, 966 insertions, 137 deletions
diff --git a/init/.kunitconfig b/init/.kunitconfig new file mode 100644 index 000000000000..acb906b1a5f9 --- /dev/null +++ b/init/.kunitconfig @@ -0,0 +1,3 @@ +CONFIG_KUNIT=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_TEST=y diff --git a/init/Kconfig b/init/Kconfig index a20e6efd3f0f..fa79feb8fe57 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -82,15 +82,14 @@ config RUSTC_LLVM_VERSION int default $(rustc-llvm-version) -config CC_CAN_LINK +config ARCH_HAS_CC_CAN_LINK bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag)) -config CC_CAN_LINK_STATIC +config CC_CAN_LINK bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static) + default ARCH_CC_CAN_LINK if ARCH_HAS_CC_CAN_LINK + default $(cc_can_link_user,$(m64-flag)) if 64BIT + default $(cc_can_link_user,$(m32-flag)) # Fixed in GCC 14, 13.3, 12.4 and 11.5 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 @@ -104,7 +103,10 @@ config GCC_ASM_GOTO_OUTPUT_BROKEN config CC_HAS_ASM_GOTO_OUTPUT def_bool y depends on !GCC_ASM_GOTO_OUTPUT_BROKEN + # Detect basic support depends on $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null) + # Detect clang (< v17) scoped label issues + depends on $(success,echo 'void b(void **);void* c(void);int f(void){{asm goto(""::::l0);return 0;l0:return 1;}void *x __attribute__((cleanup(b)))=c();{asm goto(""::::l1);return 2;l1:return 3;}}' | $(CC) -x c - -c -o /dev/null) config CC_HAS_ASM_GOTO_TIED_OUTPUT depends on CC_HAS_ASM_GOTO_OUTPUT @@ -117,17 +119,55 @@ config TOOLS_SUPPORT_RELR config CC_HAS_ASM_INLINE def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null) +config CC_HAS_ASSUME + bool + # clang needs to be at least 19.1.0 since the meaning of the assume + # attribute changed: + # https://github.com/llvm/llvm-project/commit/c44fa3e8a9a44c2e9a575768a3c185354b9f6c17 + default y if CC_IS_CLANG && CLANG_VERSION >= 190100 + # supported since gcc 13.1.0 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106654 + default y if CC_IS_GCC && GCC_VERSION >= 130100 + config CC_HAS_NO_PROFILE_FN_ATTR def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror) config CC_HAS_COUNTED_BY - # TODO: when gcc 15 is released remove the build test and add - # a gcc version check - def_bool $(success,echo 'struct flex { int count; int array[] __attribute__((__counted_by__(count))); };' | $(CC) $(CLANG_FLAGS) -x c - -c -o /dev/null -Werror) - # clang needs to be at least 19.1.3 to avoid __bdos miscalculations - # https://github.com/llvm/llvm-project/pull/110497 - # https://github.com/llvm/llvm-project/pull/112636 - depends on !(CC_IS_CLANG && CLANG_VERSION < 190103) + bool + # clang needs to be at least 20.1.0 to avoid potential crashes + # when building structures that contain __counted_by + # https://github.com/ClangBuiltLinux/linux/issues/2114 + # https://github.com/llvm/llvm-project/commit/160fb1121cdf703c3ef5e61fb26c5659eb581489 + default y if CC_IS_CLANG && CLANG_VERSION >= 200100 + # supported since gcc 15.1.0 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 + default y if CC_IS_GCC && GCC_VERSION >= 150100 + +config CC_HAS_MULTIDIMENSIONAL_NONSTRING + def_bool $(success,echo 'char tag[][4] __attribute__((__nonstring__)) = { };' | $(CC) $(CLANG_FLAGS) -x c - -c -o /dev/null -Werror) + +config LD_CAN_USE_KEEP_IN_OVERLAY + # ld.lld prior to 21.0.0 did not support KEEP within an overlay description + # https://github.com/llvm/llvm-project/pull/130661 + def_bool LD_IS_BFD || LLD_VERSION >= 210000 + +config RUSTC_HAS_SLICE_AS_FLATTENED + def_bool RUSTC_VERSION >= 108000 + +config RUSTC_HAS_COERCE_POINTEE + def_bool RUSTC_VERSION >= 108400 + +config RUSTC_HAS_SPAN_FILE + def_bool RUSTC_VERSION >= 108800 + +config RUSTC_HAS_UNNECESSARY_TRANSMUTES + def_bool RUSTC_VERSION >= 108800 + +config RUSTC_HAS_FILE_WITH_NUL + def_bool RUSTC_VERSION >= 108900 + +config RUSTC_HAS_FILE_AS_C_STR + def_bool RUSTC_VERSION >= 109100 config PAHOLE_VERSION int @@ -156,6 +196,10 @@ menu "General setup" config BROKEN bool + help + This option allows you to choose whether you want to try to + compile (and fix) old drivers that haven't been updated to + new infrastructure. config BROKEN_ON_SMP bool @@ -468,16 +512,6 @@ config CROSS_MEMORY_ATTACH to directly read from or write to another process' address space. See the man page for more details. -config USELIB - bool "uselib syscall (for libc5 and earlier)" - default ALPHA || M68K || SPARC - help - This option enables the uselib syscall, a system call used in the - dynamic linker from libc5 and earlier. glibc does not use this - system call. If you intend to run programs built on libc5 or - earlier, you may need to enable this syscall. Current systems - running glibc can safely disable this. - config AUDIT bool "Auditing support" depends on NET @@ -703,7 +737,7 @@ endmenu # "CPU/Task time and stats accounting" config CPU_ISOLATION bool "CPU isolation" - depends on SMP || COMPILE_TEST + depends on SMP default y help Make sure that CPUs running critical tasks are not disturbed by @@ -872,6 +906,18 @@ config UCLAMP_BUCKETS_COUNT If in doubt, use the default value. +config SCHED_PROXY_EXEC + bool "Proxy Execution" + # Avoid some build failures w/ PREEMPT_RT until it can be fixed + depends on !PREEMPT_RT + # Need to investigate how to inform sched_ext of split contexts + depends on !SCHED_CLASS_EXT + # Not particularly useful until we get to multi-rq proxying + depends on EXPERT + help + This option enables proxy execution, a mechanism for mutex-owning + tasks to inherit the scheduling context of higher priority waiters. + endmenu # @@ -989,9 +1035,24 @@ config MEMCG select PAGE_COUNTER select EVENTFD select SLAB_OBJ_EXT + select VM_EVENT_COUNTERS help Provides control over the memory footprint of tasks in a cgroup. +config MEMCG_NMI_UNSAFE + bool + depends on MEMCG + depends on HAVE_NMI + depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !ARCH_HAVE_NMI_SAFE_CMPXCHG + default y + +config MEMCG_NMI_SAFETY_REQUIRES_ATOMIC + bool + depends on MEMCG + depends on HAVE_NMI + depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && ARCH_HAVE_NMI_SAFE_CMPXCHG + default y + config MEMCG_V1 bool "Legacy cgroup v1 memory controller" depends on MEMCG @@ -1048,6 +1109,9 @@ if CGROUP_SCHED config GROUP_SCHED_WEIGHT def_bool n +config GROUP_SCHED_BANDWIDTH + def_bool n + config FAIR_GROUP_SCHED bool "Group scheduling for SCHED_OTHER" depends on CGROUP_SCHED @@ -1057,6 +1121,7 @@ config FAIR_GROUP_SCHED config CFS_BANDWIDTH bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED" depends on FAIR_GROUP_SCHED + select GROUP_SCHED_BANDWIDTH default n help This option allows users to define CPU bandwidth rates (limits) for @@ -1076,10 +1141,22 @@ config RT_GROUP_SCHED realtime bandwidth for them. See Documentation/scheduler/sched-rt-group.rst for more information. +config RT_GROUP_SCHED_DEFAULT_DISABLED + bool "Require boot parameter to enable group scheduling for SCHED_RR/FIFO" + depends on RT_GROUP_SCHED + default n + help + When set, the RT group scheduling is disabled by default. The option + is in inverted form so that mere RT_GROUP_SCHED enables the group + scheduling. + + Say N if unsure. + config EXT_GROUP_SCHED bool depends on SCHED_CLASS_EXT && CGROUP_SCHED select GROUP_SCHED_WEIGHT + select GROUP_SCHED_BANDWIDTH default y endif #CGROUP_SCHED @@ -1136,6 +1213,16 @@ config CGROUP_RDMA Attaching processes with active RDMA resources to the cgroup hierarchy is allowed even if can cross the hierarchy's limit. +config CGROUP_DMEM + bool "Device memory controller (DMEM)" + select PAGE_COUNTER + help + The DMEM controller allows compatible devices to restrict device + memory usage based on the cgroup hierarchy. + + As an example, it allows you to restrict VRAM usage for applications + in the DRM subsystem. + config CGROUP_FREEZER bool "Freezer controller" help @@ -1182,7 +1269,8 @@ config CPUSETS_V1 help Legacy cgroup v1 cpusets controller which has been deprecated by cgroup v2 implementation. The v1 is there for legacy applications - which haven't migrated to the new cgroup v2 interface yet. If you + which haven't migrated to the new cgroup v2 interface yet. Legacy + interface includes cpuset filesystem and /proc/<pid>/cpuset. If you do not have any such application then you are completely fine leaving this option disabled. @@ -1190,7 +1278,7 @@ config CPUSETS_V1 config PROC_PID_CPUSET bool "Include legacy /proc/<pid>/cpuset file" - depends on CPUSETS + depends on CPUSETS_V1 default y config CGROUP_DEVICE @@ -1282,7 +1370,7 @@ config UTS_NS config TIME_NS bool "TIME namespace" - depends on GENERIC_VDSO_TIME_NS + depends on GENERIC_GETTIMEOFDAY default y help In this namespace boottime and monotonic clocks can be set. @@ -1431,8 +1519,27 @@ config BOOT_CONFIG_EMBED_FILE This bootconfig will be used if there is no initrd or no other bootconfig in the initrd. +config CMDLINE_LOG_WRAP_IDEAL_LEN + int "Length to try to wrap the cmdline when logged at boot" + default 1021 + range 0 1021 + help + At boot time, the kernel command line is logged to the console. + The log message will start with the prefix "Kernel command line: ". + The log message will attempt to be wrapped (split into multiple log + messages) at spaces based on CMDLINE_LOG_WRAP_IDEAL_LEN characters. + If wrapping happens, each log message will start with the prefix and + all but the last message will end with " \". Messages may exceed the + ideal length if a place to wrap isn't found before the specified + number of characters. + + A value of 0 disables wrapping, though be warned that the maximum + length of a log message (1021 characters) may cause the cmdline to + be truncated. + config INITRAMFS_PRESERVE_MTIME bool "Preserve cpio archive mtimes in initramfs" + depends on BLK_DEV_INITRD default y help Each entry in an initramfs cpio archive carries an mtime value. When @@ -1441,6 +1548,13 @@ config INITRAMFS_PRESERVE_MTIME If unsure, say Y. +config INITRAMFS_TEST + bool "Test initramfs cpio archive extraction" if !KUNIT_ALL_TESTS + depends on BLK_DEV_INITRD && KUNIT=y + default KUNIT_ALL_TESTS + help + Build KUnit tests for initramfs. See Documentation/dev-tools/kunit + choice prompt "Compiler optimization level" default CC_OPTIMIZE_FOR_PERFORMANCE @@ -1526,6 +1640,16 @@ config SYSCTL_ARCH_UNALIGN_ALLOW the unaligned access emulation. see arch/parisc/kernel/unaligned.c for reference +config SYSFS_SYSCALL + bool "Sysfs syscall support" + default n + help + sys_sysfs is an obsolete system call no longer supported in libc. + Note that disabling this option is more secure but might break + compatibility with some systems. + + If unsure say N here. + config HAVE_PCSPKR_PLATFORM bool @@ -1570,16 +1694,6 @@ config SGETMASK_SYSCALL If unsure, leave the default option here. -config SYSFS_SYSCALL - bool "Sysfs syscall support" if EXPERT - default y - help - sys_sysfs is an obsolete system call no longer supported in libc. - Note that disabling this option is more secure but might break - compatibility with some systems. - - If unsure say Y here. - config FHANDLE bool "open by fhandle syscalls" if EXPERT select EXPORTFS @@ -1621,6 +1735,18 @@ config PRINTK very difficult to diagnose system problems, saying N here is strongly discouraged. +config PRINTK_RINGBUFFER_KUNIT_TEST + tristate "KUnit Test for the printk ringbuffer" if !KUNIT_ALL_TESTS + depends on PRINTK && KUNIT + default KUNIT_ALL_TESTS + help + This builds the printk ringbuffer KUnit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + config BUG bool "BUG() support" if EXPERT default y @@ -1670,6 +1796,16 @@ config FUTEX_PI depends on FUTEX && RT_MUTEXES default y +config FUTEX_PRIVATE_HASH + bool + depends on FUTEX && !BASE_SMALL && MMU + default y + +config FUTEX_MPOL + bool + depends on FUTEX && NUMA + default y + config EPOLL bool "Enable eventpoll support" if EXPERT default y @@ -1734,7 +1870,7 @@ config IO_URING config GCOV_PROFILE_URING bool "Enable GCOV profiling on the io_uring subsystem" - depends on GCOV_KERNEL + depends on IO_URING && GCOV_KERNEL help Enable GCOV profiling on the io_uring subsystem, to facilitate code coverage testing. @@ -1745,6 +1881,17 @@ config GCOV_PROFILE_URING the io_uring subsystem, hence this should only be enabled for specific test purposes. +config IO_URING_MOCK_FILE + tristate "Enable io_uring mock files (Experimental)" if EXPERT + default n + depends on IO_URING + help + Enable mock files for io_uring subststem testing. The ABI might + still change, so it's still experimental and should only be enabled + for specific test purposes. + + If unsure, say N. + config ADVISE_SYSCALLS bool "Enable madvise/fadvise syscalls" if EXPERT default y @@ -1791,10 +1938,36 @@ config RSEQ If unsure, say Y. +config RSEQ_STATS + default n + bool "Enable lightweight statistics of restartable sequences" if EXPERT + depends on RSEQ && DEBUG_FS + help + Enable lightweight counters which expose information about the + frequency of RSEQ operations via debugfs. Mostly interesting for + kernel debugging or performance analysis. While lightweight it's + still adding code into the user/kernel mode transitions. + + If unsure, say N. + +config RSEQ_DEBUG_DEFAULT_ENABLE + default n + bool "Enable restartable sequences debug mode by default" if EXPERT + depends on RSEQ + help + This enables the static branch for debug mode of restartable + sequences. + + This also can be controlled on the kernel command line via the + command line parameter "rseq_debug=0/1" and through debugfs. + + If unsure, say N. + config DEBUG_RSEQ default n bool "Enable debugging of rseq() system call" if EXPERT - depends on RSEQ && DEBUG_KERNEL + depends on RSEQ && DEBUG_KERNEL && !GENERIC_ENTRY + select RSEQ_DEBUG_DEFAULT_ENABLE help Enable extra debugging checks for the rseq system call. @@ -1810,13 +1983,6 @@ config CACHESTAT_SYSCALL If unsure say Y here. -config PC104 - bool "PC/104 support" if EXPERT - help - Expose PC/104 form factor device drivers and options available for - selection and configuration. Enable this option if your target - machine has a PC/104 bus. - config KALLSYMS bool "Load all symbols for debugging/ksymoops" if EXPERT default y @@ -1856,11 +2022,6 @@ config KALLSYMS_ALL Say N unless you really need all symbols, or kernel live patching. -config KALLSYMS_ABSOLUTE_PERCPU - bool - depends on KALLSYMS - default X86_64 && SMP - # end of the "standard kernel features (expert users)" menu config ARCH_HAS_MEMBARRIER_CALLBACKS @@ -1869,6 +2030,28 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS config ARCH_HAS_MEMBARRIER_SYNC_CORE bool +config ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS + bool + help + Control MSEAL_SYSTEM_MAPPINGS access based on architecture. + + A 64-bit kernel is required for the memory sealing feature. + No specific hardware features from the CPU are needed. + + To enable this feature, the architecture needs to update their + special mappings calls to include the sealing flag and confirm + that it doesn't unmap/remap system mappings during the life + time of the process. The existence of this flag for an architecture + implies that it does not require the remapping of the system + mappings during process lifetime, so sealing these mappings is safe + from a kernel perspective. + + After the architecture enables this, a distribution can set + CONFIG_MSEAL_SYSTEM_MAPPING to manage access to the feature. + + For complete descriptions of memory sealing, please see + Documentation/userspace-api/mseal.rst + config HAVE_PERF_EVENTS bool help @@ -1956,12 +2139,13 @@ config RUST bool "Rust support" depends on HAVE_RUST depends on RUST_IS_AVAILABLE - depends on !MODVERSIONS + select EXTENDED_MODVERSIONS if MODVERSIONS + depends on !MODVERSIONS || GENDWARFKSYMS depends on !GCC_PLUGIN_RANDSTRUCT depends on !RANDSTRUCT - depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE - depends on !CFI_CLANG || HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC - select CFI_ICALL_NORMALIZE_INTEGERS if CFI_CLANG + depends on !DEBUG_INFO_BTF || (PAHOLE_HAS_LANG_EXCLUDE && !LTO) + depends on !CFI || HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC + select CFI_ICALL_NORMALIZE_INTEGERS if CFI depends on !CALL_PADDING || RUSTC_VERSION >= 108100 depends on !KASAN_SW_TAGS depends on !(MITIGATION_RETHUNK && KASAN) || RUSTC_VERSION >= 108300 @@ -1989,8 +2173,10 @@ config BINDGEN_VERSION_TEXT string depends on RUST # The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0 - # (https://github.com/rust-lang/rust-bindgen/pull/2678). It can be removed when - # the minimum version is upgraded past that (0.69.1 already fixed the issue). + # (https://github.com/rust-lang/rust-bindgen/pull/2678) and 0.71.0 + # (https://github.com/rust-lang/rust-bindgen/pull/3040). It can be removed + # when the minimum version is upgraded past the latter (0.69.1 and 0.71.1 + # both fixed the issue). default "$(shell,$(BINDGEN) --version workaround-for-0.69.0 2>/dev/null)" # @@ -2003,6 +2189,8 @@ config TRACEPOINTS source "kernel/Kconfig.kexec" +source "kernel/liveupdate/Kconfig" + endmenu # General setup source "arch/Kconfig" diff --git a/init/Makefile b/init/Makefile index 10b652d33e87..d6f75d8907e0 100644 --- a/init/Makefile +++ b/init/Makefile @@ -12,6 +12,7 @@ else obj-$(CONFIG_BLK_DEV_INITRD) += initramfs.o endif obj-$(CONFIG_GENERIC_CALIBRATE_DELAY) += calibrate.o +obj-$(CONFIG_INITRAMFS_TEST) += initramfs_test.o obj-y += init_task.o diff --git a/init/calibrate.c b/init/calibrate.c index f3831272f113..63be4c65bc52 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -5,19 +5,22 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ -#include <linux/jiffies.h> #include <linux/delay.h> #include <linux/init.h> -#include <linux/timex.h> -#include <linux/smp.h> +#include <linux/jiffies.h> +#include <linux/kstrtox.h> #include <linux/percpu.h> +#include <linux/printk.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/timex.h> unsigned long lpj_fine; unsigned long preset_lpj; + static int __init lpj_setup(char *str) { - preset_lpj = simple_strtoul(str,NULL,0); - return 1; + return kstrtoul(str, 0, &preset_lpj) == 0; } __setup("lpj=", lpj_setup); diff --git a/init/do_mounts.c b/init/do_mounts.c index 6af29da8889e..defbbf1d55f7 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -120,7 +120,8 @@ static int __init fs_names_setup(char *str) static unsigned int __initdata root_delay; static int __init root_delay_setup(char *str) { - root_delay = simple_strtoul(str, NULL, 0); + if (kstrtouint(str, 0, &root_delay)) + return 0; return 1; } @@ -507,7 +508,7 @@ static int rootfs_init_fs_context(struct fs_context *fc) struct file_system_type rootfs_fs_type = { .name = "rootfs", .init_fs_context = rootfs_init_fs_context, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; void __init init_rootfs(void) diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 22c7f41ff642..f6867bad0d78 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -21,7 +21,7 @@ phys_addr_t phys_initrd_start __initdata; unsigned long phys_initrd_size __initdata; #ifdef CONFIG_SYSCTL -static struct ctl_table kern_do_mounts_initrd_table[] = { +static const struct ctl_table kern_do_mounts_initrd_table[] = { { .procname = "real-root-dev", .data = &real_root_dev, @@ -89,7 +89,7 @@ static void __init handle_initrd(char *root_device_name) extern char *envp_init[]; int error; - pr_warn("using deprecated initrd support, will be removed in 2021.\n"); + pr_warn("using deprecated initrd support, will be removed soon.\n"); real_root_dev = new_encode_dev(ROOT_DEV); create_dev("/dev/root.old", Root_RAM0); diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index ac021ae6e6fa..eddbe5cb0413 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -7,6 +7,7 @@ #include <uapi/linux/cramfs_fs.h> #include <linux/initrd.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/slab.h> #include "do_mounts.h" @@ -28,8 +29,7 @@ int __initdata rd_image_start; /* starting block # of image */ static int __init ramdisk_start_setup(char *str) { - rd_image_start = simple_strtol(str,NULL,0); - return 1; + return kstrtoint(str, 0, &rd_image_start) == 0; } __setup("ramdisk_start=", ramdisk_start_setup); @@ -186,14 +186,12 @@ static unsigned long nr_blocks(struct file *file) int __init rd_load_image(char *from) { int res = 0; - unsigned long rd_blocks, devblocks; + unsigned long rd_blocks, devblocks, nr_disks; int nblocks, i; char *buf = NULL; unsigned short rotate = 0; decompress_fn decompressor = NULL; -#if !defined(CONFIG_S390) char rotator[4] = { '|' , '/' , '-' , '\\' }; -#endif out_file = filp_open("/dev/ram", O_RDWR, 0); if (IS_ERR(out_file)) @@ -244,8 +242,9 @@ int __init rd_load_image(char *from) goto done; } - printk(KERN_NOTICE "RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ", - nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : ""); + nr_disks = (nblocks - 1) / devblocks + 1; + pr_notice("RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ", + nblocks, nr_disks, str_plural(nr_disks)); for (i = 0; i < nblocks; i++) { if (i && (i % devblocks == 0)) { pr_cont("done disk #1.\n"); @@ -255,12 +254,10 @@ int __init rd_load_image(char *from) } kernel_read(in_file, buf, BLOCK_SIZE, &in_pos); kernel_write(out_file, buf, BLOCK_SIZE, &out_pos); -#if !defined(CONFIG_S390) - if (!(i % 16)) { + if (!IS_ENABLED(CONFIG_S390) && !(i % 16)) { pr_cont("%c\b", rotator[rotate & 0x3]); rotate++; } -#endif } pr_cont("done.\n"); diff --git a/init/init_task.c b/init/init_task.c index e557f622bd90..49b13d7c3985 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -27,6 +27,9 @@ static struct signal_struct init_signals = { }, .multiprocess = HLIST_HEAD_INIT, .rlim = INIT_RLIMITS, +#ifdef CONFIG_CGROUPS + .cgroup_threadgroup_rwsem = __RWSEM_INITIALIZER(init_signals.cgroup_threadgroup_rwsem), +#endif .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), .exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock), #ifdef CONFIG_POSIX_TIMERS @@ -59,6 +62,33 @@ unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] = { }; #endif +/* init to 2 - one for init_task, one to ensure it is never freed */ +static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; + +/* + * The initial credentials for the initial task + */ +static struct cred init_cred = { + .usage = ATOMIC_INIT(4), + .uid = GLOBAL_ROOT_UID, + .gid = GLOBAL_ROOT_GID, + .suid = GLOBAL_ROOT_UID, + .sgid = GLOBAL_ROOT_GID, + .euid = GLOBAL_ROOT_UID, + .egid = GLOBAL_ROOT_GID, + .fsuid = GLOBAL_ROOT_UID, + .fsgid = GLOBAL_ROOT_GID, + .securebits = SECUREBITS_DEFAULT, + .cap_inheritable = CAP_EMPTY_SET, + .cap_permitted = CAP_FULL_SET, + .cap_effective = CAP_FULL_SET, + .cap_bset = CAP_FULL_SET, + .user = INIT_USER, + .user_ns = &init_user_ns, + .group_info = &init_groups, + .ucounts = &init_ucounts, +}; + /* * Set up the first task table, touch at your own risk!. Base=0, * limit=0x1fffff (=2MB) @@ -220,6 +250,9 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { #ifdef CONFIG_SECCOMP_FILTER .seccomp = { .filter_count = ATOMIC_INIT(0) }, #endif +#ifdef CONFIG_SCHED_MM_CID + .mm_cid = { .cid = MM_CID_UNSET, }, +#endif }; EXPORT_SYMBOL(init_task); diff --git a/init/initramfs.c b/init/initramfs.c index b2f7583bb1f5..6ddbfb17fb8f 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/init.h> #include <linux/async.h> +#include <linux/export.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/types.h> @@ -18,8 +19,10 @@ #include <linux/init_syscalls.h> #include <linux/umh.h> #include <linux/security.h> +#include <linux/overflow.h> #include "do_mounts.h" +#include "initramfs_internal.h" static __initdata bool csum_present; static __initdata u32 io_csum; @@ -75,6 +78,7 @@ static __initdata struct hash { struct hash *next; char name[N_ALIGN(PATH_MAX)]; } *head[32]; +static __initdata bool hardlink_seen; static inline int hash(int major, int minor, int ino) { @@ -105,22 +109,24 @@ static char __init *find_link(int major, int minor, int ino, q->minor = minor; q->ino = ino; q->mode = mode; - strcpy(q->name, name); + strscpy(q->name, name); q->next = NULL; *p = q; + hardlink_seen = true; return NULL; } static void __init free_hash(void) { struct hash **p, *q; - for (p = head; p < head + 32; p++) { + for (p = head; hardlink_seen && p < head + 32; p++) { while (*p) { q = *p; *p = q->next; kfree(q); } } + hardlink_seen = false; } #ifdef CONFIG_INITRAMFS_PRESERVE_MTIME @@ -143,12 +149,11 @@ struct dir_entry { char name[]; }; -static void __init dir_add(const char *name, time64_t mtime) +static void __init dir_add(const char *name, size_t nlen, time64_t mtime) { - size_t nlen = strlen(name) + 1; struct dir_entry *de; - de = kmalloc(sizeof(struct dir_entry) + nlen, GFP_KERNEL); + de = kmalloc(struct_size(de, name, nlen), GFP_KERNEL); if (!de) panic_show_mem("can't allocate dir_entry buffer"); INIT_LIST_HEAD(&de->list); @@ -169,7 +174,7 @@ static void __init dir_utime(void) #else static void __init do_utime(char *filename, time64_t mtime) {} static void __init do_utime_path(const struct path *path, time64_t mtime) {} -static void __init dir_add(const char *name, time64_t mtime) {} +static void __init dir_add(const char *name, size_t nlen, time64_t mtime) {} static void __init dir_utime(void) {} #endif @@ -188,14 +193,11 @@ static __initdata u32 hdr_csum; static void __init parse_header(char *s) { unsigned long parsed[13]; - char buf[9]; int i; - buf[8] = '\0'; - for (i = 0, s += 6; i < 13; i++, s += 8) { - memcpy(buf, s, 8); - parsed[i] = simple_strtoul(buf, NULL, 16); - } + for (i = 0, s += 6; i < 13; i++, s += 8) + parsed[i] = simple_strntoul(s, NULL, 16, 8); + ino = parsed[0]; mode = parsed[1]; uid = parsed[2]; @@ -256,7 +258,7 @@ static __initdata char *header_buf, *symlink_buf, *name_buf; static int __init do_start(void) { - read_into(header_buf, 110, GotHeader); + read_into(header_buf, CPIO_HDRLEN, GotHeader); return 0; } @@ -396,7 +398,7 @@ static int __init do_name(void) init_mkdir(collected, mode); init_chown(collected, uid, gid, 0); init_chmod(collected, mode); - dir_add(collected, mtime); + dir_add(collected, name_len, mtime); } else if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { if (maybe_link() == 0) { @@ -497,20 +499,33 @@ static unsigned long my_inptr __initdata; /* index of next byte to be processed #include <linux/decompress/generic.h> -static char * __init unpack_to_rootfs(char *buf, unsigned long len) +/** + * unpack_to_rootfs - decompress and extract an initramfs archive + * @buf: input initramfs archive to extract + * @len: length of initramfs data to process + * + * Returns: NULL for success or an error message string + * + * This symbol shouldn't be used externally. It's available for unit tests. + */ +char * __init unpack_to_rootfs(char *buf, unsigned long len) { long written; decompress_fn decompress; const char *compress_name; - static __initdata char msg_buf[64]; + struct { + char header[CPIO_HDRLEN]; + char symlink[PATH_MAX + N_ALIGN(PATH_MAX) + 1]; + char name[N_ALIGN(PATH_MAX)]; + } *bufs = kmalloc(sizeof(*bufs), GFP_KERNEL); - header_buf = kmalloc(110, GFP_KERNEL); - symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL); - name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL); - - if (!header_buf || !symlink_buf || !name_buf) + if (!bufs) panic_show_mem("can't allocate buffers"); + header_buf = bufs->header; + symlink_buf = bufs->symlink; + name_buf = bufs->name; + state = Start; this_header = 0; message = NULL; @@ -538,12 +553,9 @@ static char * __init unpack_to_rootfs(char *buf, unsigned long len) if (res) error("decompressor failed"); } else if (compress_name) { - if (!message) { - snprintf(msg_buf, sizeof msg_buf, - "compression method %s not configured", - compress_name); - message = msg_buf; - } + pr_err("compression method %s not configured\n", + compress_name); + error("decompressor failed"); } else error("invalid magic at start of compressed archive"); if (state != Reset) @@ -553,9 +565,9 @@ static char * __init unpack_to_rootfs(char *buf, unsigned long len) len -= my_inptr; } dir_utime(); - kfree(name_buf); - kfree(symlink_buf); - kfree(header_buf); + /* free any hardlink state collected without optional TRAILER!!! */ + free_hash(); + kfree(bufs); return message; } diff --git a/init/initramfs_internal.h b/init/initramfs_internal.h new file mode 100644 index 000000000000..233dad16b0a0 --- /dev/null +++ b/init/initramfs_internal.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __INITRAMFS_INTERNAL_H__ +#define __INITRAMFS_INTERNAL_H__ + +char *unpack_to_rootfs(char *buf, unsigned long len); +#define CPIO_HDRLEN 110 + +#endif diff --git a/init/initramfs_test.c b/init/initramfs_test.c new file mode 100644 index 000000000000..5d2db455e60c --- /dev/null +++ b/init/initramfs_test.c @@ -0,0 +1,472 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <kunit/test.h> +#include <linux/fcntl.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/init_syscalls.h> +#include <linux/stringify.h> +#include <linux/timekeeping.h> +#include "initramfs_internal.h" + +struct initramfs_test_cpio { + char *magic; + unsigned int ino; + unsigned int mode; + unsigned int uid; + unsigned int gid; + unsigned int nlink; + unsigned int mtime; + unsigned int filesize; + unsigned int devmajor; + unsigned int devminor; + unsigned int rdevmajor; + unsigned int rdevminor; + unsigned int namesize; + unsigned int csum; + char *fname; + char *data; +}; + +static size_t fill_cpio(struct initramfs_test_cpio *cs, size_t csz, char *out) +{ + int i; + size_t off = 0; + + for (i = 0; i < csz; i++) { + char *pos = &out[off]; + struct initramfs_test_cpio *c = &cs[i]; + size_t thislen; + + /* +1 to account for nulterm */ + thislen = sprintf(pos, "%s" + "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x" + "%s", + c->magic, c->ino, c->mode, c->uid, c->gid, c->nlink, + c->mtime, c->filesize, c->devmajor, c->devminor, + c->rdevmajor, c->rdevminor, c->namesize, c->csum, + c->fname) + 1; + + pr_debug("packing (%zu): %.*s\n", thislen, (int)thislen, pos); + if (thislen != CPIO_HDRLEN + c->namesize) + pr_debug("padded to: %u\n", CPIO_HDRLEN + c->namesize); + off += CPIO_HDRLEN + c->namesize; + while (off & 3) + out[off++] = '\0'; + + memcpy(&out[off], c->data, c->filesize); + off += c->filesize; + while (off & 3) + out[off++] = '\0'; + } + + return off; +} + +static void __init initramfs_test_extract(struct kunit *test) +{ + char *err, *cpio_srcbuf; + size_t len; + struct timespec64 ts_before, ts_after; + struct kstat st = {}; + struct initramfs_test_cpio c[] = { { + .magic = "070701", + .ino = 1, + .mode = S_IFREG | 0777, + .uid = 12, + .gid = 34, + .nlink = 1, + .mtime = 56, + .filesize = 0, + .devmajor = 0, + .devminor = 1, + .rdevmajor = 0, + .rdevminor = 0, + .namesize = sizeof("initramfs_test_extract"), + .csum = 0, + .fname = "initramfs_test_extract", + }, { + .magic = "070701", + .ino = 2, + .mode = S_IFDIR | 0777, + .nlink = 1, + .mtime = 57, + .devminor = 1, + .namesize = sizeof("initramfs_test_extract_dir"), + .fname = "initramfs_test_extract_dir", + }, { + .magic = "070701", + .namesize = sizeof("TRAILER!!!"), + .fname = "TRAILER!!!", + } }; + + /* +3 to cater for any 4-byte end-alignment */ + cpio_srcbuf = kzalloc(ARRAY_SIZE(c) * (CPIO_HDRLEN + PATH_MAX + 3), + GFP_KERNEL); + len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf); + + ktime_get_real_ts64(&ts_before); + err = unpack_to_rootfs(cpio_srcbuf, len); + ktime_get_real_ts64(&ts_after); + if (err) { + KUNIT_FAIL(test, "unpack failed %s", err); + goto out; + } + + KUNIT_EXPECT_EQ(test, init_stat(c[0].fname, &st, 0), 0); + KUNIT_EXPECT_TRUE(test, S_ISREG(st.mode)); + KUNIT_EXPECT_TRUE(test, uid_eq(st.uid, KUIDT_INIT(c[0].uid))); + KUNIT_EXPECT_TRUE(test, gid_eq(st.gid, KGIDT_INIT(c[0].gid))); + KUNIT_EXPECT_EQ(test, st.nlink, 1); + if (IS_ENABLED(CONFIG_INITRAMFS_PRESERVE_MTIME)) { + KUNIT_EXPECT_EQ(test, st.mtime.tv_sec, c[0].mtime); + } else { + KUNIT_EXPECT_GE(test, st.mtime.tv_sec, ts_before.tv_sec); + KUNIT_EXPECT_LE(test, st.mtime.tv_sec, ts_after.tv_sec); + } + KUNIT_EXPECT_EQ(test, st.blocks, c[0].filesize); + + KUNIT_EXPECT_EQ(test, init_stat(c[1].fname, &st, 0), 0); + KUNIT_EXPECT_TRUE(test, S_ISDIR(st.mode)); + if (IS_ENABLED(CONFIG_INITRAMFS_PRESERVE_MTIME)) { + KUNIT_EXPECT_EQ(test, st.mtime.tv_sec, c[1].mtime); + } else { + KUNIT_EXPECT_GE(test, st.mtime.tv_sec, ts_before.tv_sec); + KUNIT_EXPECT_LE(test, st.mtime.tv_sec, ts_after.tv_sec); + } + + KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0); + KUNIT_EXPECT_EQ(test, init_rmdir(c[1].fname), 0); +out: + kfree(cpio_srcbuf); +} + +/* + * Don't terminate filename. Previously, the cpio filename field was passed + * directly to filp_open(collected, O_CREAT|..) without nulterm checks. See + * https://lore.kernel.org/linux-fsdevel/20241030035509.20194-2-ddiss@suse.de + */ +static void __init initramfs_test_fname_overrun(struct kunit *test) +{ + char *err, *cpio_srcbuf; + size_t len, suffix_off; + struct initramfs_test_cpio c[] = { { + .magic = "070701", + .ino = 1, + .mode = S_IFREG | 0777, + .uid = 0, + .gid = 0, + .nlink = 1, + .mtime = 1, + .filesize = 0, + .devmajor = 0, + .devminor = 1, + .rdevmajor = 0, + .rdevminor = 0, + .namesize = sizeof("initramfs_test_fname_overrun"), + .csum = 0, + .fname = "initramfs_test_fname_overrun", + } }; + + /* + * poison cpio source buffer, so we can detect overrun. source + * buffer is used by read_into() when hdr or fname + * are already available (e.g. no compression). + */ + cpio_srcbuf = kmalloc(CPIO_HDRLEN + PATH_MAX + 3, GFP_KERNEL); + memset(cpio_srcbuf, 'B', CPIO_HDRLEN + PATH_MAX + 3); + /* limit overrun to avoid crashes / filp_open() ENAMETOOLONG */ + cpio_srcbuf[CPIO_HDRLEN + strlen(c[0].fname) + 20] = '\0'; + + len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf); + /* overwrite trailing fname terminator and padding */ + suffix_off = len - 1; + while (cpio_srcbuf[suffix_off] == '\0') { + cpio_srcbuf[suffix_off] = 'P'; + suffix_off--; + } + + err = unpack_to_rootfs(cpio_srcbuf, len); + KUNIT_EXPECT_NOT_NULL(test, err); + + kfree(cpio_srcbuf); +} + +static void __init initramfs_test_data(struct kunit *test) +{ + char *err, *cpio_srcbuf; + size_t len; + struct file *file; + struct initramfs_test_cpio c[] = { { + .magic = "070701", + .ino = 1, + .mode = S_IFREG | 0777, + .uid = 0, + .gid = 0, + .nlink = 1, + .mtime = 1, + .filesize = sizeof("ASDF") - 1, + .devmajor = 0, + .devminor = 1, + .rdevmajor = 0, + .rdevminor = 0, + .namesize = sizeof("initramfs_test_data"), + .csum = 0, + .fname = "initramfs_test_data", + .data = "ASDF", + } }; + + /* +6 for max name and data 4-byte padding */ + cpio_srcbuf = kmalloc(CPIO_HDRLEN + c[0].namesize + c[0].filesize + 6, + GFP_KERNEL); + + len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf); + + err = unpack_to_rootfs(cpio_srcbuf, len); + KUNIT_EXPECT_NULL(test, err); + + file = filp_open(c[0].fname, O_RDONLY, 0); + if (IS_ERR(file)) { + KUNIT_FAIL(test, "open failed"); + goto out; + } + + /* read back file contents into @cpio_srcbuf and confirm match */ + len = kernel_read(file, cpio_srcbuf, c[0].filesize, NULL); + KUNIT_EXPECT_EQ(test, len, c[0].filesize); + KUNIT_EXPECT_MEMEQ(test, cpio_srcbuf, c[0].data, len); + + fput(file); + KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0); +out: + kfree(cpio_srcbuf); +} + +static void __init initramfs_test_csum(struct kunit *test) +{ + char *err, *cpio_srcbuf; + size_t len; + struct initramfs_test_cpio c[] = { { + /* 070702 magic indicates a valid csum is present */ + .magic = "070702", + .ino = 1, + .mode = S_IFREG | 0777, + .nlink = 1, + .filesize = sizeof("ASDF") - 1, + .devminor = 1, + .namesize = sizeof("initramfs_test_csum"), + .csum = 'A' + 'S' + 'D' + 'F', + .fname = "initramfs_test_csum", + .data = "ASDF", + }, { + /* mix csum entry above with no-csum entry below */ + .magic = "070701", + .ino = 2, + .mode = S_IFREG | 0777, + .nlink = 1, + .filesize = sizeof("ASDF") - 1, + .devminor = 1, + .namesize = sizeof("initramfs_test_csum_not_here"), + /* csum ignored */ + .csum = 5555, + .fname = "initramfs_test_csum_not_here", + .data = "ASDF", + } }; + + cpio_srcbuf = kmalloc(8192, GFP_KERNEL); + + len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf); + + err = unpack_to_rootfs(cpio_srcbuf, len); + KUNIT_EXPECT_NULL(test, err); + + KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0); + KUNIT_EXPECT_EQ(test, init_unlink(c[1].fname), 0); + + /* mess up the csum and confirm that unpack fails */ + c[0].csum--; + len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf); + + err = unpack_to_rootfs(cpio_srcbuf, len); + KUNIT_EXPECT_NOT_NULL(test, err); + + /* + * file (with content) is still retained in case of bad-csum abort. + * Perhaps we should change this. + */ + KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0); + KUNIT_EXPECT_EQ(test, init_unlink(c[1].fname), -ENOENT); + kfree(cpio_srcbuf); +} + +/* + * hardlink hashtable may leak when the archive omits a trailer: + * https://lore.kernel.org/r/20241107002044.16477-10-ddiss@suse.de/ + */ +static void __init initramfs_test_hardlink(struct kunit *test) +{ + char *err, *cpio_srcbuf; + size_t len; + struct kstat st0, st1; + struct initramfs_test_cpio c[] = { { + .magic = "070701", + .ino = 1, + .mode = S_IFREG | 0777, + .nlink = 2, + .devminor = 1, + .namesize = sizeof("initramfs_test_hardlink"), + .fname = "initramfs_test_hardlink", + }, { + /* hardlink data is present in last archive entry */ + .magic = "070701", + .ino = 1, + .mode = S_IFREG | 0777, + .nlink = 2, + .filesize = sizeof("ASDF") - 1, + .devminor = 1, + .namesize = sizeof("initramfs_test_hardlink_link"), + .fname = "initramfs_test_hardlink_link", + .data = "ASDF", + } }; + + cpio_srcbuf = kmalloc(8192, GFP_KERNEL); + + len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf); + + err = unpack_to_rootfs(cpio_srcbuf, len); + KUNIT_EXPECT_NULL(test, err); + + KUNIT_EXPECT_EQ(test, init_stat(c[0].fname, &st0, 0), 0); + KUNIT_EXPECT_EQ(test, init_stat(c[1].fname, &st1, 0), 0); + KUNIT_EXPECT_EQ(test, st0.ino, st1.ino); + KUNIT_EXPECT_EQ(test, st0.nlink, 2); + KUNIT_EXPECT_EQ(test, st1.nlink, 2); + + KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0); + KUNIT_EXPECT_EQ(test, init_unlink(c[1].fname), 0); + + kfree(cpio_srcbuf); +} + +#define INITRAMFS_TEST_MANY_LIMIT 1000 +#define INITRAMFS_TEST_MANY_PATH_MAX (sizeof("initramfs_test_many-") \ + + sizeof(__stringify(INITRAMFS_TEST_MANY_LIMIT))) +static void __init initramfs_test_many(struct kunit *test) +{ + char *err, *cpio_srcbuf, *p; + size_t len = INITRAMFS_TEST_MANY_LIMIT * + (CPIO_HDRLEN + INITRAMFS_TEST_MANY_PATH_MAX + 3); + char thispath[INITRAMFS_TEST_MANY_PATH_MAX]; + int i; + + p = cpio_srcbuf = kmalloc(len, GFP_KERNEL); + + for (i = 0; i < INITRAMFS_TEST_MANY_LIMIT; i++) { + struct initramfs_test_cpio c = { + .magic = "070701", + .ino = i, + .mode = S_IFREG | 0777, + .nlink = 1, + .devminor = 1, + .fname = thispath, + }; + + c.namesize = 1 + sprintf(thispath, "initramfs_test_many-%d", i); + p += fill_cpio(&c, 1, p); + } + + len = p - cpio_srcbuf; + err = unpack_to_rootfs(cpio_srcbuf, len); + KUNIT_EXPECT_NULL(test, err); + + for (i = 0; i < INITRAMFS_TEST_MANY_LIMIT; i++) { + sprintf(thispath, "initramfs_test_many-%d", i); + KUNIT_EXPECT_EQ(test, init_unlink(thispath), 0); + } + + kfree(cpio_srcbuf); +} + +/* + * An initramfs filename is namesize in length, including the zero-terminator. + * A filename can be zero-terminated prior to namesize, with the remainder used + * as padding. This can be useful for e.g. alignment of file data segments with + * a 4KB filesystem block, allowing for extent sharing (reflinks) between cpio + * source and destination. This hack works with both GNU cpio and initramfs, as + * long as PATH_MAX isn't exceeded. + */ +static void __init initramfs_test_fname_pad(struct kunit *test) +{ + char *err; + size_t len; + struct file *file; + char fdata[] = "this file data is aligned at 4K in the archive"; + struct test_fname_pad { + char padded_fname[4096 - CPIO_HDRLEN]; + char cpio_srcbuf[CPIO_HDRLEN + PATH_MAX + 3 + sizeof(fdata)]; + } *tbufs = kzalloc(sizeof(struct test_fname_pad), GFP_KERNEL); + struct initramfs_test_cpio c[] = { { + .magic = "070701", + .ino = 1, + .mode = S_IFREG | 0777, + .uid = 0, + .gid = 0, + .nlink = 1, + .mtime = 1, + .filesize = sizeof(fdata), + .devmajor = 0, + .devminor = 1, + .rdevmajor = 0, + .rdevminor = 0, + /* align file data at 4K archive offset via padded fname */ + .namesize = 4096 - CPIO_HDRLEN, + .csum = 0, + .fname = tbufs->padded_fname, + .data = fdata, + } }; + + memcpy(tbufs->padded_fname, "padded_fname", sizeof("padded_fname")); + len = fill_cpio(c, ARRAY_SIZE(c), tbufs->cpio_srcbuf); + + err = unpack_to_rootfs(tbufs->cpio_srcbuf, len); + KUNIT_EXPECT_NULL(test, err); + + file = filp_open(c[0].fname, O_RDONLY, 0); + if (IS_ERR(file)) { + KUNIT_FAIL(test, "open failed"); + goto out; + } + + /* read back file contents into @cpio_srcbuf and confirm match */ + len = kernel_read(file, tbufs->cpio_srcbuf, c[0].filesize, NULL); + KUNIT_EXPECT_EQ(test, len, c[0].filesize); + KUNIT_EXPECT_MEMEQ(test, tbufs->cpio_srcbuf, c[0].data, len); + + fput(file); + KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0); +out: + kfree(tbufs); +} + +/* + * The kunit_case/_suite struct cannot be marked as __initdata as this will be + * used in debugfs to retrieve results after test has run. + */ +static struct kunit_case __refdata initramfs_test_cases[] = { + KUNIT_CASE(initramfs_test_extract), + KUNIT_CASE(initramfs_test_fname_overrun), + KUNIT_CASE(initramfs_test_data), + KUNIT_CASE(initramfs_test_csum), + KUNIT_CASE(initramfs_test_hardlink), + KUNIT_CASE(initramfs_test_many), + KUNIT_CASE(initramfs_test_fname_pad), + {}, +}; + +static struct kunit_suite initramfs_test_suite = { + .name = "initramfs", + .test_cases = initramfs_test_cases, +}; +kunit_test_init_section_suites(&initramfs_test_suite); + +MODULE_DESCRIPTION("Initramfs KUnit test suite"); +MODULE_LICENSE("GPL v2"); diff --git a/init/main.c b/init/main.c index 00fac1170294..b84818ad9685 100644 --- a/init/main.c +++ b/init/main.c @@ -13,6 +13,7 @@ #define DEBUG /* Enable initcall_debug */ #include <linux/types.h> +#include <linux/export.h> #include <linux/extable.h> #include <linux/module.h> #include <linux/proc_fs.h> @@ -50,8 +51,8 @@ #include <linux/writeback.h> #include <linux/cpu.h> #include <linux/cpuset.h> +#include <linux/memcontrol.h> #include <linux/cgroup.h> -#include <linux/efi.h> #include <linux/tick.h> #include <linux/sched/isolation.h> #include <linux/interrupt.h> @@ -102,6 +103,7 @@ #include <linux/randomize_kstack.h> #include <linux/pidfs.h> #include <linux/ptdump.h> +#include <linux/time_namespace.h> #include <net/net_namespace.h> #include <asm/io.h> @@ -543,6 +545,12 @@ static int __init unknown_bootoption(char *param, char *val, const char *unused, void *arg) { size_t len = strlen(param); + /* + * Well-known bootloader identifiers: + * 1. LILO/Grub pass "BOOT_IMAGE=..."; + * 2. kexec/kdump (kexec-tools) pass "kexec". + */ + const char *bootloader[] = { "BOOT_IMAGE=", "kexec", NULL }; /* Handle params aliased to sysctls */ if (sysctl_is_alias(param)) @@ -550,6 +558,12 @@ static int __init unknown_bootoption(char *param, char *val, repair_env_string(param, val); + /* Handle bootloader identifier */ + for (int i = 0; bootloader[i]; i++) { + if (strstarts(param, bootloader[i])) + return 0; + } + /* Handle obsolete-style parameters */ if (obsolete_checksetup(param)) return 0; @@ -640,15 +654,11 @@ static void __init setup_command_line(char *command_line) len = xlen + strlen(boot_command_line) + ilen + 1; - saved_command_line = memblock_alloc(len, SMP_CACHE_BYTES); - if (!saved_command_line) - panic("%s: Failed to allocate %zu bytes\n", __func__, len); + saved_command_line = memblock_alloc_or_panic(len, SMP_CACHE_BYTES); len = xlen + strlen(command_line) + 1; - static_command_line = memblock_alloc(len, SMP_CACHE_BYTES); - if (!static_command_line) - panic("%s: Failed to allocate %zu bytes\n", __func__, len); + static_command_line = memblock_alloc_or_panic(len, SMP_CACHE_BYTES); if (xlen) { /* @@ -896,6 +906,101 @@ static void __init early_numa_node_init(void) #endif } +#define KERNEL_CMDLINE_PREFIX "Kernel command line: " +#define KERNEL_CMDLINE_PREFIX_LEN (sizeof(KERNEL_CMDLINE_PREFIX) - 1) +#define KERNEL_CMDLINE_CONTINUATION " \\" +#define KERNEL_CMDLINE_CONTINUATION_LEN (sizeof(KERNEL_CMDLINE_CONTINUATION) - 1) + +#define MIN_CMDLINE_LOG_WRAP_IDEAL_LEN (KERNEL_CMDLINE_PREFIX_LEN + \ + KERNEL_CMDLINE_CONTINUATION_LEN) +#define CMDLINE_LOG_WRAP_IDEAL_LEN (CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN > \ + MIN_CMDLINE_LOG_WRAP_IDEAL_LEN ? \ + CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN : \ + MIN_CMDLINE_LOG_WRAP_IDEAL_LEN) + +#define IDEAL_CMDLINE_LEN (CMDLINE_LOG_WRAP_IDEAL_LEN - KERNEL_CMDLINE_PREFIX_LEN) +#define IDEAL_CMDLINE_SPLIT_LEN (IDEAL_CMDLINE_LEN - KERNEL_CMDLINE_CONTINUATION_LEN) + +/** + * print_kernel_cmdline() - Print the kernel cmdline with wrapping. + * @cmdline: The cmdline to print. + * + * Print the kernel command line, trying to wrap based on the Kconfig knob + * CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN. + * + * Wrapping is based on spaces, ignoring quotes. All lines are prefixed + * with "Kernel command line: " and lines that are not the last line have + * a " \" suffix added to them. The prefix and suffix count towards the + * line length for wrapping purposes. The ideal length will be exceeded + * if no appropriate place to wrap is found. + * + * Example output if CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN is 40: + * Kernel command line: loglevel=7 \ + * Kernel command line: init=/sbin/init \ + * Kernel command line: root=PARTUUID=8c3efc1a-768b-6642-8d0c-89eb782f19f0/PARTNROFF=1 \ + * Kernel command line: rootwait ro \ + * Kernel command line: my_quoted_arg="The \ + * Kernel command line: quick brown fox \ + * Kernel command line: jumps over the \ + * Kernel command line: lazy dog." + */ +static void __init print_kernel_cmdline(const char *cmdline) +{ + size_t len; + + /* Config option of 0 or anything longer than the max disables wrapping */ + if (CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN == 0 || + IDEAL_CMDLINE_LEN >= COMMAND_LINE_SIZE - 1) { + pr_notice("%s%s\n", KERNEL_CMDLINE_PREFIX, cmdline); + return; + } + + len = strlen(cmdline); + while (len > IDEAL_CMDLINE_LEN) { + const char *first_space; + const char *prev_cutoff; + const char *cutoff; + int to_print; + size_t used; + + /* Find the last ' ' that wouldn't make the line too long */ + prev_cutoff = NULL; + cutoff = cmdline; + while (true) { + cutoff = strchr(cutoff + 1, ' '); + if (!cutoff || cutoff - cmdline > IDEAL_CMDLINE_SPLIT_LEN) + break; + prev_cutoff = cutoff; + } + if (prev_cutoff) + cutoff = prev_cutoff; + else if (!cutoff) + break; + + /* Find the beginning and end of the string of spaces */ + first_space = cutoff; + while (first_space > cmdline && first_space[-1] == ' ') + first_space--; + to_print = first_space - cmdline; + while (*cutoff == ' ') + cutoff++; + used = cutoff - cmdline; + + /* If the whole string is used, break and do the final printout */ + if (len == used) + break; + + if (to_print) + pr_notice("%s%.*s%s\n", KERNEL_CMDLINE_PREFIX, + to_print, cmdline, KERNEL_CMDLINE_CONTINUATION); + + len -= used; + cmdline += used; + } + if (len) + pr_notice("%s%s\n", KERNEL_CMDLINE_PREFIX, cmdline); +} + asmlinkage __visible __init __no_sanitize_address __noreturn __no_stack_protector void start_kernel(void) { @@ -932,7 +1037,7 @@ void start_kernel(void) early_numa_node_init(); boot_cpu_hotplug_init(); - pr_notice("Kernel command line: %s\n", saved_command_line); + print_kernel_cmdline(saved_command_line); /* parameters may set static keys */ parse_early_param(); after_dashes = parse_args("Booting kernel", @@ -959,6 +1064,7 @@ void start_kernel(void) sort_main_extable(); trap_init(); mm_core_init(); + maple_tree_init(); poking_init(); ftrace_init(); @@ -976,7 +1082,6 @@ void start_kernel(void) "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); radix_tree_init(); - maple_tree_init(); /* * Set up housekeeping before setting up workqueues to allow the unbound @@ -992,6 +1097,7 @@ void start_kernel(void) workqueue_init_early(); rcu_init(); + kvfree_rcu_init(); /* Trace events are available after this */ trace_init(); @@ -1005,7 +1111,7 @@ void start_kernel(void) init_IRQ(); tick_init(); rcu_init_nohz(); - init_timers(); + timers_init(); srcu_init(); hrtimers_init(); softirq_init(); @@ -1069,15 +1175,12 @@ void start_kernel(void) pid_idr_init(); anon_vma_init(); -#ifdef CONFIG_X86 - if (efi_enabled(EFI_RUNTIME_SERVICES)) - efi_enter_virtual_mode(); -#endif thread_stack_cache_init(); cred_init(); fork_init(); proc_caches_init(); uts_ns_init(); + time_ns_init(); key_init(); security_init(); dbg_late_init(); @@ -1090,6 +1193,7 @@ void start_kernel(void) nsfs_init(); pidfs_init(); cpuset_init(); + mem_cgroup_init(); cgroup_init(); taskstats_init_early(); delayacct_init(); @@ -1145,16 +1249,10 @@ static int __init initcall_blacklist(char *str) str_entry = strsep(&str, ","); if (str_entry) { pr_debug("blacklisting initcall %s\n", str_entry); - entry = memblock_alloc(sizeof(*entry), + entry = memblock_alloc_or_panic(sizeof(*entry), SMP_CACHE_BYTES); - if (!entry) - panic("%s: Failed to allocate %zu bytes\n", - __func__, sizeof(*entry)); - entry->buf = memblock_alloc(strlen(str_entry) + 1, + entry->buf = memblock_alloc_or_panic(strlen(str_entry) + 1, SMP_CACHE_BYTES); - if (!entry->buf) - panic("%s: Failed to allocate %zu bytes\n", - __func__, strlen(str_entry) + 1); strcpy(entry->buf, str_entry); list_add(&entry->next, &blacklisted_initcalls); } @@ -1223,6 +1321,12 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret) fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime)); } +static __init_or_module void +trace_initcall_level_cb(void *data, const char *level) +{ + printk(KERN_DEBUG "entering initcall level: %s\n", level); +} + static ktime_t initcall_calltime; #ifdef TRACEPOINTS_ENABLED @@ -1234,10 +1338,12 @@ static void __init initcall_debug_enable(void) &initcall_calltime); ret |= register_trace_initcall_finish(trace_initcall_finish_cb, &initcall_calltime); + ret |= register_trace_initcall_level(trace_initcall_level_cb, NULL); WARN(ret, "Failed to register initcall tracepoints\n"); } # define do_trace_initcall_start trace_initcall_start # define do_trace_initcall_finish trace_initcall_finish +# define do_trace_initcall_level trace_initcall_level #else static inline void do_trace_initcall_start(initcall_t fn) { @@ -1251,6 +1357,12 @@ static inline void do_trace_initcall_finish(initcall_t fn, int ret) return; trace_initcall_finish_cb(&initcall_calltime, fn, ret); } +static inline void do_trace_initcall_level(const char *level) +{ + if (!initcall_debug) + return; + trace_initcall_level_cb(NULL, level); +} #endif /* !TRACEPOINTS_ENABLED */ int __init_or_module do_one_initcall(initcall_t fn) @@ -1323,7 +1435,7 @@ static void __init do_initcall_level(int level, char *command_line) level, level, NULL, ignore_unknown_bootoption); - trace_initcall_level(initcall_level_names[level]); + do_trace_initcall_level(initcall_level_names[level]); for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++) do_one_initcall(initcall_from_entry(fn)); } @@ -1367,7 +1479,7 @@ static void __init do_pre_smp_initcalls(void) { initcall_entry_t *fn; - trace_initcall_level("early"); + do_trace_initcall_level("early"); for (fn = __initcall_start; fn < __initcall0_start; fn++) do_one_initcall(initcall_from_entry(fn)); } @@ -1562,7 +1674,6 @@ static noinline void __init kernel_init_freeable(void) init_mm_internals(); - rcu_init_tasks_generic(); do_pre_smp_initcalls(); lockup_detector_init(); @@ -1585,7 +1696,11 @@ static noinline void __init kernel_init_freeable(void) * check if there is an early userspace init. If yes, let it do all * the work */ - if (init_eaccess(ramdisk_execute_command) != 0) { + int ramdisk_command_access; + ramdisk_command_access = init_eaccess(ramdisk_execute_command); + if (ramdisk_command_access != 0) { + pr_warn("check access for rdinit=%s failed: %i, ignoring\n", + ramdisk_execute_command, ramdisk_command_access); ramdisk_execute_command = NULL; prepare_namespace(); } diff --git a/init/version-timestamp.c b/init/version-timestamp.c index 043cbf80a766..375726e05f69 100644 --- a/init/version-timestamp.c +++ b/init/version-timestamp.c @@ -8,7 +8,7 @@ #include <linux/utsname.h> struct uts_namespace init_uts_ns = { - .ns.count = REFCOUNT_INIT(2), + .ns = NS_COMMON_INIT(init_uts_ns), .name = { .sysname = UTS_SYSNAME, .nodename = UTS_NODENAME, @@ -18,10 +18,6 @@ struct uts_namespace init_uts_ns = { .domainname = UTS_DOMAINNAME, }, .user_ns = &init_user_ns, - .ns.inum = PROC_UTS_INIT_INO, -#ifdef CONFIG_UTS_NS - .ns.ops = &utsns_operations, -#endif }; /* FIXED STRINGS! Don't touch! */ |
