diff options
Diffstat (limited to 'init')
| -rw-r--r-- | init/.kunitconfig | 3 | ||||
| -rw-r--r-- | init/Kconfig | 624 | ||||
| -rw-r--r-- | init/Makefile | 4 | ||||
| -rwxr-xr-x | init/build-version | 10 | ||||
| -rw-r--r-- | init/calibrate.c | 13 | ||||
| -rw-r--r-- | init/do_mounts.c | 479 | ||||
| -rw-r--r-- | init/do_mounts.h | 23 | ||||
| -rw-r--r-- | init/do_mounts_initrd.c | 16 | ||||
| -rw-r--r-- | init/do_mounts_rd.c | 17 | ||||
| -rw-r--r-- | init/init_task.c | 67 | ||||
| -rw-r--r-- | init/initramfs.c | 129 | ||||
| -rw-r--r-- | init/initramfs_internal.h | 8 | ||||
| -rw-r--r-- | init/initramfs_test.c | 472 | ||||
| -rw-r--r-- | init/main.c | 396 | ||||
| -rw-r--r-- | init/version-timestamp.c | 6 | ||||
| -rw-r--r-- | init/version.c | 6 |
16 files changed, 1475 insertions, 798 deletions
diff --git a/init/.kunitconfig b/init/.kunitconfig new file mode 100644 index 000000000000..acb906b1a5f9 --- /dev/null +++ b/init/.kunitconfig @@ -0,0 +1,3 @@ +CONFIG_KUNIT=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_TEST=y diff --git a/init/Kconfig b/init/Kconfig index 44e90b28a30f..fa79feb8fe57 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -60,6 +60,13 @@ config LLD_VERSION default $(ld-version) if LD_IS_LLD default 0 +config RUSTC_VERSION + int + default $(rustc-version) + help + It does not depend on `RUST` since that one may need to use the version + in a `depends on`. + config RUST_IS_AVAILABLE def_bool $(success,$(srctree)/scripts/rust_is_available.sh) help @@ -71,18 +78,35 @@ config RUST_IS_AVAILABLE In particular, the Makefile target 'rustavailable' is useful to check why the Rust toolchain is not being detected. +config RUSTC_LLVM_VERSION + int + default $(rustc-llvm-version) + +config ARCH_HAS_CC_CAN_LINK + bool + config CC_CAN_LINK bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag)) + default ARCH_CC_CAN_LINK if ARCH_HAS_CC_CAN_LINK + default $(cc_can_link_user,$(m64-flag)) if 64BIT + default $(cc_can_link_user,$(m32-flag)) -config CC_CAN_LINK_STATIC +# Fixed in GCC 14, 13.3, 12.4 and 11.5 +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 +config GCC_ASM_GOTO_OUTPUT_BROKEN bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static) + depends on CC_IS_GCC + default y if GCC_VERSION < 110500 + default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400 + default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300 config CC_HAS_ASM_GOTO_OUTPUT - def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null) + def_bool y + depends on !GCC_ASM_GOTO_OUTPUT_BROKEN + # Detect basic support + depends on $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null) + # Detect clang (< v17) scoped label issues + depends on $(success,echo 'void b(void **);void* c(void);int f(void){{asm goto(""::::l0);return 0;l0:return 1;}void *x __attribute__((cleanup(b)))=c();{asm goto(""::::l1);return 2;l1:return 3;}}' | $(CC) -x c - -c -o /dev/null) config CC_HAS_ASM_GOTO_TIED_OUTPUT depends on CC_HAS_ASM_GOTO_OUTPUT @@ -95,9 +119,56 @@ config TOOLS_SUPPORT_RELR config CC_HAS_ASM_INLINE def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null) +config CC_HAS_ASSUME + bool + # clang needs to be at least 19.1.0 since the meaning of the assume + # attribute changed: + # https://github.com/llvm/llvm-project/commit/c44fa3e8a9a44c2e9a575768a3c185354b9f6c17 + default y if CC_IS_CLANG && CLANG_VERSION >= 190100 + # supported since gcc 13.1.0 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106654 + default y if CC_IS_GCC && GCC_VERSION >= 130100 + config CC_HAS_NO_PROFILE_FN_ATTR def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror) +config CC_HAS_COUNTED_BY + bool + # clang needs to be at least 20.1.0 to avoid potential crashes + # when building structures that contain __counted_by + # https://github.com/ClangBuiltLinux/linux/issues/2114 + # https://github.com/llvm/llvm-project/commit/160fb1121cdf703c3ef5e61fb26c5659eb581489 + default y if CC_IS_CLANG && CLANG_VERSION >= 200100 + # supported since gcc 15.1.0 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 + default y if CC_IS_GCC && GCC_VERSION >= 150100 + +config CC_HAS_MULTIDIMENSIONAL_NONSTRING + def_bool $(success,echo 'char tag[][4] __attribute__((__nonstring__)) = { };' | $(CC) $(CLANG_FLAGS) -x c - -c -o /dev/null -Werror) + +config LD_CAN_USE_KEEP_IN_OVERLAY + # ld.lld prior to 21.0.0 did not support KEEP within an overlay description + # https://github.com/llvm/llvm-project/pull/130661 + def_bool LD_IS_BFD || LLD_VERSION >= 210000 + +config RUSTC_HAS_SLICE_AS_FLATTENED + def_bool RUSTC_VERSION >= 108000 + +config RUSTC_HAS_COERCE_POINTEE + def_bool RUSTC_VERSION >= 108400 + +config RUSTC_HAS_SPAN_FILE + def_bool RUSTC_VERSION >= 108800 + +config RUSTC_HAS_UNNECESSARY_TRANSMUTES + def_bool RUSTC_VERSION >= 108800 + +config RUSTC_HAS_FILE_WITH_NUL + def_bool RUSTC_VERSION >= 108900 + +config RUSTC_HAS_FILE_AS_C_STR + def_bool RUSTC_VERSION >= 109100 + config PAHOLE_VERSION int default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE)) @@ -106,7 +177,7 @@ config CONSTRUCTORS bool config IRQ_WORK - bool + def_bool y if SMP config BUILDTIME_TABLE_SORT bool @@ -125,6 +196,10 @@ menu "General setup" config BROKEN bool + help + This option allows you to choose whether you want to try to + compile (and fix) old drivers that haven't been updated to + new infrastructure. config BROKEN_ON_SMP bool @@ -299,8 +374,9 @@ config KERNEL_XZ BCJ filters which can improve compression ratio of executable code. The size of the kernel is about 30% smaller with XZ in comparison to gzip. On architectures for which there is a BCJ - filter (i386, x86_64, ARM, IA-64, PowerPC, and SPARC), XZ - will create a few percent smaller kernel than plain LZMA. + filter (i386, x86_64, ARM, ARM64, RISC-V, big endian PowerPC, + and SPARC), XZ will create a few percent smaller kernel than + plain LZMA. The speed is about the same as with LZMA: The decompression speed of XZ is better than that of bzip2 but worse than gzip @@ -436,16 +512,6 @@ config CROSS_MEMORY_ATTACH to directly read from or write to another process' address space. See the man page for more details. -config USELIB - bool "uselib syscall (for libc5 and earlier)" - default ALPHA || M68K || SPARC - help - This option enables the uselib syscall, a system call used in the - dynamic linker from libc5 and earlier. glibc does not use this - system call. If you intend to run programs built on libc5 or - earlier, you may need to enable this syscall. Current systems - running glibc can safely disable this. - config AUDIT bool "Auditing support" depends on NET @@ -538,24 +604,24 @@ config HAVE_SCHED_AVG_IRQ depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING depends on SMP -config SCHED_THERMAL_PRESSURE +config SCHED_HW_PRESSURE bool default y if ARM && ARM_CPU_TOPOLOGY default y if ARM64 depends on SMP depends on CPU_FREQ_THERMAL help - Select this option to enable thermal pressure accounting in the - scheduler. Thermal pressure is the value conveyed to the scheduler + Select this option to enable HW pressure accounting in the + scheduler. HW pressure is the value conveyed to the scheduler that reflects the reduction in CPU compute capacity resulted from - thermal throttling. Thermal throttling occurs when the performance of - a CPU is capped due to high operating temperatures. + HW throttling. HW throttling occurs when the performance of + a CPU is capped due to high operating temperatures as an example. If selected, the scheduler will be able to balance tasks accordingly, i.e. put less load on throttled CPUs than on non/less throttled ones. This requires the architecture to implement - arch_update_thermal_pressure() and arch_scale_thermal_pressure(). + arch_update_hw_pressure() and arch_scale_thermal_pressure(). config BSD_PROCESS_ACCT bool "BSD Process Accounting" @@ -629,6 +695,7 @@ config TASK_IO_ACCOUNTING config PSI bool "Pressure stall information tracking" + select KERNFS help Collect metrics that indicate how overcommitted the CPU, memory, and IO capacity are in the system. @@ -670,7 +737,7 @@ endmenu # "CPU/Task time and stats accounting" config CPU_ISOLATION bool "CPU isolation" - depends on SMP || COMPILE_TEST + depends on SMP default y help Make sure that CPUs running critical tasks are not disturbed by @@ -682,10 +749,6 @@ config CPU_ISOLATION source "kernel/rcu/Kconfig" -config BUILD_BIN2C - bool - default n - config IKCONFIG tristate "Kernel .config support" help @@ -737,8 +800,8 @@ config LOG_CPU_MAX_BUF_SHIFT int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)" depends on SMP range 0 21 - default 12 if !BASE_SMALL default 0 if BASE_SMALL + default 12 depends on PRINTK help This option allows to increase the default ring buffer size @@ -769,30 +832,6 @@ config LOG_CPU_MAX_BUF_SHIFT 13 => 8 KB for each CPU 12 => 4 KB for each CPU -config PRINTK_SAFE_LOG_BUF_SHIFT - int "Temporary per-CPU printk log buffer size (12 => 4KB, 13 => 8KB)" - range 10 21 - default 13 - depends on PRINTK - help - Select the size of an alternate printk per-CPU buffer where messages - printed from unsafe contexts are temporary stored. One example would - be NMI messages, another one - printk recursion. The messages are - copied to the main log buffer in a safe context to avoid a deadlock. - The value defines the size as a power of 2. - - Those messages are rare and limited. The largest one is when - a backtrace is printed. It usually fits into 4KB. Select - 8KB if you want to be on the safe side. - - Examples: - 17 => 128 KB for each CPU - 16 => 64 KB for each CPU - 15 => 32 KB for each CPU - 14 => 16 KB for each CPU - 13 => 8 KB for each CPU - 12 => 4 KB for each CPU - config PRINTK_INDEX bool "Printk indexing debugfs interface" depends on PRINTK && DEBUG_FS @@ -867,6 +906,18 @@ config UCLAMP_BUCKETS_COUNT If in doubt, use the default value. +config SCHED_PROXY_EXEC + bool "Proxy Execution" + # Avoid some build failures w/ PREEMPT_RT until it can be fixed + depends on !PREEMPT_RT + # Need to investigate how to inform sched_ext of split contexts + depends on !SCHED_CLASS_EXT + # Not particularly useful until we get to multi-rq proxying + depends on EXPERT + help + This option enables proxy execution, a mechanism for mutex-owning + tasks to inherit the scheduling context of higher priority waiters. + endmenu # @@ -894,18 +945,26 @@ config CC_IMPLICIT_FALLTHROUGH default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5) default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough) -# Currently, disable gcc-11,12 array-bounds globally. -# We may want to target only particular configurations some day. -config GCC11_NO_ARRAY_BOUNDS +# Currently, disable gcc-10+ array-bounds globally. +# It's still broken in gcc-13, so no upper bound yet. +config GCC10_NO_ARRAY_BOUNDS def_bool y -config GCC12_NO_ARRAY_BOUNDS +config CC_NO_ARRAY_BOUNDS + bool + default y if CC_IS_GCC && GCC_VERSION >= 90000 && GCC10_NO_ARRAY_BOUNDS + +# Currently, disable -Wstringop-overflow for GCC globally. +config GCC_NO_STRINGOP_OVERFLOW def_bool y -config CC_NO_ARRAY_BOUNDS +config CC_NO_STRINGOP_OVERFLOW + bool + default y if CC_IS_GCC && GCC_NO_STRINGOP_OVERFLOW + +config CC_STRINGOP_OVERFLOW bool - default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_ARRAY_BOUNDS - default y if CC_IS_GCC && GCC_VERSION >= 120000 && GCC_VERSION < 130000 && GCC12_NO_ARRAY_BOUNDS + default y if CC_IS_GCC && !CC_NO_STRINGOP_OVERFLOW # # For architectures that know their GCC __int128 support is sound @@ -939,6 +998,9 @@ config NUMA_BALANCING_DEFAULT_ENABLED If set, automatic NUMA balancing will be enabled if running on a NUMA machine. +config SLAB_OBJ_EXT + bool + menuconfig CGROUPS bool "Control Group support" select KERNFS @@ -972,14 +1034,42 @@ config MEMCG bool "Memory controller" select PAGE_COUNTER select EVENTFD + select SLAB_OBJ_EXT + select VM_EVENT_COUNTERS help Provides control over the memory footprint of tasks in a cgroup. -config MEMCG_KMEM +config MEMCG_NMI_UNSAFE + bool + depends on MEMCG + depends on HAVE_NMI + depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !ARCH_HAVE_NMI_SAFE_CMPXCHG + default y + +config MEMCG_NMI_SAFETY_REQUIRES_ATOMIC bool - depends on MEMCG && !SLOB + depends on MEMCG + depends on HAVE_NMI + depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && ARCH_HAVE_NMI_SAFE_CMPXCHG default y +config MEMCG_V1 + bool "Legacy cgroup v1 memory controller" + depends on MEMCG + default n + help + Legacy cgroup v1 memory controller which has been deprecated by + cgroup v2 implementation. The v1 is there for legacy applications + which haven't migrated to the new cgroup v2 interface yet. If you + do not have any such application then you are completely fine leaving + this option disabled. + + Please note that feature set of the legacy memory controller is likely + going to shrink due to deprecation process. New deployments with v1 + controller are highly discouraged. + + Say N if unsure. + config BLK_CGROUP bool "IO controller" depends on BLOCK @@ -1016,14 +1106,22 @@ menuconfig CGROUP_SCHED tasks. if CGROUP_SCHED +config GROUP_SCHED_WEIGHT + def_bool n + +config GROUP_SCHED_BANDWIDTH + def_bool n + config FAIR_GROUP_SCHED bool "Group scheduling for SCHED_OTHER" depends on CGROUP_SCHED + select GROUP_SCHED_WEIGHT default CGROUP_SCHED config CFS_BANDWIDTH bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED" depends on FAIR_GROUP_SCHED + select GROUP_SCHED_BANDWIDTH default n help This option allows users to define CPU bandwidth rates (limits) for @@ -1043,8 +1141,30 @@ config RT_GROUP_SCHED realtime bandwidth for them. See Documentation/scheduler/sched-rt-group.rst for more information. +config RT_GROUP_SCHED_DEFAULT_DISABLED + bool "Require boot parameter to enable group scheduling for SCHED_RR/FIFO" + depends on RT_GROUP_SCHED + default n + help + When set, the RT group scheduling is disabled by default. The option + is in inverted form so that mere RT_GROUP_SCHED enables the group + scheduling. + + Say N if unsure. + +config EXT_GROUP_SCHED + bool + depends on SCHED_CLASS_EXT && CGROUP_SCHED + select GROUP_SCHED_WEIGHT + select GROUP_SCHED_BANDWIDTH + default y + endif #CGROUP_SCHED +config SCHED_MM_CID + def_bool y + depends on SMP && RSEQ + config UCLAMP_TASK_GROUP bool "Utilization clamping per group of tasks" depends on CGROUP_SCHED @@ -1093,6 +1213,16 @@ config CGROUP_RDMA Attaching processes with active RDMA resources to the cgroup hierarchy is allowed even if can cross the hierarchy's limit. +config CGROUP_DMEM + bool "Device memory controller (DMEM)" + select PAGE_COUNTER + help + The DMEM controller allows compatible devices to restrict device + memory usage based on the cgroup hierarchy. + + As an example, it allows you to restrict VRAM usage for applications + in the DRM subsystem. + config CGROUP_FREEZER bool "Freezer controller" help @@ -1123,6 +1253,7 @@ config CGROUP_HUGETLB config CPUSETS bool "Cpuset controller" depends on SMP + select UNION_FIND help This option will let you create and manage CPUSETs which allow dynamically partitioning a system into sets of CPUs and @@ -1131,9 +1262,23 @@ config CPUSETS Say N if unsure. +config CPUSETS_V1 + bool "Legacy cgroup v1 cpusets controller" + depends on CPUSETS + default n + help + Legacy cgroup v1 cpusets controller which has been deprecated by + cgroup v2 implementation. The v1 is there for legacy applications + which haven't migrated to the new cgroup v2 interface yet. Legacy + interface includes cpuset filesystem and /proc/<pid>/cpuset. If you + do not have any such application then you are completely fine leaving + this option disabled. + + Say N if unsure. + config PROC_PID_CPUSET bool "Include legacy /proc/<pid>/cpuset file" - depends on CPUSETS + depends on CPUSETS_V1 default y config CGROUP_DEVICE @@ -1225,7 +1370,7 @@ config UTS_NS config TIME_NS bool "TIME namespace" - depends on GENERIC_VDSO_TIME_NS + depends on GENERIC_GETTIMEOFDAY default y help In this namespace boottime and monotonic clocks can be set. @@ -1297,44 +1442,6 @@ config SCHED_AUTOGROUP desktop applications. Task group autogeneration is currently based upon task session. -config SYSFS_DEPRECATED - bool "Enable deprecated sysfs features to support old userspace tools" - depends on SYSFS - default n - help - This option adds code that switches the layout of the "block" class - devices, to not show up in /sys/class/block/, but only in - /sys/block/. - - This switch is only active when the sysfs.deprecated=1 boot option is - passed or the SYSFS_DEPRECATED_V2 option is set. - - This option allows new kernels to run on old distributions and tools, - which might get confused by /sys/class/block/. Since 2007/2008 all - major distributions and tools handle this just fine. - - Recent distributions and userspace tools after 2009/2010 depend on - the existence of /sys/class/block/, and will not work with this - option enabled. - - Only if you are using a new kernel on an old distribution, you might - need to say Y here. - -config SYSFS_DEPRECATED_V2 - bool "Enable deprecated sysfs features by default" - default n - depends on SYSFS - depends on SYSFS_DEPRECATED - help - Enable deprecated sysfs by default. - - See the CONFIG_SYSFS_DEPRECATED option for more details about this - option. - - Only if you are using a new kernel on an old distribution, you might - need to say Y here. Even then, odds are you would not need it - enabled, you can always pass the boot option if absolutely necessary. - config RELAY bool "Kernel->user space relay support (formerly relayfs)" select IRQ_WORK @@ -1380,6 +1487,19 @@ config BOOT_CONFIG If unsure, say Y. +config BOOT_CONFIG_FORCE + bool "Force unconditional bootconfig processing" + depends on BOOT_CONFIG + default y if BOOT_CONFIG_EMBED + help + With this Kconfig option set, BOOT_CONFIG processing is carried + out even when the "bootconfig" kernel-boot parameter is omitted. + In fact, with this Kconfig option set, there is no way to + make the kernel ignore the BOOT_CONFIG-supplied kernel-boot + parameters. + + If unsure, say N. + config BOOT_CONFIG_EMBED bool "Embed bootconfig file in the kernel" depends on BOOT_CONFIG @@ -1399,8 +1519,27 @@ config BOOT_CONFIG_EMBED_FILE This bootconfig will be used if there is no initrd or no other bootconfig in the initrd. +config CMDLINE_LOG_WRAP_IDEAL_LEN + int "Length to try to wrap the cmdline when logged at boot" + default 1021 + range 0 1021 + help + At boot time, the kernel command line is logged to the console. + The log message will start with the prefix "Kernel command line: ". + The log message will attempt to be wrapped (split into multiple log + messages) at spaces based on CMDLINE_LOG_WRAP_IDEAL_LEN characters. + If wrapping happens, each log message will start with the prefix and + all but the last message will end with " \". Messages may exceed the + ideal length if a place to wrap isn't found before the specified + number of characters. + + A value of 0 disables wrapping, though be warned that the maximum + length of a log message (1021 characters) may cause the cmdline to + be truncated. + config INITRAMFS_PRESERVE_MTIME bool "Preserve cpio archive mtimes in initramfs" + depends on BLK_DEV_INITRD default y help Each entry in an initramfs cpio archive carries an mtime value. When @@ -1409,6 +1548,13 @@ config INITRAMFS_PRESERVE_MTIME If unsure, say Y. +config INITRAMFS_TEST + bool "Test initramfs cpio archive extraction" if !KUNIT_ALL_TESTS + depends on BLK_DEV_INITRD && KUNIT=y + default KUNIT_ALL_TESTS + help + Build KUnit tests for initramfs. See Documentation/dev-tools/kunit + choice prompt "Compiler optimization level" default CC_OPTIMIZE_FOR_PERFORMANCE @@ -1494,13 +1640,18 @@ config SYSCTL_ARCH_UNALIGN_ALLOW the unaligned access emulation. see arch/parisc/kernel/unaligned.c for reference -config HAVE_PCSPKR_PLATFORM - bool +config SYSFS_SYSCALL + bool "Sysfs syscall support" + default n + help + sys_sysfs is an obsolete system call no longer supported in libc. + Note that disabling this option is more secure but might break + compatibility with some systems. + + If unsure say N here. -# interpreter that classic socket filters depend on -config BPF +config HAVE_PCSPKR_PLATFORM bool - select CRYPTO_LIB_SHA1 menuconfig EXPERT bool "Configure standard kernel features (expert users)" @@ -1535,7 +1686,7 @@ config MULTIUSER config SGETMASK_SYSCALL bool "sgetmask/ssetmask syscalls support" if EXPERT - def_bool PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH + default PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH help sys_sgetmask and sys_ssetmask are obsolete system calls no longer supported in libc but still enabled by default in some @@ -1543,16 +1694,6 @@ config SGETMASK_SYSCALL If unsure, leave the default option here. -config SYSFS_SYSCALL - bool "Sysfs syscall support" if EXPERT - default y - help - sys_sysfs is an obsolete system call no longer supported in libc. - Note that disabling this option is more secure but might break - compatibility with some systems. - - If unsure say Y here. - config FHANDLE bool "open by fhandle syscalls" if EXPERT select EXPORTFS @@ -1594,6 +1735,18 @@ config PRINTK very difficult to diagnose system problems, saying N here is strongly discouraged. +config PRINTK_RINGBUFFER_KUNIT_TEST + tristate "KUnit Test for the printk ringbuffer" if !KUNIT_ALL_TESTS + depends on PRINTK && KUNIT + default KUNIT_ALL_TESTS + help + This builds the printk ringbuffer KUnit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + config BUG bool "BUG() support" if EXPERT default y @@ -1621,11 +1774,10 @@ config PCSPKR_PLATFORM This option allows to disable the internal PC-Speaker support, saving some memory. -config BASE_FULL - default y - bool "Enable full-sized data structures for core" if EXPERT +config BASE_SMALL + bool "Enable smaller-sized data structures for core" if EXPERT help - Disabling this option reduces the size of miscellaneous core + Enabling this option reduces the size of miscellaneous core kernel data structures. This saves memory on small machines, but may reduce performance. @@ -1644,6 +1796,16 @@ config FUTEX_PI depends on FUTEX && RT_MUTEXES default y +config FUTEX_PRIVATE_HASH + bool + depends on FUTEX && !BASE_SMALL && MMU + default y + +config FUTEX_MPOL + bool + depends on FUTEX && NUMA + default y + config EPOLL bool "Enable eventpoll support" if EXPERT default y @@ -1706,6 +1868,30 @@ config IO_URING applications to submit and complete IO through submission and completion rings that are shared between the kernel and application. +config GCOV_PROFILE_URING + bool "Enable GCOV profiling on the io_uring subsystem" + depends on IO_URING && GCOV_KERNEL + help + Enable GCOV profiling on the io_uring subsystem, to facilitate + code coverage testing. + + If unsure, say N. + + Note that this will have a negative impact on the performance of + the io_uring subsystem, hence this should only be enabled for + specific test purposes. + +config IO_URING_MOCK_FILE + tristate "Enable io_uring mock files (Experimental)" if EXPERT + default n + depends on IO_URING + help + Enable mock files for io_uring subststem testing. The ABI might + still change, so it's still experimental and should only be enabled + for specific test purposes. + + If unsure, say N. + config ADVISE_SYSCALLS bool "Enable madvise/fadvise syscalls" if EXPERT default y @@ -1728,6 +1914,75 @@ config MEMBARRIER If unsure, say Y. +config KCMP + bool "Enable kcmp() system call" if EXPERT + help + Enable the kernel resource comparison system call. It provides + user-space with the ability to compare two processes to see if they + share a common resource, such as a file descriptor or even virtual + memory space. + + If unsure, say N. + +config RSEQ + bool "Enable rseq() system call" if EXPERT + default y + depends on HAVE_RSEQ + select MEMBARRIER + help + Enable the restartable sequences system call. It provides a + user-space cache for the current CPU number value, which + speeds up getting the current CPU number from user-space, + as well as an ABI to speed up user-space operations on + per-CPU data. + + If unsure, say Y. + +config RSEQ_STATS + default n + bool "Enable lightweight statistics of restartable sequences" if EXPERT + depends on RSEQ && DEBUG_FS + help + Enable lightweight counters which expose information about the + frequency of RSEQ operations via debugfs. Mostly interesting for + kernel debugging or performance analysis. While lightweight it's + still adding code into the user/kernel mode transitions. + + If unsure, say N. + +config RSEQ_DEBUG_DEFAULT_ENABLE + default n + bool "Enable restartable sequences debug mode by default" if EXPERT + depends on RSEQ + help + This enables the static branch for debug mode of restartable + sequences. + + This also can be controlled on the kernel command line via the + command line parameter "rseq_debug=0/1" and through debugfs. + + If unsure, say N. + +config DEBUG_RSEQ + default n + bool "Enable debugging of rseq() system call" if EXPERT + depends on RSEQ && DEBUG_KERNEL && !GENERIC_ENTRY + select RSEQ_DEBUG_DEFAULT_ENABLE + help + Enable extra debugging checks for the rseq system call. + + If unsure, say N. + +config CACHESTAT_SYSCALL + bool "Enable cachestat() system call" if EXPERT + default y + help + Enable the cachestat system call, which queries the page cache + statistics of a file (number of cached pages, dirty pages, + pages marked for writeback, (recently) evicted pages). + + If unsure say Y here. + config KALLSYMS bool "Load all symbols for debugging/ksymoops" if EXPERT default y @@ -1767,79 +2022,35 @@ config KALLSYMS_ALL Say N unless you really need all symbols, or kernel live patching. -config KALLSYMS_ABSOLUTE_PERCPU - bool - depends on KALLSYMS - default X86_64 && SMP - -config KALLSYMS_BASE_RELATIVE - bool - depends on KALLSYMS - default !IA64 - help - Instead of emitting them as absolute values in the native word size, - emit the symbol references in the kallsyms table as 32-bit entries, - each containing a relative value in the range [base, base + U32_MAX] - or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either - an absolute value in the range [0, S32_MAX] or a relative value in the - range [base, base + S32_MAX], where base is the lowest relative symbol - address encountered in the image. - - On 64-bit builds, this reduces the size of the address table by 50%, - but more importantly, it results in entries whose values are build - time constants, and no relocation pass is required at runtime to fix - up the entries based on the runtime load address of the kernel. - # end of the "standard kernel features (expert users)" menu -# syscall, maps, verifier - config ARCH_HAS_MEMBARRIER_CALLBACKS bool config ARCH_HAS_MEMBARRIER_SYNC_CORE bool -config KCMP - bool "Enable kcmp() system call" if EXPERT - help - Enable the kernel resource comparison system call. It provides - user-space with the ability to compare two processes to see if they - share a common resource, such as a file descriptor or even virtual - memory space. - - If unsure, say N. - -config RSEQ - bool "Enable rseq() system call" if EXPERT - default y - depends on HAVE_RSEQ - select MEMBARRIER +config ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS + bool help - Enable the restartable sequences system call. It provides a - user-space cache for the current CPU number value, which - speeds up getting the current CPU number from user-space, - as well as an ABI to speed up user-space operations on - per-CPU data. + Control MSEAL_SYSTEM_MAPPINGS access based on architecture. - If unsure, say Y. + A 64-bit kernel is required for the memory sealing feature. + No specific hardware features from the CPU are needed. -config DEBUG_RSEQ - default n - bool "Enabled debugging of rseq() system call" if EXPERT - depends on RSEQ && DEBUG_KERNEL - help - Enable extra debugging checks for the rseq system call. + To enable this feature, the architecture needs to update their + special mappings calls to include the sealing flag and confirm + that it doesn't unmap/remap system mappings during the life + time of the process. The existence of this flag for an architecture + implies that it does not require the remapping of the system + mappings during process lifetime, so sealing these mappings is safe + from a kernel perspective. - If unsure, say N. + After the architecture enables this, a distribution can set + CONFIG_MSEAL_SYSTEM_MAPPING to manage access to the feature. -config EMBEDDED - bool "Embedded system" - select EXPERT - help - This option should be enabled if compiling the kernel for - an embedded system so certain expert options are available - for configuration. + For complete descriptions of memory sealing, please see + Documentation/userspace-api/mseal.rst config HAVE_PERF_EVENTS bool @@ -1855,13 +2066,6 @@ config PERF_USE_VMALLOC help See tools/perf/design.txt for details -config PC104 - bool "PC/104 support" if EXPERT - help - Expose PC/104 form factor device drivers and options available for - selection and configuration. Enable this option if your target - machine has a PC/104 bus. - menu "Kernel Performance Events And Counters" config PERF_EVENTS @@ -1869,7 +2073,6 @@ config PERF_EVENTS default y if PROFILING depends on HAVE_PERF_EVENTS select IRQ_WORK - select SRCU help Enable kernel support for various performance events provided by software and hardware. @@ -1936,11 +2139,16 @@ config RUST bool "Rust support" depends on HAVE_RUST depends on RUST_IS_AVAILABLE - depends on !MODVERSIONS - depends on !GCC_PLUGINS + select EXTENDED_MODVERSIONS if MODVERSIONS + depends on !MODVERSIONS || GENDWARFKSYMS + depends on !GCC_PLUGIN_RANDSTRUCT depends on !RANDSTRUCT - depends on !DEBUG_INFO_BTF - select CONSTRUCTORS + depends on !DEBUG_INFO_BTF || (PAHOLE_HAS_LANG_EXCLUDE && !LTO) + depends on !CFI || HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC + select CFI_ICALL_NORMALIZE_INTEGERS if CFI + depends on !CALL_PADDING || RUSTC_VERSION >= 108100 + depends on !KASAN_SW_TAGS + depends on !(MITIGATION_RETHUNK && KASAN) || RUSTC_VERSION >= 108300 help Enables Rust support in the kernel. @@ -1957,12 +2165,19 @@ config RUST config RUSTC_VERSION_TEXT string depends on RUST - default $(shell,command -v $(RUSTC) >/dev/null 2>&1 && $(RUSTC) --version || echo n) + default "$(RUSTC_VERSION_TEXT)" + help + See `CC_VERSION_TEXT`. config BINDGEN_VERSION_TEXT string depends on RUST - default $(shell,command -v $(BINDGEN) >/dev/null 2>&1 && $(BINDGEN) --version || echo n) + # The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0 + # (https://github.com/rust-lang/rust-bindgen/pull/2678) and 0.71.0 + # (https://github.com/rust-lang/rust-bindgen/pull/3040). It can be removed + # when the minimum version is upgraded past the latter (0.69.1 and 0.71.1 + # both fixed the issue). + default "$(shell,$(BINDGEN) --version workaround-for-0.69.0 2>/dev/null)" # # Place an empty function call at each tracepoint site. Can be @@ -1970,6 +2185,11 @@ config BINDGEN_VERSION_TEXT # config TRACEPOINTS bool + select TASKS_TRACE_RCU + +source "kernel/Kconfig.kexec" + +source "kernel/liveupdate/Kconfig" endmenu # General setup @@ -1979,11 +2199,6 @@ config RT_MUTEXES bool default y if PREEMPT_RT -config BASE_SMALL - int - default 0 if BASE_FULL - default 1 if !BASE_FULL - config MODULE_SIG_FORMAT def_bool n select SYSTEM_DATA_VERIFICATION @@ -2021,6 +2236,9 @@ source "kernel/Kconfig.locks" config ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE bool +config ARCH_HAS_PREPARE_SYNC_CORE_CMD + bool + config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE bool diff --git a/init/Makefile b/init/Makefile index 26de459006c4..d6f75d8907e0 100644 --- a/init/Makefile +++ b/init/Makefile @@ -12,6 +12,7 @@ else obj-$(CONFIG_BLK_DEV_INITRD) += initramfs.o endif obj-$(CONFIG_GENERIC_CALIBRATE_DELAY) += calibrate.o +obj-$(CONFIG_INITRAMFS_TEST) += initramfs_test.o obj-y += init_task.o @@ -52,11 +53,10 @@ CFLAGS_version.o := -include $(obj)/utsversion-tmp.h # Build version-timestamp.c with final UTS_VERSION # -include/generated/utsversion.h: build-version-auto = $(shell $(srctree)/$(src)/build-version) +include/generated/utsversion.h: build-version-auto = $(shell $(srctree)/scripts/build-version) include/generated/utsversion.h: build-timestamp-auto = $(shell LC_ALL=C date) include/generated/utsversion.h: FORCE $(call filechk,uts_version) $(obj)/version-timestamp.o: include/generated/utsversion.h CFLAGS_version-timestamp.o := -include include/generated/utsversion.h -KASAN_SANITIZE_version-timestamp.o := n diff --git a/init/build-version b/init/build-version deleted file mode 100755 index 537d45815083..000000000000 --- a/init/build-version +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0-only - -prev_ver=$(cat .version 2>/dev/null) && -ver=$(expr ${prev_ver} + 1 2>/dev/null) || -ver=1 - -echo ${ver} > .version - -echo ${ver} diff --git a/init/calibrate.c b/init/calibrate.c index f3831272f113..63be4c65bc52 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -5,19 +5,22 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ -#include <linux/jiffies.h> #include <linux/delay.h> #include <linux/init.h> -#include <linux/timex.h> -#include <linux/smp.h> +#include <linux/jiffies.h> +#include <linux/kstrtox.h> #include <linux/percpu.h> +#include <linux/printk.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/timex.h> unsigned long lpj_fine; unsigned long preset_lpj; + static int __init lpj_setup(char *str) { - preset_lpj = simple_strtoul(str,NULL,0); - return 1; + return kstrtoul(str, 0, &preset_lpj) == 0; } __setup("lpj=", lpj_setup); diff --git a/init/do_mounts.c b/init/do_mounts.c index 811e94daf0a8..defbbf1d55f7 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/ramfs.h> #include <linux/shmem_fs.h> +#include <linux/ktime.h> #include <linux/nfs_fs.h> #include <linux/nfs_fs_sb.h> @@ -28,7 +29,6 @@ #include "do_mounts.h" int root_mountflags = MS_RDONLY | MS_SILENT; -static char * __initdata root_device_name; static char __initdata saved_root_name[64]; static int root_wait; @@ -60,257 +60,48 @@ static int __init readwrite(char *str) __setup("ro", readonly); __setup("rw", readwrite); -#ifdef CONFIG_BLOCK -struct uuidcmp { - const char *uuid; - int len; -}; - -/** - * match_dev_by_uuid - callback for finding a partition using its uuid - * @dev: device passed in by the caller - * @data: opaque pointer to the desired struct uuidcmp to match - * - * Returns 1 if the device matches, and 0 otherwise. - */ -static int match_dev_by_uuid(struct device *dev, const void *data) +static int __init root_dev_setup(char *line) { - struct block_device *bdev = dev_to_bdev(dev); - const struct uuidcmp *cmp = data; - - if (!bdev->bd_meta_info || - strncasecmp(cmp->uuid, bdev->bd_meta_info->uuid, cmp->len)) - return 0; + strscpy(saved_root_name, line, sizeof(saved_root_name)); return 1; } -/** - * devt_from_partuuid - looks up the dev_t of a partition by its UUID - * @uuid_str: char array containing ascii UUID - * - * The function will return the first partition which contains a matching - * UUID value in its partition_meta_info struct. This does not search - * by filesystem UUIDs. - * - * If @uuid_str is followed by a "/PARTNROFF=%d", then the number will be - * extracted and used as an offset from the partition identified by the UUID. - * - * Returns the matching dev_t on success or 0 on failure. - */ -static dev_t devt_from_partuuid(const char *uuid_str) -{ - struct uuidcmp cmp; - struct device *dev = NULL; - dev_t devt = 0; - int offset = 0; - char *slash; - - cmp.uuid = uuid_str; - - slash = strchr(uuid_str, '/'); - /* Check for optional partition number offset attributes. */ - if (slash) { - char c = 0; - - /* Explicitly fail on poor PARTUUID syntax. */ - if (sscanf(slash + 1, "PARTNROFF=%d%c", &offset, &c) != 1) - goto clear_root_wait; - cmp.len = slash - uuid_str; - } else { - cmp.len = strlen(uuid_str); - } - - if (!cmp.len) - goto clear_root_wait; - - dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid); - if (!dev) - return 0; - - if (offset) { - /* - * Attempt to find the requested partition by adding an offset - * to the partition number found by UUID. - */ - devt = part_devt(dev_to_disk(dev), - dev_to_bdev(dev)->bd_partno + offset); - } else { - devt = dev->devt; - } - - put_device(dev); - return devt; - -clear_root_wait: - pr_err("VFS: PARTUUID= is invalid.\n" - "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n"); - if (root_wait) - pr_err("Disabling rootwait; root= is invalid.\n"); - root_wait = 0; - return 0; -} +__setup("root=", root_dev_setup); -/** - * match_dev_by_label - callback for finding a partition using its label - * @dev: device passed in by the caller - * @data: opaque pointer to the label to match - * - * Returns 1 if the device matches, and 0 otherwise. - */ -static int match_dev_by_label(struct device *dev, const void *data) +static int __init rootwait_setup(char *str) { - struct block_device *bdev = dev_to_bdev(dev); - const char *label = data; - - if (!bdev->bd_meta_info || strcmp(label, bdev->bd_meta_info->volname)) + if (*str) return 0; + root_wait = -1; return 1; } -static dev_t devt_from_partlabel(const char *label) -{ - struct device *dev; - dev_t devt = 0; - - dev = class_find_device(&block_class, NULL, label, &match_dev_by_label); - if (dev) { - devt = dev->devt; - put_device(dev); - } - - return devt; -} +__setup("rootwait", rootwait_setup); -static dev_t devt_from_devname(const char *name) +static int __init rootwait_timeout_setup(char *str) { - dev_t devt = 0; - int part; - char s[32]; - char *p; + int sec; - if (strlen(name) > 31) - return 0; - strcpy(s, name); - for (p = s; *p; p++) { - if (*p == '/') - *p = '!'; + if (kstrtoint(str, 0, &sec) || sec < 0) { + pr_warn("ignoring invalid rootwait value\n"); + goto ignore; } - devt = blk_lookup_devt(s, 0); - if (devt) - return devt; - - /* - * Try non-existent, but valid partition, which may only exist after - * opening the device, like partitioned md devices. - */ - while (p > s && isdigit(p[-1])) - p--; - if (p == s || !*p || *p == '0') - return 0; - - /* try disk name without <part number> */ - part = simple_strtoul(p, NULL, 10); - *p = '\0'; - devt = blk_lookup_devt(s, part); - if (devt) - return devt; - - /* try disk name without p<part number> */ - if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') - return 0; - p[-1] = '\0'; - return blk_lookup_devt(s, part); -} -#endif /* CONFIG_BLOCK */ - -static dev_t devt_from_devnum(const char *name) -{ - unsigned maj, min, offset; - dev_t devt = 0; - char *p, dummy; - - if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 || - sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) { - devt = MKDEV(maj, min); - if (maj != MAJOR(devt) || min != MINOR(devt)) - return 0; - } else { - devt = new_decode_dev(simple_strtoul(name, &p, 16)); - if (*p) - return 0; + if (check_mul_overflow(sec, MSEC_PER_SEC, &root_wait)) { + pr_warn("ignoring excessive rootwait value\n"); + goto ignore; } - return devt; -} - -/* - * Convert a name into device number. We accept the following variants: - * - * 1) <hex_major><hex_minor> device number in hexadecimal represents itself - * no leading 0x, for example b302. - * 2) /dev/nfs represents Root_NFS (0xff) - * 3) /dev/<disk_name> represents the device number of disk - * 4) /dev/<disk_name><decimal> represents the device number - * of partition - device number of disk plus the partition number - * 5) /dev/<disk_name>p<decimal> - same as the above, that form is - * used when disk name of partitioned disk ends on a digit. - * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the - * unique id of a partition if the partition table provides it. - * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS - * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero- - * filled hex representation of the 32-bit "NT disk signature", and PP - * is a zero-filled hex representation of the 1-based partition number. - * 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to - * a partition with a known unique id. - * 8) <major>:<minor> major and minor number of the device separated by - * a colon. - * 9) PARTLABEL=<name> with name being the GPT partition label. - * MSDOS partitions do not support labels! - * 10) /dev/cifs represents Root_CIFS (0xfe) - * - * If name doesn't have fall into the categories above, we return (0,0). - * block_class is used to check if something is a disk name. If the disk - * name contains slashes, the device name has them replaced with - * bangs. - */ -dev_t name_to_dev_t(const char *name) -{ - if (strcmp(name, "/dev/nfs") == 0) - return Root_NFS; - if (strcmp(name, "/dev/cifs") == 0) - return Root_CIFS; - if (strcmp(name, "/dev/ram") == 0) - return Root_RAM0; -#ifdef CONFIG_BLOCK - if (strncmp(name, "PARTUUID=", 9) == 0) - return devt_from_partuuid(name + 9); - if (strncmp(name, "PARTLABEL=", 10) == 0) - return devt_from_partlabel(name + 10); - if (strncmp(name, "/dev/", 5) == 0) - return devt_from_devname(name + 5); -#endif - return devt_from_devnum(name); -} -EXPORT_SYMBOL_GPL(name_to_dev_t); - -static int __init root_dev_setup(char *line) -{ - strscpy(saved_root_name, line, sizeof(saved_root_name)); return 1; -} -__setup("root=", root_dev_setup); +ignore: + /* Fallback to indefinite wait */ + root_wait = -1; -static int __init rootwait_setup(char *str) -{ - if (*str) - return 0; - root_wait = 1; return 1; } -__setup("rootwait", rootwait_setup); +__setup("rootwait=", rootwait_timeout_setup); static char * __initdata root_mount_data; static int __init root_data_setup(char *str) @@ -329,7 +120,8 @@ static int __init fs_names_setup(char *str) static unsigned int __initdata root_delay; static int __init root_delay_setup(char *str) { - root_delay = simple_strtoul(str, NULL, 0); + if (kstrtouint(str, 0, &root_delay)) + return 0; return 1; } @@ -338,7 +130,7 @@ __setup("rootfstype=", fs_names_setup); __setup("rootdelay=", root_delay_setup); /* This can return zero length strings. Caller should check */ -static int __init split_fs_names(char *page, size_t size, char *names) +static int __init split_fs_names(char *page, size_t size) { int count = 1; char *p = page; @@ -368,8 +160,7 @@ static int __init do_mount_root(const char *name, const char *fs, if (!p) return -ENOMEM; data_page = page_address(p); - /* zero-pad. init_mount() will make sure it's terminated */ - strncpy(data_page, data, PAGE_SIZE); + strscpy_pad(data_page, data, PAGE_SIZE); } ret = init_mount(name, "/root", fs, flags, data_page); @@ -391,7 +182,7 @@ out: return ret; } -void __init mount_block_root(char *name, int flags) +void __init mount_root_generic(char *name, char *pretty_name, int flags) { struct page *page = alloc_page(GFP_KERNEL); char *fs_names = page_address(page); @@ -402,7 +193,7 @@ void __init mount_block_root(char *name, int flags) scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)", MAJOR(ROOT_DEV), MINOR(ROOT_DEV)); if (root_fs_names) - num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names); + num_fs = split_fs_names(fs_names, PAGE_SIZE); else num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE); retry: @@ -417,6 +208,9 @@ retry: goto out; case -EACCES: case -EINVAL: +#ifdef CONFIG_BLOCK + init_flush_fput(); +#endif continue; } /* @@ -425,10 +219,21 @@ retry: * and give them a list of the available devices */ printk("VFS: Cannot open root device \"%s\" or %s: error %d\n", - root_device_name, b, err); + pretty_name, b, err); printk("Please append a correct \"root=\" boot option; here are the available partitions:\n"); - printk_all_partitions(); + + if (root_fs_names) + num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE); + if (!num_fs) + pr_err("Can't find any bdev filesystem to be used for mount!\n"); + else { + pr_err("List of all bdev filesystems:\n"); + for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1) + pr_err(" %s", p); + pr_err("\n"); + } + panic("VFS: Unable to mount root fs on %s", b); } if (!(flags & SB_RDONLY)) { @@ -442,7 +247,7 @@ retry: for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1) printk(" %s", p); printk("\n"); - panic("VFS: Unable to mount root fs on %s", b); + panic("VFS: Unable to mount root fs on \"%s\" or %s", pretty_name, b); out: put_page(page); } @@ -453,15 +258,14 @@ out: #define NFSROOT_TIMEOUT_MAX 30 #define NFSROOT_RETRY_MAX 5 -static int __init mount_nfs_root(void) +static void __init mount_nfs_root(void) { char *root_dev, *root_data; unsigned int timeout; - int try, err; + int try; - err = nfs_root_data(&root_dev, &root_data); - if (err != 0) - return 0; + if (nfs_root_data(&root_dev, &root_data)) + goto fail; /* * The server or network may not be ready, so try several @@ -470,10 +274,8 @@ static int __init mount_nfs_root(void) */ timeout = NFSROOT_TIMEOUT_MIN; for (try = 1; ; try++) { - err = do_mount_root(root_dev, "nfs", - root_mountflags, root_data); - if (err == 0) - return 1; + if (!do_mount_root(root_dev, "nfs", root_mountflags, root_data)) + return; if (try > NFSROOT_RETRY_MAX) break; @@ -483,34 +285,35 @@ static int __init mount_nfs_root(void) if (timeout > NFSROOT_TIMEOUT_MAX) timeout = NFSROOT_TIMEOUT_MAX; } - return 0; +fail: + pr_err("VFS: Unable to mount root fs via NFS.\n"); } -#endif +#else +static inline void mount_nfs_root(void) +{ +} +#endif /* CONFIG_ROOT_NFS */ #ifdef CONFIG_CIFS_ROOT -extern int cifs_root_data(char **dev, char **opts); - #define CIFSROOT_TIMEOUT_MIN 5 #define CIFSROOT_TIMEOUT_MAX 30 #define CIFSROOT_RETRY_MAX 5 -static int __init mount_cifs_root(void) +static void __init mount_cifs_root(void) { char *root_dev, *root_data; unsigned int timeout; - int try, err; + int try; - err = cifs_root_data(&root_dev, &root_data); - if (err != 0) - return 0; + if (cifs_root_data(&root_dev, &root_data)) + goto fail; timeout = CIFSROOT_TIMEOUT_MIN; for (try = 1; ; try++) { - err = do_mount_root(root_dev, "cifs", root_mountflags, - root_data); - if (err == 0) - return 1; + if (!do_mount_root(root_dev, "cifs", root_mountflags, + root_data)) + return; if (try > CIFSROOT_RETRY_MAX) break; @@ -519,9 +322,14 @@ static int __init mount_cifs_root(void) if (timeout > CIFSROOT_TIMEOUT_MAX) timeout = CIFSROOT_TIMEOUT_MAX; } - return 0; +fail: + pr_err("VFS: Unable to mount root fs via SMB.\n"); } -#endif +#else +static inline void mount_cifs_root(void) +{ +} +#endif /* CONFIG_CIFS_ROOT */ static bool __init fs_is_nodev(char *fstype) { @@ -536,7 +344,7 @@ static bool __init fs_is_nodev(char *fstype) return ret; } -static int __init mount_nodev_root(void) +static int __init mount_nodev_root(char *root_device_name) { char *fs_names, *fstype; int err = -EINVAL; @@ -545,7 +353,7 @@ static int __init mount_nodev_root(void) fs_names = (void *)__get_free_page(GFP_KERNEL); if (!fs_names) return -EINVAL; - num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names); + num_fs = split_fs_names(fs_names, PAGE_SIZE); for (i = 0, fstype = fs_names; i < num_fs; i++, fstype += strlen(fstype) + 1) { @@ -563,35 +371,92 @@ static int __init mount_nodev_root(void) return err; } -void __init mount_root(void) +#ifdef CONFIG_BLOCK +static void __init mount_block_root(char *root_device_name) { -#ifdef CONFIG_ROOT_NFS - if (ROOT_DEV == Root_NFS) { - if (!mount_nfs_root()) - printk(KERN_ERR "VFS: Unable to mount root fs via NFS.\n"); - return; + int err = create_dev("/dev/root", ROOT_DEV); + + if (err < 0) + pr_emerg("Failed to create /dev/root: %d\n", err); + mount_root_generic("/dev/root", root_device_name, root_mountflags); +} +#else +static inline void mount_block_root(char *root_device_name) +{ +} +#endif /* CONFIG_BLOCK */ + +void __init mount_root(char *root_device_name) +{ + switch (ROOT_DEV) { + case Root_NFS: + mount_nfs_root(); + break; + case Root_CIFS: + mount_cifs_root(); + break; + case Root_Generic: + mount_root_generic(root_device_name, root_device_name, + root_mountflags); + break; + case 0: + if (root_device_name && root_fs_names && + mount_nodev_root(root_device_name) == 0) + break; + fallthrough; + default: + mount_block_root(root_device_name); + break; } -#endif -#ifdef CONFIG_CIFS_ROOT - if (ROOT_DEV == Root_CIFS) { - if (!mount_cifs_root()) - printk(KERN_ERR "VFS: Unable to mount root fs via SMB.\n"); +} + +/* wait for any asynchronous scanning to complete */ +static void __init wait_for_root(char *root_device_name) +{ + ktime_t end; + + if (ROOT_DEV != 0) return; + + pr_info("Waiting for root device %s...\n", root_device_name); + + end = ktime_add_ms(ktime_get_raw(), root_wait); + + while (!driver_probe_done() || + early_lookup_bdev(root_device_name, &ROOT_DEV) < 0) { + msleep(5); + if (root_wait > 0 && ktime_after(ktime_get_raw(), end)) + break; } -#endif - if (ROOT_DEV == 0 && root_device_name && root_fs_names) { - if (mount_nodev_root() == 0) - return; - } -#ifdef CONFIG_BLOCK - { - int err = create_dev("/dev/root", ROOT_DEV); - if (err < 0) - pr_emerg("Failed to create /dev/root: %d\n", err); - mount_block_root("/dev/root", root_mountflags); + async_synchronize_full(); + +} + +static dev_t __init parse_root_device(char *root_device_name) +{ + int error; + dev_t dev; + + if (!strncmp(root_device_name, "mtd", 3) || + !strncmp(root_device_name, "ubi", 3)) + return Root_Generic; + if (strcmp(root_device_name, "/dev/nfs") == 0) + return Root_NFS; + if (strcmp(root_device_name, "/dev/cifs") == 0) + return Root_CIFS; + if (strcmp(root_device_name, "/dev/ram") == 0) + return Root_RAM0; + + error = early_lookup_bdev(root_device_name, &dev); + if (error) { + if (error == -EINVAL && root_wait) { + pr_err("Disabling rootwait; root= is invalid.\n"); + root_wait = 0; + } + return 0; } -#endif + return dev; } /* @@ -616,32 +481,15 @@ void __init prepare_namespace(void) md_run_setup(); - if (saved_root_name[0]) { - root_device_name = saved_root_name; - if (!strncmp(root_device_name, "mtd", 3) || - !strncmp(root_device_name, "ubi", 3)) { - mount_block_root(root_device_name, root_mountflags); - goto out; - } - ROOT_DEV = name_to_dev_t(root_device_name); - if (strncmp(root_device_name, "/dev/", 5) == 0) - root_device_name += 5; - } + if (saved_root_name[0]) + ROOT_DEV = parse_root_device(saved_root_name); - if (initrd_load()) + if (initrd_load(saved_root_name)) goto out; - /* wait for any asynchronous scanning to complete */ - if ((ROOT_DEV == 0) && root_wait) { - printk(KERN_INFO "Waiting for root device %s...\n", - saved_root_name); - while (driver_probe_done() != 0 || - (ROOT_DEV = name_to_dev_t(saved_root_name)) == 0) - msleep(5); - async_synchronize_full(); - } - - mount_root(); + if (root_wait) + wait_for_root(saved_root_name); + mount_root(saved_root_name); out: devtmpfs_mount(); init_mount(".", "/", NULL, MS_MOVE, NULL); @@ -660,12 +508,15 @@ static int rootfs_init_fs_context(struct fs_context *fc) struct file_system_type rootfs_fs_type = { .name = "rootfs", .init_fs_context = rootfs_init_fs_context, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; void __init init_rootfs(void) { - if (IS_ENABLED(CONFIG_TMPFS) && !saved_root_name[0] && - (!root_fs_names || strstr(root_fs_names, "tmpfs"))) - is_tmpfs = true; + if (IS_ENABLED(CONFIG_TMPFS)) { + if (!saved_root_name[0] && !root_fs_names) + is_tmpfs = true; + else if (root_fs_names && !!strstr(root_fs_names, "tmpfs")) + is_tmpfs = true; + } } diff --git a/init/do_mounts.h b/init/do_mounts.h index 7a29ac3e427b..6069ea3eb80d 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -9,9 +9,11 @@ #include <linux/major.h> #include <linux/root_dev.h> #include <linux/init_syscalls.h> +#include <linux/task_work.h> +#include <linux/file.h> -void mount_block_root(char *name, int flags); -void mount_root(void); +void mount_root_generic(char *name, char *pretty_name, int flags); +void mount_root(char *root_device_name); extern int root_mountflags; static inline __init int create_dev(char *name, dev_t dev) @@ -33,11 +35,18 @@ static inline int rd_load_image(char *from) { return 0; } #endif #ifdef CONFIG_BLK_DEV_INITRD - -bool __init initrd_load(void); - +bool __init initrd_load(char *root_device_name); #else - -static inline bool initrd_load(void) { return false; } +static inline bool initrd_load(char *root_device_name) +{ + return false; + } #endif + +/* Ensure that async file closing finished to prevent spurious errors. */ +static inline void init_flush_fput(void) +{ + flush_delayed_fput(); + task_work_run(); +} diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 34731241377d..f6867bad0d78 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -21,7 +21,7 @@ phys_addr_t phys_initrd_start __initdata; unsigned long phys_initrd_size __initdata; #ifdef CONFIG_SYSCTL -static struct ctl_table kern_do_mounts_initrd_table[] = { +static const struct ctl_table kern_do_mounts_initrd_table[] = { { .procname = "real-root-dev", .data = &real_root_dev, @@ -29,7 +29,6 @@ static struct ctl_table kern_do_mounts_initrd_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; static __init int kernel_do_mounts_initrd_sysctls_init(void) @@ -83,19 +82,20 @@ static int __init init_linuxrc(struct subprocess_info *info, struct cred *new) return 0; } -static void __init handle_initrd(void) +static void __init handle_initrd(char *root_device_name) { struct subprocess_info *info; static char *argv[] = { "linuxrc", NULL, }; extern char *envp_init[]; int error; - pr_warn("using deprecated initrd support, will be removed in 2021.\n"); + pr_warn("using deprecated initrd support, will be removed soon.\n"); real_root_dev = new_encode_dev(ROOT_DEV); create_dev("/dev/root.old", Root_RAM0); /* mount initrd on rootfs' /root */ - mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY); + mount_root_generic("/dev/root.old", root_device_name, + root_mountflags & ~MS_RDONLY); init_mkdir("/old", 0700); init_chdir("/old"); @@ -117,7 +117,7 @@ static void __init handle_initrd(void) init_chdir("/"); ROOT_DEV = new_decode_dev(real_root_dev); - mount_root(); + mount_root(root_device_name); printk(KERN_NOTICE "Trying to move old root to /initrd ... "); error = init_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); @@ -133,7 +133,7 @@ static void __init handle_initrd(void) } } -bool __init initrd_load(void) +bool __init initrd_load(char *root_device_name) { if (mount_initrd) { create_dev("/dev/ram", Root_RAM0); @@ -145,7 +145,7 @@ bool __init initrd_load(void) */ if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) { init_unlink("/initrd.image"); - handle_initrd(); + handle_initrd(root_device_name); return true; } } diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index ac021ae6e6fa..eddbe5cb0413 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -7,6 +7,7 @@ #include <uapi/linux/cramfs_fs.h> #include <linux/initrd.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/slab.h> #include "do_mounts.h" @@ -28,8 +29,7 @@ int __initdata rd_image_start; /* starting block # of image */ static int __init ramdisk_start_setup(char *str) { - rd_image_start = simple_strtol(str,NULL,0); - return 1; + return kstrtoint(str, 0, &rd_image_start) == 0; } __setup("ramdisk_start=", ramdisk_start_setup); @@ -186,14 +186,12 @@ static unsigned long nr_blocks(struct file *file) int __init rd_load_image(char *from) { int res = 0; - unsigned long rd_blocks, devblocks; + unsigned long rd_blocks, devblocks, nr_disks; int nblocks, i; char *buf = NULL; unsigned short rotate = 0; decompress_fn decompressor = NULL; -#if !defined(CONFIG_S390) char rotator[4] = { '|' , '/' , '-' , '\\' }; -#endif out_file = filp_open("/dev/ram", O_RDWR, 0); if (IS_ERR(out_file)) @@ -244,8 +242,9 @@ int __init rd_load_image(char *from) goto done; } - printk(KERN_NOTICE "RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ", - nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : ""); + nr_disks = (nblocks - 1) / devblocks + 1; + pr_notice("RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ", + nblocks, nr_disks, str_plural(nr_disks)); for (i = 0; i < nblocks; i++) { if (i && (i % devblocks == 0)) { pr_cont("done disk #1.\n"); @@ -255,12 +254,10 @@ int __init rd_load_image(char *from) } kernel_read(in_file, buf, BLOCK_SIZE, &in_pos); kernel_write(out_file, buf, BLOCK_SIZE, &out_pos); -#if !defined(CONFIG_S390) - if (!(i % 16)) { + if (!IS_ENABLED(CONFIG_S390) && !(i % 16)) { pr_cont("%c\b", rotator[rotate & 0x3]); rotate++; } -#endif } pr_cont("done.\n"); diff --git a/init/init_task.c b/init/init_task.c index ff6c4b9bfe6b..49b13d7c3985 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -6,12 +6,14 @@ #include <linux/sched/sysctl.h> #include <linux/sched/rt.h> #include <linux/sched/task.h> +#include <linux/sched/ext.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/audit.h> #include <linux/numa.h> #include <linux/scs.h> +#include <linux/plist.h> #include <linux/uaccess.h> @@ -25,11 +27,15 @@ static struct signal_struct init_signals = { }, .multiprocess = HLIST_HEAD_INIT, .rlim = INIT_RLIMITS, +#ifdef CONFIG_CGROUPS + .cgroup_threadgroup_rwsem = __RWSEM_INITIALIZER(init_signals.cgroup_threadgroup_rwsem), +#endif .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), .exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock), #ifdef CONFIG_POSIX_TIMERS - .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), - .cputimer = { + .posix_timers = HLIST_HEAD_INIT, + .ignored_posix_timers = HLIST_HEAD_INIT, + .cputimer = { .cputime_atomic = INIT_CPUTIME_ATOMIC, }, #endif @@ -51,22 +57,43 @@ static struct sighand_struct init_sighand = { }; #ifdef CONFIG_SHADOW_CALL_STACK -unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] - __init_task_data = { +unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] = { [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC }; #endif +/* init to 2 - one for init_task, one to ensure it is never freed */ +static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; + +/* + * The initial credentials for the initial task + */ +static struct cred init_cred = { + .usage = ATOMIC_INIT(4), + .uid = GLOBAL_ROOT_UID, + .gid = GLOBAL_ROOT_GID, + .suid = GLOBAL_ROOT_UID, + .sgid = GLOBAL_ROOT_GID, + .euid = GLOBAL_ROOT_UID, + .egid = GLOBAL_ROOT_GID, + .fsuid = GLOBAL_ROOT_UID, + .fsgid = GLOBAL_ROOT_GID, + .securebits = SECUREBITS_DEFAULT, + .cap_inheritable = CAP_EMPTY_SET, + .cap_permitted = CAP_FULL_SET, + .cap_effective = CAP_FULL_SET, + .cap_bset = CAP_FULL_SET, + .user = INIT_USER, + .user_ns = &init_user_ns, + .group_info = &init_groups, + .ucounts = &init_ucounts, +}; + /* * Set up the first task table, touch at your own risk!. Base=0, * limit=0x1fffff (=2MB) */ -struct task_struct init_task -#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK - __init_task_data -#endif - __aligned(L1_CACHE_BYTES) -= { +struct task_struct init_task __aligned(L1_CACHE_BYTES) = { #ifdef CONFIG_THREAD_INFO_IN_TASK .thread_info = INIT_THREAD_INFO(init_task), .stack_refcount = REFCOUNT_INIT(1), @@ -82,9 +109,11 @@ struct task_struct init_task .cpus_ptr = &init_task.cpus_mask, .user_cpus_ptr = NULL, .cpus_mask = CPU_MASK_ALL, + .max_allowed_capacity = SCHED_CAPACITY_SCALE, .nr_cpus_allowed= NR_CPUS, .mm = NULL, .active_mm = &init_mm, + .faults_disabled_mapping = NULL, .restart_block = { .fn = do_no_restart_syscall, }, @@ -102,6 +131,17 @@ struct task_struct init_task #ifdef CONFIG_CGROUP_SCHED .sched_task_group = &root_task_group, #endif +#ifdef CONFIG_SCHED_CLASS_EXT + .scx = { + .dsq_list.node = LIST_HEAD_INIT(init_task.scx.dsq_list.node), + .sticky_cpu = -1, + .holding_cpu = -1, + .runnable_node = LIST_HEAD_INIT(init_task.scx.runnable_node), + .runnable_at = INITIAL_JIFFIES, + .ddsp_dsq_id = SCX_DSQ_INVALID, + .slice = SCX_SLICE_DFL, + }, +#endif .ptraced = LIST_HEAD_INIT(init_task.ptraced), .ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry), .real_parent = &init_task, @@ -132,7 +172,6 @@ struct task_struct init_task .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), .timer_slack_ns = 50000, /* 50 usec default slack */ .thread_pid = &init_struct_pid, - .thread_group = LIST_HEAD_INIT(init_task.thread_group), .thread_node = LIST_HEAD_INIT(init_signals.thread_head), #ifdef CONFIG_AUDIT .loginuid = INVALID_UID, @@ -152,6 +191,7 @@ struct task_struct init_task .rcu_tasks_holdout = false, .rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list), .rcu_tasks_idle_cpu = -1, + .rcu_tasks_exit_list = LIST_HEAD_INIT(init_task.rcu_tasks_exit_list), #endif #ifdef CONFIG_TASKS_TRACE_RCU .trc_reader_nesting = 0, @@ -202,7 +242,7 @@ struct task_struct init_task .trace_recursion = 0, #endif #ifdef CONFIG_LIVEPATCH - .patch_state = KLP_UNDEFINED, + .patch_state = KLP_TRANSITION_IDLE, #endif #ifdef CONFIG_SECURITY .security = NULL, @@ -210,6 +250,9 @@ struct task_struct init_task #ifdef CONFIG_SECCOMP_FILTER .seccomp = { .filter_count = ATOMIC_INIT(0) }, #endif +#ifdef CONFIG_SCHED_MM_CID + .mm_cid = { .cid = MM_CID_UNSET, }, +#endif }; EXPORT_SYMBOL(init_task); diff --git a/init/initramfs.c b/init/initramfs.c index 62321883fe61..6ddbfb17fb8f 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/init.h> #include <linux/async.h> +#include <linux/export.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/types.h> @@ -11,12 +12,17 @@ #include <linux/syscalls.h> #include <linux/utime.h> #include <linux/file.h> +#include <linux/kstrtox.h> #include <linux/memblock.h> #include <linux/mm.h> #include <linux/namei.h> #include <linux/init_syscalls.h> -#include <linux/task_work.h> #include <linux/umh.h> +#include <linux/security.h> +#include <linux/overflow.h> + +#include "do_mounts.h" +#include "initramfs_internal.h" static __initdata bool csum_present; static __initdata u32 io_csum; @@ -59,15 +65,8 @@ static void __init error(char *x) message = x; } -static void panic_show_mem(const char *fmt, ...) -{ - va_list args; - - show_mem(0, NULL); - va_start(args, fmt); - panic(fmt, args); - va_end(args); -} +#define panic_show_mem(fmt, ...) \ + ({ show_mem(); panic(fmt, ##__VA_ARGS__); }) /* link hash */ @@ -79,6 +78,7 @@ static __initdata struct hash { struct hash *next; char name[N_ALIGN(PATH_MAX)]; } *head[32]; +static __initdata bool hardlink_seen; static inline int hash(int major, int minor, int ino) { @@ -109,22 +109,24 @@ static char __init *find_link(int major, int minor, int ino, q->minor = minor; q->ino = ino; q->mode = mode; - strcpy(q->name, name); + strscpy(q->name, name); q->next = NULL; *p = q; + hardlink_seen = true; return NULL; } static void __init free_hash(void) { struct hash **p, *q; - for (p = head; p < head + 32; p++) { + for (p = head; hardlink_seen && p < head + 32; p++) { while (*p) { q = *p; *p = q->next; kfree(q); } } + hardlink_seen = false; } #ifdef CONFIG_INITRAMFS_PRESERVE_MTIME @@ -147,12 +149,11 @@ struct dir_entry { char name[]; }; -static void __init dir_add(const char *name, time64_t mtime) +static void __init dir_add(const char *name, size_t nlen, time64_t mtime) { - size_t nlen = strlen(name) + 1; struct dir_entry *de; - de = kmalloc(sizeof(struct dir_entry) + nlen, GFP_KERNEL); + de = kmalloc(struct_size(de, name, nlen), GFP_KERNEL); if (!de) panic_show_mem("can't allocate dir_entry buffer"); INIT_LIST_HEAD(&de->list); @@ -173,7 +174,7 @@ static void __init dir_utime(void) #else static void __init do_utime(char *filename, time64_t mtime) {} static void __init do_utime_path(const struct path *path, time64_t mtime) {} -static void __init dir_add(const char *name, time64_t mtime) {} +static void __init dir_add(const char *name, size_t nlen, time64_t mtime) {} static void __init dir_utime(void) {} #endif @@ -192,14 +193,11 @@ static __initdata u32 hdr_csum; static void __init parse_header(char *s) { unsigned long parsed[13]; - char buf[9]; int i; - buf[8] = '\0'; - for (i = 0, s += 6; i < 13; i++, s += 8) { - memcpy(buf, s, 8); - parsed[i] = simple_strtoul(buf, NULL, 16); - } + for (i = 0, s += 6; i < 13; i++, s += 8) + parsed[i] = simple_strntoul(s, NULL, 16, 8); + ino = parsed[0]; mode = parsed[1]; uid = parsed[2]; @@ -260,7 +258,7 @@ static __initdata char *header_buf, *symlink_buf, *name_buf; static int __init do_start(void) { - read_into(header_buf, 110, GotHeader); + read_into(header_buf, CPIO_HDRLEN, GotHeader); return 0; } @@ -364,6 +362,15 @@ static int __init do_name(void) { state = SkipIt; next_state = Reset; + + /* name_len > 0 && name_len <= PATH_MAX checked in do_header */ + if (collected[name_len - 1] != '\0') { + pr_err("initramfs name without nulterm: %.*s\n", + (int)name_len, collected); + error("malformed archive"); + return 1; + } + if (strcmp(collected, "TRAILER!!!") == 0) { free_hash(); return 0; @@ -372,7 +379,7 @@ static int __init do_name(void) if (S_ISREG(mode)) { int ml = maybe_link(); if (ml >= 0) { - int openflags = O_WRONLY|O_CREAT; + int openflags = O_WRONLY|O_CREAT|O_LARGEFILE; if (ml != 1) openflags |= O_TRUNC; wfile = filp_open(collected, openflags, mode); @@ -391,7 +398,7 @@ static int __init do_name(void) init_mkdir(collected, mode); init_chown(collected, uid, gid, 0); init_chmod(collected, mode); - dir_add(collected, mtime); + dir_add(collected, name_len, mtime); } else if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { if (maybe_link() == 0) { @@ -428,6 +435,12 @@ static int __init do_copy(void) static int __init do_symlink(void) { + if (collected[name_len - 1] != '\0') { + pr_err("initramfs symlink without nulterm: %.*s\n", + (int)name_len, collected); + error("malformed archive"); + return 1; + } collected[N_ALIGN(name_len) + body_len] = '\0'; clean_path(collected, 0); init_symlink(collected + N_ALIGN(name_len), collected); @@ -486,20 +499,33 @@ static unsigned long my_inptr __initdata; /* index of next byte to be processed #include <linux/decompress/generic.h> -static char * __init unpack_to_rootfs(char *buf, unsigned long len) +/** + * unpack_to_rootfs - decompress and extract an initramfs archive + * @buf: input initramfs archive to extract + * @len: length of initramfs data to process + * + * Returns: NULL for success or an error message string + * + * This symbol shouldn't be used externally. It's available for unit tests. + */ +char * __init unpack_to_rootfs(char *buf, unsigned long len) { long written; decompress_fn decompress; const char *compress_name; - static __initdata char msg_buf[64]; - - header_buf = kmalloc(110, GFP_KERNEL); - symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL); - name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL); + struct { + char header[CPIO_HDRLEN]; + char symlink[PATH_MAX + N_ALIGN(PATH_MAX) + 1]; + char name[N_ALIGN(PATH_MAX)]; + } *bufs = kmalloc(sizeof(*bufs), GFP_KERNEL); - if (!header_buf || !symlink_buf || !name_buf) + if (!bufs) panic_show_mem("can't allocate buffers"); + header_buf = bufs->header; + symlink_buf = bufs->symlink; + name_buf = bufs->name; + state = Start; this_header = 0; message = NULL; @@ -527,12 +553,9 @@ static char * __init unpack_to_rootfs(char *buf, unsigned long len) if (res) error("decompressor failed"); } else if (compress_name) { - if (!message) { - snprintf(msg_buf, sizeof msg_buf, - "compression method %s not configured", - compress_name); - message = msg_buf; - } + pr_err("compression method %s not configured\n", + compress_name); + error("decompressor failed"); } else error("invalid magic at start of compressed archive"); if (state != Reset) @@ -542,9 +565,9 @@ static char * __init unpack_to_rootfs(char *buf, unsigned long len) len -= my_inptr; } dir_utime(); - kfree(name_buf); - kfree(symlink_buf); - kfree(header_buf); + /* free any hardlink state collected without optional TRAILER!!! */ + free_hash(); + kfree(bufs); return message; } @@ -571,8 +594,7 @@ __setup("keepinitrd", keepinitrd_setup); static bool __initdata initramfs_async = true; static int __init initramfs_async_setup(char *str) { - strtobool(str, &initramfs_async); - return 1; + return kstrtobool(str, &initramfs_async) == 0; } __setup("initramfs_async=", initramfs_async_setup); @@ -581,6 +603,8 @@ extern unsigned long __initramfs_size; #include <linux/initrd.h> #include <linux/kexec.h> +static BIN_ATTR(initrd, 0440, sysfs_bin_attr_simple_read, NULL, 0); + void __init reserve_initrd_mem(void) { phys_addr_t start; @@ -639,7 +663,7 @@ void __weak __init free_initrd_mem(unsigned long start, unsigned long end) "initrd"); } -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_RESERVE static bool __init kexec_free_initrd(void) { unsigned long crashk_start = (unsigned long)__va(crashk_res.start); @@ -676,11 +700,9 @@ static void __init populate_initrd_image(char *err) struct file *file; loff_t pos = 0; - unpack_to_rootfs(__initramfs_start, __initramfs_size); - printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n", err); - file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700); + file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700); if (IS_ERR(file)) return; @@ -718,17 +740,24 @@ static void __init do_populate_rootfs(void *unused, async_cookie_t cookie) } done: + security_initramfs_populated(); + /* * If the initrd region is overlapped with crashkernel reserved region, * free only memory that is not part of crashkernel region. */ - if (!do_retain_initrd && initrd_start && !kexec_free_initrd()) + if (!do_retain_initrd && initrd_start && !kexec_free_initrd()) { free_initrd_mem(initrd_start, initrd_end); + } else if (do_retain_initrd && initrd_start) { + bin_attr_initrd.size = initrd_end - initrd_start; + bin_attr_initrd.private = (void *)initrd_start; + if (sysfs_create_bin_file(firmware_kobj, &bin_attr_initrd)) + pr_err("Failed to create initrd sysfs file"); + } initrd_start = 0; initrd_end = 0; - flush_delayed_fput(); - task_work_run(); + init_flush_fput(); } static ASYNC_DOMAIN_EXCLUSIVE(initramfs_domain); diff --git a/init/initramfs_internal.h b/init/initramfs_internal.h new file mode 100644 index 000000000000..233dad16b0a0 --- /dev/null +++ b/init/initramfs_internal.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __INITRAMFS_INTERNAL_H__ +#define __INITRAMFS_INTERNAL_H__ + +char *unpack_to_rootfs(char *buf, unsigned long len); +#define CPIO_HDRLEN 110 + +#endif diff --git a/init/initramfs_test.c b/init/initramfs_test.c new file mode 100644 index 000000000000..5d2db455e60c --- /dev/null +++ b/init/initramfs_test.c @@ -0,0 +1,472 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <kunit/test.h> +#include <linux/fcntl.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/init_syscalls.h> +#include <linux/stringify.h> +#include <linux/timekeeping.h> +#include "initramfs_internal.h" + +struct initramfs_test_cpio { + char *magic; + unsigned int ino; + unsigned int mode; + unsigned int uid; + unsigned int gid; + unsigned int nlink; + unsigned int mtime; + unsigned int filesize; + unsigned int devmajor; + unsigned int devminor; + unsigned int rdevmajor; + unsigned int rdevminor; + unsigned int namesize; + unsigned int csum; + char *fname; + char *data; +}; + +static size_t fill_cpio(struct initramfs_test_cpio *cs, size_t csz, char *out) +{ + int i; + size_t off = 0; + + for (i = 0; i < csz; i++) { + char *pos = &out[off]; + struct initramfs_test_cpio *c = &cs[i]; + size_t thislen; + + /* +1 to account for nulterm */ + thislen = sprintf(pos, "%s" + "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x" + "%s", + c->magic, c->ino, c->mode, c->uid, c->gid, c->nlink, + c->mtime, c->filesize, c->devmajor, c->devminor, + c->rdevmajor, c->rdevminor, c->namesize, c->csum, + c->fname) + 1; + + pr_debug("packing (%zu): %.*s\n", thislen, (int)thislen, pos); + if (thislen != CPIO_HDRLEN + c->namesize) + pr_debug("padded to: %u\n", CPIO_HDRLEN + c->namesize); + off += CPIO_HDRLEN + c->namesize; + while (off & 3) + out[off++] = '\0'; + + memcpy(&out[off], c->data, c->filesize); + off += c->filesize; + while (off & 3) + out[off++] = '\0'; + } + + return off; +} + +static void __init initramfs_test_extract(struct kunit *test) +{ + char *err, *cpio_srcbuf; + size_t len; + struct timespec64 ts_before, ts_after; + struct kstat st = {}; + struct initramfs_test_cpio c[] = { { + .magic = "070701", + .ino = 1, + .mode = S_IFREG | 0777, + .uid = 12, + .gid = 34, + .nlink = 1, + .mtime = 56, + .filesize = 0, + .devmajor = 0, + .devminor = 1, + .rdevmajor = 0, + .rdevminor = 0, + .namesize = sizeof("initramfs_test_extract"), + .csum = 0, + .fname = "initramfs_test_extract", + }, { + .magic = "070701", + .ino = 2, + .mode = S_IFDIR | 0777, + .nlink = 1, + .mtime = 57, + .devminor = 1, + .namesize = sizeof("initramfs_test_extract_dir"), + .fname = "initramfs_test_extract_dir", + }, { + .magic = "070701", + .namesize = sizeof("TRAILER!!!"), + .fname = "TRAILER!!!", + } }; + + /* +3 to cater for any 4-byte end-alignment */ + cpio_srcbuf = kzalloc(ARRAY_SIZE(c) * (CPIO_HDRLEN + PATH_MAX + 3), + GFP_KERNEL); + len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf); + + ktime_get_real_ts64(&ts_before); + err = unpack_to_rootfs(cpio_srcbuf, len); + ktime_get_real_ts64(&ts_after); + if (err) { + KUNIT_FAIL(test, "unpack failed %s", err); + goto out; + } + + KUNIT_EXPECT_EQ(test, init_stat(c[0].fname, &st, 0), 0); + KUNIT_EXPECT_TRUE(test, S_ISREG(st.mode)); + KUNIT_EXPECT_TRUE(test, uid_eq(st.uid, KUIDT_INIT(c[0].uid))); + KUNIT_EXPECT_TRUE(test, gid_eq(st.gid, KGIDT_INIT(c[0].gid))); + KUNIT_EXPECT_EQ(test, st.nlink, 1); + if (IS_ENABLED(CONFIG_INITRAMFS_PRESERVE_MTIME)) { + KUNIT_EXPECT_EQ(test, st.mtime.tv_sec, c[0].mtime); + } else { + KUNIT_EXPECT_GE(test, st.mtime.tv_sec, ts_before.tv_sec); + KUNIT_EXPECT_LE(test, st.mtime.tv_sec, ts_after.tv_sec); + } + KUNIT_EXPECT_EQ(test, st.blocks, c[0].filesize); + + KUNIT_EXPECT_EQ(test, init_stat(c[1].fname, &st, 0), 0); + KUNIT_EXPECT_TRUE(test, S_ISDIR(st.mode)); + if (IS_ENABLED(CONFIG_INITRAMFS_PRESERVE_MTIME)) { + KUNIT_EXPECT_EQ(test, st.mtime.tv_sec, c[1].mtime); + } else { + KUNIT_EXPECT_GE(test, st.mtime.tv_sec, ts_before.tv_sec); + KUNIT_EXPECT_LE(test, st.mtime.tv_sec, ts_after.tv_sec); + } + + KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0); + KUNIT_EXPECT_EQ(test, init_rmdir(c[1].fname), 0); +out: + kfree(cpio_srcbuf); +} + +/* + * Don't terminate filename. Previously, the cpio filename field was passed + * directly to filp_open(collected, O_CREAT|..) without nulterm checks. See + * https://lore.kernel.org/linux-fsdevel/20241030035509.20194-2-ddiss@suse.de + */ +static void __init initramfs_test_fname_overrun(struct kunit *test) +{ + char *err, *cpio_srcbuf; + size_t len, suffix_off; + struct initramfs_test_cpio c[] = { { + .magic = "070701", + .ino = 1, + .mode = S_IFREG | 0777, + .uid = 0, + .gid = 0, + .nlink = 1, + .mtime = 1, + .filesize = 0, + .devmajor = 0, + .devminor = 1, + .rdevmajor = 0, + .rdevminor = 0, + .namesize = sizeof("initramfs_test_fname_overrun"), + .csum = 0, + .fname = "initramfs_test_fname_overrun", + } }; + + /* + * poison cpio source buffer, so we can detect overrun. source + * buffer is used by read_into() when hdr or fname + * are already available (e.g. no compression). + */ + cpio_srcbuf = kmalloc(CPIO_HDRLEN + PATH_MAX + 3, GFP_KERNEL); + memset(cpio_srcbuf, 'B', CPIO_HDRLEN + PATH_MAX + 3); + /* limit overrun to avoid crashes / filp_open() ENAMETOOLONG */ + cpio_srcbuf[CPIO_HDRLEN + strlen(c[0].fname) + 20] = '\0'; + + len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf); + /* overwrite trailing fname terminator and padding */ + suffix_off = len - 1; + while (cpio_srcbuf[suffix_off] == '\0') { + cpio_srcbuf[suffix_off] = 'P'; + suffix_off--; + } + + err = unpack_to_rootfs(cpio_srcbuf, len); + KUNIT_EXPECT_NOT_NULL(test, err); + + kfree(cpio_srcbuf); +} + +static void __init initramfs_test_data(struct kunit *test) +{ + char *err, *cpio_srcbuf; + size_t len; + struct file *file; + struct initramfs_test_cpio c[] = { { + .magic = "070701", + .ino = 1, + .mode = S_IFREG | 0777, + .uid = 0, + .gid = 0, + .nlink = 1, + .mtime = 1, + .filesize = sizeof("ASDF") - 1, + .devmajor = 0, + .devminor = 1, + .rdevmajor = 0, + .rdevminor = 0, + .namesize = sizeof("initramfs_test_data"), + .csum = 0, + .fname = "initramfs_test_data", + .data = "ASDF", + } }; + + /* +6 for max name and data 4-byte padding */ + cpio_srcbuf = kmalloc(CPIO_HDRLEN + c[0].namesize + c[0].filesize + 6, + GFP_KERNEL); + + len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf); + + err = unpack_to_rootfs(cpio_srcbuf, len); + KUNIT_EXPECT_NULL(test, err); + + file = filp_open(c[0].fname, O_RDONLY, 0); + if (IS_ERR(file)) { + KUNIT_FAIL(test, "open failed"); + goto out; + } + + /* read back file contents into @cpio_srcbuf and confirm match */ + len = kernel_read(file, cpio_srcbuf, c[0].filesize, NULL); + KUNIT_EXPECT_EQ(test, len, c[0].filesize); + KUNIT_EXPECT_MEMEQ(test, cpio_srcbuf, c[0].data, len); + + fput(file); + KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0); +out: + kfree(cpio_srcbuf); +} + +static void __init initramfs_test_csum(struct kunit *test) +{ + char *err, *cpio_srcbuf; + size_t len; + struct initramfs_test_cpio c[] = { { + /* 070702 magic indicates a valid csum is present */ + .magic = "070702", + .ino = 1, + .mode = S_IFREG | 0777, + .nlink = 1, + .filesize = sizeof("ASDF") - 1, + .devminor = 1, + .namesize = sizeof("initramfs_test_csum"), + .csum = 'A' + 'S' + 'D' + 'F', + .fname = "initramfs_test_csum", + .data = "ASDF", + }, { + /* mix csum entry above with no-csum entry below */ + .magic = "070701", + .ino = 2, + .mode = S_IFREG | 0777, + .nlink = 1, + .filesize = sizeof("ASDF") - 1, + .devminor = 1, + .namesize = sizeof("initramfs_test_csum_not_here"), + /* csum ignored */ + .csum = 5555, + .fname = "initramfs_test_csum_not_here", + .data = "ASDF", + } }; + + cpio_srcbuf = kmalloc(8192, GFP_KERNEL); + + len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf); + + err = unpack_to_rootfs(cpio_srcbuf, len); + KUNIT_EXPECT_NULL(test, err); + + KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0); + KUNIT_EXPECT_EQ(test, init_unlink(c[1].fname), 0); + + /* mess up the csum and confirm that unpack fails */ + c[0].csum--; + len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf); + + err = unpack_to_rootfs(cpio_srcbuf, len); + KUNIT_EXPECT_NOT_NULL(test, err); + + /* + * file (with content) is still retained in case of bad-csum abort. + * Perhaps we should change this. + */ + KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0); + KUNIT_EXPECT_EQ(test, init_unlink(c[1].fname), -ENOENT); + kfree(cpio_srcbuf); +} + +/* + * hardlink hashtable may leak when the archive omits a trailer: + * https://lore.kernel.org/r/20241107002044.16477-10-ddiss@suse.de/ + */ +static void __init initramfs_test_hardlink(struct kunit *test) +{ + char *err, *cpio_srcbuf; + size_t len; + struct kstat st0, st1; + struct initramfs_test_cpio c[] = { { + .magic = "070701", + .ino = 1, + .mode = S_IFREG | 0777, + .nlink = 2, + .devminor = 1, + .namesize = sizeof("initramfs_test_hardlink"), + .fname = "initramfs_test_hardlink", + }, { + /* hardlink data is present in last archive entry */ + .magic = "070701", + .ino = 1, + .mode = S_IFREG | 0777, + .nlink = 2, + .filesize = sizeof("ASDF") - 1, + .devminor = 1, + .namesize = sizeof("initramfs_test_hardlink_link"), + .fname = "initramfs_test_hardlink_link", + .data = "ASDF", + } }; + + cpio_srcbuf = kmalloc(8192, GFP_KERNEL); + + len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf); + + err = unpack_to_rootfs(cpio_srcbuf, len); + KUNIT_EXPECT_NULL(test, err); + + KUNIT_EXPECT_EQ(test, init_stat(c[0].fname, &st0, 0), 0); + KUNIT_EXPECT_EQ(test, init_stat(c[1].fname, &st1, 0), 0); + KUNIT_EXPECT_EQ(test, st0.ino, st1.ino); + KUNIT_EXPECT_EQ(test, st0.nlink, 2); + KUNIT_EXPECT_EQ(test, st1.nlink, 2); + + KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0); + KUNIT_EXPECT_EQ(test, init_unlink(c[1].fname), 0); + + kfree(cpio_srcbuf); +} + +#define INITRAMFS_TEST_MANY_LIMIT 1000 +#define INITRAMFS_TEST_MANY_PATH_MAX (sizeof("initramfs_test_many-") \ + + sizeof(__stringify(INITRAMFS_TEST_MANY_LIMIT))) +static void __init initramfs_test_many(struct kunit *test) +{ + char *err, *cpio_srcbuf, *p; + size_t len = INITRAMFS_TEST_MANY_LIMIT * + (CPIO_HDRLEN + INITRAMFS_TEST_MANY_PATH_MAX + 3); + char thispath[INITRAMFS_TEST_MANY_PATH_MAX]; + int i; + + p = cpio_srcbuf = kmalloc(len, GFP_KERNEL); + + for (i = 0; i < INITRAMFS_TEST_MANY_LIMIT; i++) { + struct initramfs_test_cpio c = { + .magic = "070701", + .ino = i, + .mode = S_IFREG | 0777, + .nlink = 1, + .devminor = 1, + .fname = thispath, + }; + + c.namesize = 1 + sprintf(thispath, "initramfs_test_many-%d", i); + p += fill_cpio(&c, 1, p); + } + + len = p - cpio_srcbuf; + err = unpack_to_rootfs(cpio_srcbuf, len); + KUNIT_EXPECT_NULL(test, err); + + for (i = 0; i < INITRAMFS_TEST_MANY_LIMIT; i++) { + sprintf(thispath, "initramfs_test_many-%d", i); + KUNIT_EXPECT_EQ(test, init_unlink(thispath), 0); + } + + kfree(cpio_srcbuf); +} + +/* + * An initramfs filename is namesize in length, including the zero-terminator. + * A filename can be zero-terminated prior to namesize, with the remainder used + * as padding. This can be useful for e.g. alignment of file data segments with + * a 4KB filesystem block, allowing for extent sharing (reflinks) between cpio + * source and destination. This hack works with both GNU cpio and initramfs, as + * long as PATH_MAX isn't exceeded. + */ +static void __init initramfs_test_fname_pad(struct kunit *test) +{ + char *err; + size_t len; + struct file *file; + char fdata[] = "this file data is aligned at 4K in the archive"; + struct test_fname_pad { + char padded_fname[4096 - CPIO_HDRLEN]; + char cpio_srcbuf[CPIO_HDRLEN + PATH_MAX + 3 + sizeof(fdata)]; + } *tbufs = kzalloc(sizeof(struct test_fname_pad), GFP_KERNEL); + struct initramfs_test_cpio c[] = { { + .magic = "070701", + .ino = 1, + .mode = S_IFREG | 0777, + .uid = 0, + .gid = 0, + .nlink = 1, + .mtime = 1, + .filesize = sizeof(fdata), + .devmajor = 0, + .devminor = 1, + .rdevmajor = 0, + .rdevminor = 0, + /* align file data at 4K archive offset via padded fname */ + .namesize = 4096 - CPIO_HDRLEN, + .csum = 0, + .fname = tbufs->padded_fname, + .data = fdata, + } }; + + memcpy(tbufs->padded_fname, "padded_fname", sizeof("padded_fname")); + len = fill_cpio(c, ARRAY_SIZE(c), tbufs->cpio_srcbuf); + + err = unpack_to_rootfs(tbufs->cpio_srcbuf, len); + KUNIT_EXPECT_NULL(test, err); + + file = filp_open(c[0].fname, O_RDONLY, 0); + if (IS_ERR(file)) { + KUNIT_FAIL(test, "open failed"); + goto out; + } + + /* read back file contents into @cpio_srcbuf and confirm match */ + len = kernel_read(file, tbufs->cpio_srcbuf, c[0].filesize, NULL); + KUNIT_EXPECT_EQ(test, len, c[0].filesize); + KUNIT_EXPECT_MEMEQ(test, tbufs->cpio_srcbuf, c[0].data, len); + + fput(file); + KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0); +out: + kfree(tbufs); +} + +/* + * The kunit_case/_suite struct cannot be marked as __initdata as this will be + * used in debugfs to retrieve results after test has run. + */ +static struct kunit_case __refdata initramfs_test_cases[] = { + KUNIT_CASE(initramfs_test_extract), + KUNIT_CASE(initramfs_test_fname_overrun), + KUNIT_CASE(initramfs_test_data), + KUNIT_CASE(initramfs_test_csum), + KUNIT_CASE(initramfs_test_hardlink), + KUNIT_CASE(initramfs_test_many), + KUNIT_CASE(initramfs_test_fname_pad), + {}, +}; + +static struct kunit_suite initramfs_test_suite = { + .name = "initramfs", + .test_cases = initramfs_test_cases, +}; +kunit_test_init_section_suites(&initramfs_test_suite); + +MODULE_DESCRIPTION("Initramfs KUnit test suite"); +MODULE_LICENSE("GPL v2"); diff --git a/init/main.c b/init/main.c index e1c3911d7c70..b84818ad9685 100644 --- a/init/main.c +++ b/init/main.c @@ -13,6 +13,7 @@ #define DEBUG /* Enable initcall_debug */ #include <linux/types.h> +#include <linux/export.h> #include <linux/extable.h> #include <linux/module.h> #include <linux/proc_fs.h> @@ -50,8 +51,8 @@ #include <linux/writeback.h> #include <linux/cpu.h> #include <linux/cpuset.h> +#include <linux/memcontrol.h> #include <linux/cgroup.h> -#include <linux/efi.h> #include <linux/tick.h> #include <linux/sched/isolation.h> #include <linux/interrupt.h> @@ -62,7 +63,6 @@ #include <linux/rmap.h> #include <linux/mempolicy.h> #include <linux/key.h> -#include <linux/page_ext.h> #include <linux/debug_locks.h> #include <linux/debugobjects.h> #include <linux/lockdep.h> @@ -89,6 +89,7 @@ #include <linux/sched/task_stack.h> #include <linux/context_tracking.h> #include <linux/random.h> +#include <linux/moduleloader.h> #include <linux/list.h> #include <linux/integrity.h> #include <linux/proc_ns.h> @@ -96,15 +97,16 @@ #include <linux/cache.h> #include <linux/rodata_test.h> #include <linux/jump_label.h> -#include <linux/mem_encrypt.h> #include <linux/kcsan.h> #include <linux/init_syscalls.h> #include <linux/stackdepot.h> #include <linux/randomize_kstack.h> +#include <linux/pidfs.h> +#include <linux/ptdump.h> +#include <linux/time_namespace.h> #include <net/net_namespace.h> #include <asm/io.h> -#include <asm/bugs.h> #include <asm/setup.h> #include <asm/sections.h> #include <asm/cacheflush.h> @@ -116,10 +118,6 @@ static int kernel_init(void *); -extern void init_IRQ(void); -extern void radix_tree_init(void); -extern void maple_tree_init(void); - /* * Debug helper: via this flag we know that we are in 'early bootup code' * where only the boot processor is running with IRQ disabled. This means @@ -138,7 +136,6 @@ EXPORT_SYMBOL(system_state); #define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT #define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT -extern void time_init(void); /* Default late time init is NULL. archs can override this later. */ void (*__initdata late_time_init)(void); @@ -197,8 +194,6 @@ static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; -extern const struct obs_kernel_param __setup_start[], __setup_end[]; - static bool __init obsolete_checksetup(char *line) { const struct obs_kernel_param *p; @@ -334,7 +329,7 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size, { struct xbc_node *knode, *vnode; char *end = buf + size; - const char *val; + const char *val, *q; int ret; xbc_node_for_each_key_value(root, knode, val) { @@ -352,8 +347,14 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size, continue; } xbc_array_for_each_value(vnode, val) { - ret = snprintf(buf, rest(buf, end), "%s=\"%s\" ", - xbc_namebuf, val); + /* + * For prettier and more readable /proc/cmdline, only + * quote the value when necessary, i.e. when it contains + * whitespace. + */ + q = strpbrk(val, " \t\r\n") ? "\"" : ""; + ret = snprintf(buf, rest(buf, end), "%s=%s%s%s ", + xbc_namebuf, q, val, q); if (ret < 0) return ret; buf += ret; @@ -429,7 +430,7 @@ static void __init setup_boot_config(void) err = parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL, bootconfig_params); - if (IS_ERR(err) || !bootconfig_found) + if (IS_ERR(err) || !(bootconfig_found || IS_ENABLED(CONFIG_BOOT_CONFIG_FORCE))) return; /* parse_args() stops at the next param of '--' and returns an address */ @@ -437,7 +438,11 @@ static void __init setup_boot_config(void) initargs_offs = err - tmp_cmdline; if (!data) { - pr_err("'bootconfig' found on command line, but no bootconfig found\n"); + /* If user intended to use bootconfig, show an error level message */ + if (bootconfig_found) + pr_err("'bootconfig' found on command line, but no bootconfig found\n"); + else + pr_info("No bootconfig data provided, so skipping bootconfig"); return; } @@ -490,6 +495,11 @@ static int __init warn_bootconfig(char *str) early_param("bootconfig", warn_bootconfig); +bool __init cmdline_has_extra_options(void) +{ + return extra_command_line || extra_init_args; +} + /* Change NUL term back to "=", to make "param" the whole string. */ static void __init repair_env_string(char *param, char *val) { @@ -535,9 +545,25 @@ static int __init unknown_bootoption(char *param, char *val, const char *unused, void *arg) { size_t len = strlen(param); + /* + * Well-known bootloader identifiers: + * 1. LILO/Grub pass "BOOT_IMAGE=..."; + * 2. kexec/kdump (kexec-tools) pass "kexec". + */ + const char *bootloader[] = { "BOOT_IMAGE=", "kexec", NULL }; + + /* Handle params aliased to sysctls */ + if (sysctl_is_alias(param)) + return 0; repair_env_string(param, val); + /* Handle bootloader identifier */ + for (int i = 0; bootloader[i]; i++) { + if (strstarts(param, bootloader[i])) + return 0; + } + /* Handle obsolete-style parameters */ if (obsolete_checksetup(param)) return 0; @@ -605,7 +631,6 @@ static int __init rdinit_setup(char *str) __setup("rdinit=", rdinit_setup); #ifndef CONFIG_SMP -static const unsigned int setup_max_cpus = NR_CPUS; static inline void setup_nr_cpu_ids(void) { } static inline void smp_prepare_cpus(unsigned int maxcpus) { } #endif @@ -622,18 +647,18 @@ static void __init setup_command_line(char *command_line) if (extra_command_line) xlen = strlen(extra_command_line); - if (extra_init_args) + if (extra_init_args) { + extra_init_args = strim(extra_init_args); /* remove trailing space */ ilen = strlen(extra_init_args) + 4; /* for " -- " */ + } - len = xlen + strlen(boot_command_line) + 1; + len = xlen + strlen(boot_command_line) + ilen + 1; - saved_command_line = memblock_alloc(len + ilen, SMP_CACHE_BYTES); - if (!saved_command_line) - panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen); + saved_command_line = memblock_alloc_or_panic(len, SMP_CACHE_BYTES); - static_command_line = memblock_alloc(len, SMP_CACHE_BYTES); - if (!static_command_line) - panic("%s: Failed to allocate %zu bytes\n", __func__, len); + len = xlen + strlen(command_line) + 1; + + static_command_line = memblock_alloc_or_panic(len, SMP_CACHE_BYTES); if (xlen) { /* @@ -683,7 +708,7 @@ static void __init setup_command_line(char *command_line) static __initdata DECLARE_COMPLETION(kthreadd_done); -noinline void __ref rest_init(void) +static noinline void __ref __noreturn rest_init(void) { struct task_struct *tsk; int pid; @@ -707,7 +732,7 @@ noinline void __ref rest_init(void) rcu_read_unlock(); numa_default_policy(); - pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); + pid = kernel_thread(kthreadd, NULL, NULL, CLONE_FS | CLONE_FILES); rcu_read_lock(); kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); rcu_read_unlock(); @@ -739,10 +764,7 @@ static int __init do_early_param(char *param, char *val, const struct obs_kernel_param *p; for (p = __setup_start; p < __setup_end; p++) { - if ((p->early && parameq(param, p->str)) || - (strcmp(param, "console") == 0 && - strcmp(p->str, "earlycon") == 0) - ) { + if (p->early && parameq(param, p->str)) { if (p->setup_func(val) != 0) pr_warn("Malformed early option '%s'\n", param); } @@ -778,14 +800,16 @@ void __init __weak smp_setup_processor_id(void) { } +void __init __weak smp_prepare_boot_cpu(void) +{ +} + # if THREAD_SIZE >= PAGE_SIZE void __init __weak thread_stack_cache_init(void) { } #endif -void __init __weak mem_encrypt_init(void) { } - void __init __weak poking_init(void) { } void __init __weak pgtable_cache_init(void) { } @@ -803,69 +827,6 @@ static inline void initcall_debug_enable(void) } #endif -/* Report memory auto-initialization states for this boot. */ -static void __init report_meminit(void) -{ - const char *stack; - - if (IS_ENABLED(CONFIG_INIT_STACK_ALL_PATTERN)) - stack = "all(pattern)"; - else if (IS_ENABLED(CONFIG_INIT_STACK_ALL_ZERO)) - stack = "all(zero)"; - else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL)) - stack = "byref_all(zero)"; - else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF)) - stack = "byref(zero)"; - else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_USER)) - stack = "__user(zero)"; - else - stack = "off"; - - pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s\n", - stack, want_init_on_alloc(GFP_KERNEL) ? "on" : "off", - want_init_on_free() ? "on" : "off"); - if (want_init_on_free()) - pr_info("mem auto-init: clearing system memory may take some time...\n"); -} - -/* - * Set up kernel memory allocators - */ -static void __init mm_init(void) -{ - /* - * page_ext requires contiguous pages, - * bigger than MAX_ORDER unless SPARSEMEM. - */ - page_ext_init_flatmem(); - init_mem_debugging_and_hardening(); - kfence_alloc_pool(); - report_meminit(); - kmsan_init_shadow(); - stack_depot_early_init(); - mem_init(); - mem_init_print_info(); - kmem_cache_init(); - /* - * page_owner must be initialized after buddy is ready, and also after - * slab is ready so that stack_depot_init() works properly - */ - page_ext_init_flatmem_late(); - kmemleak_init(); - pgtable_init(); - debug_objects_mem_init(); - vmalloc_init(); - /* Should be run after vmap initialization */ - if (early_page_ext_enabled()) - page_ext_init(); - /* Should be run before the first non-init thread is created */ - init_espfix_bsp(); - /* Should be run after espfix64 is set up. */ - pti_init(); - kmsan_init_runtime(); - mm_cache_init(); -} - #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, randomize_kstack_offset); @@ -889,11 +850,6 @@ static int __init early_randomize_kstack_offset(char *buf) early_param("randomize_kstack_offset", early_randomize_kstack_offset); #endif -void __init __weak arch_call_rest_init(void) -{ - rest_init(); -} - static void __init print_unknown_bootoptions(void) { char *unknown_options; @@ -937,7 +893,116 @@ static void __init print_unknown_bootoptions(void) memblock_free(unknown_options, len); } -asmlinkage __visible void __init __no_sanitize_address start_kernel(void) +static void __init early_numa_node_init(void) +{ +#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID +#ifndef cpu_to_node + int cpu; + + /* The early_cpu_to_node() should be ready here. */ + for_each_possible_cpu(cpu) + set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); +#endif +#endif +} + +#define KERNEL_CMDLINE_PREFIX "Kernel command line: " +#define KERNEL_CMDLINE_PREFIX_LEN (sizeof(KERNEL_CMDLINE_PREFIX) - 1) +#define KERNEL_CMDLINE_CONTINUATION " \\" +#define KERNEL_CMDLINE_CONTINUATION_LEN (sizeof(KERNEL_CMDLINE_CONTINUATION) - 1) + +#define MIN_CMDLINE_LOG_WRAP_IDEAL_LEN (KERNEL_CMDLINE_PREFIX_LEN + \ + KERNEL_CMDLINE_CONTINUATION_LEN) +#define CMDLINE_LOG_WRAP_IDEAL_LEN (CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN > \ + MIN_CMDLINE_LOG_WRAP_IDEAL_LEN ? \ + CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN : \ + MIN_CMDLINE_LOG_WRAP_IDEAL_LEN) + +#define IDEAL_CMDLINE_LEN (CMDLINE_LOG_WRAP_IDEAL_LEN - KERNEL_CMDLINE_PREFIX_LEN) +#define IDEAL_CMDLINE_SPLIT_LEN (IDEAL_CMDLINE_LEN - KERNEL_CMDLINE_CONTINUATION_LEN) + +/** + * print_kernel_cmdline() - Print the kernel cmdline with wrapping. + * @cmdline: The cmdline to print. + * + * Print the kernel command line, trying to wrap based on the Kconfig knob + * CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN. + * + * Wrapping is based on spaces, ignoring quotes. All lines are prefixed + * with "Kernel command line: " and lines that are not the last line have + * a " \" suffix added to them. The prefix and suffix count towards the + * line length for wrapping purposes. The ideal length will be exceeded + * if no appropriate place to wrap is found. + * + * Example output if CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN is 40: + * Kernel command line: loglevel=7 \ + * Kernel command line: init=/sbin/init \ + * Kernel command line: root=PARTUUID=8c3efc1a-768b-6642-8d0c-89eb782f19f0/PARTNROFF=1 \ + * Kernel command line: rootwait ro \ + * Kernel command line: my_quoted_arg="The \ + * Kernel command line: quick brown fox \ + * Kernel command line: jumps over the \ + * Kernel command line: lazy dog." + */ +static void __init print_kernel_cmdline(const char *cmdline) +{ + size_t len; + + /* Config option of 0 or anything longer than the max disables wrapping */ + if (CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN == 0 || + IDEAL_CMDLINE_LEN >= COMMAND_LINE_SIZE - 1) { + pr_notice("%s%s\n", KERNEL_CMDLINE_PREFIX, cmdline); + return; + } + + len = strlen(cmdline); + while (len > IDEAL_CMDLINE_LEN) { + const char *first_space; + const char *prev_cutoff; + const char *cutoff; + int to_print; + size_t used; + + /* Find the last ' ' that wouldn't make the line too long */ + prev_cutoff = NULL; + cutoff = cmdline; + while (true) { + cutoff = strchr(cutoff + 1, ' '); + if (!cutoff || cutoff - cmdline > IDEAL_CMDLINE_SPLIT_LEN) + break; + prev_cutoff = cutoff; + } + if (prev_cutoff) + cutoff = prev_cutoff; + else if (!cutoff) + break; + + /* Find the beginning and end of the string of spaces */ + first_space = cutoff; + while (first_space > cmdline && first_space[-1] == ' ') + first_space--; + to_print = first_space - cmdline; + while (*cutoff == ' ') + cutoff++; + used = cutoff - cmdline; + + /* If the whole string is used, break and do the final printout */ + if (len == used) + break; + + if (to_print) + pr_notice("%s%.*s%s\n", KERNEL_CMDLINE_PREFIX, + to_print, cmdline, KERNEL_CMDLINE_CONTINUATION); + + len -= used; + cmdline += used; + } + if (len) + pr_notice("%s%s\n", KERNEL_CMDLINE_PREFIX, cmdline); +} + +asmlinkage __visible __init __no_sanitize_address __noreturn __no_stack_protector +void start_kernel(void) { char *command_line; char *after_dashes; @@ -959,21 +1024,21 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) boot_cpu_init(); page_address_init(); pr_notice("%s", linux_banner); - early_security_init(); setup_arch(&command_line); + /* Static keys and static calls are needed by LSMs */ + jump_label_init(); + static_call_init(); + early_security_init(); setup_boot_config(); setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ + early_numa_node_init(); boot_cpu_hotplug_init(); - build_all_zonelists(NULL); - page_alloc_init(); - - pr_notice("Kernel command line: %s\n", saved_command_line); + print_kernel_cmdline(saved_command_line); /* parameters may set static keys */ - jump_label_init(); parse_early_param(); after_dashes = parse_args("Booting kernel", static_command_line, __start___param, @@ -992,13 +1057,14 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) /* * These use large bootmem allocations and must precede - * kmem_cache_init() + * initalization of page allocator */ setup_log_buf(0); vfs_caches_init_early(); sort_main_extable(); trap_init(); - mm_init(); + mm_core_init(); + maple_tree_init(); poking_init(); ftrace_init(); @@ -1016,7 +1082,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); radix_tree_init(); - maple_tree_init(); /* * Set up housekeeping before setting up workqueues to allow the unbound @@ -1032,6 +1097,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) workqueue_init_early(); rcu_init(); + kvfree_rcu_init(); /* Trace events are available after this */ trace_init(); @@ -1045,7 +1111,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) init_IRQ(); tick_init(); rcu_init_nohz(); - init_timers(); + timers_init(); srcu_init(); hrtimers_init(); softirq_init(); @@ -1088,14 +1154,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) */ locking_selftest(); - /* - * This needs to be called before any devices perform DMA - * operations that might use the SWIOTLB bounce buffers. It will - * mark the bounce buffers as decrypted so that their usage will - * not cause "plain-text" data to be decrypted when accessed. - */ - mem_encrypt_init(); - #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { @@ -1112,17 +1170,17 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) late_time_init(); sched_clock_init(); calibrate_delay(); + + arch_cpu_finalize_init(); + pid_idr_init(); anon_vma_init(); -#ifdef CONFIG_X86 - if (efi_enabled(EFI_RUNTIME_SERVICES)) - efi_enter_virtual_mode(); -#endif thread_stack_cache_init(); cred_init(); fork_init(); proc_caches_init(); uts_ns_init(); + time_ns_init(); key_init(); security_init(); dbg_late_init(); @@ -1133,21 +1191,27 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) seq_file_init(); proc_root_init(); nsfs_init(); + pidfs_init(); cpuset_init(); + mem_cgroup_init(); cgroup_init(); taskstats_init_early(); delayacct_init(); - check_bugs(); - acpi_subsystem_init(); arch_post_acpi_subsys_init(); kcsan_init(); /* Do the rest non-__init'ed, we're now alive */ - arch_call_rest_init(); + rest_init(); + /* + * Avoid stack canaries in callers of boot_init_stack_canary for gcc-10 + * and older. + */ +#if !__has_attribute(__no_stack_protector__) prevent_tail_call_optimization(); +#endif } /* Call all constructor functions linked into the kernel. */ @@ -1185,16 +1249,10 @@ static int __init initcall_blacklist(char *str) str_entry = strsep(&str, ","); if (str_entry) { pr_debug("blacklisting initcall %s\n", str_entry); - entry = memblock_alloc(sizeof(*entry), + entry = memblock_alloc_or_panic(sizeof(*entry), SMP_CACHE_BYTES); - if (!entry) - panic("%s: Failed to allocate %zu bytes\n", - __func__, sizeof(*entry)); - entry->buf = memblock_alloc(strlen(str_entry) + 1, + entry->buf = memblock_alloc_or_panic(strlen(str_entry) + 1, SMP_CACHE_BYTES); - if (!entry->buf) - panic("%s: Failed to allocate %zu bytes\n", - __func__, strlen(str_entry) + 1); strcpy(entry->buf, str_entry); list_add(&entry->next, &blacklisted_initcalls); } @@ -1263,6 +1321,12 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret) fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime)); } +static __init_or_module void +trace_initcall_level_cb(void *data, const char *level) +{ + printk(KERN_DEBUG "entering initcall level: %s\n", level); +} + static ktime_t initcall_calltime; #ifdef TRACEPOINTS_ENABLED @@ -1274,10 +1338,12 @@ static void __init initcall_debug_enable(void) &initcall_calltime); ret |= register_trace_initcall_finish(trace_initcall_finish_cb, &initcall_calltime); + ret |= register_trace_initcall_level(trace_initcall_level_cb, NULL); WARN(ret, "Failed to register initcall tracepoints\n"); } # define do_trace_initcall_start trace_initcall_start # define do_trace_initcall_finish trace_initcall_finish +# define do_trace_initcall_level trace_initcall_level #else static inline void do_trace_initcall_start(initcall_t fn) { @@ -1291,6 +1357,12 @@ static inline void do_trace_initcall_finish(initcall_t fn, int ret) return; trace_initcall_finish_cb(&initcall_calltime, fn, ret); } +static inline void do_trace_initcall_level(const char *level) +{ + if (!initcall_debug) + return; + trace_initcall_level_cb(NULL, level); +} #endif /* !TRACEPOINTS_ENABLED */ int __init_or_module do_one_initcall(initcall_t fn) @@ -1323,17 +1395,6 @@ int __init_or_module do_one_initcall(initcall_t fn) } -extern initcall_entry_t __initcall_start[]; -extern initcall_entry_t __initcall0_start[]; -extern initcall_entry_t __initcall1_start[]; -extern initcall_entry_t __initcall2_start[]; -extern initcall_entry_t __initcall3_start[]; -extern initcall_entry_t __initcall4_start[]; -extern initcall_entry_t __initcall5_start[]; -extern initcall_entry_t __initcall6_start[]; -extern initcall_entry_t __initcall7_start[]; -extern initcall_entry_t __initcall_end[]; - static initcall_entry_t *initcall_levels[] __initdata = { __initcall0_start, __initcall1_start, @@ -1374,7 +1435,7 @@ static void __init do_initcall_level(int level, char *command_line) level, level, NULL, ignore_unknown_bootoption); - trace_initcall_level(initcall_level_names[level]); + do_trace_initcall_level(initcall_level_names[level]); for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++) do_one_initcall(initcall_from_entry(fn)); } @@ -1418,7 +1479,7 @@ static void __init do_pre_smp_initcalls(void) { initcall_entry_t *fn; - trace_initcall_level("early"); + do_trace_initcall_level("early"); for (fn = __initcall_start; fn < __initcall0_start; fn++) do_one_initcall(initcall_from_entry(fn)); } @@ -1477,33 +1538,28 @@ static int __init set_debug_rodata(char *str) early_param("rodata", set_debug_rodata); #endif -#ifdef CONFIG_STRICT_KERNEL_RWX static void mark_readonly(void) { - if (rodata_enabled) { + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && rodata_enabled) { /* * load_module() results in W+X mappings, which are cleaned - * up with call_rcu(). Let's make sure that queued work is + * up with init_free_wq. Let's make sure that queued work is * flushed so that we don't hit false positives looking for * insecure pages which are W+X. */ - rcu_barrier(); + flush_module_init_free_work(); + jump_label_init_ro(); mark_rodata_ro(); + debug_checkwx(); rodata_test(); - } else + } else if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { pr_info("Kernel memory protection disabled.\n"); + } else if (IS_ENABLED(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX)) { + pr_warn("Kernel memory protection not selected by kernel config.\n"); + } else { + pr_warn("This architecture does not have kernel memory protection.\n"); + } } -#elif defined(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX) -static inline void mark_readonly(void) -{ - pr_warn("Kernel memory protection not selected by kernel config.\n"); -} -#else -static inline void mark_readonly(void) -{ - pr_warn("This architecture does not have kernel memory protection.\n"); -} -#endif void __weak free_initmem(void) { @@ -1618,18 +1674,16 @@ static noinline void __init kernel_init_freeable(void) init_mm_internals(); - rcu_init_tasks_generic(); do_pre_smp_initcalls(); lockup_detector_init(); smp_init(); sched_init_smp(); + workqueue_init_topology(); + async_init(); padata_init(); page_alloc_init_late(); - /* Initialize page ext after all struct pages are initialized. */ - if (!early_page_ext_enabled()) - page_ext_init(); do_basic_setup(); @@ -1642,7 +1696,11 @@ static noinline void __init kernel_init_freeable(void) * check if there is an early userspace init. If yes, let it do all * the work */ - if (init_eaccess(ramdisk_execute_command) != 0) { + int ramdisk_command_access; + ramdisk_command_access = init_eaccess(ramdisk_execute_command); + if (ramdisk_command_access != 0) { + pr_warn("check access for rdinit=%s failed: %i, ignoring\n", + ramdisk_execute_command, ramdisk_command_access); ramdisk_execute_command = NULL; prepare_namespace(); } diff --git a/init/version-timestamp.c b/init/version-timestamp.c index 043cbf80a766..375726e05f69 100644 --- a/init/version-timestamp.c +++ b/init/version-timestamp.c @@ -8,7 +8,7 @@ #include <linux/utsname.h> struct uts_namespace init_uts_ns = { - .ns.count = REFCOUNT_INIT(2), + .ns = NS_COMMON_INIT(init_uts_ns), .name = { .sysname = UTS_SYSNAME, .nodename = UTS_NODENAME, @@ -18,10 +18,6 @@ struct uts_namespace init_uts_ns = { .domainname = UTS_DOMAINNAME, }, .user_ns = &init_user_ns, - .ns.inum = PROC_UTS_INIT_INO, -#ifdef CONFIG_UTS_NS - .ns.ops = &utsns_operations, -#endif }; /* FIXED STRINGS! Don't touch! */ diff --git a/init/version.c b/init/version.c index f117921811b4..94c96f6fbfe6 100644 --- a/init/version.c +++ b/init/version.c @@ -21,10 +21,10 @@ static int __init early_hostname(char *arg) { size_t bufsize = sizeof(init_uts_ns.name.nodename); size_t maxlen = bufsize - 1; - size_t arglen; + ssize_t arglen; - arglen = strlcpy(init_uts_ns.name.nodename, arg, bufsize); - if (arglen > maxlen) { + arglen = strscpy(init_uts_ns.name.nodename, arg, bufsize); + if (arglen < 0) { pr_warn("hostname parameter exceeds %zd characters and will be truncated", maxlen); } |
