summaryrefslogtreecommitdiff
path: root/init
diff options
context:
space:
mode:
Diffstat (limited to 'init')
-rw-r--r--init/.kunitconfig3
-rw-r--r--init/Kconfig302
-rw-r--r--init/Makefile1
-rw-r--r--init/calibrate.c13
-rw-r--r--init/do_mounts.c5
-rw-r--r--init/do_mounts_initrd.c4
-rw-r--r--init/do_mounts_rd.c17
-rw-r--r--init/init_task.c33
-rw-r--r--init/initramfs.c72
-rw-r--r--init/initramfs_internal.h8
-rw-r--r--init/initramfs_test.c472
-rw-r--r--init/main.c167
-rw-r--r--init/version-timestamp.c6
13 files changed, 966 insertions, 137 deletions
diff --git a/init/.kunitconfig b/init/.kunitconfig
new file mode 100644
index 000000000000..acb906b1a5f9
--- /dev/null
+++ b/init/.kunitconfig
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_TEST=y
diff --git a/init/Kconfig b/init/Kconfig
index a20e6efd3f0f..fa79feb8fe57 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -82,15 +82,14 @@ config RUSTC_LLVM_VERSION
int
default $(rustc-llvm-version)
-config CC_CAN_LINK
+config ARCH_HAS_CC_CAN_LINK
bool
- default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT
- default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag))
-config CC_CAN_LINK_STATIC
+config CC_CAN_LINK
bool
- default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT
- default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static)
+ default ARCH_CC_CAN_LINK if ARCH_HAS_CC_CAN_LINK
+ default $(cc_can_link_user,$(m64-flag)) if 64BIT
+ default $(cc_can_link_user,$(m32-flag))
# Fixed in GCC 14, 13.3, 12.4 and 11.5
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
@@ -104,7 +103,10 @@ config GCC_ASM_GOTO_OUTPUT_BROKEN
config CC_HAS_ASM_GOTO_OUTPUT
def_bool y
depends on !GCC_ASM_GOTO_OUTPUT_BROKEN
+ # Detect basic support
depends on $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null)
+ # Detect clang (< v17) scoped label issues
+ depends on $(success,echo 'void b(void **);void* c(void);int f(void){{asm goto(""::::l0);return 0;l0:return 1;}void *x __attribute__((cleanup(b)))=c();{asm goto(""::::l1);return 2;l1:return 3;}}' | $(CC) -x c - -c -o /dev/null)
config CC_HAS_ASM_GOTO_TIED_OUTPUT
depends on CC_HAS_ASM_GOTO_OUTPUT
@@ -117,17 +119,55 @@ config TOOLS_SUPPORT_RELR
config CC_HAS_ASM_INLINE
def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
+config CC_HAS_ASSUME
+ bool
+ # clang needs to be at least 19.1.0 since the meaning of the assume
+ # attribute changed:
+ # https://github.com/llvm/llvm-project/commit/c44fa3e8a9a44c2e9a575768a3c185354b9f6c17
+ default y if CC_IS_CLANG && CLANG_VERSION >= 190100
+ # supported since gcc 13.1.0
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106654
+ default y if CC_IS_GCC && GCC_VERSION >= 130100
+
config CC_HAS_NO_PROFILE_FN_ATTR
def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror)
config CC_HAS_COUNTED_BY
- # TODO: when gcc 15 is released remove the build test and add
- # a gcc version check
- def_bool $(success,echo 'struct flex { int count; int array[] __attribute__((__counted_by__(count))); };' | $(CC) $(CLANG_FLAGS) -x c - -c -o /dev/null -Werror)
- # clang needs to be at least 19.1.3 to avoid __bdos miscalculations
- # https://github.com/llvm/llvm-project/pull/110497
- # https://github.com/llvm/llvm-project/pull/112636
- depends on !(CC_IS_CLANG && CLANG_VERSION < 190103)
+ bool
+ # clang needs to be at least 20.1.0 to avoid potential crashes
+ # when building structures that contain __counted_by
+ # https://github.com/ClangBuiltLinux/linux/issues/2114
+ # https://github.com/llvm/llvm-project/commit/160fb1121cdf703c3ef5e61fb26c5659eb581489
+ default y if CC_IS_CLANG && CLANG_VERSION >= 200100
+ # supported since gcc 15.1.0
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
+ default y if CC_IS_GCC && GCC_VERSION >= 150100
+
+config CC_HAS_MULTIDIMENSIONAL_NONSTRING
+ def_bool $(success,echo 'char tag[][4] __attribute__((__nonstring__)) = { };' | $(CC) $(CLANG_FLAGS) -x c - -c -o /dev/null -Werror)
+
+config LD_CAN_USE_KEEP_IN_OVERLAY
+ # ld.lld prior to 21.0.0 did not support KEEP within an overlay description
+ # https://github.com/llvm/llvm-project/pull/130661
+ def_bool LD_IS_BFD || LLD_VERSION >= 210000
+
+config RUSTC_HAS_SLICE_AS_FLATTENED
+ def_bool RUSTC_VERSION >= 108000
+
+config RUSTC_HAS_COERCE_POINTEE
+ def_bool RUSTC_VERSION >= 108400
+
+config RUSTC_HAS_SPAN_FILE
+ def_bool RUSTC_VERSION >= 108800
+
+config RUSTC_HAS_UNNECESSARY_TRANSMUTES
+ def_bool RUSTC_VERSION >= 108800
+
+config RUSTC_HAS_FILE_WITH_NUL
+ def_bool RUSTC_VERSION >= 108900
+
+config RUSTC_HAS_FILE_AS_C_STR
+ def_bool RUSTC_VERSION >= 109100
config PAHOLE_VERSION
int
@@ -156,6 +196,10 @@ menu "General setup"
config BROKEN
bool
+ help
+ This option allows you to choose whether you want to try to
+ compile (and fix) old drivers that haven't been updated to
+ new infrastructure.
config BROKEN_ON_SMP
bool
@@ -468,16 +512,6 @@ config CROSS_MEMORY_ATTACH
to directly read from or write to another process' address space.
See the man page for more details.
-config USELIB
- bool "uselib syscall (for libc5 and earlier)"
- default ALPHA || M68K || SPARC
- help
- This option enables the uselib syscall, a system call used in the
- dynamic linker from libc5 and earlier. glibc does not use this
- system call. If you intend to run programs built on libc5 or
- earlier, you may need to enable this syscall. Current systems
- running glibc can safely disable this.
-
config AUDIT
bool "Auditing support"
depends on NET
@@ -703,7 +737,7 @@ endmenu # "CPU/Task time and stats accounting"
config CPU_ISOLATION
bool "CPU isolation"
- depends on SMP || COMPILE_TEST
+ depends on SMP
default y
help
Make sure that CPUs running critical tasks are not disturbed by
@@ -872,6 +906,18 @@ config UCLAMP_BUCKETS_COUNT
If in doubt, use the default value.
+config SCHED_PROXY_EXEC
+ bool "Proxy Execution"
+ # Avoid some build failures w/ PREEMPT_RT until it can be fixed
+ depends on !PREEMPT_RT
+ # Need to investigate how to inform sched_ext of split contexts
+ depends on !SCHED_CLASS_EXT
+ # Not particularly useful until we get to multi-rq proxying
+ depends on EXPERT
+ help
+ This option enables proxy execution, a mechanism for mutex-owning
+ tasks to inherit the scheduling context of higher priority waiters.
+
endmenu
#
@@ -989,9 +1035,24 @@ config MEMCG
select PAGE_COUNTER
select EVENTFD
select SLAB_OBJ_EXT
+ select VM_EVENT_COUNTERS
help
Provides control over the memory footprint of tasks in a cgroup.
+config MEMCG_NMI_UNSAFE
+ bool
+ depends on MEMCG
+ depends on HAVE_NMI
+ depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !ARCH_HAVE_NMI_SAFE_CMPXCHG
+ default y
+
+config MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
+ bool
+ depends on MEMCG
+ depends on HAVE_NMI
+ depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && ARCH_HAVE_NMI_SAFE_CMPXCHG
+ default y
+
config MEMCG_V1
bool "Legacy cgroup v1 memory controller"
depends on MEMCG
@@ -1048,6 +1109,9 @@ if CGROUP_SCHED
config GROUP_SCHED_WEIGHT
def_bool n
+config GROUP_SCHED_BANDWIDTH
+ def_bool n
+
config FAIR_GROUP_SCHED
bool "Group scheduling for SCHED_OTHER"
depends on CGROUP_SCHED
@@ -1057,6 +1121,7 @@ config FAIR_GROUP_SCHED
config CFS_BANDWIDTH
bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
depends on FAIR_GROUP_SCHED
+ select GROUP_SCHED_BANDWIDTH
default n
help
This option allows users to define CPU bandwidth rates (limits) for
@@ -1076,10 +1141,22 @@ config RT_GROUP_SCHED
realtime bandwidth for them.
See Documentation/scheduler/sched-rt-group.rst for more information.
+config RT_GROUP_SCHED_DEFAULT_DISABLED
+ bool "Require boot parameter to enable group scheduling for SCHED_RR/FIFO"
+ depends on RT_GROUP_SCHED
+ default n
+ help
+ When set, the RT group scheduling is disabled by default. The option
+ is in inverted form so that mere RT_GROUP_SCHED enables the group
+ scheduling.
+
+ Say N if unsure.
+
config EXT_GROUP_SCHED
bool
depends on SCHED_CLASS_EXT && CGROUP_SCHED
select GROUP_SCHED_WEIGHT
+ select GROUP_SCHED_BANDWIDTH
default y
endif #CGROUP_SCHED
@@ -1136,6 +1213,16 @@ config CGROUP_RDMA
Attaching processes with active RDMA resources to the cgroup
hierarchy is allowed even if can cross the hierarchy's limit.
+config CGROUP_DMEM
+ bool "Device memory controller (DMEM)"
+ select PAGE_COUNTER
+ help
+ The DMEM controller allows compatible devices to restrict device
+ memory usage based on the cgroup hierarchy.
+
+ As an example, it allows you to restrict VRAM usage for applications
+ in the DRM subsystem.
+
config CGROUP_FREEZER
bool "Freezer controller"
help
@@ -1182,7 +1269,8 @@ config CPUSETS_V1
help
Legacy cgroup v1 cpusets controller which has been deprecated by
cgroup v2 implementation. The v1 is there for legacy applications
- which haven't migrated to the new cgroup v2 interface yet. If you
+ which haven't migrated to the new cgroup v2 interface yet. Legacy
+ interface includes cpuset filesystem and /proc/<pid>/cpuset. If you
do not have any such application then you are completely fine leaving
this option disabled.
@@ -1190,7 +1278,7 @@ config CPUSETS_V1
config PROC_PID_CPUSET
bool "Include legacy /proc/<pid>/cpuset file"
- depends on CPUSETS
+ depends on CPUSETS_V1
default y
config CGROUP_DEVICE
@@ -1282,7 +1370,7 @@ config UTS_NS
config TIME_NS
bool "TIME namespace"
- depends on GENERIC_VDSO_TIME_NS
+ depends on GENERIC_GETTIMEOFDAY
default y
help
In this namespace boottime and monotonic clocks can be set.
@@ -1431,8 +1519,27 @@ config BOOT_CONFIG_EMBED_FILE
This bootconfig will be used if there is no initrd or no other
bootconfig in the initrd.
+config CMDLINE_LOG_WRAP_IDEAL_LEN
+ int "Length to try to wrap the cmdline when logged at boot"
+ default 1021
+ range 0 1021
+ help
+ At boot time, the kernel command line is logged to the console.
+ The log message will start with the prefix "Kernel command line: ".
+ The log message will attempt to be wrapped (split into multiple log
+ messages) at spaces based on CMDLINE_LOG_WRAP_IDEAL_LEN characters.
+ If wrapping happens, each log message will start with the prefix and
+ all but the last message will end with " \". Messages may exceed the
+ ideal length if a place to wrap isn't found before the specified
+ number of characters.
+
+ A value of 0 disables wrapping, though be warned that the maximum
+ length of a log message (1021 characters) may cause the cmdline to
+ be truncated.
+
config INITRAMFS_PRESERVE_MTIME
bool "Preserve cpio archive mtimes in initramfs"
+ depends on BLK_DEV_INITRD
default y
help
Each entry in an initramfs cpio archive carries an mtime value. When
@@ -1441,6 +1548,13 @@ config INITRAMFS_PRESERVE_MTIME
If unsure, say Y.
+config INITRAMFS_TEST
+ bool "Test initramfs cpio archive extraction" if !KUNIT_ALL_TESTS
+ depends on BLK_DEV_INITRD && KUNIT=y
+ default KUNIT_ALL_TESTS
+ help
+ Build KUnit tests for initramfs. See Documentation/dev-tools/kunit
+
choice
prompt "Compiler optimization level"
default CC_OPTIMIZE_FOR_PERFORMANCE
@@ -1526,6 +1640,16 @@ config SYSCTL_ARCH_UNALIGN_ALLOW
the unaligned access emulation.
see arch/parisc/kernel/unaligned.c for reference
+config SYSFS_SYSCALL
+ bool "Sysfs syscall support"
+ default n
+ help
+ sys_sysfs is an obsolete system call no longer supported in libc.
+ Note that disabling this option is more secure but might break
+ compatibility with some systems.
+
+ If unsure say N here.
+
config HAVE_PCSPKR_PLATFORM
bool
@@ -1570,16 +1694,6 @@ config SGETMASK_SYSCALL
If unsure, leave the default option here.
-config SYSFS_SYSCALL
- bool "Sysfs syscall support" if EXPERT
- default y
- help
- sys_sysfs is an obsolete system call no longer supported in libc.
- Note that disabling this option is more secure but might break
- compatibility with some systems.
-
- If unsure say Y here.
-
config FHANDLE
bool "open by fhandle syscalls" if EXPERT
select EXPORTFS
@@ -1621,6 +1735,18 @@ config PRINTK
very difficult to diagnose system problems, saying N here is
strongly discouraged.
+config PRINTK_RINGBUFFER_KUNIT_TEST
+ tristate "KUnit Test for the printk ringbuffer" if !KUNIT_ALL_TESTS
+ depends on PRINTK && KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ This builds the printk ringbuffer KUnit test suite.
+
+ For more information on KUnit and unit tests in general, please refer
+ to the KUnit documentation.
+
+ If unsure, say N.
+
config BUG
bool "BUG() support" if EXPERT
default y
@@ -1670,6 +1796,16 @@ config FUTEX_PI
depends on FUTEX && RT_MUTEXES
default y
+config FUTEX_PRIVATE_HASH
+ bool
+ depends on FUTEX && !BASE_SMALL && MMU
+ default y
+
+config FUTEX_MPOL
+ bool
+ depends on FUTEX && NUMA
+ default y
+
config EPOLL
bool "Enable eventpoll support" if EXPERT
default y
@@ -1734,7 +1870,7 @@ config IO_URING
config GCOV_PROFILE_URING
bool "Enable GCOV profiling on the io_uring subsystem"
- depends on GCOV_KERNEL
+ depends on IO_URING && GCOV_KERNEL
help
Enable GCOV profiling on the io_uring subsystem, to facilitate
code coverage testing.
@@ -1745,6 +1881,17 @@ config GCOV_PROFILE_URING
the io_uring subsystem, hence this should only be enabled for
specific test purposes.
+config IO_URING_MOCK_FILE
+ tristate "Enable io_uring mock files (Experimental)" if EXPERT
+ default n
+ depends on IO_URING
+ help
+ Enable mock files for io_uring subststem testing. The ABI might
+ still change, so it's still experimental and should only be enabled
+ for specific test purposes.
+
+ If unsure, say N.
+
config ADVISE_SYSCALLS
bool "Enable madvise/fadvise syscalls" if EXPERT
default y
@@ -1791,10 +1938,36 @@ config RSEQ
If unsure, say Y.
+config RSEQ_STATS
+ default n
+ bool "Enable lightweight statistics of restartable sequences" if EXPERT
+ depends on RSEQ && DEBUG_FS
+ help
+ Enable lightweight counters which expose information about the
+ frequency of RSEQ operations via debugfs. Mostly interesting for
+ kernel debugging or performance analysis. While lightweight it's
+ still adding code into the user/kernel mode transitions.
+
+ If unsure, say N.
+
+config RSEQ_DEBUG_DEFAULT_ENABLE
+ default n
+ bool "Enable restartable sequences debug mode by default" if EXPERT
+ depends on RSEQ
+ help
+ This enables the static branch for debug mode of restartable
+ sequences.
+
+ This also can be controlled on the kernel command line via the
+ command line parameter "rseq_debug=0/1" and through debugfs.
+
+ If unsure, say N.
+
config DEBUG_RSEQ
default n
bool "Enable debugging of rseq() system call" if EXPERT
- depends on RSEQ && DEBUG_KERNEL
+ depends on RSEQ && DEBUG_KERNEL && !GENERIC_ENTRY
+ select RSEQ_DEBUG_DEFAULT_ENABLE
help
Enable extra debugging checks for the rseq system call.
@@ -1810,13 +1983,6 @@ config CACHESTAT_SYSCALL
If unsure say Y here.
-config PC104
- bool "PC/104 support" if EXPERT
- help
- Expose PC/104 form factor device drivers and options available for
- selection and configuration. Enable this option if your target
- machine has a PC/104 bus.
-
config KALLSYMS
bool "Load all symbols for debugging/ksymoops" if EXPERT
default y
@@ -1856,11 +2022,6 @@ config KALLSYMS_ALL
Say N unless you really need all symbols, or kernel live patching.
-config KALLSYMS_ABSOLUTE_PERCPU
- bool
- depends on KALLSYMS
- default X86_64 && SMP
-
# end of the "standard kernel features (expert users)" menu
config ARCH_HAS_MEMBARRIER_CALLBACKS
@@ -1869,6 +2030,28 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS
config ARCH_HAS_MEMBARRIER_SYNC_CORE
bool
+config ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS
+ bool
+ help
+ Control MSEAL_SYSTEM_MAPPINGS access based on architecture.
+
+ A 64-bit kernel is required for the memory sealing feature.
+ No specific hardware features from the CPU are needed.
+
+ To enable this feature, the architecture needs to update their
+ special mappings calls to include the sealing flag and confirm
+ that it doesn't unmap/remap system mappings during the life
+ time of the process. The existence of this flag for an architecture
+ implies that it does not require the remapping of the system
+ mappings during process lifetime, so sealing these mappings is safe
+ from a kernel perspective.
+
+ After the architecture enables this, a distribution can set
+ CONFIG_MSEAL_SYSTEM_MAPPING to manage access to the feature.
+
+ For complete descriptions of memory sealing, please see
+ Documentation/userspace-api/mseal.rst
+
config HAVE_PERF_EVENTS
bool
help
@@ -1956,12 +2139,13 @@ config RUST
bool "Rust support"
depends on HAVE_RUST
depends on RUST_IS_AVAILABLE
- depends on !MODVERSIONS
+ select EXTENDED_MODVERSIONS if MODVERSIONS
+ depends on !MODVERSIONS || GENDWARFKSYMS
depends on !GCC_PLUGIN_RANDSTRUCT
depends on !RANDSTRUCT
- depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE
- depends on !CFI_CLANG || HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC
- select CFI_ICALL_NORMALIZE_INTEGERS if CFI_CLANG
+ depends on !DEBUG_INFO_BTF || (PAHOLE_HAS_LANG_EXCLUDE && !LTO)
+ depends on !CFI || HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC
+ select CFI_ICALL_NORMALIZE_INTEGERS if CFI
depends on !CALL_PADDING || RUSTC_VERSION >= 108100
depends on !KASAN_SW_TAGS
depends on !(MITIGATION_RETHUNK && KASAN) || RUSTC_VERSION >= 108300
@@ -1989,8 +2173,10 @@ config BINDGEN_VERSION_TEXT
string
depends on RUST
# The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0
- # (https://github.com/rust-lang/rust-bindgen/pull/2678). It can be removed when
- # the minimum version is upgraded past that (0.69.1 already fixed the issue).
+ # (https://github.com/rust-lang/rust-bindgen/pull/2678) and 0.71.0
+ # (https://github.com/rust-lang/rust-bindgen/pull/3040). It can be removed
+ # when the minimum version is upgraded past the latter (0.69.1 and 0.71.1
+ # both fixed the issue).
default "$(shell,$(BINDGEN) --version workaround-for-0.69.0 2>/dev/null)"
#
@@ -2003,6 +2189,8 @@ config TRACEPOINTS
source "kernel/Kconfig.kexec"
+source "kernel/liveupdate/Kconfig"
+
endmenu # General setup
source "arch/Kconfig"
diff --git a/init/Makefile b/init/Makefile
index 10b652d33e87..d6f75d8907e0 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -12,6 +12,7 @@ else
obj-$(CONFIG_BLK_DEV_INITRD) += initramfs.o
endif
obj-$(CONFIG_GENERIC_CALIBRATE_DELAY) += calibrate.o
+obj-$(CONFIG_INITRAMFS_TEST) += initramfs_test.o
obj-y += init_task.o
diff --git a/init/calibrate.c b/init/calibrate.c
index f3831272f113..63be4c65bc52 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -5,19 +5,22 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*/
-#include <linux/jiffies.h>
#include <linux/delay.h>
#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/smp.h>
+#include <linux/jiffies.h>
+#include <linux/kstrtox.h>
#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/timex.h>
unsigned long lpj_fine;
unsigned long preset_lpj;
+
static int __init lpj_setup(char *str)
{
- preset_lpj = simple_strtoul(str,NULL,0);
- return 1;
+ return kstrtoul(str, 0, &preset_lpj) == 0;
}
__setup("lpj=", lpj_setup);
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 6af29da8889e..defbbf1d55f7 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -120,7 +120,8 @@ static int __init fs_names_setup(char *str)
static unsigned int __initdata root_delay;
static int __init root_delay_setup(char *str)
{
- root_delay = simple_strtoul(str, NULL, 0);
+ if (kstrtouint(str, 0, &root_delay))
+ return 0;
return 1;
}
@@ -507,7 +508,7 @@ static int rootfs_init_fs_context(struct fs_context *fc)
struct file_system_type rootfs_fs_type = {
.name = "rootfs",
.init_fs_context = rootfs_init_fs_context,
- .kill_sb = kill_litter_super,
+ .kill_sb = kill_anon_super,
};
void __init init_rootfs(void)
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 22c7f41ff642..f6867bad0d78 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -21,7 +21,7 @@ phys_addr_t phys_initrd_start __initdata;
unsigned long phys_initrd_size __initdata;
#ifdef CONFIG_SYSCTL
-static struct ctl_table kern_do_mounts_initrd_table[] = {
+static const struct ctl_table kern_do_mounts_initrd_table[] = {
{
.procname = "real-root-dev",
.data = &real_root_dev,
@@ -89,7 +89,7 @@ static void __init handle_initrd(char *root_device_name)
extern char *envp_init[];
int error;
- pr_warn("using deprecated initrd support, will be removed in 2021.\n");
+ pr_warn("using deprecated initrd support, will be removed soon.\n");
real_root_dev = new_encode_dev(ROOT_DEV);
create_dev("/dev/root.old", Root_RAM0);
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index ac021ae6e6fa..eddbe5cb0413 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -7,6 +7,7 @@
#include <uapi/linux/cramfs_fs.h>
#include <linux/initrd.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/slab.h>
#include "do_mounts.h"
@@ -28,8 +29,7 @@ int __initdata rd_image_start; /* starting block # of image */
static int __init ramdisk_start_setup(char *str)
{
- rd_image_start = simple_strtol(str,NULL,0);
- return 1;
+ return kstrtoint(str, 0, &rd_image_start) == 0;
}
__setup("ramdisk_start=", ramdisk_start_setup);
@@ -186,14 +186,12 @@ static unsigned long nr_blocks(struct file *file)
int __init rd_load_image(char *from)
{
int res = 0;
- unsigned long rd_blocks, devblocks;
+ unsigned long rd_blocks, devblocks, nr_disks;
int nblocks, i;
char *buf = NULL;
unsigned short rotate = 0;
decompress_fn decompressor = NULL;
-#if !defined(CONFIG_S390)
char rotator[4] = { '|' , '/' , '-' , '\\' };
-#endif
out_file = filp_open("/dev/ram", O_RDWR, 0);
if (IS_ERR(out_file))
@@ -244,8 +242,9 @@ int __init rd_load_image(char *from)
goto done;
}
- printk(KERN_NOTICE "RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ",
- nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : "");
+ nr_disks = (nblocks - 1) / devblocks + 1;
+ pr_notice("RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ",
+ nblocks, nr_disks, str_plural(nr_disks));
for (i = 0; i < nblocks; i++) {
if (i && (i % devblocks == 0)) {
pr_cont("done disk #1.\n");
@@ -255,12 +254,10 @@ int __init rd_load_image(char *from)
}
kernel_read(in_file, buf, BLOCK_SIZE, &in_pos);
kernel_write(out_file, buf, BLOCK_SIZE, &out_pos);
-#if !defined(CONFIG_S390)
- if (!(i % 16)) {
+ if (!IS_ENABLED(CONFIG_S390) && !(i % 16)) {
pr_cont("%c\b", rotator[rotate & 0x3]);
rotate++;
}
-#endif
}
pr_cont("done.\n");
diff --git a/init/init_task.c b/init/init_task.c
index e557f622bd90..49b13d7c3985 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -27,6 +27,9 @@ static struct signal_struct init_signals = {
},
.multiprocess = HLIST_HEAD_INIT,
.rlim = INIT_RLIMITS,
+#ifdef CONFIG_CGROUPS
+ .cgroup_threadgroup_rwsem = __RWSEM_INITIALIZER(init_signals.cgroup_threadgroup_rwsem),
+#endif
.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
.exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock),
#ifdef CONFIG_POSIX_TIMERS
@@ -59,6 +62,33 @@ unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] = {
};
#endif
+/* init to 2 - one for init_task, one to ensure it is never freed */
+static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) };
+
+/*
+ * The initial credentials for the initial task
+ */
+static struct cred init_cred = {
+ .usage = ATOMIC_INIT(4),
+ .uid = GLOBAL_ROOT_UID,
+ .gid = GLOBAL_ROOT_GID,
+ .suid = GLOBAL_ROOT_UID,
+ .sgid = GLOBAL_ROOT_GID,
+ .euid = GLOBAL_ROOT_UID,
+ .egid = GLOBAL_ROOT_GID,
+ .fsuid = GLOBAL_ROOT_UID,
+ .fsgid = GLOBAL_ROOT_GID,
+ .securebits = SECUREBITS_DEFAULT,
+ .cap_inheritable = CAP_EMPTY_SET,
+ .cap_permitted = CAP_FULL_SET,
+ .cap_effective = CAP_FULL_SET,
+ .cap_bset = CAP_FULL_SET,
+ .user = INIT_USER,
+ .user_ns = &init_user_ns,
+ .group_info = &init_groups,
+ .ucounts = &init_ucounts,
+};
+
/*
* Set up the first task table, touch at your own risk!. Base=0,
* limit=0x1fffff (=2MB)
@@ -220,6 +250,9 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
#ifdef CONFIG_SECCOMP_FILTER
.seccomp = { .filter_count = ATOMIC_INIT(0) },
#endif
+#ifdef CONFIG_SCHED_MM_CID
+ .mm_cid = { .cid = MM_CID_UNSET, },
+#endif
};
EXPORT_SYMBOL(init_task);
diff --git a/init/initramfs.c b/init/initramfs.c
index b2f7583bb1f5..6ddbfb17fb8f 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/async.h>
+#include <linux/export.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -18,8 +19,10 @@
#include <linux/init_syscalls.h>
#include <linux/umh.h>
#include <linux/security.h>
+#include <linux/overflow.h>
#include "do_mounts.h"
+#include "initramfs_internal.h"
static __initdata bool csum_present;
static __initdata u32 io_csum;
@@ -75,6 +78,7 @@ static __initdata struct hash {
struct hash *next;
char name[N_ALIGN(PATH_MAX)];
} *head[32];
+static __initdata bool hardlink_seen;
static inline int hash(int major, int minor, int ino)
{
@@ -105,22 +109,24 @@ static char __init *find_link(int major, int minor, int ino,
q->minor = minor;
q->ino = ino;
q->mode = mode;
- strcpy(q->name, name);
+ strscpy(q->name, name);
q->next = NULL;
*p = q;
+ hardlink_seen = true;
return NULL;
}
static void __init free_hash(void)
{
struct hash **p, *q;
- for (p = head; p < head + 32; p++) {
+ for (p = head; hardlink_seen && p < head + 32; p++) {
while (*p) {
q = *p;
*p = q->next;
kfree(q);
}
}
+ hardlink_seen = false;
}
#ifdef CONFIG_INITRAMFS_PRESERVE_MTIME
@@ -143,12 +149,11 @@ struct dir_entry {
char name[];
};
-static void __init dir_add(const char *name, time64_t mtime)
+static void __init dir_add(const char *name, size_t nlen, time64_t mtime)
{
- size_t nlen = strlen(name) + 1;
struct dir_entry *de;
- de = kmalloc(sizeof(struct dir_entry) + nlen, GFP_KERNEL);
+ de = kmalloc(struct_size(de, name, nlen), GFP_KERNEL);
if (!de)
panic_show_mem("can't allocate dir_entry buffer");
INIT_LIST_HEAD(&de->list);
@@ -169,7 +174,7 @@ static void __init dir_utime(void)
#else
static void __init do_utime(char *filename, time64_t mtime) {}
static void __init do_utime_path(const struct path *path, time64_t mtime) {}
-static void __init dir_add(const char *name, time64_t mtime) {}
+static void __init dir_add(const char *name, size_t nlen, time64_t mtime) {}
static void __init dir_utime(void) {}
#endif
@@ -188,14 +193,11 @@ static __initdata u32 hdr_csum;
static void __init parse_header(char *s)
{
unsigned long parsed[13];
- char buf[9];
int i;
- buf[8] = '\0';
- for (i = 0, s += 6; i < 13; i++, s += 8) {
- memcpy(buf, s, 8);
- parsed[i] = simple_strtoul(buf, NULL, 16);
- }
+ for (i = 0, s += 6; i < 13; i++, s += 8)
+ parsed[i] = simple_strntoul(s, NULL, 16, 8);
+
ino = parsed[0];
mode = parsed[1];
uid = parsed[2];
@@ -256,7 +258,7 @@ static __initdata char *header_buf, *symlink_buf, *name_buf;
static int __init do_start(void)
{
- read_into(header_buf, 110, GotHeader);
+ read_into(header_buf, CPIO_HDRLEN, GotHeader);
return 0;
}
@@ -396,7 +398,7 @@ static int __init do_name(void)
init_mkdir(collected, mode);
init_chown(collected, uid, gid, 0);
init_chmod(collected, mode);
- dir_add(collected, mtime);
+ dir_add(collected, name_len, mtime);
} else if (S_ISBLK(mode) || S_ISCHR(mode) ||
S_ISFIFO(mode) || S_ISSOCK(mode)) {
if (maybe_link() == 0) {
@@ -497,20 +499,33 @@ static unsigned long my_inptr __initdata; /* index of next byte to be processed
#include <linux/decompress/generic.h>
-static char * __init unpack_to_rootfs(char *buf, unsigned long len)
+/**
+ * unpack_to_rootfs - decompress and extract an initramfs archive
+ * @buf: input initramfs archive to extract
+ * @len: length of initramfs data to process
+ *
+ * Returns: NULL for success or an error message string
+ *
+ * This symbol shouldn't be used externally. It's available for unit tests.
+ */
+char * __init unpack_to_rootfs(char *buf, unsigned long len)
{
long written;
decompress_fn decompress;
const char *compress_name;
- static __initdata char msg_buf[64];
+ struct {
+ char header[CPIO_HDRLEN];
+ char symlink[PATH_MAX + N_ALIGN(PATH_MAX) + 1];
+ char name[N_ALIGN(PATH_MAX)];
+ } *bufs = kmalloc(sizeof(*bufs), GFP_KERNEL);
- header_buf = kmalloc(110, GFP_KERNEL);
- symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL);
- name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL);
-
- if (!header_buf || !symlink_buf || !name_buf)
+ if (!bufs)
panic_show_mem("can't allocate buffers");
+ header_buf = bufs->header;
+ symlink_buf = bufs->symlink;
+ name_buf = bufs->name;
+
state = Start;
this_header = 0;
message = NULL;
@@ -538,12 +553,9 @@ static char * __init unpack_to_rootfs(char *buf, unsigned long len)
if (res)
error("decompressor failed");
} else if (compress_name) {
- if (!message) {
- snprintf(msg_buf, sizeof msg_buf,
- "compression method %s not configured",
- compress_name);
- message = msg_buf;
- }
+ pr_err("compression method %s not configured\n",
+ compress_name);
+ error("decompressor failed");
} else
error("invalid magic at start of compressed archive");
if (state != Reset)
@@ -553,9 +565,9 @@ static char * __init unpack_to_rootfs(char *buf, unsigned long len)
len -= my_inptr;
}
dir_utime();
- kfree(name_buf);
- kfree(symlink_buf);
- kfree(header_buf);
+ /* free any hardlink state collected without optional TRAILER!!! */
+ free_hash();
+ kfree(bufs);
return message;
}
diff --git a/init/initramfs_internal.h b/init/initramfs_internal.h
new file mode 100644
index 000000000000..233dad16b0a0
--- /dev/null
+++ b/init/initramfs_internal.h
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __INITRAMFS_INTERNAL_H__
+#define __INITRAMFS_INTERNAL_H__
+
+char *unpack_to_rootfs(char *buf, unsigned long len);
+#define CPIO_HDRLEN 110
+
+#endif
diff --git a/init/initramfs_test.c b/init/initramfs_test.c
new file mode 100644
index 000000000000..5d2db455e60c
--- /dev/null
+++ b/init/initramfs_test.c
@@ -0,0 +1,472 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <kunit/test.h>
+#include <linux/fcntl.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/init_syscalls.h>
+#include <linux/stringify.h>
+#include <linux/timekeeping.h>
+#include "initramfs_internal.h"
+
+struct initramfs_test_cpio {
+ char *magic;
+ unsigned int ino;
+ unsigned int mode;
+ unsigned int uid;
+ unsigned int gid;
+ unsigned int nlink;
+ unsigned int mtime;
+ unsigned int filesize;
+ unsigned int devmajor;
+ unsigned int devminor;
+ unsigned int rdevmajor;
+ unsigned int rdevminor;
+ unsigned int namesize;
+ unsigned int csum;
+ char *fname;
+ char *data;
+};
+
+static size_t fill_cpio(struct initramfs_test_cpio *cs, size_t csz, char *out)
+{
+ int i;
+ size_t off = 0;
+
+ for (i = 0; i < csz; i++) {
+ char *pos = &out[off];
+ struct initramfs_test_cpio *c = &cs[i];
+ size_t thislen;
+
+ /* +1 to account for nulterm */
+ thislen = sprintf(pos, "%s"
+ "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x"
+ "%s",
+ c->magic, c->ino, c->mode, c->uid, c->gid, c->nlink,
+ c->mtime, c->filesize, c->devmajor, c->devminor,
+ c->rdevmajor, c->rdevminor, c->namesize, c->csum,
+ c->fname) + 1;
+
+ pr_debug("packing (%zu): %.*s\n", thislen, (int)thislen, pos);
+ if (thislen != CPIO_HDRLEN + c->namesize)
+ pr_debug("padded to: %u\n", CPIO_HDRLEN + c->namesize);
+ off += CPIO_HDRLEN + c->namesize;
+ while (off & 3)
+ out[off++] = '\0';
+
+ memcpy(&out[off], c->data, c->filesize);
+ off += c->filesize;
+ while (off & 3)
+ out[off++] = '\0';
+ }
+
+ return off;
+}
+
+static void __init initramfs_test_extract(struct kunit *test)
+{
+ char *err, *cpio_srcbuf;
+ size_t len;
+ struct timespec64 ts_before, ts_after;
+ struct kstat st = {};
+ struct initramfs_test_cpio c[] = { {
+ .magic = "070701",
+ .ino = 1,
+ .mode = S_IFREG | 0777,
+ .uid = 12,
+ .gid = 34,
+ .nlink = 1,
+ .mtime = 56,
+ .filesize = 0,
+ .devmajor = 0,
+ .devminor = 1,
+ .rdevmajor = 0,
+ .rdevminor = 0,
+ .namesize = sizeof("initramfs_test_extract"),
+ .csum = 0,
+ .fname = "initramfs_test_extract",
+ }, {
+ .magic = "070701",
+ .ino = 2,
+ .mode = S_IFDIR | 0777,
+ .nlink = 1,
+ .mtime = 57,
+ .devminor = 1,
+ .namesize = sizeof("initramfs_test_extract_dir"),
+ .fname = "initramfs_test_extract_dir",
+ }, {
+ .magic = "070701",
+ .namesize = sizeof("TRAILER!!!"),
+ .fname = "TRAILER!!!",
+ } };
+
+ /* +3 to cater for any 4-byte end-alignment */
+ cpio_srcbuf = kzalloc(ARRAY_SIZE(c) * (CPIO_HDRLEN + PATH_MAX + 3),
+ GFP_KERNEL);
+ len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf);
+
+ ktime_get_real_ts64(&ts_before);
+ err = unpack_to_rootfs(cpio_srcbuf, len);
+ ktime_get_real_ts64(&ts_after);
+ if (err) {
+ KUNIT_FAIL(test, "unpack failed %s", err);
+ goto out;
+ }
+
+ KUNIT_EXPECT_EQ(test, init_stat(c[0].fname, &st, 0), 0);
+ KUNIT_EXPECT_TRUE(test, S_ISREG(st.mode));
+ KUNIT_EXPECT_TRUE(test, uid_eq(st.uid, KUIDT_INIT(c[0].uid)));
+ KUNIT_EXPECT_TRUE(test, gid_eq(st.gid, KGIDT_INIT(c[0].gid)));
+ KUNIT_EXPECT_EQ(test, st.nlink, 1);
+ if (IS_ENABLED(CONFIG_INITRAMFS_PRESERVE_MTIME)) {
+ KUNIT_EXPECT_EQ(test, st.mtime.tv_sec, c[0].mtime);
+ } else {
+ KUNIT_EXPECT_GE(test, st.mtime.tv_sec, ts_before.tv_sec);
+ KUNIT_EXPECT_LE(test, st.mtime.tv_sec, ts_after.tv_sec);
+ }
+ KUNIT_EXPECT_EQ(test, st.blocks, c[0].filesize);
+
+ KUNIT_EXPECT_EQ(test, init_stat(c[1].fname, &st, 0), 0);
+ KUNIT_EXPECT_TRUE(test, S_ISDIR(st.mode));
+ if (IS_ENABLED(CONFIG_INITRAMFS_PRESERVE_MTIME)) {
+ KUNIT_EXPECT_EQ(test, st.mtime.tv_sec, c[1].mtime);
+ } else {
+ KUNIT_EXPECT_GE(test, st.mtime.tv_sec, ts_before.tv_sec);
+ KUNIT_EXPECT_LE(test, st.mtime.tv_sec, ts_after.tv_sec);
+ }
+
+ KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0);
+ KUNIT_EXPECT_EQ(test, init_rmdir(c[1].fname), 0);
+out:
+ kfree(cpio_srcbuf);
+}
+
+/*
+ * Don't terminate filename. Previously, the cpio filename field was passed
+ * directly to filp_open(collected, O_CREAT|..) without nulterm checks. See
+ * https://lore.kernel.org/linux-fsdevel/20241030035509.20194-2-ddiss@suse.de
+ */
+static void __init initramfs_test_fname_overrun(struct kunit *test)
+{
+ char *err, *cpio_srcbuf;
+ size_t len, suffix_off;
+ struct initramfs_test_cpio c[] = { {
+ .magic = "070701",
+ .ino = 1,
+ .mode = S_IFREG | 0777,
+ .uid = 0,
+ .gid = 0,
+ .nlink = 1,
+ .mtime = 1,
+ .filesize = 0,
+ .devmajor = 0,
+ .devminor = 1,
+ .rdevmajor = 0,
+ .rdevminor = 0,
+ .namesize = sizeof("initramfs_test_fname_overrun"),
+ .csum = 0,
+ .fname = "initramfs_test_fname_overrun",
+ } };
+
+ /*
+ * poison cpio source buffer, so we can detect overrun. source
+ * buffer is used by read_into() when hdr or fname
+ * are already available (e.g. no compression).
+ */
+ cpio_srcbuf = kmalloc(CPIO_HDRLEN + PATH_MAX + 3, GFP_KERNEL);
+ memset(cpio_srcbuf, 'B', CPIO_HDRLEN + PATH_MAX + 3);
+ /* limit overrun to avoid crashes / filp_open() ENAMETOOLONG */
+ cpio_srcbuf[CPIO_HDRLEN + strlen(c[0].fname) + 20] = '\0';
+
+ len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf);
+ /* overwrite trailing fname terminator and padding */
+ suffix_off = len - 1;
+ while (cpio_srcbuf[suffix_off] == '\0') {
+ cpio_srcbuf[suffix_off] = 'P';
+ suffix_off--;
+ }
+
+ err = unpack_to_rootfs(cpio_srcbuf, len);
+ KUNIT_EXPECT_NOT_NULL(test, err);
+
+ kfree(cpio_srcbuf);
+}
+
+static void __init initramfs_test_data(struct kunit *test)
+{
+ char *err, *cpio_srcbuf;
+ size_t len;
+ struct file *file;
+ struct initramfs_test_cpio c[] = { {
+ .magic = "070701",
+ .ino = 1,
+ .mode = S_IFREG | 0777,
+ .uid = 0,
+ .gid = 0,
+ .nlink = 1,
+ .mtime = 1,
+ .filesize = sizeof("ASDF") - 1,
+ .devmajor = 0,
+ .devminor = 1,
+ .rdevmajor = 0,
+ .rdevminor = 0,
+ .namesize = sizeof("initramfs_test_data"),
+ .csum = 0,
+ .fname = "initramfs_test_data",
+ .data = "ASDF",
+ } };
+
+ /* +6 for max name and data 4-byte padding */
+ cpio_srcbuf = kmalloc(CPIO_HDRLEN + c[0].namesize + c[0].filesize + 6,
+ GFP_KERNEL);
+
+ len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf);
+
+ err = unpack_to_rootfs(cpio_srcbuf, len);
+ KUNIT_EXPECT_NULL(test, err);
+
+ file = filp_open(c[0].fname, O_RDONLY, 0);
+ if (IS_ERR(file)) {
+ KUNIT_FAIL(test, "open failed");
+ goto out;
+ }
+
+ /* read back file contents into @cpio_srcbuf and confirm match */
+ len = kernel_read(file, cpio_srcbuf, c[0].filesize, NULL);
+ KUNIT_EXPECT_EQ(test, len, c[0].filesize);
+ KUNIT_EXPECT_MEMEQ(test, cpio_srcbuf, c[0].data, len);
+
+ fput(file);
+ KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0);
+out:
+ kfree(cpio_srcbuf);
+}
+
+static void __init initramfs_test_csum(struct kunit *test)
+{
+ char *err, *cpio_srcbuf;
+ size_t len;
+ struct initramfs_test_cpio c[] = { {
+ /* 070702 magic indicates a valid csum is present */
+ .magic = "070702",
+ .ino = 1,
+ .mode = S_IFREG | 0777,
+ .nlink = 1,
+ .filesize = sizeof("ASDF") - 1,
+ .devminor = 1,
+ .namesize = sizeof("initramfs_test_csum"),
+ .csum = 'A' + 'S' + 'D' + 'F',
+ .fname = "initramfs_test_csum",
+ .data = "ASDF",
+ }, {
+ /* mix csum entry above with no-csum entry below */
+ .magic = "070701",
+ .ino = 2,
+ .mode = S_IFREG | 0777,
+ .nlink = 1,
+ .filesize = sizeof("ASDF") - 1,
+ .devminor = 1,
+ .namesize = sizeof("initramfs_test_csum_not_here"),
+ /* csum ignored */
+ .csum = 5555,
+ .fname = "initramfs_test_csum_not_here",
+ .data = "ASDF",
+ } };
+
+ cpio_srcbuf = kmalloc(8192, GFP_KERNEL);
+
+ len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf);
+
+ err = unpack_to_rootfs(cpio_srcbuf, len);
+ KUNIT_EXPECT_NULL(test, err);
+
+ KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0);
+ KUNIT_EXPECT_EQ(test, init_unlink(c[1].fname), 0);
+
+ /* mess up the csum and confirm that unpack fails */
+ c[0].csum--;
+ len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf);
+
+ err = unpack_to_rootfs(cpio_srcbuf, len);
+ KUNIT_EXPECT_NOT_NULL(test, err);
+
+ /*
+ * file (with content) is still retained in case of bad-csum abort.
+ * Perhaps we should change this.
+ */
+ KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0);
+ KUNIT_EXPECT_EQ(test, init_unlink(c[1].fname), -ENOENT);
+ kfree(cpio_srcbuf);
+}
+
+/*
+ * hardlink hashtable may leak when the archive omits a trailer:
+ * https://lore.kernel.org/r/20241107002044.16477-10-ddiss@suse.de/
+ */
+static void __init initramfs_test_hardlink(struct kunit *test)
+{
+ char *err, *cpio_srcbuf;
+ size_t len;
+ struct kstat st0, st1;
+ struct initramfs_test_cpio c[] = { {
+ .magic = "070701",
+ .ino = 1,
+ .mode = S_IFREG | 0777,
+ .nlink = 2,
+ .devminor = 1,
+ .namesize = sizeof("initramfs_test_hardlink"),
+ .fname = "initramfs_test_hardlink",
+ }, {
+ /* hardlink data is present in last archive entry */
+ .magic = "070701",
+ .ino = 1,
+ .mode = S_IFREG | 0777,
+ .nlink = 2,
+ .filesize = sizeof("ASDF") - 1,
+ .devminor = 1,
+ .namesize = sizeof("initramfs_test_hardlink_link"),
+ .fname = "initramfs_test_hardlink_link",
+ .data = "ASDF",
+ } };
+
+ cpio_srcbuf = kmalloc(8192, GFP_KERNEL);
+
+ len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf);
+
+ err = unpack_to_rootfs(cpio_srcbuf, len);
+ KUNIT_EXPECT_NULL(test, err);
+
+ KUNIT_EXPECT_EQ(test, init_stat(c[0].fname, &st0, 0), 0);
+ KUNIT_EXPECT_EQ(test, init_stat(c[1].fname, &st1, 0), 0);
+ KUNIT_EXPECT_EQ(test, st0.ino, st1.ino);
+ KUNIT_EXPECT_EQ(test, st0.nlink, 2);
+ KUNIT_EXPECT_EQ(test, st1.nlink, 2);
+
+ KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0);
+ KUNIT_EXPECT_EQ(test, init_unlink(c[1].fname), 0);
+
+ kfree(cpio_srcbuf);
+}
+
+#define INITRAMFS_TEST_MANY_LIMIT 1000
+#define INITRAMFS_TEST_MANY_PATH_MAX (sizeof("initramfs_test_many-") \
+ + sizeof(__stringify(INITRAMFS_TEST_MANY_LIMIT)))
+static void __init initramfs_test_many(struct kunit *test)
+{
+ char *err, *cpio_srcbuf, *p;
+ size_t len = INITRAMFS_TEST_MANY_LIMIT *
+ (CPIO_HDRLEN + INITRAMFS_TEST_MANY_PATH_MAX + 3);
+ char thispath[INITRAMFS_TEST_MANY_PATH_MAX];
+ int i;
+
+ p = cpio_srcbuf = kmalloc(len, GFP_KERNEL);
+
+ for (i = 0; i < INITRAMFS_TEST_MANY_LIMIT; i++) {
+ struct initramfs_test_cpio c = {
+ .magic = "070701",
+ .ino = i,
+ .mode = S_IFREG | 0777,
+ .nlink = 1,
+ .devminor = 1,
+ .fname = thispath,
+ };
+
+ c.namesize = 1 + sprintf(thispath, "initramfs_test_many-%d", i);
+ p += fill_cpio(&c, 1, p);
+ }
+
+ len = p - cpio_srcbuf;
+ err = unpack_to_rootfs(cpio_srcbuf, len);
+ KUNIT_EXPECT_NULL(test, err);
+
+ for (i = 0; i < INITRAMFS_TEST_MANY_LIMIT; i++) {
+ sprintf(thispath, "initramfs_test_many-%d", i);
+ KUNIT_EXPECT_EQ(test, init_unlink(thispath), 0);
+ }
+
+ kfree(cpio_srcbuf);
+}
+
+/*
+ * An initramfs filename is namesize in length, including the zero-terminator.
+ * A filename can be zero-terminated prior to namesize, with the remainder used
+ * as padding. This can be useful for e.g. alignment of file data segments with
+ * a 4KB filesystem block, allowing for extent sharing (reflinks) between cpio
+ * source and destination. This hack works with both GNU cpio and initramfs, as
+ * long as PATH_MAX isn't exceeded.
+ */
+static void __init initramfs_test_fname_pad(struct kunit *test)
+{
+ char *err;
+ size_t len;
+ struct file *file;
+ char fdata[] = "this file data is aligned at 4K in the archive";
+ struct test_fname_pad {
+ char padded_fname[4096 - CPIO_HDRLEN];
+ char cpio_srcbuf[CPIO_HDRLEN + PATH_MAX + 3 + sizeof(fdata)];
+ } *tbufs = kzalloc(sizeof(struct test_fname_pad), GFP_KERNEL);
+ struct initramfs_test_cpio c[] = { {
+ .magic = "070701",
+ .ino = 1,
+ .mode = S_IFREG | 0777,
+ .uid = 0,
+ .gid = 0,
+ .nlink = 1,
+ .mtime = 1,
+ .filesize = sizeof(fdata),
+ .devmajor = 0,
+ .devminor = 1,
+ .rdevmajor = 0,
+ .rdevminor = 0,
+ /* align file data at 4K archive offset via padded fname */
+ .namesize = 4096 - CPIO_HDRLEN,
+ .csum = 0,
+ .fname = tbufs->padded_fname,
+ .data = fdata,
+ } };
+
+ memcpy(tbufs->padded_fname, "padded_fname", sizeof("padded_fname"));
+ len = fill_cpio(c, ARRAY_SIZE(c), tbufs->cpio_srcbuf);
+
+ err = unpack_to_rootfs(tbufs->cpio_srcbuf, len);
+ KUNIT_EXPECT_NULL(test, err);
+
+ file = filp_open(c[0].fname, O_RDONLY, 0);
+ if (IS_ERR(file)) {
+ KUNIT_FAIL(test, "open failed");
+ goto out;
+ }
+
+ /* read back file contents into @cpio_srcbuf and confirm match */
+ len = kernel_read(file, tbufs->cpio_srcbuf, c[0].filesize, NULL);
+ KUNIT_EXPECT_EQ(test, len, c[0].filesize);
+ KUNIT_EXPECT_MEMEQ(test, tbufs->cpio_srcbuf, c[0].data, len);
+
+ fput(file);
+ KUNIT_EXPECT_EQ(test, init_unlink(c[0].fname), 0);
+out:
+ kfree(tbufs);
+}
+
+/*
+ * The kunit_case/_suite struct cannot be marked as __initdata as this will be
+ * used in debugfs to retrieve results after test has run.
+ */
+static struct kunit_case __refdata initramfs_test_cases[] = {
+ KUNIT_CASE(initramfs_test_extract),
+ KUNIT_CASE(initramfs_test_fname_overrun),
+ KUNIT_CASE(initramfs_test_data),
+ KUNIT_CASE(initramfs_test_csum),
+ KUNIT_CASE(initramfs_test_hardlink),
+ KUNIT_CASE(initramfs_test_many),
+ KUNIT_CASE(initramfs_test_fname_pad),
+ {},
+};
+
+static struct kunit_suite initramfs_test_suite = {
+ .name = "initramfs",
+ .test_cases = initramfs_test_cases,
+};
+kunit_test_init_section_suites(&initramfs_test_suite);
+
+MODULE_DESCRIPTION("Initramfs KUnit test suite");
+MODULE_LICENSE("GPL v2");
diff --git a/init/main.c b/init/main.c
index 00fac1170294..b84818ad9685 100644
--- a/init/main.c
+++ b/init/main.c
@@ -13,6 +13,7 @@
#define DEBUG /* Enable initcall_debug */
#include <linux/types.h>
+#include <linux/export.h>
#include <linux/extable.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
@@ -50,8 +51,8 @@
#include <linux/writeback.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
+#include <linux/memcontrol.h>
#include <linux/cgroup.h>
-#include <linux/efi.h>
#include <linux/tick.h>
#include <linux/sched/isolation.h>
#include <linux/interrupt.h>
@@ -102,6 +103,7 @@
#include <linux/randomize_kstack.h>
#include <linux/pidfs.h>
#include <linux/ptdump.h>
+#include <linux/time_namespace.h>
#include <net/net_namespace.h>
#include <asm/io.h>
@@ -543,6 +545,12 @@ static int __init unknown_bootoption(char *param, char *val,
const char *unused, void *arg)
{
size_t len = strlen(param);
+ /*
+ * Well-known bootloader identifiers:
+ * 1. LILO/Grub pass "BOOT_IMAGE=...";
+ * 2. kexec/kdump (kexec-tools) pass "kexec".
+ */
+ const char *bootloader[] = { "BOOT_IMAGE=", "kexec", NULL };
/* Handle params aliased to sysctls */
if (sysctl_is_alias(param))
@@ -550,6 +558,12 @@ static int __init unknown_bootoption(char *param, char *val,
repair_env_string(param, val);
+ /* Handle bootloader identifier */
+ for (int i = 0; bootloader[i]; i++) {
+ if (strstarts(param, bootloader[i]))
+ return 0;
+ }
+
/* Handle obsolete-style parameters */
if (obsolete_checksetup(param))
return 0;
@@ -640,15 +654,11 @@ static void __init setup_command_line(char *command_line)
len = xlen + strlen(boot_command_line) + ilen + 1;
- saved_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
- if (!saved_command_line)
- panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+ saved_command_line = memblock_alloc_or_panic(len, SMP_CACHE_BYTES);
len = xlen + strlen(command_line) + 1;
- static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
- if (!static_command_line)
- panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+ static_command_line = memblock_alloc_or_panic(len, SMP_CACHE_BYTES);
if (xlen) {
/*
@@ -896,6 +906,101 @@ static void __init early_numa_node_init(void)
#endif
}
+#define KERNEL_CMDLINE_PREFIX "Kernel command line: "
+#define KERNEL_CMDLINE_PREFIX_LEN (sizeof(KERNEL_CMDLINE_PREFIX) - 1)
+#define KERNEL_CMDLINE_CONTINUATION " \\"
+#define KERNEL_CMDLINE_CONTINUATION_LEN (sizeof(KERNEL_CMDLINE_CONTINUATION) - 1)
+
+#define MIN_CMDLINE_LOG_WRAP_IDEAL_LEN (KERNEL_CMDLINE_PREFIX_LEN + \
+ KERNEL_CMDLINE_CONTINUATION_LEN)
+#define CMDLINE_LOG_WRAP_IDEAL_LEN (CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN > \
+ MIN_CMDLINE_LOG_WRAP_IDEAL_LEN ? \
+ CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN : \
+ MIN_CMDLINE_LOG_WRAP_IDEAL_LEN)
+
+#define IDEAL_CMDLINE_LEN (CMDLINE_LOG_WRAP_IDEAL_LEN - KERNEL_CMDLINE_PREFIX_LEN)
+#define IDEAL_CMDLINE_SPLIT_LEN (IDEAL_CMDLINE_LEN - KERNEL_CMDLINE_CONTINUATION_LEN)
+
+/**
+ * print_kernel_cmdline() - Print the kernel cmdline with wrapping.
+ * @cmdline: The cmdline to print.
+ *
+ * Print the kernel command line, trying to wrap based on the Kconfig knob
+ * CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN.
+ *
+ * Wrapping is based on spaces, ignoring quotes. All lines are prefixed
+ * with "Kernel command line: " and lines that are not the last line have
+ * a " \" suffix added to them. The prefix and suffix count towards the
+ * line length for wrapping purposes. The ideal length will be exceeded
+ * if no appropriate place to wrap is found.
+ *
+ * Example output if CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN is 40:
+ * Kernel command line: loglevel=7 \
+ * Kernel command line: init=/sbin/init \
+ * Kernel command line: root=PARTUUID=8c3efc1a-768b-6642-8d0c-89eb782f19f0/PARTNROFF=1 \
+ * Kernel command line: rootwait ro \
+ * Kernel command line: my_quoted_arg="The \
+ * Kernel command line: quick brown fox \
+ * Kernel command line: jumps over the \
+ * Kernel command line: lazy dog."
+ */
+static void __init print_kernel_cmdline(const char *cmdline)
+{
+ size_t len;
+
+ /* Config option of 0 or anything longer than the max disables wrapping */
+ if (CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN == 0 ||
+ IDEAL_CMDLINE_LEN >= COMMAND_LINE_SIZE - 1) {
+ pr_notice("%s%s\n", KERNEL_CMDLINE_PREFIX, cmdline);
+ return;
+ }
+
+ len = strlen(cmdline);
+ while (len > IDEAL_CMDLINE_LEN) {
+ const char *first_space;
+ const char *prev_cutoff;
+ const char *cutoff;
+ int to_print;
+ size_t used;
+
+ /* Find the last ' ' that wouldn't make the line too long */
+ prev_cutoff = NULL;
+ cutoff = cmdline;
+ while (true) {
+ cutoff = strchr(cutoff + 1, ' ');
+ if (!cutoff || cutoff - cmdline > IDEAL_CMDLINE_SPLIT_LEN)
+ break;
+ prev_cutoff = cutoff;
+ }
+ if (prev_cutoff)
+ cutoff = prev_cutoff;
+ else if (!cutoff)
+ break;
+
+ /* Find the beginning and end of the string of spaces */
+ first_space = cutoff;
+ while (first_space > cmdline && first_space[-1] == ' ')
+ first_space--;
+ to_print = first_space - cmdline;
+ while (*cutoff == ' ')
+ cutoff++;
+ used = cutoff - cmdline;
+
+ /* If the whole string is used, break and do the final printout */
+ if (len == used)
+ break;
+
+ if (to_print)
+ pr_notice("%s%.*s%s\n", KERNEL_CMDLINE_PREFIX,
+ to_print, cmdline, KERNEL_CMDLINE_CONTINUATION);
+
+ len -= used;
+ cmdline += used;
+ }
+ if (len)
+ pr_notice("%s%s\n", KERNEL_CMDLINE_PREFIX, cmdline);
+}
+
asmlinkage __visible __init __no_sanitize_address __noreturn __no_stack_protector
void start_kernel(void)
{
@@ -932,7 +1037,7 @@ void start_kernel(void)
early_numa_node_init();
boot_cpu_hotplug_init();
- pr_notice("Kernel command line: %s\n", saved_command_line);
+ print_kernel_cmdline(saved_command_line);
/* parameters may set static keys */
parse_early_param();
after_dashes = parse_args("Booting kernel",
@@ -959,6 +1064,7 @@ void start_kernel(void)
sort_main_extable();
trap_init();
mm_core_init();
+ maple_tree_init();
poking_init();
ftrace_init();
@@ -976,7 +1082,6 @@ void start_kernel(void)
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
radix_tree_init();
- maple_tree_init();
/*
* Set up housekeeping before setting up workqueues to allow the unbound
@@ -992,6 +1097,7 @@ void start_kernel(void)
workqueue_init_early();
rcu_init();
+ kvfree_rcu_init();
/* Trace events are available after this */
trace_init();
@@ -1005,7 +1111,7 @@ void start_kernel(void)
init_IRQ();
tick_init();
rcu_init_nohz();
- init_timers();
+ timers_init();
srcu_init();
hrtimers_init();
softirq_init();
@@ -1069,15 +1175,12 @@ void start_kernel(void)
pid_idr_init();
anon_vma_init();
-#ifdef CONFIG_X86
- if (efi_enabled(EFI_RUNTIME_SERVICES))
- efi_enter_virtual_mode();
-#endif
thread_stack_cache_init();
cred_init();
fork_init();
proc_caches_init();
uts_ns_init();
+ time_ns_init();
key_init();
security_init();
dbg_late_init();
@@ -1090,6 +1193,7 @@ void start_kernel(void)
nsfs_init();
pidfs_init();
cpuset_init();
+ mem_cgroup_init();
cgroup_init();
taskstats_init_early();
delayacct_init();
@@ -1145,16 +1249,10 @@ static int __init initcall_blacklist(char *str)
str_entry = strsep(&str, ",");
if (str_entry) {
pr_debug("blacklisting initcall %s\n", str_entry);
- entry = memblock_alloc(sizeof(*entry),
+ entry = memblock_alloc_or_panic(sizeof(*entry),
SMP_CACHE_BYTES);
- if (!entry)
- panic("%s: Failed to allocate %zu bytes\n",
- __func__, sizeof(*entry));
- entry->buf = memblock_alloc(strlen(str_entry) + 1,
+ entry->buf = memblock_alloc_or_panic(strlen(str_entry) + 1,
SMP_CACHE_BYTES);
- if (!entry->buf)
- panic("%s: Failed to allocate %zu bytes\n",
- __func__, strlen(str_entry) + 1);
strcpy(entry->buf, str_entry);
list_add(&entry->next, &blacklisted_initcalls);
}
@@ -1223,6 +1321,12 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
}
+static __init_or_module void
+trace_initcall_level_cb(void *data, const char *level)
+{
+ printk(KERN_DEBUG "entering initcall level: %s\n", level);
+}
+
static ktime_t initcall_calltime;
#ifdef TRACEPOINTS_ENABLED
@@ -1234,10 +1338,12 @@ static void __init initcall_debug_enable(void)
&initcall_calltime);
ret |= register_trace_initcall_finish(trace_initcall_finish_cb,
&initcall_calltime);
+ ret |= register_trace_initcall_level(trace_initcall_level_cb, NULL);
WARN(ret, "Failed to register initcall tracepoints\n");
}
# define do_trace_initcall_start trace_initcall_start
# define do_trace_initcall_finish trace_initcall_finish
+# define do_trace_initcall_level trace_initcall_level
#else
static inline void do_trace_initcall_start(initcall_t fn)
{
@@ -1251,6 +1357,12 @@ static inline void do_trace_initcall_finish(initcall_t fn, int ret)
return;
trace_initcall_finish_cb(&initcall_calltime, fn, ret);
}
+static inline void do_trace_initcall_level(const char *level)
+{
+ if (!initcall_debug)
+ return;
+ trace_initcall_level_cb(NULL, level);
+}
#endif /* !TRACEPOINTS_ENABLED */
int __init_or_module do_one_initcall(initcall_t fn)
@@ -1323,7 +1435,7 @@ static void __init do_initcall_level(int level, char *command_line)
level, level,
NULL, ignore_unknown_bootoption);
- trace_initcall_level(initcall_level_names[level]);
+ do_trace_initcall_level(initcall_level_names[level]);
for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
do_one_initcall(initcall_from_entry(fn));
}
@@ -1367,7 +1479,7 @@ static void __init do_pre_smp_initcalls(void)
{
initcall_entry_t *fn;
- trace_initcall_level("early");
+ do_trace_initcall_level("early");
for (fn = __initcall_start; fn < __initcall0_start; fn++)
do_one_initcall(initcall_from_entry(fn));
}
@@ -1562,7 +1674,6 @@ static noinline void __init kernel_init_freeable(void)
init_mm_internals();
- rcu_init_tasks_generic();
do_pre_smp_initcalls();
lockup_detector_init();
@@ -1585,7 +1696,11 @@ static noinline void __init kernel_init_freeable(void)
* check if there is an early userspace init. If yes, let it do all
* the work
*/
- if (init_eaccess(ramdisk_execute_command) != 0) {
+ int ramdisk_command_access;
+ ramdisk_command_access = init_eaccess(ramdisk_execute_command);
+ if (ramdisk_command_access != 0) {
+ pr_warn("check access for rdinit=%s failed: %i, ignoring\n",
+ ramdisk_execute_command, ramdisk_command_access);
ramdisk_execute_command = NULL;
prepare_namespace();
}
diff --git a/init/version-timestamp.c b/init/version-timestamp.c
index 043cbf80a766..375726e05f69 100644
--- a/init/version-timestamp.c
+++ b/init/version-timestamp.c
@@ -8,7 +8,7 @@
#include <linux/utsname.h>
struct uts_namespace init_uts_ns = {
- .ns.count = REFCOUNT_INIT(2),
+ .ns = NS_COMMON_INIT(init_uts_ns),
.name = {
.sysname = UTS_SYSNAME,
.nodename = UTS_NODENAME,
@@ -18,10 +18,6 @@ struct uts_namespace init_uts_ns = {
.domainname = UTS_DOMAINNAME,
},
.user_ns = &init_user_ns,
- .ns.inum = PROC_UTS_INIT_INO,
-#ifdef CONFIG_UTS_NS
- .ns.ops = &utsns_operations,
-#endif
};
/* FIXED STRINGS! Don't touch! */