diff options
Diffstat (limited to 'init')
-rw-r--r-- | init/Kconfig | 202 | ||||
-rw-r--r-- | init/Makefile | 5 | ||||
-rwxr-xr-x | init/build-version | 10 | ||||
-rw-r--r-- | init/do_mounts.c | 6 | ||||
-rw-r--r-- | init/do_mounts.h | 9 | ||||
-rw-r--r-- | init/do_mounts_initrd.c | 5 | ||||
-rw-r--r-- | init/init_task.c | 21 | ||||
-rw-r--r-- | init/initramfs.c | 42 | ||||
-rw-r--r-- | init/main.c | 115 |
9 files changed, 264 insertions, 151 deletions
diff --git a/init/Kconfig b/init/Kconfig index bee58f7468c3..324c2886b2ea 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -60,6 +60,13 @@ config LLD_VERSION default $(ld-version) if LD_IS_LLD default 0 +config RUSTC_VERSION + int + default $(rustc-version) + help + It does not depend on `RUST` since that one may need to use the version + in a `depends on`. + config RUST_IS_AVAILABLE def_bool $(success,$(srctree)/scripts/rust_is_available.sh) help @@ -71,6 +78,10 @@ config RUST_IS_AVAILABLE In particular, the Makefile target 'rustavailable' is useful to check why the Rust toolchain is not being detected. +config RUSTC_LLVM_VERSION + int + default $(rustc-llvm-version) + config CC_CAN_LINK bool default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT @@ -81,23 +92,25 @@ config CC_CAN_LINK_STATIC default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static) +# Fixed in GCC 14, 13.3, 12.4 and 11.5 +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 +config GCC_ASM_GOTO_OUTPUT_BROKEN + bool + depends on CC_IS_GCC + default y if GCC_VERSION < 110500 + default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400 + default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300 + config CC_HAS_ASM_GOTO_OUTPUT - def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null) + def_bool y + depends on !GCC_ASM_GOTO_OUTPUT_BROKEN + depends on $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null) config CC_HAS_ASM_GOTO_TIED_OUTPUT depends on CC_HAS_ASM_GOTO_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) -config GCC_ASM_GOTO_OUTPUT_WORKAROUND - bool - depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT - # Fixed in GCC 14, 13.3, 12.4 and 11.5 - # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 - default y if GCC_VERSION < 110500 - default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400 - default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300 - config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) @@ -107,6 +120,18 @@ config CC_HAS_ASM_INLINE config CC_HAS_NO_PROFILE_FN_ATTR def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror) +config CC_HAS_COUNTED_BY + # TODO: when gcc 15 is released remove the build test and add + # a gcc version check + def_bool $(success,echo 'struct flex { int count; int array[] __attribute__((__counted_by__(count))); };' | $(CC) $(CLANG_FLAGS) -x c - -c -o /dev/null -Werror) + # clang needs to be at least 19.1.3 to avoid __bdos miscalculations + # https://github.com/llvm/llvm-project/pull/110497 + # https://github.com/llvm/llvm-project/pull/112636 + depends on !(CC_IS_CLANG && CLANG_VERSION < 190103) + +config RUSTC_HAS_COERCE_POINTEE + def_bool RUSTC_VERSION >= 108400 + config PAHOLE_VERSION int default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE)) @@ -115,7 +140,7 @@ config CONSTRUCTORS bool config IRQ_WORK - bool + def_bool y if SMP config BUILDTIME_TABLE_SORT bool @@ -308,8 +333,9 @@ config KERNEL_XZ BCJ filters which can improve compression ratio of executable code. The size of the kernel is about 30% smaller with XZ in comparison to gzip. On architectures for which there is a BCJ - filter (i386, x86_64, ARM, IA-64, PowerPC, and SPARC), XZ - will create a few percent smaller kernel than plain LZMA. + filter (i386, x86_64, ARM, ARM64, RISC-V, big endian PowerPC, + and SPARC), XZ will create a few percent smaller kernel than + plain LZMA. The speed is about the same as with LZMA: The decompression speed of XZ is better than that of bzip2 but worse than gzip @@ -547,24 +573,24 @@ config HAVE_SCHED_AVG_IRQ depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING depends on SMP -config SCHED_THERMAL_PRESSURE +config SCHED_HW_PRESSURE bool default y if ARM && ARM_CPU_TOPOLOGY default y if ARM64 depends on SMP depends on CPU_FREQ_THERMAL help - Select this option to enable thermal pressure accounting in the - scheduler. Thermal pressure is the value conveyed to the scheduler + Select this option to enable HW pressure accounting in the + scheduler. HW pressure is the value conveyed to the scheduler that reflects the reduction in CPU compute capacity resulted from - thermal throttling. Thermal throttling occurs when the performance of - a CPU is capped due to high operating temperatures. + HW throttling. HW throttling occurs when the performance of + a CPU is capped due to high operating temperatures as an example. If selected, the scheduler will be able to balance tasks accordingly, i.e. put less load on throttled CPUs than on non/less throttled ones. This requires the architecture to implement - arch_update_thermal_pressure() and arch_scale_thermal_pressure(). + arch_update_hw_pressure() and arch_scale_thermal_pressure(). config BSD_PROCESS_ACCT bool "BSD Process Accounting" @@ -743,8 +769,8 @@ config LOG_CPU_MAX_BUF_SHIFT int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)" depends on SMP range 0 21 - default 12 if !BASE_SMALL default 0 if BASE_SMALL + default 12 depends on PRINTK help This option allows to increase the default ring buffer size @@ -883,7 +909,7 @@ config GCC10_NO_ARRAY_BOUNDS config CC_NO_ARRAY_BOUNDS bool - default y if CC_IS_GCC && GCC_VERSION >= 100000 && GCC10_NO_ARRAY_BOUNDS + default y if CC_IS_GCC && GCC_VERSION >= 90000 && GCC10_NO_ARRAY_BOUNDS # Currently, disable -Wstringop-overflow for GCC globally. config GCC_NO_STRINGOP_OVERFLOW @@ -929,6 +955,9 @@ config NUMA_BALANCING_DEFAULT_ENABLED If set, automatic NUMA balancing will be enabled if running on a NUMA machine. +config SLAB_OBJ_EXT + bool + menuconfig CGROUPS bool "Control Group support" select KERNFS @@ -962,13 +991,26 @@ config MEMCG bool "Memory controller" select PAGE_COUNTER select EVENTFD + select SLAB_OBJ_EXT help Provides control over the memory footprint of tasks in a cgroup. -config MEMCG_KMEM - bool +config MEMCG_V1 + bool "Legacy cgroup v1 memory controller" depends on MEMCG - default y + default n + help + Legacy cgroup v1 memory controller which has been deprecated by + cgroup v2 implementation. The v1 is there for legacy applications + which haven't migrated to the new cgroup v2 interface yet. If you + do not have any such application then you are completely fine leaving + this option disabled. + + Please note that feature set of the legacy memory controller is likely + going to shrink due to deprecation process. New deployments with v1 + controller are highly discouraged. + + Say N if unsure. config BLK_CGROUP bool "IO controller" @@ -1006,9 +1048,13 @@ menuconfig CGROUP_SCHED tasks. if CGROUP_SCHED +config GROUP_SCHED_WEIGHT + def_bool n + config FAIR_GROUP_SCHED bool "Group scheduling for SCHED_OTHER" depends on CGROUP_SCHED + select GROUP_SCHED_WEIGHT default CGROUP_SCHED config CFS_BANDWIDTH @@ -1033,6 +1079,12 @@ config RT_GROUP_SCHED realtime bandwidth for them. See Documentation/scheduler/sched-rt-group.rst for more information. +config EXT_GROUP_SCHED + bool + depends on SCHED_CLASS_EXT && CGROUP_SCHED + select GROUP_SCHED_WEIGHT + default y + endif #CGROUP_SCHED config SCHED_MM_CID @@ -1087,6 +1139,16 @@ config CGROUP_RDMA Attaching processes with active RDMA resources to the cgroup hierarchy is allowed even if can cross the hierarchy's limit. +config CGROUP_DMEM + bool "Device memory controller (DMEM)" + select PAGE_COUNTER + help + The DMEM controller allows compatible devices to restrict device + memory usage based on the cgroup hierarchy. + + As an example, it allows you to restrict VRAM usage for applications + in the DRM subsystem. + config CGROUP_FREEZER bool "Freezer controller" help @@ -1117,6 +1179,7 @@ config CGROUP_HUGETLB config CPUSETS bool "Cpuset controller" depends on SMP + select UNION_FIND help This option will let you create and manage CPUSETs which allow dynamically partitioning a system into sets of CPUs and @@ -1125,6 +1188,19 @@ config CPUSETS Say N if unsure. +config CPUSETS_V1 + bool "Legacy cgroup v1 cpusets controller" + depends on CPUSETS + default n + help + Legacy cgroup v1 cpusets controller which has been deprecated by + cgroup v2 implementation. The v1 is there for legacy applications + which haven't migrated to the new cgroup v2 interface yet. If you + do not have any such application then you are completely fine leaving + this option disabled. + + Say N if unsure. + config PROC_PID_CPUSET bool "Include legacy /proc/<pid>/cpuset file" depends on CPUSETS @@ -1466,11 +1542,6 @@ config SYSCTL_ARCH_UNALIGN_ALLOW config HAVE_PCSPKR_PLATFORM bool -# interpreter that classic socket filters depend on -config BPF - bool - select CRYPTO_LIB_SHA1 - menuconfig EXPERT bool "Configure standard kernel features (expert users)" # Unhide debug options, to make the on-by-default options visible @@ -1504,7 +1575,7 @@ config MULTIUSER config SGETMASK_SYSCALL bool "sgetmask/ssetmask syscalls support" if EXPERT - def_bool PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH + default PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH help sys_sgetmask and sys_ssetmask are obsolete system calls no longer supported in libc but still enabled by default in some @@ -1590,11 +1661,10 @@ config PCSPKR_PLATFORM This option allows to disable the internal PC-Speaker support, saving some memory. -config BASE_FULL - default y - bool "Enable full-sized data structures for core" if EXPERT +config BASE_SMALL + bool "Enable smaller-sized data structures for core" if EXPERT help - Disabling this option reduces the size of miscellaneous core + Enabling this option reduces the size of miscellaneous core kernel data structures. This saves memory on small machines, but may reduce performance. @@ -1675,6 +1745,19 @@ config IO_URING applications to submit and complete IO through submission and completion rings that are shared between the kernel and application. +config GCOV_PROFILE_URING + bool "Enable GCOV profiling on the io_uring subsystem" + depends on GCOV_KERNEL + help + Enable GCOV profiling on the io_uring subsystem, to facilitate + code coverage testing. + + If unsure, say N. + + Note that this will have a negative impact on the performance of + the io_uring subsystem, hence this should only be enabled for + specific test purposes. + config ADVISE_SYSCALLS bool "Enable madvise/fadvise syscalls" if EXPERT default y @@ -1791,24 +1874,6 @@ config KALLSYMS_ABSOLUTE_PERCPU depends on KALLSYMS default X86_64 && SMP -config KALLSYMS_BASE_RELATIVE - bool - depends on KALLSYMS - default y - help - Instead of emitting them as absolute values in the native word size, - emit the symbol references in the kallsyms table as 32-bit entries, - each containing a relative value in the range [base, base + U32_MAX] - or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either - an absolute value in the range [0, S32_MAX] or a relative value in the - range [base, base + S32_MAX], where base is the lowest relative symbol - address encountered in the image. - - On 64-bit builds, this reduces the size of the address table by 50%, - but more importantly, it results in entries whose values are build - time constants, and no relocation pass is required at runtime to fix - up the entries based on the runtime load address of the kernel. - # end of the "standard kernel features (expert users)" menu config ARCH_HAS_MEMBARRIER_CALLBACKS @@ -1904,11 +1969,16 @@ config RUST bool "Rust support" depends on HAVE_RUST depends on RUST_IS_AVAILABLE - depends on !MODVERSIONS - depends on !GCC_PLUGINS + select EXTENDED_MODVERSIONS if MODVERSIONS + depends on !MODVERSIONS || GENDWARFKSYMS + depends on !GCC_PLUGIN_RANDSTRUCT depends on !RANDSTRUCT - depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE - select CONSTRUCTORS + depends on !DEBUG_INFO_BTF || (PAHOLE_HAS_LANG_EXCLUDE && !LTO) + depends on !CFI_CLANG || HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC + select CFI_ICALL_NORMALIZE_INTEGERS if CFI_CLANG + depends on !CALL_PADDING || RUSTC_VERSION >= 108100 + depends on !KASAN_SW_TAGS + depends on !(MITIGATION_RETHUNK && KASAN) || RUSTC_VERSION >= 108300 help Enables Rust support in the kernel. @@ -1925,12 +1995,19 @@ config RUST config RUSTC_VERSION_TEXT string depends on RUST - default $(shell,command -v $(RUSTC) >/dev/null 2>&1 && $(RUSTC) --version || echo n) + default "$(RUSTC_VERSION_TEXT)" + help + See `CC_VERSION_TEXT`. config BINDGEN_VERSION_TEXT string depends on RUST - default $(shell,command -v $(BINDGEN) >/dev/null 2>&1 && $(BINDGEN) --version || echo n) + # The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0 + # (https://github.com/rust-lang/rust-bindgen/pull/2678) and 0.71.0 + # (https://github.com/rust-lang/rust-bindgen/pull/3040). It can be removed + # when the minimum version is upgraded past the latter (0.69.1 and 0.71.1 + # both fixed the issue). + default "$(shell,$(BINDGEN) --version workaround-for-0.69.0 2>/dev/null)" # # Place an empty function call at each tracepoint site. Can be @@ -1938,6 +2015,7 @@ config BINDGEN_VERSION_TEXT # config TRACEPOINTS bool + select TASKS_TRACE_RCU source "kernel/Kconfig.kexec" @@ -1949,11 +2027,6 @@ config RT_MUTEXES bool default y if PREEMPT_RT -config BASE_SMALL - int - default 0 if BASE_FULL - default 1 if !BASE_FULL - config MODULE_SIG_FORMAT def_bool n select SYSTEM_DATA_VERIFICATION @@ -1991,6 +2064,9 @@ source "kernel/Kconfig.locks" config ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE bool +config ARCH_HAS_PREPARE_SYNC_CORE_CMD + bool + config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE bool diff --git a/init/Makefile b/init/Makefile index cbac576c57d6..10b652d33e87 100644 --- a/init/Makefile +++ b/init/Makefile @@ -52,13 +52,10 @@ CFLAGS_version.o := -include $(obj)/utsversion-tmp.h # Build version-timestamp.c with final UTS_VERSION # -include/generated/utsversion.h: build-version-auto = $(shell $(srctree)/$(src)/build-version) +include/generated/utsversion.h: build-version-auto = $(shell $(srctree)/scripts/build-version) include/generated/utsversion.h: build-timestamp-auto = $(shell LC_ALL=C date) include/generated/utsversion.h: FORCE $(call filechk,uts_version) $(obj)/version-timestamp.o: include/generated/utsversion.h CFLAGS_version-timestamp.o := -include include/generated/utsversion.h -KASAN_SANITIZE_version-timestamp.o := n -KCSAN_SANITIZE_version-timestamp.o := n -GCOV_PROFILE_version-timestamp.o := n diff --git a/init/build-version b/init/build-version deleted file mode 100755 index 537d45815083..000000000000 --- a/init/build-version +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0-only - -prev_ver=$(cat .version 2>/dev/null) && -ver=$(expr ${prev_ver} + 1 2>/dev/null) || -ver=1 - -echo ${ver} > .version - -echo ${ver} diff --git a/init/do_mounts.c b/init/do_mounts.c index 279ad28bf4fb..6af29da8889e 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -159,8 +159,7 @@ static int __init do_mount_root(const char *name, const char *fs, if (!p) return -ENOMEM; data_page = page_address(p); - /* zero-pad. init_mount() will make sure it's terminated */ - strncpy(data_page, data, PAGE_SIZE); + strscpy_pad(data_page, data, PAGE_SIZE); } ret = init_mount(name, "/root", fs, flags, data_page); @@ -208,6 +207,9 @@ retry: goto out; case -EACCES: case -EINVAL: +#ifdef CONFIG_BLOCK + init_flush_fput(); +#endif continue; } /* diff --git a/init/do_mounts.h b/init/do_mounts.h index 15e372b00ce7..6069ea3eb80d 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -9,6 +9,8 @@ #include <linux/major.h> #include <linux/root_dev.h> #include <linux/init_syscalls.h> +#include <linux/task_work.h> +#include <linux/file.h> void mount_root_generic(char *name, char *pretty_name, int flags); void mount_root(char *root_device_name); @@ -41,3 +43,10 @@ static inline bool initrd_load(char *root_device_name) } #endif + +/* Ensure that async file closing finished to prevent spurious errors. */ +static inline void init_flush_fput(void) +{ + flush_delayed_fput(); + task_work_run(); +} diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 425f4bcf4b77..f6867bad0d78 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -21,7 +21,7 @@ phys_addr_t phys_initrd_start __initdata; unsigned long phys_initrd_size __initdata; #ifdef CONFIG_SYSCTL -static struct ctl_table kern_do_mounts_initrd_table[] = { +static const struct ctl_table kern_do_mounts_initrd_table[] = { { .procname = "real-root-dev", .data = &real_root_dev, @@ -29,7 +29,6 @@ static struct ctl_table kern_do_mounts_initrd_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; static __init int kernel_do_mounts_initrd_sysctls_init(void) @@ -90,7 +89,7 @@ static void __init handle_initrd(char *root_device_name) extern char *envp_init[]; int error; - pr_warn("using deprecated initrd support, will be removed in 2021.\n"); + pr_warn("using deprecated initrd support, will be removed soon.\n"); real_root_dev = new_encode_dev(ROOT_DEV); create_dev("/dev/root.old", Root_RAM0); diff --git a/init/init_task.c b/init/init_task.c index 7ecb458eb3da..e557f622bd90 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -6,6 +6,7 @@ #include <linux/sched/sysctl.h> #include <linux/sched/rt.h> #include <linux/sched/task.h> +#include <linux/sched/ext.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/mm.h> @@ -29,8 +30,9 @@ static struct signal_struct init_signals = { .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), .exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock), #ifdef CONFIG_POSIX_TIMERS - .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), - .cputimer = { + .posix_timers = HLIST_HEAD_INIT, + .ignored_posix_timers = HLIST_HEAD_INIT, + .cputimer = { .cputime_atomic = INIT_CPUTIME_ATOMIC, }, #endif @@ -77,6 +79,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { .cpus_ptr = &init_task.cpus_mask, .user_cpus_ptr = NULL, .cpus_mask = CPU_MASK_ALL, + .max_allowed_capacity = SCHED_CAPACITY_SCALE, .nr_cpus_allowed= NR_CPUS, .mm = NULL, .active_mm = &init_mm, @@ -98,6 +101,17 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { #ifdef CONFIG_CGROUP_SCHED .sched_task_group = &root_task_group, #endif +#ifdef CONFIG_SCHED_CLASS_EXT + .scx = { + .dsq_list.node = LIST_HEAD_INIT(init_task.scx.dsq_list.node), + .sticky_cpu = -1, + .holding_cpu = -1, + .runnable_node = LIST_HEAD_INIT(init_task.scx.runnable_node), + .runnable_at = INITIAL_JIFFIES, + .ddsp_dsq_id = SCX_DSQ_INVALID, + .slice = SCX_SLICE_DFL, + }, +#endif .ptraced = LIST_HEAD_INIT(init_task.ptraced), .ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry), .real_parent = &init_task, @@ -147,6 +161,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { .rcu_tasks_holdout = false, .rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list), .rcu_tasks_idle_cpu = -1, + .rcu_tasks_exit_list = LIST_HEAD_INIT(init_task.rcu_tasks_exit_list), #endif #ifdef CONFIG_TASKS_TRACE_RCU .trc_reader_nesting = 0, @@ -197,7 +212,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { .trace_recursion = 0, #endif #ifdef CONFIG_LIVEPATCH - .patch_state = KLP_UNDEFINED, + .patch_state = KLP_TRANSITION_IDLE, #endif #ifdef CONFIG_SECURITY .security = NULL, diff --git a/init/initramfs.c b/init/initramfs.c index 76deb48c38cb..b2f7583bb1f5 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -16,8 +16,10 @@ #include <linux/mm.h> #include <linux/namei.h> #include <linux/init_syscalls.h> -#include <linux/task_work.h> #include <linux/umh.h> +#include <linux/security.h> + +#include "do_mounts.h" static __initdata bool csum_present; static __initdata u32 io_csum; @@ -358,6 +360,15 @@ static int __init do_name(void) { state = SkipIt; next_state = Reset; + + /* name_len > 0 && name_len <= PATH_MAX checked in do_header */ + if (collected[name_len - 1] != '\0') { + pr_err("initramfs name without nulterm: %.*s\n", + (int)name_len, collected); + error("malformed archive"); + return 1; + } + if (strcmp(collected, "TRAILER!!!") == 0) { free_hash(); return 0; @@ -366,7 +377,7 @@ static int __init do_name(void) if (S_ISREG(mode)) { int ml = maybe_link(); if (ml >= 0) { - int openflags = O_WRONLY|O_CREAT; + int openflags = O_WRONLY|O_CREAT|O_LARGEFILE; if (ml != 1) openflags |= O_TRUNC; wfile = filp_open(collected, openflags, mode); @@ -422,6 +433,12 @@ static int __init do_copy(void) static int __init do_symlink(void) { + if (collected[name_len - 1] != '\0') { + pr_err("initramfs symlink without nulterm: %.*s\n", + (int)name_len, collected); + error("malformed archive"); + return 1; + } collected[N_ALIGN(name_len) + body_len] = '\0'; clean_path(collected, 0); init_symlink(collected + N_ALIGN(name_len), collected); @@ -574,15 +591,7 @@ extern unsigned long __initramfs_size; #include <linux/initrd.h> #include <linux/kexec.h> -static ssize_t raw_read(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, - loff_t pos, size_t count) -{ - memcpy(buf, attr->private + pos, count); - return count; -} - -static BIN_ATTR(initrd, 0440, raw_read, NULL, 0); +static BIN_ATTR(initrd, 0440, sysfs_bin_attr_simple_read, NULL, 0); void __init reserve_initrd_mem(void) { @@ -642,7 +651,7 @@ void __weak __init free_initrd_mem(unsigned long start, unsigned long end) "initrd"); } -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_RESERVE static bool __init kexec_free_initrd(void) { unsigned long crashk_start = (unsigned long)__va(crashk_res.start); @@ -679,11 +688,9 @@ static void __init populate_initrd_image(char *err) struct file *file; loff_t pos = 0; - unpack_to_rootfs(__initramfs_start, __initramfs_size); - printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n", err); - file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700); + file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700); if (IS_ERR(file)) return; @@ -721,6 +728,8 @@ static void __init do_populate_rootfs(void *unused, async_cookie_t cookie) } done: + security_initramfs_populated(); + /* * If the initrd region is overlapped with crashkernel reserved region, * free only memory that is not part of crashkernel region. @@ -736,8 +745,7 @@ done: initrd_start = 0; initrd_end = 0; - flush_delayed_fput(); - task_work_run(); + init_flush_fput(); } static ASYNC_DOMAIN_EXCLUSIVE(initramfs_domain); diff --git a/init/main.c b/init/main.c index e24b0780fdff..2a1757826397 100644 --- a/init/main.c +++ b/init/main.c @@ -88,6 +88,7 @@ #include <linux/sched/task_stack.h> #include <linux/context_tracking.h> #include <linux/random.h> +#include <linux/moduleloader.h> #include <linux/list.h> #include <linux/integrity.h> #include <linux/proc_ns.h> @@ -99,6 +100,8 @@ #include <linux/init_syscalls.h> #include <linux/stackdepot.h> #include <linux/randomize_kstack.h> +#include <linux/pidfs.h> +#include <linux/ptdump.h> #include <net/net_namespace.h> #include <asm/io.h> @@ -324,7 +327,7 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size, { struct xbc_node *knode, *vnode; char *end = buf + size; - const char *val; + const char *val, *q; int ret; xbc_node_for_each_key_value(root, knode, val) { @@ -342,8 +345,14 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size, continue; } xbc_array_for_each_value(vnode, val) { - ret = snprintf(buf, rest(buf, end), "%s=\"%s\" ", - xbc_namebuf, val); + /* + * For prettier and more readable /proc/cmdline, only + * quote the value when necessary, i.e. when it contains + * whitespace. + */ + q = strpbrk(val, " \t\r\n") ? "\"" : ""; + ret = snprintf(buf, rest(buf, end), "%s=%s%s%s ", + xbc_namebuf, q, val, q); if (ret < 0) return ret; buf += ret; @@ -484,6 +493,11 @@ static int __init warn_bootconfig(char *str) early_param("bootconfig", warn_bootconfig); +bool __init cmdline_has_extra_options(void) +{ + return extra_command_line || extra_init_args; +} + /* Change NUL term back to "=", to make "param" the whole string. */ static void __init repair_env_string(char *param, char *val) { @@ -603,7 +617,6 @@ static int __init rdinit_setup(char *str) __setup("rdinit=", rdinit_setup); #ifndef CONFIG_SMP -static const unsigned int setup_max_cpus = NR_CPUS; static inline void setup_nr_cpu_ids(void) { } static inline void smp_prepare_cpus(unsigned int maxcpus) { } #endif @@ -620,18 +633,18 @@ static void __init setup_command_line(char *command_line) if (extra_command_line) xlen = strlen(extra_command_line); - if (extra_init_args) + if (extra_init_args) { + extra_init_args = strim(extra_init_args); /* remove trailing space */ ilen = strlen(extra_init_args) + 4; /* for " -- " */ + } - len = xlen + strlen(boot_command_line) + 1; + len = xlen + strlen(boot_command_line) + ilen + 1; - saved_command_line = memblock_alloc(len + ilen, SMP_CACHE_BYTES); - if (!saved_command_line) - panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen); + saved_command_line = memblock_alloc_or_panic(len, SMP_CACHE_BYTES); - static_command_line = memblock_alloc(len, SMP_CACHE_BYTES); - if (!static_command_line) - panic("%s: Failed to allocate %zu bytes\n", __func__, len); + len = xlen + strlen(command_line) + 1; + + static_command_line = memblock_alloc_or_panic(len, SMP_CACHE_BYTES); if (xlen) { /* @@ -681,7 +694,7 @@ static void __init setup_command_line(char *command_line) static __initdata DECLARE_COMPLETION(kthreadd_done); -noinline void __ref __noreturn rest_init(void) +static noinline void __ref __noreturn rest_init(void) { struct task_struct *tsk; int pid; @@ -737,10 +750,7 @@ static int __init do_early_param(char *param, char *val, const struct obs_kernel_param *p; for (p = __setup_start; p < __setup_end; p++) { - if ((p->early && parameq(param, p->str)) || - (strcmp(param, "console") == 0 && - strcmp(p->str, "earlycon") == 0) - ) { + if (p->early && parameq(param, p->str)) { if (p->setup_func(val) != 0) pr_warn("Malformed early option '%s'\n", param); } @@ -776,6 +786,10 @@ void __init __weak smp_setup_processor_id(void) { } +void __init __weak smp_prepare_boot_cpu(void) +{ +} + # if THREAD_SIZE >= PAGE_SIZE void __init __weak thread_stack_cache_init(void) { @@ -822,11 +836,6 @@ static int __init early_randomize_kstack_offset(char *buf) early_param("randomize_kstack_offset", early_randomize_kstack_offset); #endif -void __init __weak __noreturn arch_call_rest_init(void) -{ - rest_init(); -} - static void __init print_unknown_bootoptions(void) { char *unknown_options; @@ -870,6 +879,19 @@ static void __init print_unknown_bootoptions(void) memblock_free(unknown_options, len); } +static void __init early_numa_node_init(void) +{ +#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID +#ifndef cpu_to_node + int cpu; + + /* The early_cpu_to_node() should be ready here. */ + for_each_possible_cpu(cpu) + set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); +#endif +#endif +} + asmlinkage __visible __init __no_sanitize_address __noreturn __no_stack_protector void start_kernel(void) { @@ -893,18 +915,21 @@ void start_kernel(void) boot_cpu_init(); page_address_init(); pr_notice("%s", linux_banner); - early_security_init(); setup_arch(&command_line); + /* Static keys and static calls are needed by LSMs */ + jump_label_init(); + static_call_init(); + early_security_init(); setup_boot_config(); setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ + early_numa_node_init(); boot_cpu_hotplug_init(); pr_notice("Kernel command line: %s\n", saved_command_line); /* parameters may set static keys */ - jump_label_init(); parse_early_param(); after_dashes = parse_args("Booting kernel", static_command_line, __start___param, @@ -963,6 +988,7 @@ void start_kernel(void) workqueue_init_early(); rcu_init(); + kvfree_rcu_init(); /* Trace events are available after this */ trace_init(); @@ -1059,6 +1085,7 @@ void start_kernel(void) seq_file_init(); proc_root_init(); nsfs_init(); + pidfs_init(); cpuset_init(); cgroup_init(); taskstats_init_early(); @@ -1069,7 +1096,7 @@ void start_kernel(void) kcsan_init(); /* Do the rest non-__init'ed, we're now alive */ - arch_call_rest_init(); + rest_init(); /* * Avoid stack canaries in callers of boot_init_stack_canary for gcc-10 @@ -1115,16 +1142,10 @@ static int __init initcall_blacklist(char *str) str_entry = strsep(&str, ","); if (str_entry) { pr_debug("blacklisting initcall %s\n", str_entry); - entry = memblock_alloc(sizeof(*entry), + entry = memblock_alloc_or_panic(sizeof(*entry), SMP_CACHE_BYTES); - if (!entry) - panic("%s: Failed to allocate %zu bytes\n", - __func__, sizeof(*entry)); - entry->buf = memblock_alloc(strlen(str_entry) + 1, + entry->buf = memblock_alloc_or_panic(strlen(str_entry) + 1, SMP_CACHE_BYTES); - if (!entry->buf) - panic("%s: Failed to allocate %zu bytes\n", - __func__, strlen(str_entry) + 1); strcpy(entry->buf, str_entry); list_add(&entry->next, &blacklisted_initcalls); } @@ -1396,33 +1417,28 @@ static int __init set_debug_rodata(char *str) early_param("rodata", set_debug_rodata); #endif -#ifdef CONFIG_STRICT_KERNEL_RWX static void mark_readonly(void) { - if (rodata_enabled) { + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && rodata_enabled) { /* * load_module() results in W+X mappings, which are cleaned - * up with call_rcu(). Let's make sure that queued work is + * up with init_free_wq. Let's make sure that queued work is * flushed so that we don't hit false positives looking for * insecure pages which are W+X. */ - rcu_barrier(); + flush_module_init_free_work(); + jump_label_init_ro(); mark_rodata_ro(); + debug_checkwx(); rodata_test(); - } else + } else if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { pr_info("Kernel memory protection disabled.\n"); + } else if (IS_ENABLED(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX)) { + pr_warn("Kernel memory protection not selected by kernel config.\n"); + } else { + pr_warn("This architecture does not have kernel memory protection.\n"); + } } -#elif defined(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX) -static inline void mark_readonly(void) -{ - pr_warn("Kernel memory protection not selected by kernel config.\n"); -} -#else -static inline void mark_readonly(void) -{ - pr_warn("This architecture does not have kernel memory protection.\n"); -} -#endif void __weak free_initmem(void) { @@ -1545,6 +1561,7 @@ static noinline void __init kernel_init_freeable(void) sched_init_smp(); workqueue_init_topology(); + async_init(); padata_init(); page_alloc_init_late(); |