summaryrefslogtreecommitdiff
path: root/init
diff options
context:
space:
mode:
Diffstat (limited to 'init')
-rw-r--r--init/Kconfig202
-rw-r--r--init/Makefile5
-rwxr-xr-xinit/build-version10
-rw-r--r--init/do_mounts.c6
-rw-r--r--init/do_mounts.h9
-rw-r--r--init/do_mounts_initrd.c5
-rw-r--r--init/init_task.c21
-rw-r--r--init/initramfs.c42
-rw-r--r--init/main.c115
9 files changed, 264 insertions, 151 deletions
diff --git a/init/Kconfig b/init/Kconfig
index bee58f7468c3..324c2886b2ea 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -60,6 +60,13 @@ config LLD_VERSION
default $(ld-version) if LD_IS_LLD
default 0
+config RUSTC_VERSION
+ int
+ default $(rustc-version)
+ help
+ It does not depend on `RUST` since that one may need to use the version
+ in a `depends on`.
+
config RUST_IS_AVAILABLE
def_bool $(success,$(srctree)/scripts/rust_is_available.sh)
help
@@ -71,6 +78,10 @@ config RUST_IS_AVAILABLE
In particular, the Makefile target 'rustavailable' is useful to check
why the Rust toolchain is not being detected.
+config RUSTC_LLVM_VERSION
+ int
+ default $(rustc-llvm-version)
+
config CC_CAN_LINK
bool
default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT
@@ -81,23 +92,25 @@ config CC_CAN_LINK_STATIC
default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT
default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static)
+# Fixed in GCC 14, 13.3, 12.4 and 11.5
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
+config GCC_ASM_GOTO_OUTPUT_BROKEN
+ bool
+ depends on CC_IS_GCC
+ default y if GCC_VERSION < 110500
+ default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400
+ default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300
+
config CC_HAS_ASM_GOTO_OUTPUT
- def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null)
+ def_bool y
+ depends on !GCC_ASM_GOTO_OUTPUT_BROKEN
+ depends on $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null)
config CC_HAS_ASM_GOTO_TIED_OUTPUT
depends on CC_HAS_ASM_GOTO_OUTPUT
# Detect buggy gcc and clang, fixed in gcc-11 clang-14.
def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null)
-config GCC_ASM_GOTO_OUTPUT_WORKAROUND
- bool
- depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT
- # Fixed in GCC 14, 13.3, 12.4 and 11.5
- # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
- default y if GCC_VERSION < 110500
- default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400
- default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300
-
config TOOLS_SUPPORT_RELR
def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)
@@ -107,6 +120,18 @@ config CC_HAS_ASM_INLINE
config CC_HAS_NO_PROFILE_FN_ATTR
def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror)
+config CC_HAS_COUNTED_BY
+ # TODO: when gcc 15 is released remove the build test and add
+ # a gcc version check
+ def_bool $(success,echo 'struct flex { int count; int array[] __attribute__((__counted_by__(count))); };' | $(CC) $(CLANG_FLAGS) -x c - -c -o /dev/null -Werror)
+ # clang needs to be at least 19.1.3 to avoid __bdos miscalculations
+ # https://github.com/llvm/llvm-project/pull/110497
+ # https://github.com/llvm/llvm-project/pull/112636
+ depends on !(CC_IS_CLANG && CLANG_VERSION < 190103)
+
+config RUSTC_HAS_COERCE_POINTEE
+ def_bool RUSTC_VERSION >= 108400
+
config PAHOLE_VERSION
int
default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE))
@@ -115,7 +140,7 @@ config CONSTRUCTORS
bool
config IRQ_WORK
- bool
+ def_bool y if SMP
config BUILDTIME_TABLE_SORT
bool
@@ -308,8 +333,9 @@ config KERNEL_XZ
BCJ filters which can improve compression ratio of executable
code. The size of the kernel is about 30% smaller with XZ in
comparison to gzip. On architectures for which there is a BCJ
- filter (i386, x86_64, ARM, IA-64, PowerPC, and SPARC), XZ
- will create a few percent smaller kernel than plain LZMA.
+ filter (i386, x86_64, ARM, ARM64, RISC-V, big endian PowerPC,
+ and SPARC), XZ will create a few percent smaller kernel than
+ plain LZMA.
The speed is about the same as with LZMA: The decompression
speed of XZ is better than that of bzip2 but worse than gzip
@@ -547,24 +573,24 @@ config HAVE_SCHED_AVG_IRQ
depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING
depends on SMP
-config SCHED_THERMAL_PRESSURE
+config SCHED_HW_PRESSURE
bool
default y if ARM && ARM_CPU_TOPOLOGY
default y if ARM64
depends on SMP
depends on CPU_FREQ_THERMAL
help
- Select this option to enable thermal pressure accounting in the
- scheduler. Thermal pressure is the value conveyed to the scheduler
+ Select this option to enable HW pressure accounting in the
+ scheduler. HW pressure is the value conveyed to the scheduler
that reflects the reduction in CPU compute capacity resulted from
- thermal throttling. Thermal throttling occurs when the performance of
- a CPU is capped due to high operating temperatures.
+ HW throttling. HW throttling occurs when the performance of
+ a CPU is capped due to high operating temperatures as an example.
If selected, the scheduler will be able to balance tasks accordingly,
i.e. put less load on throttled CPUs than on non/less throttled ones.
This requires the architecture to implement
- arch_update_thermal_pressure() and arch_scale_thermal_pressure().
+ arch_update_hw_pressure() and arch_scale_thermal_pressure().
config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
@@ -743,8 +769,8 @@ config LOG_CPU_MAX_BUF_SHIFT
int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
depends on SMP
range 0 21
- default 12 if !BASE_SMALL
default 0 if BASE_SMALL
+ default 12
depends on PRINTK
help
This option allows to increase the default ring buffer size
@@ -883,7 +909,7 @@ config GCC10_NO_ARRAY_BOUNDS
config CC_NO_ARRAY_BOUNDS
bool
- default y if CC_IS_GCC && GCC_VERSION >= 100000 && GCC10_NO_ARRAY_BOUNDS
+ default y if CC_IS_GCC && GCC_VERSION >= 90000 && GCC10_NO_ARRAY_BOUNDS
# Currently, disable -Wstringop-overflow for GCC globally.
config GCC_NO_STRINGOP_OVERFLOW
@@ -929,6 +955,9 @@ config NUMA_BALANCING_DEFAULT_ENABLED
If set, automatic NUMA balancing will be enabled if running on a NUMA
machine.
+config SLAB_OBJ_EXT
+ bool
+
menuconfig CGROUPS
bool "Control Group support"
select KERNFS
@@ -962,13 +991,26 @@ config MEMCG
bool "Memory controller"
select PAGE_COUNTER
select EVENTFD
+ select SLAB_OBJ_EXT
help
Provides control over the memory footprint of tasks in a cgroup.
-config MEMCG_KMEM
- bool
+config MEMCG_V1
+ bool "Legacy cgroup v1 memory controller"
depends on MEMCG
- default y
+ default n
+ help
+ Legacy cgroup v1 memory controller which has been deprecated by
+ cgroup v2 implementation. The v1 is there for legacy applications
+ which haven't migrated to the new cgroup v2 interface yet. If you
+ do not have any such application then you are completely fine leaving
+ this option disabled.
+
+ Please note that feature set of the legacy memory controller is likely
+ going to shrink due to deprecation process. New deployments with v1
+ controller are highly discouraged.
+
+ Say N if unsure.
config BLK_CGROUP
bool "IO controller"
@@ -1006,9 +1048,13 @@ menuconfig CGROUP_SCHED
tasks.
if CGROUP_SCHED
+config GROUP_SCHED_WEIGHT
+ def_bool n
+
config FAIR_GROUP_SCHED
bool "Group scheduling for SCHED_OTHER"
depends on CGROUP_SCHED
+ select GROUP_SCHED_WEIGHT
default CGROUP_SCHED
config CFS_BANDWIDTH
@@ -1033,6 +1079,12 @@ config RT_GROUP_SCHED
realtime bandwidth for them.
See Documentation/scheduler/sched-rt-group.rst for more information.
+config EXT_GROUP_SCHED
+ bool
+ depends on SCHED_CLASS_EXT && CGROUP_SCHED
+ select GROUP_SCHED_WEIGHT
+ default y
+
endif #CGROUP_SCHED
config SCHED_MM_CID
@@ -1087,6 +1139,16 @@ config CGROUP_RDMA
Attaching processes with active RDMA resources to the cgroup
hierarchy is allowed even if can cross the hierarchy's limit.
+config CGROUP_DMEM
+ bool "Device memory controller (DMEM)"
+ select PAGE_COUNTER
+ help
+ The DMEM controller allows compatible devices to restrict device
+ memory usage based on the cgroup hierarchy.
+
+ As an example, it allows you to restrict VRAM usage for applications
+ in the DRM subsystem.
+
config CGROUP_FREEZER
bool "Freezer controller"
help
@@ -1117,6 +1179,7 @@ config CGROUP_HUGETLB
config CPUSETS
bool "Cpuset controller"
depends on SMP
+ select UNION_FIND
help
This option will let you create and manage CPUSETs which
allow dynamically partitioning a system into sets of CPUs and
@@ -1125,6 +1188,19 @@ config CPUSETS
Say N if unsure.
+config CPUSETS_V1
+ bool "Legacy cgroup v1 cpusets controller"
+ depends on CPUSETS
+ default n
+ help
+ Legacy cgroup v1 cpusets controller which has been deprecated by
+ cgroup v2 implementation. The v1 is there for legacy applications
+ which haven't migrated to the new cgroup v2 interface yet. If you
+ do not have any such application then you are completely fine leaving
+ this option disabled.
+
+ Say N if unsure.
+
config PROC_PID_CPUSET
bool "Include legacy /proc/<pid>/cpuset file"
depends on CPUSETS
@@ -1466,11 +1542,6 @@ config SYSCTL_ARCH_UNALIGN_ALLOW
config HAVE_PCSPKR_PLATFORM
bool
-# interpreter that classic socket filters depend on
-config BPF
- bool
- select CRYPTO_LIB_SHA1
-
menuconfig EXPERT
bool "Configure standard kernel features (expert users)"
# Unhide debug options, to make the on-by-default options visible
@@ -1504,7 +1575,7 @@ config MULTIUSER
config SGETMASK_SYSCALL
bool "sgetmask/ssetmask syscalls support" if EXPERT
- def_bool PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH
+ default PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH
help
sys_sgetmask and sys_ssetmask are obsolete system calls
no longer supported in libc but still enabled by default in some
@@ -1590,11 +1661,10 @@ config PCSPKR_PLATFORM
This option allows to disable the internal PC-Speaker
support, saving some memory.
-config BASE_FULL
- default y
- bool "Enable full-sized data structures for core" if EXPERT
+config BASE_SMALL
+ bool "Enable smaller-sized data structures for core" if EXPERT
help
- Disabling this option reduces the size of miscellaneous core
+ Enabling this option reduces the size of miscellaneous core
kernel data structures. This saves memory on small machines,
but may reduce performance.
@@ -1675,6 +1745,19 @@ config IO_URING
applications to submit and complete IO through submission and
completion rings that are shared between the kernel and application.
+config GCOV_PROFILE_URING
+ bool "Enable GCOV profiling on the io_uring subsystem"
+ depends on GCOV_KERNEL
+ help
+ Enable GCOV profiling on the io_uring subsystem, to facilitate
+ code coverage testing.
+
+ If unsure, say N.
+
+ Note that this will have a negative impact on the performance of
+ the io_uring subsystem, hence this should only be enabled for
+ specific test purposes.
+
config ADVISE_SYSCALLS
bool "Enable madvise/fadvise syscalls" if EXPERT
default y
@@ -1791,24 +1874,6 @@ config KALLSYMS_ABSOLUTE_PERCPU
depends on KALLSYMS
default X86_64 && SMP
-config KALLSYMS_BASE_RELATIVE
- bool
- depends on KALLSYMS
- default y
- help
- Instead of emitting them as absolute values in the native word size,
- emit the symbol references in the kallsyms table as 32-bit entries,
- each containing a relative value in the range [base, base + U32_MAX]
- or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either
- an absolute value in the range [0, S32_MAX] or a relative value in the
- range [base, base + S32_MAX], where base is the lowest relative symbol
- address encountered in the image.
-
- On 64-bit builds, this reduces the size of the address table by 50%,
- but more importantly, it results in entries whose values are build
- time constants, and no relocation pass is required at runtime to fix
- up the entries based on the runtime load address of the kernel.
-
# end of the "standard kernel features (expert users)" menu
config ARCH_HAS_MEMBARRIER_CALLBACKS
@@ -1904,11 +1969,16 @@ config RUST
bool "Rust support"
depends on HAVE_RUST
depends on RUST_IS_AVAILABLE
- depends on !MODVERSIONS
- depends on !GCC_PLUGINS
+ select EXTENDED_MODVERSIONS if MODVERSIONS
+ depends on !MODVERSIONS || GENDWARFKSYMS
+ depends on !GCC_PLUGIN_RANDSTRUCT
depends on !RANDSTRUCT
- depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE
- select CONSTRUCTORS
+ depends on !DEBUG_INFO_BTF || (PAHOLE_HAS_LANG_EXCLUDE && !LTO)
+ depends on !CFI_CLANG || HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC
+ select CFI_ICALL_NORMALIZE_INTEGERS if CFI_CLANG
+ depends on !CALL_PADDING || RUSTC_VERSION >= 108100
+ depends on !KASAN_SW_TAGS
+ depends on !(MITIGATION_RETHUNK && KASAN) || RUSTC_VERSION >= 108300
help
Enables Rust support in the kernel.
@@ -1925,12 +1995,19 @@ config RUST
config RUSTC_VERSION_TEXT
string
depends on RUST
- default $(shell,command -v $(RUSTC) >/dev/null 2>&1 && $(RUSTC) --version || echo n)
+ default "$(RUSTC_VERSION_TEXT)"
+ help
+ See `CC_VERSION_TEXT`.
config BINDGEN_VERSION_TEXT
string
depends on RUST
- default $(shell,command -v $(BINDGEN) >/dev/null 2>&1 && $(BINDGEN) --version || echo n)
+ # The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0
+ # (https://github.com/rust-lang/rust-bindgen/pull/2678) and 0.71.0
+ # (https://github.com/rust-lang/rust-bindgen/pull/3040). It can be removed
+ # when the minimum version is upgraded past the latter (0.69.1 and 0.71.1
+ # both fixed the issue).
+ default "$(shell,$(BINDGEN) --version workaround-for-0.69.0 2>/dev/null)"
#
# Place an empty function call at each tracepoint site. Can be
@@ -1938,6 +2015,7 @@ config BINDGEN_VERSION_TEXT
#
config TRACEPOINTS
bool
+ select TASKS_TRACE_RCU
source "kernel/Kconfig.kexec"
@@ -1949,11 +2027,6 @@ config RT_MUTEXES
bool
default y if PREEMPT_RT
-config BASE_SMALL
- int
- default 0 if BASE_FULL
- default 1 if !BASE_FULL
-
config MODULE_SIG_FORMAT
def_bool n
select SYSTEM_DATA_VERIFICATION
@@ -1991,6 +2064,9 @@ source "kernel/Kconfig.locks"
config ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
bool
+config ARCH_HAS_PREPARE_SYNC_CORE_CMD
+ bool
+
config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
bool
diff --git a/init/Makefile b/init/Makefile
index cbac576c57d6..10b652d33e87 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -52,13 +52,10 @@ CFLAGS_version.o := -include $(obj)/utsversion-tmp.h
# Build version-timestamp.c with final UTS_VERSION
#
-include/generated/utsversion.h: build-version-auto = $(shell $(srctree)/$(src)/build-version)
+include/generated/utsversion.h: build-version-auto = $(shell $(srctree)/scripts/build-version)
include/generated/utsversion.h: build-timestamp-auto = $(shell LC_ALL=C date)
include/generated/utsversion.h: FORCE
$(call filechk,uts_version)
$(obj)/version-timestamp.o: include/generated/utsversion.h
CFLAGS_version-timestamp.o := -include include/generated/utsversion.h
-KASAN_SANITIZE_version-timestamp.o := n
-KCSAN_SANITIZE_version-timestamp.o := n
-GCOV_PROFILE_version-timestamp.o := n
diff --git a/init/build-version b/init/build-version
deleted file mode 100755
index 537d45815083..000000000000
--- a/init/build-version
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0-only
-
-prev_ver=$(cat .version 2>/dev/null) &&
-ver=$(expr ${prev_ver} + 1 2>/dev/null) ||
-ver=1
-
-echo ${ver} > .version
-
-echo ${ver}
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 279ad28bf4fb..6af29da8889e 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -159,8 +159,7 @@ static int __init do_mount_root(const char *name, const char *fs,
if (!p)
return -ENOMEM;
data_page = page_address(p);
- /* zero-pad. init_mount() will make sure it's terminated */
- strncpy(data_page, data, PAGE_SIZE);
+ strscpy_pad(data_page, data, PAGE_SIZE);
}
ret = init_mount(name, "/root", fs, flags, data_page);
@@ -208,6 +207,9 @@ retry:
goto out;
case -EACCES:
case -EINVAL:
+#ifdef CONFIG_BLOCK
+ init_flush_fput();
+#endif
continue;
}
/*
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 15e372b00ce7..6069ea3eb80d 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -9,6 +9,8 @@
#include <linux/major.h>
#include <linux/root_dev.h>
#include <linux/init_syscalls.h>
+#include <linux/task_work.h>
+#include <linux/file.h>
void mount_root_generic(char *name, char *pretty_name, int flags);
void mount_root(char *root_device_name);
@@ -41,3 +43,10 @@ static inline bool initrd_load(char *root_device_name)
}
#endif
+
+/* Ensure that async file closing finished to prevent spurious errors. */
+static inline void init_flush_fput(void)
+{
+ flush_delayed_fput();
+ task_work_run();
+}
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 425f4bcf4b77..f6867bad0d78 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -21,7 +21,7 @@ phys_addr_t phys_initrd_start __initdata;
unsigned long phys_initrd_size __initdata;
#ifdef CONFIG_SYSCTL
-static struct ctl_table kern_do_mounts_initrd_table[] = {
+static const struct ctl_table kern_do_mounts_initrd_table[] = {
{
.procname = "real-root-dev",
.data = &real_root_dev,
@@ -29,7 +29,6 @@ static struct ctl_table kern_do_mounts_initrd_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static __init int kernel_do_mounts_initrd_sysctls_init(void)
@@ -90,7 +89,7 @@ static void __init handle_initrd(char *root_device_name)
extern char *envp_init[];
int error;
- pr_warn("using deprecated initrd support, will be removed in 2021.\n");
+ pr_warn("using deprecated initrd support, will be removed soon.\n");
real_root_dev = new_encode_dev(ROOT_DEV);
create_dev("/dev/root.old", Root_RAM0);
diff --git a/init/init_task.c b/init/init_task.c
index 7ecb458eb3da..e557f622bd90 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -6,6 +6,7 @@
#include <linux/sched/sysctl.h>
#include <linux/sched/rt.h>
#include <linux/sched/task.h>
+#include <linux/sched/ext.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>
@@ -29,8 +30,9 @@ static struct signal_struct init_signals = {
.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
.exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock),
#ifdef CONFIG_POSIX_TIMERS
- .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
- .cputimer = {
+ .posix_timers = HLIST_HEAD_INIT,
+ .ignored_posix_timers = HLIST_HEAD_INIT,
+ .cputimer = {
.cputime_atomic = INIT_CPUTIME_ATOMIC,
},
#endif
@@ -77,6 +79,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
.cpus_ptr = &init_task.cpus_mask,
.user_cpus_ptr = NULL,
.cpus_mask = CPU_MASK_ALL,
+ .max_allowed_capacity = SCHED_CAPACITY_SCALE,
.nr_cpus_allowed= NR_CPUS,
.mm = NULL,
.active_mm = &init_mm,
@@ -98,6 +101,17 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
#ifdef CONFIG_CGROUP_SCHED
.sched_task_group = &root_task_group,
#endif
+#ifdef CONFIG_SCHED_CLASS_EXT
+ .scx = {
+ .dsq_list.node = LIST_HEAD_INIT(init_task.scx.dsq_list.node),
+ .sticky_cpu = -1,
+ .holding_cpu = -1,
+ .runnable_node = LIST_HEAD_INIT(init_task.scx.runnable_node),
+ .runnable_at = INITIAL_JIFFIES,
+ .ddsp_dsq_id = SCX_DSQ_INVALID,
+ .slice = SCX_SLICE_DFL,
+ },
+#endif
.ptraced = LIST_HEAD_INIT(init_task.ptraced),
.ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry),
.real_parent = &init_task,
@@ -147,6 +161,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
.rcu_tasks_holdout = false,
.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
.rcu_tasks_idle_cpu = -1,
+ .rcu_tasks_exit_list = LIST_HEAD_INIT(init_task.rcu_tasks_exit_list),
#endif
#ifdef CONFIG_TASKS_TRACE_RCU
.trc_reader_nesting = 0,
@@ -197,7 +212,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
.trace_recursion = 0,
#endif
#ifdef CONFIG_LIVEPATCH
- .patch_state = KLP_UNDEFINED,
+ .patch_state = KLP_TRANSITION_IDLE,
#endif
#ifdef CONFIG_SECURITY
.security = NULL,
diff --git a/init/initramfs.c b/init/initramfs.c
index 76deb48c38cb..b2f7583bb1f5 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -16,8 +16,10 @@
#include <linux/mm.h>
#include <linux/namei.h>
#include <linux/init_syscalls.h>
-#include <linux/task_work.h>
#include <linux/umh.h>
+#include <linux/security.h>
+
+#include "do_mounts.h"
static __initdata bool csum_present;
static __initdata u32 io_csum;
@@ -358,6 +360,15 @@ static int __init do_name(void)
{
state = SkipIt;
next_state = Reset;
+
+ /* name_len > 0 && name_len <= PATH_MAX checked in do_header */
+ if (collected[name_len - 1] != '\0') {
+ pr_err("initramfs name without nulterm: %.*s\n",
+ (int)name_len, collected);
+ error("malformed archive");
+ return 1;
+ }
+
if (strcmp(collected, "TRAILER!!!") == 0) {
free_hash();
return 0;
@@ -366,7 +377,7 @@ static int __init do_name(void)
if (S_ISREG(mode)) {
int ml = maybe_link();
if (ml >= 0) {
- int openflags = O_WRONLY|O_CREAT;
+ int openflags = O_WRONLY|O_CREAT|O_LARGEFILE;
if (ml != 1)
openflags |= O_TRUNC;
wfile = filp_open(collected, openflags, mode);
@@ -422,6 +433,12 @@ static int __init do_copy(void)
static int __init do_symlink(void)
{
+ if (collected[name_len - 1] != '\0') {
+ pr_err("initramfs symlink without nulterm: %.*s\n",
+ (int)name_len, collected);
+ error("malformed archive");
+ return 1;
+ }
collected[N_ALIGN(name_len) + body_len] = '\0';
clean_path(collected, 0);
init_symlink(collected + N_ALIGN(name_len), collected);
@@ -574,15 +591,7 @@ extern unsigned long __initramfs_size;
#include <linux/initrd.h>
#include <linux/kexec.h>
-static ssize_t raw_read(struct file *file, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
- loff_t pos, size_t count)
-{
- memcpy(buf, attr->private + pos, count);
- return count;
-}
-
-static BIN_ATTR(initrd, 0440, raw_read, NULL, 0);
+static BIN_ATTR(initrd, 0440, sysfs_bin_attr_simple_read, NULL, 0);
void __init reserve_initrd_mem(void)
{
@@ -642,7 +651,7 @@ void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
"initrd");
}
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_RESERVE
static bool __init kexec_free_initrd(void)
{
unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
@@ -679,11 +688,9 @@ static void __init populate_initrd_image(char *err)
struct file *file;
loff_t pos = 0;
- unpack_to_rootfs(__initramfs_start, __initramfs_size);
-
printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n",
err);
- file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
+ file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700);
if (IS_ERR(file))
return;
@@ -721,6 +728,8 @@ static void __init do_populate_rootfs(void *unused, async_cookie_t cookie)
}
done:
+ security_initramfs_populated();
+
/*
* If the initrd region is overlapped with crashkernel reserved region,
* free only memory that is not part of crashkernel region.
@@ -736,8 +745,7 @@ done:
initrd_start = 0;
initrd_end = 0;
- flush_delayed_fput();
- task_work_run();
+ init_flush_fput();
}
static ASYNC_DOMAIN_EXCLUSIVE(initramfs_domain);
diff --git a/init/main.c b/init/main.c
index e24b0780fdff..2a1757826397 100644
--- a/init/main.c
+++ b/init/main.c
@@ -88,6 +88,7 @@
#include <linux/sched/task_stack.h>
#include <linux/context_tracking.h>
#include <linux/random.h>
+#include <linux/moduleloader.h>
#include <linux/list.h>
#include <linux/integrity.h>
#include <linux/proc_ns.h>
@@ -99,6 +100,8 @@
#include <linux/init_syscalls.h>
#include <linux/stackdepot.h>
#include <linux/randomize_kstack.h>
+#include <linux/pidfs.h>
+#include <linux/ptdump.h>
#include <net/net_namespace.h>
#include <asm/io.h>
@@ -324,7 +327,7 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size,
{
struct xbc_node *knode, *vnode;
char *end = buf + size;
- const char *val;
+ const char *val, *q;
int ret;
xbc_node_for_each_key_value(root, knode, val) {
@@ -342,8 +345,14 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size,
continue;
}
xbc_array_for_each_value(vnode, val) {
- ret = snprintf(buf, rest(buf, end), "%s=\"%s\" ",
- xbc_namebuf, val);
+ /*
+ * For prettier and more readable /proc/cmdline, only
+ * quote the value when necessary, i.e. when it contains
+ * whitespace.
+ */
+ q = strpbrk(val, " \t\r\n") ? "\"" : "";
+ ret = snprintf(buf, rest(buf, end), "%s=%s%s%s ",
+ xbc_namebuf, q, val, q);
if (ret < 0)
return ret;
buf += ret;
@@ -484,6 +493,11 @@ static int __init warn_bootconfig(char *str)
early_param("bootconfig", warn_bootconfig);
+bool __init cmdline_has_extra_options(void)
+{
+ return extra_command_line || extra_init_args;
+}
+
/* Change NUL term back to "=", to make "param" the whole string. */
static void __init repair_env_string(char *param, char *val)
{
@@ -603,7 +617,6 @@ static int __init rdinit_setup(char *str)
__setup("rdinit=", rdinit_setup);
#ifndef CONFIG_SMP
-static const unsigned int setup_max_cpus = NR_CPUS;
static inline void setup_nr_cpu_ids(void) { }
static inline void smp_prepare_cpus(unsigned int maxcpus) { }
#endif
@@ -620,18 +633,18 @@ static void __init setup_command_line(char *command_line)
if (extra_command_line)
xlen = strlen(extra_command_line);
- if (extra_init_args)
+ if (extra_init_args) {
+ extra_init_args = strim(extra_init_args); /* remove trailing space */
ilen = strlen(extra_init_args) + 4; /* for " -- " */
+ }
- len = xlen + strlen(boot_command_line) + 1;
+ len = xlen + strlen(boot_command_line) + ilen + 1;
- saved_command_line = memblock_alloc(len + ilen, SMP_CACHE_BYTES);
- if (!saved_command_line)
- panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen);
+ saved_command_line = memblock_alloc_or_panic(len, SMP_CACHE_BYTES);
- static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
- if (!static_command_line)
- panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+ len = xlen + strlen(command_line) + 1;
+
+ static_command_line = memblock_alloc_or_panic(len, SMP_CACHE_BYTES);
if (xlen) {
/*
@@ -681,7 +694,7 @@ static void __init setup_command_line(char *command_line)
static __initdata DECLARE_COMPLETION(kthreadd_done);
-noinline void __ref __noreturn rest_init(void)
+static noinline void __ref __noreturn rest_init(void)
{
struct task_struct *tsk;
int pid;
@@ -737,10 +750,7 @@ static int __init do_early_param(char *param, char *val,
const struct obs_kernel_param *p;
for (p = __setup_start; p < __setup_end; p++) {
- if ((p->early && parameq(param, p->str)) ||
- (strcmp(param, "console") == 0 &&
- strcmp(p->str, "earlycon") == 0)
- ) {
+ if (p->early && parameq(param, p->str)) {
if (p->setup_func(val) != 0)
pr_warn("Malformed early option '%s'\n", param);
}
@@ -776,6 +786,10 @@ void __init __weak smp_setup_processor_id(void)
{
}
+void __init __weak smp_prepare_boot_cpu(void)
+{
+}
+
# if THREAD_SIZE >= PAGE_SIZE
void __init __weak thread_stack_cache_init(void)
{
@@ -822,11 +836,6 @@ static int __init early_randomize_kstack_offset(char *buf)
early_param("randomize_kstack_offset", early_randomize_kstack_offset);
#endif
-void __init __weak __noreturn arch_call_rest_init(void)
-{
- rest_init();
-}
-
static void __init print_unknown_bootoptions(void)
{
char *unknown_options;
@@ -870,6 +879,19 @@ static void __init print_unknown_bootoptions(void)
memblock_free(unknown_options, len);
}
+static void __init early_numa_node_init(void)
+{
+#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
+#ifndef cpu_to_node
+ int cpu;
+
+ /* The early_cpu_to_node() should be ready here. */
+ for_each_possible_cpu(cpu)
+ set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
+#endif
+#endif
+}
+
asmlinkage __visible __init __no_sanitize_address __noreturn __no_stack_protector
void start_kernel(void)
{
@@ -893,18 +915,21 @@ void start_kernel(void)
boot_cpu_init();
page_address_init();
pr_notice("%s", linux_banner);
- early_security_init();
setup_arch(&command_line);
+ /* Static keys and static calls are needed by LSMs */
+ jump_label_init();
+ static_call_init();
+ early_security_init();
setup_boot_config();
setup_command_line(command_line);
setup_nr_cpu_ids();
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
+ early_numa_node_init();
boot_cpu_hotplug_init();
pr_notice("Kernel command line: %s\n", saved_command_line);
/* parameters may set static keys */
- jump_label_init();
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
@@ -963,6 +988,7 @@ void start_kernel(void)
workqueue_init_early();
rcu_init();
+ kvfree_rcu_init();
/* Trace events are available after this */
trace_init();
@@ -1059,6 +1085,7 @@ void start_kernel(void)
seq_file_init();
proc_root_init();
nsfs_init();
+ pidfs_init();
cpuset_init();
cgroup_init();
taskstats_init_early();
@@ -1069,7 +1096,7 @@ void start_kernel(void)
kcsan_init();
/* Do the rest non-__init'ed, we're now alive */
- arch_call_rest_init();
+ rest_init();
/*
* Avoid stack canaries in callers of boot_init_stack_canary for gcc-10
@@ -1115,16 +1142,10 @@ static int __init initcall_blacklist(char *str)
str_entry = strsep(&str, ",");
if (str_entry) {
pr_debug("blacklisting initcall %s\n", str_entry);
- entry = memblock_alloc(sizeof(*entry),
+ entry = memblock_alloc_or_panic(sizeof(*entry),
SMP_CACHE_BYTES);
- if (!entry)
- panic("%s: Failed to allocate %zu bytes\n",
- __func__, sizeof(*entry));
- entry->buf = memblock_alloc(strlen(str_entry) + 1,
+ entry->buf = memblock_alloc_or_panic(strlen(str_entry) + 1,
SMP_CACHE_BYTES);
- if (!entry->buf)
- panic("%s: Failed to allocate %zu bytes\n",
- __func__, strlen(str_entry) + 1);
strcpy(entry->buf, str_entry);
list_add(&entry->next, &blacklisted_initcalls);
}
@@ -1396,33 +1417,28 @@ static int __init set_debug_rodata(char *str)
early_param("rodata", set_debug_rodata);
#endif
-#ifdef CONFIG_STRICT_KERNEL_RWX
static void mark_readonly(void)
{
- if (rodata_enabled) {
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && rodata_enabled) {
/*
* load_module() results in W+X mappings, which are cleaned
- * up with call_rcu(). Let's make sure that queued work is
+ * up with init_free_wq. Let's make sure that queued work is
* flushed so that we don't hit false positives looking for
* insecure pages which are W+X.
*/
- rcu_barrier();
+ flush_module_init_free_work();
+ jump_label_init_ro();
mark_rodata_ro();
+ debug_checkwx();
rodata_test();
- } else
+ } else if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
pr_info("Kernel memory protection disabled.\n");
+ } else if (IS_ENABLED(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX)) {
+ pr_warn("Kernel memory protection not selected by kernel config.\n");
+ } else {
+ pr_warn("This architecture does not have kernel memory protection.\n");
+ }
}
-#elif defined(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX)
-static inline void mark_readonly(void)
-{
- pr_warn("Kernel memory protection not selected by kernel config.\n");
-}
-#else
-static inline void mark_readonly(void)
-{
- pr_warn("This architecture does not have kernel memory protection.\n");
-}
-#endif
void __weak free_initmem(void)
{
@@ -1545,6 +1561,7 @@ static noinline void __init kernel_init_freeable(void)
sched_init_smp();
workqueue_init_topology();
+ async_init();
padata_init();
page_alloc_init_late();