summaryrefslogtreecommitdiff
path: root/init
diff options
context:
space:
mode:
Diffstat (limited to 'init')
-rw-r--r--init/Kconfig63
-rw-r--r--init/do_mounts.c32
-rw-r--r--init/do_mounts_initrd.c24
-rw-r--r--init/init_task.c11
-rw-r--r--init/initramfs.c34
-rw-r--r--init/main.c45
6 files changed, 143 insertions, 66 deletions
diff --git a/init/Kconfig b/init/Kconfig
index 317d5ccb5191..53b54214a36e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -23,6 +23,9 @@ config CLANG_VERSION
int
default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
+config CC_HAS_ASM_GOTO
+ def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC))
+
config CONSTRUCTORS
bool
depends on !UML
@@ -335,15 +338,6 @@ config HAVE_ARCH_AUDITSYSCALL
config AUDITSYSCALL
def_bool y
depends on AUDIT && HAVE_ARCH_AUDITSYSCALL
-
-config AUDIT_WATCH
- def_bool y
- depends on AUDITSYSCALL
- select FSNOTIFY
-
-config AUDIT_TREE
- def_bool y
- depends on AUDITSYSCALL
select FSNOTIFY
source "kernel/irq/Kconfig"
@@ -490,6 +484,45 @@ config TASK_IO_ACCOUNTING
Say N if unsure.
+config PSI
+ bool "Pressure stall information tracking"
+ help
+ Collect metrics that indicate how overcommitted the CPU, memory,
+ and IO capacity are in the system.
+
+ If you say Y here, the kernel will create /proc/pressure/ with the
+ pressure statistics files cpu, memory, and io. These will indicate
+ the share of walltime in which some or all tasks in the system are
+ delayed due to contention of the respective resource.
+
+ In kernels with cgroup support, cgroups (cgroup2 only) will
+ have cpu.pressure, memory.pressure, and io.pressure files,
+ which aggregate pressure stalls for the grouped tasks only.
+
+ For more details see Documentation/accounting/psi.txt.
+
+ Say N if unsure.
+
+config PSI_DEFAULT_DISABLED
+ bool "Require boot parameter to enable pressure stall information tracking"
+ default n
+ depends on PSI
+ help
+ If set, pressure stall information tracking will be disabled
+ per default but can be enabled through passing psi=1 on the
+ kernel commandline during boot.
+
+ This feature adds some code to the task wakeup and sleep
+ paths of the scheduler. The overhead is too low to affect
+ common scheduling-intense workloads in practice (such as
+ webservers, memcache), but it does show up in artificial
+ scheduler stress tests, such as hackbench.
+
+ If you are paranoid and not sure what the kernel will be
+ used for, say Y.
+
+ Say N if unsure.
+
endmenu # "CPU/Task time and stats accounting"
config CPU_ISOLATION
@@ -803,7 +836,7 @@ config CGROUP_PIDS
PIDs controller is designed to stop this from happening.
It should be noted that organisational operations (such as attaching
- to a cgroup hierarchy will *not* be blocked by the PIDs controller),
+ to a cgroup hierarchy) will *not* be blocked by the PIDs controller,
since the PIDs limit only affects a process's ability to fork, not to
attach to a cgroup.
@@ -1102,6 +1135,7 @@ config LD_DEAD_CODE_DATA_ELIMINATION
bool "Dead code and data elimination (EXPERIMENTAL)"
depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION
depends on EXPERT
+ depends on !(FUNCTION_TRACER && CC_IS_GCC && GCC_VERSION < 40800)
depends on $(cc-option,-ffunction-sections -fdata-sections)
depends on $(ld-option,--gc-sections)
help
@@ -1380,6 +1414,15 @@ config AIO
by some high performance threaded applications. Disabling
this option saves about 7k.
+config IO_URING
+ bool "Enable IO uring support" if EXPERT
+ select ANON_INODES
+ default y
+ help
+ This option enables support for the io_uring interface, enabling
+ applications to submit and complete IO through submission and
+ completion rings that are shared between the kernel and application.
+
config ADVISE_SYSCALLS
bool "Enable madvise/fadvise syscalls" if EXPERT
default y
diff --git a/init/do_mounts.c b/init/do_mounts.c
index e1c9afa9d8c9..f8c230c77035 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -22,6 +22,7 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
#include <linux/nfs_mount.h>
+#include <uapi/linux/mount.h>
#include "do_mounts.h"
@@ -167,6 +168,24 @@ done:
}
return res;
}
+
+/**
+ * match_dev_by_label - callback for finding a partition using its label
+ * @dev: device passed in by the caller
+ * @data: opaque pointer to the label to match
+ *
+ * Returns 1 if the device matches, and 0 otherwise.
+ */
+static int match_dev_by_label(struct device *dev, const void *data)
+{
+ const char *label = data;
+ struct hd_struct *part = dev_to_part(dev);
+
+ if (part->info && !strcmp(label, part->info->volname))
+ return 1;
+
+ return 0;
+}
#endif
/*
@@ -190,6 +209,8 @@ done:
* a partition with a known unique id.
* 8) <major>:<minor> major and minor number of the device separated by
* a colon.
+ * 9) PARTLABEL=<name> with name being the GPT partition label.
+ * MSDOS partitions do not support labels!
*
* If name doesn't have fall into the categories above, we return (0,0).
* block_class is used to check if something is a disk name. If the disk
@@ -211,6 +232,17 @@ dev_t name_to_dev_t(const char *name)
if (!res)
goto fail;
goto done;
+ } else if (strncmp(name, "PARTLABEL=", 10) == 0) {
+ struct device *dev;
+
+ dev = class_find_device(&block_class, NULL, name + 10,
+ &match_dev_by_label);
+ if (!dev)
+ goto fail;
+
+ res = dev->devt;
+ put_device(dev);
+ goto done;
}
#endif
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index d1a5d885ce13..a9c6cc56f505 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/freezer.h>
#include <linux/kmod.h>
+#include <uapi/linux/mount.h>
#include "do_mounts.h"
@@ -16,6 +17,9 @@ int initrd_below_start_ok;
unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */
static int __initdata mount_initrd = 1;
+phys_addr_t phys_initrd_start __initdata;
+unsigned long phys_initrd_size __initdata;
+
static int __init no_initrd(char *str)
{
mount_initrd = 0;
@@ -24,6 +28,23 @@ static int __init no_initrd(char *str)
__setup("noinitrd", no_initrd);
+static int __init early_initrd(char *p)
+{
+ phys_addr_t start;
+ unsigned long size;
+ char *endp;
+
+ start = memparse(p, &endp);
+ if (*endp == ',') {
+ size = memparse(endp + 1, NULL);
+
+ phys_initrd_start = start;
+ phys_initrd_size = size;
+ }
+ return 0;
+}
+early_param("initrd", early_initrd);
+
static int init_linuxrc(struct subprocess_info *info, struct cred *new)
{
ksys_unshare(CLONE_FS | CLONE_FILES);
@@ -53,9 +74,6 @@ static void __init handle_initrd(void)
ksys_mkdir("/old", 0700);
ksys_chdir("/old");
- /* try loading default modules from initrd */
- load_default_modules();
-
/*
* In case that a resume from disk is carried out by linuxrc or one of
* its children, we need to tell the freezer not to wait for us.
diff --git a/init/init_task.c b/init/init_task.c
index 5aebe3be4d7c..c70ef656d0f4 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -10,6 +10,7 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/audit.h>
+#include <linux/numa.h>
#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -44,7 +45,7 @@ static struct signal_struct init_signals = {
};
static struct sighand_struct init_sighand = {
- .count = ATOMIC_INIT(1),
+ .count = REFCOUNT_INIT(1),
.action = { { { .sa_handler = SIG_DFL, } }, },
.siglock = __SPIN_LOCK_UNLOCKED(init_sighand.siglock),
.signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
@@ -61,11 +62,11 @@ struct task_struct init_task
= {
#ifdef CONFIG_THREAD_INFO_IN_TASK
.thread_info = INIT_THREAD_INFO(init_task),
- .stack_refcount = ATOMIC_INIT(1),
+ .stack_refcount = REFCOUNT_INIT(1),
#endif
.state = 0,
.stack = init_stack,
- .usage = ATOMIC_INIT(2),
+ .usage = REFCOUNT_INIT(2),
.flags = PF_KTHREAD,
.prio = MAX_PRIO - 20,
.static_prio = MAX_PRIO - 20,
@@ -121,7 +122,7 @@ struct task_struct init_task
.thread_pid = &init_struct_pid,
.thread_group = LIST_HEAD_INIT(init_task.thread_group),
.thread_node = LIST_HEAD_INIT(init_signals.thread_head),
-#ifdef CONFIG_AUDITSYSCALL
+#ifdef CONFIG_AUDIT
.loginuid = INVALID_UID,
.sessionid = AUDIT_SID_UNSET,
#endif
@@ -154,7 +155,7 @@ struct task_struct init_task
.vtime.state = VTIME_SYS,
#endif
#ifdef CONFIG_NUMA_BALANCING
- .numa_preferred_nid = -1,
+ .numa_preferred_nid = NUMA_NO_NODE,
.numa_group = NULL,
.numa_faults = NULL,
#endif
diff --git a/init/initramfs.c b/init/initramfs.c
index 640557788026..4749e1115eef 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -291,16 +291,6 @@ static int __init do_reset(void)
return 1;
}
-static int __init maybe_link(void)
-{
- if (nlink >= 2) {
- char *old = find_link(major, minor, ino, mode, collected);
- if (old)
- return (ksys_link(old, collected) < 0) ? -1 : 1;
- }
- return 0;
-}
-
static void __init clean_path(char *path, umode_t fmode)
{
struct kstat st;
@@ -313,6 +303,18 @@ static void __init clean_path(char *path, umode_t fmode)
}
}
+static int __init maybe_link(void)
+{
+ if (nlink >= 2) {
+ char *old = find_link(major, minor, ino, mode, collected);
+ if (old) {
+ clean_path(collected, 0);
+ return (ksys_link(old, collected) < 0) ? -1 : 1;
+ }
+ }
+ return 0;
+}
+
static __initdata int wfd;
static int __init do_name(void)
@@ -429,7 +431,7 @@ static long __init flush_buffer(void *bufv, unsigned long len)
len -= written;
state = Reset;
} else
- error("junk in compressed archive");
+ error("junk within compressed archive");
}
return origLen;
}
@@ -486,9 +488,9 @@ static char * __init unpack_to_rootfs(char *buf, unsigned long len)
message = msg_buf;
}
} else
- error("junk in compressed archive");
+ error("invalid magic at start of compressed archive");
if (state != Reset)
- error("junk in compressed archive");
+ error("junk at the end of compressed archive");
this_header = saved_offset + my_inptr;
buf += my_inptr;
len -= my_inptr;
@@ -644,12 +646,6 @@ static int __init populate_rootfs(void)
#endif
}
flush_delayed_fput();
- /*
- * Try loading default modules from initramfs. This gives
- * us a chance to load before device_initcalls.
- */
- load_default_modules();
-
return 0;
}
rootfs_initcall(populate_rootfs);
diff --git a/init/main.c b/init/main.c
index 1c3f90264280..c86a1c8f19f4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -25,7 +25,7 @@
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/initrd.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/acpi.h>
#include <linux/console.h>
#include <linux/nmi.h>
@@ -105,7 +105,6 @@
static int kernel_init(void *);
extern void init_IRQ(void);
-extern void fork_init(void);
extern void radix_tree_init(void);
/*
@@ -375,10 +374,11 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
static void __init setup_command_line(char *command_line)
{
saved_command_line =
- memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
+ memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES);
initcall_command_line =
- memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
- static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
+ memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES);
+ static_command_line = memblock_alloc(strlen(command_line) + 1,
+ SMP_CACHE_BYTES);
strcpy(saved_command_line, boot_command_line);
strcpy(static_command_line, command_line);
}
@@ -520,6 +520,7 @@ static void __init mm_init(void)
mem_init();
kmem_cache_init();
pgtable_init();
+ debug_objects_mem_init();
vmalloc_init();
ioremap_huge_init();
/* Should be run before the first non-init thread is created */
@@ -694,9 +695,7 @@ asmlinkage __visible void __init start_kernel(void)
initrd_start = 0;
}
#endif
- page_ext_init();
kmemleak_init();
- debug_objects_mem_init();
setup_per_cpu_pageset();
numa_policy_init();
acpi_early_init();
@@ -736,10 +735,6 @@ asmlinkage __visible void __init start_kernel(void)
arch_post_acpi_subsys_init();
sfi_init_late();
- if (efi_enabled(EFI_RUNTIME_SERVICES)) {
- efi_free_boot_services();
- }
-
/* Do the rest non-__init'ed, we're now alive */
arch_call_rest_init();
}
@@ -773,8 +768,10 @@ static int __init initcall_blacklist(char *str)
str_entry = strsep(&str, ",");
if (str_entry) {
pr_debug("blacklisting initcall %s\n", str_entry);
- entry = alloc_bootmem(sizeof(*entry));
- entry->buf = alloc_bootmem(strlen(str_entry) + 1);
+ entry = memblock_alloc(sizeof(*entry),
+ SMP_CACHE_BYTES);
+ entry->buf = memblock_alloc(strlen(str_entry) + 1,
+ SMP_CACHE_BYTES);
strcpy(entry->buf, str_entry);
list_add(&entry->next, &blacklisted_initcalls);
}
@@ -931,7 +928,7 @@ static initcall_entry_t *initcall_levels[] __initdata = {
};
/* Keep these in sync with initcalls in include/linux/init.h */
-static char *initcall_level_names[] __initdata = {
+static const char *initcall_level_names[] __initdata = {
"pure",
"core",
"postcore",
@@ -993,17 +990,6 @@ static void __init do_pre_smp_initcalls(void)
do_one_initcall(initcall_from_entry(fn));
}
-/*
- * This function requests modules which should be loaded by default and is
- * called twice right after initrd is mounted and right before init is
- * exec'd. If such modules are on either initrd or rootfs, they will be
- * loaded before control is passed to userland.
- */
-void __init load_default_modules(void)
-{
- load_default_elevator_module();
-}
-
static int run_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
@@ -1043,12 +1029,12 @@ static void mark_readonly(void)
{
if (rodata_enabled) {
/*
- * load_module() results in W+X mappings, which are cleaned up
- * with call_rcu_sched(). Let's make sure that queued work is
+ * load_module() results in W+X mappings, which are cleaned
+ * up with call_rcu(). Let's make sure that queued work is
* flushed so that we don't hit false positives looking for
* insecure pages which are W+X.
*/
- rcu_barrier_sched();
+ rcu_barrier();
mark_rodata_ro();
rodata_test();
} else
@@ -1144,6 +1130,8 @@ static noinline void __init kernel_init_freeable(void)
sched_init_smp();
page_alloc_init_late();
+ /* Initialize page ext after all struct pages are initialized. */
+ page_ext_init();
do_basic_setup();
@@ -1177,5 +1165,4 @@ static noinline void __init kernel_init_freeable(void)
*/
integrity_load_keys();
- load_default_modules();
}