diff options
Diffstat (limited to 'init')
-rw-r--r-- | init/Kconfig | 63 | ||||
-rw-r--r-- | init/do_mounts.c | 32 | ||||
-rw-r--r-- | init/do_mounts_initrd.c | 24 | ||||
-rw-r--r-- | init/init_task.c | 11 | ||||
-rw-r--r-- | init/initramfs.c | 34 | ||||
-rw-r--r-- | init/main.c | 45 |
6 files changed, 143 insertions, 66 deletions
diff --git a/init/Kconfig b/init/Kconfig index 317d5ccb5191..53b54214a36e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -23,6 +23,9 @@ config CLANG_VERSION int default $(shell,$(srctree)/scripts/clang-version.sh $(CC)) +config CC_HAS_ASM_GOTO + def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC)) + config CONSTRUCTORS bool depends on !UML @@ -335,15 +338,6 @@ config HAVE_ARCH_AUDITSYSCALL config AUDITSYSCALL def_bool y depends on AUDIT && HAVE_ARCH_AUDITSYSCALL - -config AUDIT_WATCH - def_bool y - depends on AUDITSYSCALL - select FSNOTIFY - -config AUDIT_TREE - def_bool y - depends on AUDITSYSCALL select FSNOTIFY source "kernel/irq/Kconfig" @@ -490,6 +484,45 @@ config TASK_IO_ACCOUNTING Say N if unsure. +config PSI + bool "Pressure stall information tracking" + help + Collect metrics that indicate how overcommitted the CPU, memory, + and IO capacity are in the system. + + If you say Y here, the kernel will create /proc/pressure/ with the + pressure statistics files cpu, memory, and io. These will indicate + the share of walltime in which some or all tasks in the system are + delayed due to contention of the respective resource. + + In kernels with cgroup support, cgroups (cgroup2 only) will + have cpu.pressure, memory.pressure, and io.pressure files, + which aggregate pressure stalls for the grouped tasks only. + + For more details see Documentation/accounting/psi.txt. + + Say N if unsure. + +config PSI_DEFAULT_DISABLED + bool "Require boot parameter to enable pressure stall information tracking" + default n + depends on PSI + help + If set, pressure stall information tracking will be disabled + per default but can be enabled through passing psi=1 on the + kernel commandline during boot. + + This feature adds some code to the task wakeup and sleep + paths of the scheduler. The overhead is too low to affect + common scheduling-intense workloads in practice (such as + webservers, memcache), but it does show up in artificial + scheduler stress tests, such as hackbench. + + If you are paranoid and not sure what the kernel will be + used for, say Y. + + Say N if unsure. + endmenu # "CPU/Task time and stats accounting" config CPU_ISOLATION @@ -803,7 +836,7 @@ config CGROUP_PIDS PIDs controller is designed to stop this from happening. It should be noted that organisational operations (such as attaching - to a cgroup hierarchy will *not* be blocked by the PIDs controller), + to a cgroup hierarchy) will *not* be blocked by the PIDs controller, since the PIDs limit only affects a process's ability to fork, not to attach to a cgroup. @@ -1102,6 +1135,7 @@ config LD_DEAD_CODE_DATA_ELIMINATION bool "Dead code and data elimination (EXPERIMENTAL)" depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION depends on EXPERT + depends on !(FUNCTION_TRACER && CC_IS_GCC && GCC_VERSION < 40800) depends on $(cc-option,-ffunction-sections -fdata-sections) depends on $(ld-option,--gc-sections) help @@ -1380,6 +1414,15 @@ config AIO by some high performance threaded applications. Disabling this option saves about 7k. +config IO_URING + bool "Enable IO uring support" if EXPERT + select ANON_INODES + default y + help + This option enables support for the io_uring interface, enabling + applications to submit and complete IO through submission and + completion rings that are shared between the kernel and application. + config ADVISE_SYSCALLS bool "Enable madvise/fadvise syscalls" if EXPERT default y diff --git a/init/do_mounts.c b/init/do_mounts.c index e1c9afa9d8c9..f8c230c77035 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -22,6 +22,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_fs_sb.h> #include <linux/nfs_mount.h> +#include <uapi/linux/mount.h> #include "do_mounts.h" @@ -167,6 +168,24 @@ done: } return res; } + +/** + * match_dev_by_label - callback for finding a partition using its label + * @dev: device passed in by the caller + * @data: opaque pointer to the label to match + * + * Returns 1 if the device matches, and 0 otherwise. + */ +static int match_dev_by_label(struct device *dev, const void *data) +{ + const char *label = data; + struct hd_struct *part = dev_to_part(dev); + + if (part->info && !strcmp(label, part->info->volname)) + return 1; + + return 0; +} #endif /* @@ -190,6 +209,8 @@ done: * a partition with a known unique id. * 8) <major>:<minor> major and minor number of the device separated by * a colon. + * 9) PARTLABEL=<name> with name being the GPT partition label. + * MSDOS partitions do not support labels! * * If name doesn't have fall into the categories above, we return (0,0). * block_class is used to check if something is a disk name. If the disk @@ -211,6 +232,17 @@ dev_t name_to_dev_t(const char *name) if (!res) goto fail; goto done; + } else if (strncmp(name, "PARTLABEL=", 10) == 0) { + struct device *dev; + + dev = class_find_device(&block_class, NULL, name + 10, + &match_dev_by_label); + if (!dev) + goto fail; + + res = dev->devt; + put_device(dev); + goto done; } #endif diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index d1a5d885ce13..a9c6cc56f505 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/freezer.h> #include <linux/kmod.h> +#include <uapi/linux/mount.h> #include "do_mounts.h" @@ -16,6 +17,9 @@ int initrd_below_start_ok; unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ static int __initdata mount_initrd = 1; +phys_addr_t phys_initrd_start __initdata; +unsigned long phys_initrd_size __initdata; + static int __init no_initrd(char *str) { mount_initrd = 0; @@ -24,6 +28,23 @@ static int __init no_initrd(char *str) __setup("noinitrd", no_initrd); +static int __init early_initrd(char *p) +{ + phys_addr_t start; + unsigned long size; + char *endp; + + start = memparse(p, &endp); + if (*endp == ',') { + size = memparse(endp + 1, NULL); + + phys_initrd_start = start; + phys_initrd_size = size; + } + return 0; +} +early_param("initrd", early_initrd); + static int init_linuxrc(struct subprocess_info *info, struct cred *new) { ksys_unshare(CLONE_FS | CLONE_FILES); @@ -53,9 +74,6 @@ static void __init handle_initrd(void) ksys_mkdir("/old", 0700); ksys_chdir("/old"); - /* try loading default modules from initrd */ - load_default_modules(); - /* * In case that a resume from disk is carried out by linuxrc or one of * its children, we need to tell the freezer not to wait for us. diff --git a/init/init_task.c b/init/init_task.c index 5aebe3be4d7c..c70ef656d0f4 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -10,6 +10,7 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/audit.h> +#include <linux/numa.h> #include <asm/pgtable.h> #include <linux/uaccess.h> @@ -44,7 +45,7 @@ static struct signal_struct init_signals = { }; static struct sighand_struct init_sighand = { - .count = ATOMIC_INIT(1), + .count = REFCOUNT_INIT(1), .action = { { { .sa_handler = SIG_DFL, } }, }, .siglock = __SPIN_LOCK_UNLOCKED(init_sighand.siglock), .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh), @@ -61,11 +62,11 @@ struct task_struct init_task = { #ifdef CONFIG_THREAD_INFO_IN_TASK .thread_info = INIT_THREAD_INFO(init_task), - .stack_refcount = ATOMIC_INIT(1), + .stack_refcount = REFCOUNT_INIT(1), #endif .state = 0, .stack = init_stack, - .usage = ATOMIC_INIT(2), + .usage = REFCOUNT_INIT(2), .flags = PF_KTHREAD, .prio = MAX_PRIO - 20, .static_prio = MAX_PRIO - 20, @@ -121,7 +122,7 @@ struct task_struct init_task .thread_pid = &init_struct_pid, .thread_group = LIST_HEAD_INIT(init_task.thread_group), .thread_node = LIST_HEAD_INIT(init_signals.thread_head), -#ifdef CONFIG_AUDITSYSCALL +#ifdef CONFIG_AUDIT .loginuid = INVALID_UID, .sessionid = AUDIT_SID_UNSET, #endif @@ -154,7 +155,7 @@ struct task_struct init_task .vtime.state = VTIME_SYS, #endif #ifdef CONFIG_NUMA_BALANCING - .numa_preferred_nid = -1, + .numa_preferred_nid = NUMA_NO_NODE, .numa_group = NULL, .numa_faults = NULL, #endif diff --git a/init/initramfs.c b/init/initramfs.c index 640557788026..4749e1115eef 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -291,16 +291,6 @@ static int __init do_reset(void) return 1; } -static int __init maybe_link(void) -{ - if (nlink >= 2) { - char *old = find_link(major, minor, ino, mode, collected); - if (old) - return (ksys_link(old, collected) < 0) ? -1 : 1; - } - return 0; -} - static void __init clean_path(char *path, umode_t fmode) { struct kstat st; @@ -313,6 +303,18 @@ static void __init clean_path(char *path, umode_t fmode) } } +static int __init maybe_link(void) +{ + if (nlink >= 2) { + char *old = find_link(major, minor, ino, mode, collected); + if (old) { + clean_path(collected, 0); + return (ksys_link(old, collected) < 0) ? -1 : 1; + } + } + return 0; +} + static __initdata int wfd; static int __init do_name(void) @@ -429,7 +431,7 @@ static long __init flush_buffer(void *bufv, unsigned long len) len -= written; state = Reset; } else - error("junk in compressed archive"); + error("junk within compressed archive"); } return origLen; } @@ -486,9 +488,9 @@ static char * __init unpack_to_rootfs(char *buf, unsigned long len) message = msg_buf; } } else - error("junk in compressed archive"); + error("invalid magic at start of compressed archive"); if (state != Reset) - error("junk in compressed archive"); + error("junk at the end of compressed archive"); this_header = saved_offset + my_inptr; buf += my_inptr; len -= my_inptr; @@ -644,12 +646,6 @@ static int __init populate_rootfs(void) #endif } flush_delayed_fput(); - /* - * Try loading default modules from initramfs. This gives - * us a chance to load before device_initcalls. - */ - load_default_modules(); - return 0; } rootfs_initcall(populate_rootfs); diff --git a/init/main.c b/init/main.c index 1c3f90264280..c86a1c8f19f4 100644 --- a/init/main.c +++ b/init/main.c @@ -25,7 +25,7 @@ #include <linux/ioport.h> #include <linux/init.h> #include <linux/initrd.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/acpi.h> #include <linux/console.h> #include <linux/nmi.h> @@ -105,7 +105,6 @@ static int kernel_init(void *); extern void init_IRQ(void); -extern void fork_init(void); extern void radix_tree_init(void); /* @@ -375,10 +374,11 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { } static void __init setup_command_line(char *command_line) { saved_command_line = - memblock_virt_alloc(strlen(boot_command_line) + 1, 0); + memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES); initcall_command_line = - memblock_virt_alloc(strlen(boot_command_line) + 1, 0); - static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0); + memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES); + static_command_line = memblock_alloc(strlen(command_line) + 1, + SMP_CACHE_BYTES); strcpy(saved_command_line, boot_command_line); strcpy(static_command_line, command_line); } @@ -520,6 +520,7 @@ static void __init mm_init(void) mem_init(); kmem_cache_init(); pgtable_init(); + debug_objects_mem_init(); vmalloc_init(); ioremap_huge_init(); /* Should be run before the first non-init thread is created */ @@ -694,9 +695,7 @@ asmlinkage __visible void __init start_kernel(void) initrd_start = 0; } #endif - page_ext_init(); kmemleak_init(); - debug_objects_mem_init(); setup_per_cpu_pageset(); numa_policy_init(); acpi_early_init(); @@ -736,10 +735,6 @@ asmlinkage __visible void __init start_kernel(void) arch_post_acpi_subsys_init(); sfi_init_late(); - if (efi_enabled(EFI_RUNTIME_SERVICES)) { - efi_free_boot_services(); - } - /* Do the rest non-__init'ed, we're now alive */ arch_call_rest_init(); } @@ -773,8 +768,10 @@ static int __init initcall_blacklist(char *str) str_entry = strsep(&str, ","); if (str_entry) { pr_debug("blacklisting initcall %s\n", str_entry); - entry = alloc_bootmem(sizeof(*entry)); - entry->buf = alloc_bootmem(strlen(str_entry) + 1); + entry = memblock_alloc(sizeof(*entry), + SMP_CACHE_BYTES); + entry->buf = memblock_alloc(strlen(str_entry) + 1, + SMP_CACHE_BYTES); strcpy(entry->buf, str_entry); list_add(&entry->next, &blacklisted_initcalls); } @@ -931,7 +928,7 @@ static initcall_entry_t *initcall_levels[] __initdata = { }; /* Keep these in sync with initcalls in include/linux/init.h */ -static char *initcall_level_names[] __initdata = { +static const char *initcall_level_names[] __initdata = { "pure", "core", "postcore", @@ -993,17 +990,6 @@ static void __init do_pre_smp_initcalls(void) do_one_initcall(initcall_from_entry(fn)); } -/* - * This function requests modules which should be loaded by default and is - * called twice right after initrd is mounted and right before init is - * exec'd. If such modules are on either initrd or rootfs, they will be - * loaded before control is passed to userland. - */ -void __init load_default_modules(void) -{ - load_default_elevator_module(); -} - static int run_init_process(const char *init_filename) { argv_init[0] = init_filename; @@ -1043,12 +1029,12 @@ static void mark_readonly(void) { if (rodata_enabled) { /* - * load_module() results in W+X mappings, which are cleaned up - * with call_rcu_sched(). Let's make sure that queued work is + * load_module() results in W+X mappings, which are cleaned + * up with call_rcu(). Let's make sure that queued work is * flushed so that we don't hit false positives looking for * insecure pages which are W+X. */ - rcu_barrier_sched(); + rcu_barrier(); mark_rodata_ro(); rodata_test(); } else @@ -1144,6 +1130,8 @@ static noinline void __init kernel_init_freeable(void) sched_init_smp(); page_alloc_init_late(); + /* Initialize page ext after all struct pages are initialized. */ + page_ext_init(); do_basic_setup(); @@ -1177,5 +1165,4 @@ static noinline void __init kernel_init_freeable(void) */ integrity_load_keys(); - load_default_modules(); } |