summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-12 11:00:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-12 11:00:22 -0700
commit676cb4957396411fdb7aba906d5f950fc3de7cc9 (patch)
treebbc747384f842ace55d4a4bf6a98f27a8304bb20 /kernel
parent95b8b5953a315081eadbadf49200e57d7e05aae7 (diff)
parent6a961bffd1c3505c13b4d33bbb8385fe08239cb8 (diff)
Merge tag 'mm-nonmm-stable-2022-10-11' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton: - hfs and hfsplus kmap API modernization (Fabio Francesco) - make crash-kexec work properly when invoked from an NMI-time panic (Valentin Schneider) - ntfs bugfixes (Hawkins Jiawei) - improve IPC msg scalability by replacing atomic_t's with percpu counters (Jiebin Sun) - nilfs2 cleanups (Minghao Chi) - lots of other single patches all over the tree! * tag 'mm-nonmm-stable-2022-10-11' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (71 commits) include/linux/entry-common.h: remove has_signal comment of arch_do_signal_or_restart() prototype proc: test how it holds up with mapping'less process mailmap: update Frank Rowand email address ia64: mca: use strscpy() is more robust and safer init/Kconfig: fix unmet direct dependencies ia64: update config files nilfs2: replace WARN_ONs by nilfs_error for checkpoint acquisition failure fork: remove duplicate included header files init/main.c: remove unnecessary (void*) conversions proc: mark more files as permanent nilfs2: remove the unneeded result variable nilfs2: delete unnecessary checks before brelse() checkpatch: warn for non-standard fixes tag style usr/gen_init_cpio.c: remove unnecessary -1 values from int file ipc/msg: mitigate the lock contention with percpu counter percpu: add percpu_counter_add_local and percpu_counter_sub_local fs/ocfs2: fix repeated words in comments relay: use kvcalloc to alloc page array in relay_alloc_page_array proc: make config PROC_CHILDREN depend on PROC_FS fs: uninline inode_maybe_inc_iversion() ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fail_function.c26
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/kexec.c11
-rw-r--r--kernel/kexec_core.c36
-rw-r--r--kernel/kexec_file.c4
-rw-r--r--kernel/kexec_internal.h15
-rw-r--r--kernel/ksysfs.c7
-rw-r--r--kernel/latencytop.c4
-rw-r--r--kernel/profile.c32
-rw-r--r--kernel/relay.c5
-rw-r--r--kernel/smpboot.c15
-rw-r--r--kernel/task_work.c16
-rw-r--r--kernel/utsname_sysctl.c7
14 files changed, 101 insertions, 82 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 98c859ffa728..35e0a31a0315 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -184,6 +184,10 @@ void put_task_struct_rcu_user(struct task_struct *task)
call_rcu(&task->rcu, delayed_put_task_struct);
}
+void __weak release_thread(struct task_struct *dead_task)
+{
+}
+
void release_task(struct task_struct *p)
{
struct task_struct *leader;
diff --git a/kernel/fail_function.c b/kernel/fail_function.c
index 60dc825ecc2b..a7ccd2930c5f 100644
--- a/kernel/fail_function.c
+++ b/kernel/fail_function.c
@@ -247,15 +247,11 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
/* cut off if it is too long */
if (count > KSYM_NAME_LEN)
count = KSYM_NAME_LEN;
- buf = kmalloc(count + 1, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- if (copy_from_user(buf, buffer, count)) {
- ret = -EFAULT;
- goto out_free;
- }
- buf[count] = '\0';
+ buf = memdup_user_nul(buffer, count);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
sym = strstrip(buf);
mutex_lock(&fei_lock);
@@ -298,17 +294,15 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
}
ret = register_kprobe(&attr->kp);
- if (!ret)
- fei_debugfs_add_attr(attr);
- if (ret < 0)
- fei_attr_remove(attr);
- else {
- list_add_tail(&attr->list, &fei_attr_list);
- ret = count;
+ if (ret) {
+ fei_attr_free(attr);
+ goto out;
}
+ fei_debugfs_add_attr(attr);
+ list_add_tail(&attr->list, &fei_attr_list);
+ ret = count;
out:
mutex_unlock(&fei_lock);
-out_free:
kfree(buf);
return ret;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 5f986a32d0e6..08969f5aa38d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -97,7 +97,6 @@
#include <linux/scs.h>
#include <linux/io_uring.h>
#include <linux/bpf.h>
-#include <linux/sched/mm.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
diff --git a/kernel/kexec.c b/kernel/kexec.c
index b5e40f069768..cb8e6e6f983c 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -93,13 +93,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
/*
* Because we write directly to the reserved memory region when loading
- * crash kernels we need a mutex here to prevent multiple crash kernels
- * from attempting to load simultaneously, and to prevent a crash kernel
- * from loading over the top of a in use crash kernel.
- *
- * KISS: always take the mutex.
+ * crash kernels we need a serialization here to prevent multiple crash
+ * kernels from attempting to load simultaneously.
*/
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (flags & KEXEC_ON_CRASH) {
@@ -165,7 +162,7 @@ out:
kimage_free(image);
out_unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return ret;
}
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index acd029b307e4..ca2743f9c634 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -46,7 +46,7 @@
#include <crypto/hash.h>
#include "kexec_internal.h"
-DEFINE_MUTEX(kexec_mutex);
+atomic_t __kexec_lock = ATOMIC_INIT(0);
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes;
@@ -809,7 +809,7 @@ static int kimage_load_normal_segment(struct kimage *image,
if (result < 0)
goto out;
- ptr = kmap(page);
+ ptr = kmap_local_page(page);
/* Start with a clear page */
clear_page(ptr);
ptr += maddr & ~PAGE_MASK;
@@ -822,7 +822,7 @@ static int kimage_load_normal_segment(struct kimage *image,
memcpy(ptr, kbuf, uchunk);
else
result = copy_from_user(ptr, buf, uchunk);
- kunmap(page);
+ kunmap_local(ptr);
if (result) {
result = -EFAULT;
goto out;
@@ -873,7 +873,7 @@ static int kimage_load_crash_segment(struct kimage *image,
goto out;
}
arch_kexec_post_alloc_pages(page_address(page), 1, 0);
- ptr = kmap(page);
+ ptr = kmap_local_page(page);
ptr += maddr & ~PAGE_MASK;
mchunk = min_t(size_t, mbytes,
PAGE_SIZE - (maddr & ~PAGE_MASK));
@@ -889,7 +889,7 @@ static int kimage_load_crash_segment(struct kimage *image,
else
result = copy_from_user(ptr, buf, uchunk);
kexec_flush_icache_page(page);
- kunmap(page);
+ kunmap_local(ptr);
arch_kexec_pre_free_pages(page_address(page), 1);
if (result) {
result = -EFAULT;
@@ -959,7 +959,7 @@ late_initcall(kexec_core_sysctl_init);
*/
void __noclone __crash_kexec(struct pt_regs *regs)
{
- /* Take the kexec_mutex here to prevent sys_kexec_load
+ /* Take the kexec_lock here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
* we are using after a panic on a different cpu.
*
@@ -967,7 +967,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
* of memory the xchg(&kexec_crash_image) would be
* sufficient. But since I reuse the memory...
*/
- if (mutex_trylock(&kexec_mutex)) {
+ if (kexec_trylock()) {
if (kexec_crash_image) {
struct pt_regs fixed_regs;
@@ -976,7 +976,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
machine_crash_shutdown(&fixed_regs);
machine_kexec(kexec_crash_image);
}
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
}
}
STACK_FRAME_NON_STANDARD(__crash_kexec);
@@ -1004,14 +1004,17 @@ void crash_kexec(struct pt_regs *regs)
}
}
-size_t crash_get_memory_size(void)
+ssize_t crash_get_memory_size(void)
{
- size_t size = 0;
+ ssize_t size = 0;
+
+ if (!kexec_trylock())
+ return -EBUSY;
- mutex_lock(&kexec_mutex);
if (crashk_res.end != crashk_res.start)
size = resource_size(&crashk_res);
- mutex_unlock(&kexec_mutex);
+
+ kexec_unlock();
return size;
}
@@ -1022,7 +1025,8 @@ int crash_shrink_memory(unsigned long new_size)
unsigned long old_size;
struct resource *ram_res;
- mutex_lock(&kexec_mutex);
+ if (!kexec_trylock())
+ return -EBUSY;
if (kexec_crash_image) {
ret = -ENOENT;
@@ -1060,7 +1064,7 @@ int crash_shrink_memory(unsigned long new_size)
insert_resource(&iomem_resource, ram_res);
unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return ret;
}
@@ -1132,7 +1136,7 @@ int kernel_kexec(void)
{
int error = 0;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (!kexec_image) {
error = -EINVAL;
@@ -1208,6 +1212,6 @@ int kernel_kexec(void)
#endif
Unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return error;
}
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 1d546dc97c50..45637511e0de 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -339,7 +339,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
image = NULL;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
dest_image = &kexec_image;
@@ -411,7 +411,7 @@ out:
if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
arch_kexec_protect_crashkres();
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
kimage_free(image);
return ret;
}
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 48aaf2ac0d0d..74da1409cd14 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -13,7 +13,20 @@ void kimage_terminate(struct kimage *image);
int kimage_is_destination_range(struct kimage *image,
unsigned long start, unsigned long end);
-extern struct mutex kexec_mutex;
+/*
+ * Whatever is used to serialize accesses to the kexec_crash_image needs to be
+ * NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a
+ * "simple" atomic variable that is acquired with a cmpxchg().
+ */
+extern atomic_t __kexec_lock;
+static inline bool kexec_trylock(void)
+{
+ return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0;
+}
+static inline void kexec_unlock(void)
+{
+ atomic_set_release(&__kexec_lock, 0);
+}
#ifdef CONFIG_KEXEC_FILE
#include <linux/purgatory.h>
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index b1292a57c2a5..65dba9076f31 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -105,7 +105,12 @@ KERNEL_ATTR_RO(kexec_crash_loaded);
static ssize_t kexec_crash_size_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "%zu\n", crash_get_memory_size());
+ ssize_t size = crash_get_memory_size();
+
+ if (size < 0)
+ return size;
+
+ return sprintf(buf, "%zd\n", size);
}
static ssize_t kexec_crash_size_store(struct kobject *kobj,
struct kobj_attribute *attr,
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 76166df011a4..781249098cb6 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -112,7 +112,7 @@ static void __sched
account_global_scheduler_latency(struct task_struct *tsk,
struct latency_record *lat)
{
- int firstnonnull = MAXLR + 1;
+ int firstnonnull = MAXLR;
int i;
/* skip kernel threads for now */
@@ -150,7 +150,7 @@ account_global_scheduler_latency(struct task_struct *tsk,
}
i = firstnonnull;
- if (i >= MAXLR - 1)
+ if (i >= MAXLR)
return;
/* Allocted a new one: */
diff --git a/kernel/profile.c b/kernel/profile.c
index 7ea01ba30e75..8a77769bc4b4 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -59,43 +59,39 @@ int profile_setup(char *str)
static const char schedstr[] = "schedule";
static const char sleepstr[] = "sleep";
static const char kvmstr[] = "kvm";
+ const char *select = NULL;
int par;
if (!strncmp(str, sleepstr, strlen(sleepstr))) {
#ifdef CONFIG_SCHEDSTATS
force_schedstat_enabled();
prof_on = SLEEP_PROFILING;
- if (str[strlen(sleepstr)] == ',')
- str += strlen(sleepstr) + 1;
- if (get_option(&str, &par))
- prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
- pr_info("kernel sleep profiling enabled (shift: %u)\n",
- prof_shift);
+ select = sleepstr;
#else
pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
#endif /* CONFIG_SCHEDSTATS */
} else if (!strncmp(str, schedstr, strlen(schedstr))) {
prof_on = SCHED_PROFILING;
- if (str[strlen(schedstr)] == ',')
- str += strlen(schedstr) + 1;
- if (get_option(&str, &par))
- prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
- pr_info("kernel schedule profiling enabled (shift: %u)\n",
- prof_shift);
+ select = schedstr;
} else if (!strncmp(str, kvmstr, strlen(kvmstr))) {
prof_on = KVM_PROFILING;
- if (str[strlen(kvmstr)] == ',')
- str += strlen(kvmstr) + 1;
- if (get_option(&str, &par))
- prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
- pr_info("kernel KVM profiling enabled (shift: %u)\n",
- prof_shift);
+ select = kvmstr;
} else if (get_option(&str, &par)) {
prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
prof_on = CPU_PROFILING;
pr_info("kernel profiling enabled (shift: %u)\n",
prof_shift);
}
+
+ if (select) {
+ if (str[strlen(select)] == ',')
+ str += strlen(select) + 1;
+ if (get_option(&str, &par))
+ prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
+ pr_info("kernel %s profiling enabled (shift: %u)\n",
+ select, prof_shift);
+ }
+
return 1;
}
__setup("profile=", profile_setup);
diff --git a/kernel/relay.c b/kernel/relay.c
index 6a611e779e95..d7edc934c56d 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -60,10 +60,7 @@ static const struct vm_operations_struct relay_file_mmap_ops = {
*/
static struct page **relay_alloc_page_array(unsigned int n_pages)
{
- const size_t pa_size = n_pages * sizeof(struct page *);
- if (pa_size > PAGE_SIZE)
- return vzalloc(pa_size);
- return kzalloc(pa_size, GFP_KERNEL);
+ return kvcalloc(n_pages, sizeof(struct page *), GFP_KERNEL);
}
/*
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index b9f54544e749..2c7396da470c 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -433,7 +433,7 @@ bool cpu_wait_death(unsigned int cpu, int seconds)
/* The outgoing CPU will normally get done quite quickly. */
if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD)
- goto update_state;
+ goto update_state_early;
udelay(5);
/* But if the outgoing CPU dawdles, wait increasingly long times. */
@@ -444,16 +444,17 @@ bool cpu_wait_death(unsigned int cpu, int seconds)
break;
sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10);
}
-update_state:
+update_state_early:
oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
+update_state:
if (oldstate == CPU_DEAD) {
/* Outgoing CPU died normally, update state. */
smp_mb(); /* atomic_read() before update. */
atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD);
} else {
/* Outgoing CPU still hasn't died, set state accordingly. */
- if (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
- oldstate, CPU_BROKEN) != oldstate)
+ if (!atomic_try_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
+ &oldstate, CPU_BROKEN))
goto update_state;
ret = false;
}
@@ -475,14 +476,14 @@ bool cpu_report_death(void)
int newstate;
int cpu = smp_processor_id();
+ oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
do {
- oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
if (oldstate != CPU_BROKEN)
newstate = CPU_DEAD;
else
newstate = CPU_DEAD_FROZEN;
- } while (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
- oldstate, newstate) != oldstate);
+ } while (!atomic_try_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
+ &oldstate, newstate));
return newstate == CPU_DEAD;
}
diff --git a/kernel/task_work.c b/kernel/task_work.c
index dff75bcde151..065e1ef8fc8d 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -47,12 +47,12 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
/* record the work call stack in order to print it in KASAN reports */
kasan_record_aux_stack(work);
+ head = READ_ONCE(task->task_works);
do {
- head = READ_ONCE(task->task_works);
if (unlikely(head == &work_exited))
return -ESRCH;
work->next = head;
- } while (cmpxchg(&task->task_works, head, work) != head);
+ } while (!try_cmpxchg(&task->task_works, &head, work));
switch (notify) {
case TWA_NONE:
@@ -100,10 +100,12 @@ task_work_cancel_match(struct task_struct *task,
* we raced with task_work_run(), *pprev == NULL/exited.
*/
raw_spin_lock_irqsave(&task->pi_lock, flags);
- while ((work = READ_ONCE(*pprev))) {
- if (!match(work, data))
+ work = READ_ONCE(*pprev);
+ while (work) {
+ if (!match(work, data)) {
pprev = &work->next;
- else if (cmpxchg(pprev, work, work->next) == work)
+ work = READ_ONCE(*pprev);
+ } else if (try_cmpxchg(pprev, &work, work->next))
break;
}
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
@@ -151,16 +153,16 @@ void task_work_run(void)
* work->func() can do task_work_add(), do not set
* work_exited unless the list is empty.
*/
+ work = READ_ONCE(task->task_works);
do {
head = NULL;
- work = READ_ONCE(task->task_works);
if (!work) {
if (task->flags & PF_EXITING)
head = &work_exited;
else
break;
}
- } while (cmpxchg(&task->task_works, work, head) != work);
+ } while (!try_cmpxchg(&task->task_works, &work, head));
if (!work)
break;
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index de16bcf14b03..064072c16e3d 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -76,6 +76,13 @@ static DEFINE_CTL_TABLE_POLL(domainname_poll);
static struct ctl_table uts_kern_table[] = {
{
+ .procname = "arch",
+ .data = init_uts_ns.name.machine,
+ .maxlen = sizeof(init_uts_ns.name.machine),
+ .mode = 0444,
+ .proc_handler = proc_do_uts_string,
+ },
+ {
.procname = "ostype",
.data = init_uts_ns.name.sysname,
.maxlen = sizeof(init_uts_ns.name.sysname),