From 31a5990ed253a66712d7ddc29c92d297a991fdf2 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Wed, 6 Mar 2024 17:12:59 +0800 Subject: um: Fix return value in ubd_init() When kmalloc_array() fails to allocate memory, the ubd_init() should return -ENOMEM instead of -1. So, fix it. Fixes: f88f0bdfc32f ("um: UBD Improvements") Signed-off-by: Duoming Zhou Reviewed-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/ubd_kern.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 63fc062add70..ef805eaa9e01 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1092,7 +1092,7 @@ static int __init ubd_init(void) if (irq_req_buffer == NULL) { printk(KERN_ERR "Failed to initialize ubd buffering\n"); - return -1; + return -ENOMEM; } io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE, sizeof(struct io_thread_req *), @@ -1103,7 +1103,7 @@ static int __init ubd_init(void) if (io_req_buffer == NULL) { printk(KERN_ERR "Failed to initialize ubd buffering\n"); - return -1; + return -ENOMEM; } platform_driver_register(&ubd_driver); mutex_lock(&ubd_lock); -- cgit From 53471c574974a3df4a5705b1db431d69058cc6d9 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 6 Mar 2024 18:19:17 +0800 Subject: um: Make local functions and variables static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will also fix the warnings like: warning: no previous prototype for ‘fork_handler’ [-Wmissing-prototypes] 140 | void fork_handler(void) | ^~~~~~~~~~~~ Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/drivers/pcap_kern.c | 4 ++-- arch/um/drivers/ubd_user.c | 2 +- arch/um/kernel/kmsg_dump.c | 2 +- arch/um/kernel/process.c | 8 ++++---- arch/um/kernel/time.c | 6 +++--- arch/um/os-Linux/drivers/ethertap_kern.c | 2 +- arch/um/os-Linux/drivers/tuntap_kern.c | 2 +- arch/um/os-Linux/signal.c | 4 ++-- arch/x86/um/os-Linux/registers.c | 2 +- arch/x86/um/tls_32.c | 2 +- 10 files changed, 17 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c index 25ee2c97ca21..d9bf95d7867b 100644 --- a/arch/um/drivers/pcap_kern.c +++ b/arch/um/drivers/pcap_kern.c @@ -15,7 +15,7 @@ struct pcap_init { char *filter; }; -void pcap_init_kern(struct net_device *dev, void *data) +static void pcap_init_kern(struct net_device *dev, void *data) { struct uml_net_private *pri; struct pcap_data *ppri; @@ -50,7 +50,7 @@ static const struct net_kern_info pcap_kern_info = { .write = pcap_write, }; -int pcap_setup(char *str, char **mac_out, void *data) +static int pcap_setup(char *str, char **mac_out, void *data) { struct pcap_init *init = data; char *remain, *host_if = NULL, *options[2] = { NULL, NULL }; diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c index a1afe414ce48..b4f8b8e60564 100644 --- a/arch/um/drivers/ubd_user.c +++ b/arch/um/drivers/ubd_user.c @@ -23,7 +23,7 @@ #include #include -struct pollfd kernel_pollfd; +static struct pollfd kernel_pollfd; int start_io_thread(unsigned long sp, int *fd_out) { diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c index 427dd5a61a38..4382cf02a6d1 100644 --- a/arch/um/kernel/kmsg_dump.c +++ b/arch/um/kernel/kmsg_dump.c @@ -57,7 +57,7 @@ static struct kmsg_dumper kmsg_dumper = { .dump = kmsg_dumper_stdout }; -int __init kmsg_dumper_stdout_init(void) +static int __init kmsg_dumper_stdout_init(void) { return kmsg_dump_register(&kmsg_dumper); } diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index ab95648e93e1..20f3813143d8 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -137,7 +137,7 @@ void new_thread_handler(void) } /* Called magically, see new_thread_handler above */ -void fork_handler(void) +static void fork_handler(void) { force_flush_all(); @@ -268,14 +268,14 @@ int clear_user_proc(void __user *buf, int size) static atomic_t using_sysemu = ATOMIC_INIT(0); int sysemu_supported; -void set_using_sysemu(int value) +static void set_using_sysemu(int value) { if (value > sysemu_supported) return; atomic_set(&using_sysemu, value); } -int get_using_sysemu(void) +static int get_using_sysemu(void) { return atomic_read(&using_sysemu); } @@ -313,7 +313,7 @@ static const struct proc_ops sysemu_proc_ops = { .proc_write = sysemu_proc_write, }; -int __init make_proc_sysemu(void) +static int __init make_proc_sysemu(void) { struct proc_dir_entry *ent; if (!sysemu_supported) diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 3e270da6b6f6..efa5b9c97992 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -319,7 +319,7 @@ void time_travel_add_event_rel(struct time_travel_event *e, time_travel_add_event(e, time_travel_time + delay_ns); } -void time_travel_periodic_timer(struct time_travel_event *e) +static void time_travel_periodic_timer(struct time_travel_event *e) { time_travel_add_event(&time_travel_timer_event, time_travel_time + time_travel_timer_interval); @@ -812,7 +812,7 @@ unsigned long calibrate_delay_is_known(void) return 0; } -int setup_time_travel(char *str) +static int setup_time_travel(char *str) { if (strcmp(str, "=inf-cpu") == 0) { time_travel_mode = TT_MODE_INFCPU; @@ -862,7 +862,7 @@ __uml_help(setup_time_travel, "devices using it, assuming the device has the right capabilities.\n" "The optional ID is a 64-bit integer that's sent to the central scheduler.\n"); -int setup_time_travel_start(char *str) +static int setup_time_travel_start(char *str) { int err; diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c index 3182e759d8de..5e5ee40680ce 100644 --- a/arch/um/os-Linux/drivers/ethertap_kern.c +++ b/arch/um/os-Linux/drivers/ethertap_kern.c @@ -63,7 +63,7 @@ const struct net_kern_info ethertap_kern_info = { .write = etap_write, }; -int ethertap_setup(char *str, char **mac_out, void *data) +static int ethertap_setup(char *str, char **mac_out, void *data) { struct ethertap_init *init = data; diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c index adcb6717be6f..ff022d9cf0dd 100644 --- a/arch/um/os-Linux/drivers/tuntap_kern.c +++ b/arch/um/os-Linux/drivers/tuntap_kern.c @@ -53,7 +53,7 @@ const struct net_kern_info tuntap_kern_info = { .write = tuntap_write, }; -int tuntap_setup(char *str, char **mac_out, void *data) +static int tuntap_setup(char *str, char **mac_out, void *data) { struct tuntap_init *init = data; diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 24a403a70a02..787cfb9a0308 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -72,7 +72,7 @@ static int signals_blocked; static unsigned int signals_pending; static unsigned int signals_active = 0; -void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) +static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { int enabled = signals_enabled; @@ -108,7 +108,7 @@ static void timer_real_alarm_handler(mcontext_t *mc) timer_handler(SIGALRM, NULL, ®s); } -void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) +static void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) { int enabled; diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c index df8f4b4bf98b..f3638dd09cec 100644 --- a/arch/x86/um/os-Linux/registers.c +++ b/arch/x86/um/os-Linux/registers.c @@ -17,7 +17,7 @@ #include #include -int have_xstate_support; +static int have_xstate_support; int save_i387_registers(int pid, unsigned long *fp_regs) { diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c index 66162eafd8e8..ba40b1b8e179 100644 --- a/arch/x86/um/tls_32.c +++ b/arch/x86/um/tls_32.c @@ -20,7 +20,7 @@ static int host_supports_tls = -1; int host_gdt_entry_tls_min; -int do_set_thread_area(struct user_desc *info) +static int do_set_thread_area(struct user_desc *info) { int ret; u32 cpu; -- cgit From 0c2b208c8b790b52587df6c66bfb6d9e0d6c4cd9 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 6 Mar 2024 18:19:18 +0800 Subject: um: Fix the declaration of vfree The definition of vfree has changed since commit b3bdda02aa54 ("vmalloc: add const to void* parameters"). Update the declaration of vfree in um_malloc.h to match the latest definition. Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/include/shared/um_malloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/um/include/shared/um_malloc.h b/arch/um/include/shared/um_malloc.h index 13da93284c2c..d25084447c69 100644 --- a/arch/um/include/shared/um_malloc.h +++ b/arch/um/include/shared/um_malloc.h @@ -12,7 +12,7 @@ extern void *uml_kmalloc(int size, int flags); extern void kfree(const void *ptr); extern void *vmalloc(unsigned long size); -extern void vfree(void *ptr); +extern void vfree(const void *ptr); #endif /* __UM_MALLOC_H__ */ -- cgit From b5e0950fd6cb8bd45ea0ac5378d10e1f9ede96c1 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 6 Mar 2024 18:19:19 +0800 Subject: um: Remove unused functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions are not used anymore. Removing them will also address below -Wmissing-prototypes warnings: arch/um/kernel/process.c:51:5: warning: no previous prototype for ‘pid_to_processor_id’ [-Wmissing-prototypes] arch/um/kernel/process.c:253:5: warning: no previous prototype for ‘copy_to_user_proc’ [-Wmissing-prototypes] arch/um/kernel/process.c:263:5: warning: no previous prototype for ‘clear_user_proc’ [-Wmissing-prototypes] arch/um/kernel/tlb.c:579:6: warning: no previous prototype for ‘flush_tlb_mm_range’ [-Wmissing-prototypes] Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/kernel/process.c | 21 --------------------- arch/um/kernel/tlb.c | 6 ------ 2 files changed, 27 deletions(-) (limited to 'arch') diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 20f3813143d8..292c8014aaa6 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -48,17 +48,6 @@ static inline int external_pid(void) return userspace_pid[0]; } -int pid_to_processor_id(int pid) -{ - int i; - - for (i = 0; i < ncpus; i++) { - if (cpu_tasks[i].pid == pid) - return i; - } - return -1; -} - void free_stack(unsigned long stack, int order) { free_pages(stack, order); @@ -250,21 +239,11 @@ char *uml_strdup(const char *string) } EXPORT_SYMBOL(uml_strdup); -int copy_to_user_proc(void __user *to, void *from, int size) -{ - return copy_to_user(to, from, size); -} - int copy_from_user_proc(void *to, void __user *from, int size) { return copy_from_user(to, from, size); } -int clear_user_proc(void __user *buf, int size) -{ - return clear_user(buf, size); -} - static atomic_t using_sysemu = ATOMIC_INIT(0); int sysemu_supported; diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 7d050ab0f78a..70b5e47e9761 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -576,12 +576,6 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } EXPORT_SYMBOL(flush_tlb_range); -void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, - unsigned long end) -{ - fix_range(mm, start, end, 0); -} - void flush_tlb_mm(struct mm_struct *mm) { struct vm_area_struct *vma; -- cgit From 179d83d89c584aa50f87ca3e44b1f1aff914303c Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 6 Mar 2024 18:19:20 +0800 Subject: um: Fix the return type of __switch_to Make it match the declaration in asm-generic/switch_to.h. And also include the header to allow the compiler to check it. Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/kernel/process.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 292c8014aaa6..a7607ef1b02f 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -73,7 +74,7 @@ static inline void set_current(struct task_struct *task) extern void arch_switch_to(struct task_struct *to); -void *__switch_to(struct task_struct *from, struct task_struct *to) +struct task_struct *__switch_to(struct task_struct *from, struct task_struct *to) { to->thread.prev_sched = from; set_current(to); -- cgit From 9ffc6724a35c9404a99f430f3c35b0b6372390c1 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 6 Mar 2024 18:19:21 +0800 Subject: um: Add missing headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will address below -Wmissing-prototypes warnings: arch/um/kernel/mem.c:202:8: warning: no previous prototype for ‘pgd_alloc’ [-Wmissing-prototypes] arch/um/kernel/mem.c:215:7: warning: no previous prototype for ‘uml_kmalloc’ [-Wmissing-prototypes] arch/um/kernel/process.c:207:6: warning: no previous prototype for ‘arch_cpu_idle’ [-Wmissing-prototypes] arch/um/kernel/process.c:328:15: warning: no previous prototype for ‘arch_align_stack’ [-Wmissing-prototypes] arch/um/kernel/reboot.c:45:6: warning: no previous prototype for ‘machine_restart’ [-Wmissing-prototypes] arch/um/kernel/reboot.c:51:6: warning: no previous prototype for ‘machine_power_off’ [-Wmissing-prototypes] arch/um/kernel/reboot.c:57:6: warning: no previous prototype for ‘machine_halt’ [-Wmissing-prototypes] arch/um/kernel/skas/mmu.c:17:5: warning: no previous prototype for ‘init_new_context’ [-Wmissing-prototypes] arch/um/kernel/skas/mmu.c:60:6: warning: no previous prototype for ‘destroy_context’ [-Wmissing-prototypes] arch/um/kernel/skas/process.c:36:12: warning: no previous prototype for ‘start_uml’ [-Wmissing-prototypes] arch/um/kernel/time.c:807:15: warning: no previous prototype for ‘calibrate_delay_is_known’ [-Wmissing-prototypes] arch/um/kernel/tlb.c:594:6: warning: no previous prototype for ‘force_flush_all’ [-Wmissing-prototypes] arch/x86/um/bugs_32.c:22:6: warning: no previous prototype for ‘arch_check_bugs’ [-Wmissing-prototypes] arch/x86/um/bugs_32.c:44:6: warning: no previous prototype for ‘arch_examine_signal’ [-Wmissing-prototypes] arch/x86/um/bugs_64.c:9:6: warning: no previous prototype for ‘arch_check_bugs’ [-Wmissing-prototypes] arch/x86/um/bugs_64.c:13:6: warning: no previous prototype for ‘arch_examine_signal’ [-Wmissing-prototypes] arch/x86/um/elfcore.c:10:12: warning: no previous prototype for ‘elf_core_extra_phdrs’ [-Wmissing-prototypes] arch/x86/um/elfcore.c:15:5: warning: no previous prototype for ‘elf_core_write_extra_phdrs’ [-Wmissing-prototypes] arch/x86/um/elfcore.c:42:5: warning: no previous prototype for ‘elf_core_write_extra_data’ [-Wmissing-prototypes] arch/x86/um/elfcore.c:63:8: warning: no previous prototype for ‘elf_core_extra_data_size’ [-Wmissing-prototypes] arch/x86/um/fault.c:18:5: warning: no previous prototype for ‘arch_fixup’ [-Wmissing-prototypes] arch/x86/um/os-Linux/mcontext.c:7:6: warning: no previous prototype for ‘get_regs_from_mc’ [-Wmissing-prototypes] arch/x86/um/os-Linux/tls.c:22:6: warning: no previous prototype for ‘check_host_supports_tls’ [-Wmissing-prototypes] Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/kernel/mem.c | 2 ++ arch/um/kernel/process.c | 2 ++ arch/um/kernel/reboot.c | 1 + arch/um/kernel/skas/mmu.c | 1 + arch/um/kernel/skas/process.c | 1 + arch/um/kernel/time.c | 1 + arch/um/kernel/tlb.c | 1 + arch/x86/um/bugs_32.c | 1 + arch/x86/um/bugs_64.c | 1 + arch/x86/um/elfcore.c | 1 + arch/x86/um/fault.c | 1 + arch/x86/um/os-Linux/mcontext.c | 1 + arch/x86/um/os-Linux/tls.c | 1 + 13 files changed, 15 insertions(+) (limited to 'arch') diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 38d5a71a579b..ca91accd64fc 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -12,12 +12,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #ifdef CONFIG_KASAN diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index a7607ef1b02f..4235e2ca2664 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 48c0610d506e..25840eee1068 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 656fe16c9b63..aeed1c2aaf3c 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index f2ac134c9752..fdd5922f9222 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -12,6 +12,7 @@ #include #include #include +#include extern void start_kernel(void); diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index efa5b9c97992..a8bfe8be1526 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 70b5e47e9761..8784f03fa4a6 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c index 33daff4dade4..d29929efcc07 100644 --- a/arch/x86/um/bugs_32.c +++ b/arch/x86/um/bugs_32.c @@ -3,6 +3,7 @@ * Licensed under the GPL */ +#include #include #include #include diff --git a/arch/x86/um/bugs_64.c b/arch/x86/um/bugs_64.c index 8cc8256c698d..b01295e8a676 100644 --- a/arch/x86/um/bugs_64.c +++ b/arch/x86/um/bugs_64.c @@ -4,6 +4,7 @@ * Licensed under the GPL */ +#include #include void arch_check_bugs(void) diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c index 650cdbbdaf45..ef50662fc40d 100644 --- a/arch/x86/um/elfcore.c +++ b/arch/x86/um/elfcore.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c index 84ac7f7b0257..0dde4d613a87 100644 --- a/arch/x86/um/fault.c +++ b/arch/x86/um/fault.c @@ -3,6 +3,7 @@ * Licensed under the GPL */ +#include #include /* These two are from asm-um/uaccess.h and linux/module.h, check them. */ diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c index 49c3744cac37..e80ab7d28117 100644 --- a/arch/x86/um/os-Linux/mcontext.c +++ b/arch/x86/um/os-Linux/mcontext.c @@ -3,6 +3,7 @@ #define __FRAME_OFFSETS #include #include +#include void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc) { diff --git a/arch/x86/um/os-Linux/tls.c b/arch/x86/um/os-Linux/tls.c index 3e1b1bf6acbc..eed9efe29ade 100644 --- a/arch/x86/um/os-Linux/tls.c +++ b/arch/x86/um/os-Linux/tls.c @@ -6,6 +6,7 @@ #include #include +#include #include #ifndef PTRACE_GET_THREAD_AREA -- cgit From a4b4382f3e83bb4fa4a421e6cf5a5ef987658475 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 6 Mar 2024 18:19:23 +0800 Subject: um: Move declarations to proper headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will address below -Wmissing-prototypes warnings: arch/um/kernel/initrd.c:18:12: warning: no previous prototype for ‘read_initrd’ [-Wmissing-prototypes] arch/um/kernel/um_arch.c:408:19: warning: no previous prototype for ‘read_initrd’ [-Wmissing-prototypes] arch/um/os-Linux/start_up.c:301:12: warning: no previous prototype for ‘parse_iomem’ [-Wmissing-prototypes] arch/x86/um/ptrace_32.c:15:6: warning: no previous prototype for ‘arch_switch_to’ [-Wmissing-prototypes] arch/x86/um/ptrace_32.c:101:5: warning: no previous prototype for ‘poke_user’ [-Wmissing-prototypes] arch/x86/um/ptrace_32.c:153:5: warning: no previous prototype for ‘peek_user’ [-Wmissing-prototypes] arch/x86/um/ptrace_64.c:111:5: warning: no previous prototype for ‘poke_user’ [-Wmissing-prototypes] arch/x86/um/ptrace_64.c:171:5: warning: no previous prototype for ‘peek_user’ [-Wmissing-prototypes] arch/x86/um/syscalls_64.c:48:6: warning: no previous prototype for ‘arch_switch_to’ [-Wmissing-prototypes] arch/x86/um/tls_32.c:184:5: warning: no previous prototype for ‘arch_switch_tls’ [-Wmissing-prototypes] Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/include/asm/ptrace-generic.h | 3 +++ arch/um/include/shared/kern_util.h | 1 + arch/um/kernel/physmem.c | 3 +-- arch/um/kernel/process.c | 2 -- arch/um/kernel/ptrace.c | 3 --- arch/um/kernel/um_arch.h | 2 ++ arch/um/os-Linux/start_up.c | 1 + arch/x86/um/asm/ptrace.h | 6 ++++++ arch/x86/um/ptrace_32.c | 2 -- 9 files changed, 14 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h index adf91ef553ae..4696f24d1492 100644 --- a/arch/um/include/asm/ptrace-generic.h +++ b/arch/um/include/asm/ptrace-generic.h @@ -36,6 +36,9 @@ extern long subarch_ptrace(struct task_struct *child, long request, extern unsigned long getreg(struct task_struct *child, int regno); extern int putreg(struct task_struct *child, int regno, unsigned long value); +extern int poke_user(struct task_struct *child, long addr, long data); +extern int peek_user(struct task_struct *child, long addr, long data); + extern int arch_set_tls(struct task_struct *new, unsigned long tls); extern void clear_flushed_tls(struct task_struct *task); extern int syscall_trace_enter(struct pt_regs *regs); diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index 789b83013f35..81bc38a2e3fc 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -41,6 +41,7 @@ extern void uml_pm_wake(void); extern int start_uml(void); extern void paging_init(void); +extern int parse_iomem(char *str, int *add); extern void uml_cleanup(void); extern void do_uml_exitcalls(void); diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 91485119ae67..fb2adfb49945 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -161,8 +162,6 @@ __uml_setup("mem=", uml_mem_setup, " Example: mem=64M\n\n" ); -extern int __init parse_iomem(char *str, int *add); - __uml_setup("iomem=", parse_iomem, "iomem=,\n" " Configure as an IO memory region named .\n\n" diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 4235e2ca2664..1395ff3017f0 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -74,8 +74,6 @@ static inline void set_current(struct task_struct *task) { external_pid(), task }); } -extern void arch_switch_to(struct task_struct *to); - struct task_struct *__switch_to(struct task_struct *from, struct task_struct *to) { to->thread.prev_sched = from; diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 6600a2782796..2124624b7817 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -35,9 +35,6 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } -extern int peek_user(struct task_struct * child, long addr, long data); -extern int poke_user(struct task_struct * child, long addr, long data); - long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { diff --git a/arch/um/kernel/um_arch.h b/arch/um/kernel/um_arch.h index 1e07fb7ee35e..46e731ab9dfc 100644 --- a/arch/um/kernel/um_arch.h +++ b/arch/um/kernel/um_arch.h @@ -11,4 +11,6 @@ extern void __init uml_dtb_init(void); static inline void uml_dtb_init(void) { } #endif +extern int __init read_initrd(void); + #endif diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 8b0e98ab842c..6b21061c431c 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h index 83822fd42204..2fef3da55533 100644 --- a/arch/x86/um/asm/ptrace.h +++ b/arch/x86/um/asm/ptrace.h @@ -54,6 +54,8 @@ extern int ptrace_get_thread_area(struct task_struct *child, int idx, extern int ptrace_set_thread_area(struct task_struct *child, int idx, struct user_desc __user *user_desc); +extern int arch_switch_tls(struct task_struct *to); + #else #define PT_REGS_R8(r) UPT_R8(&(r)->regs) @@ -83,5 +85,9 @@ extern long arch_prctl(struct task_struct *task, int option, unsigned long __user *addr); #endif + #define user_stack_pointer(regs) PT_REGS_SP(regs) + +extern void arch_switch_to(struct task_struct *to); + #endif /* __UM_X86_PTRACE_H */ diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index 7f1abde2c84b..b0a71c6cdc6e 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -10,8 +10,6 @@ #include #include -extern int arch_switch_tls(struct task_struct *to); - void arch_switch_to(struct task_struct *to) { int err = arch_switch_tls(to); -- cgit From 19cf79157309ea3834caf9ef442722f776b93814 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 6 Mar 2024 18:19:24 +0800 Subject: um: Fix -Wmissing-prototypes warnings for text_poke* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prototypes for text_poke* are declared in asm/text-patching.h under arch/x86/include/. It's safe to include this header, as it's UML-aware (by checking CONFIG_UML_X86). This will address below -Wmissing-prototypes warnings: arch/um/kernel/um_arch.c:461:7: warning: no previous prototype for ‘text_poke’ [-Wmissing-prototypes] arch/um/kernel/um_arch.c:473:6: warning: no previous prototype for ‘text_poke_sync’ [-Wmissing-prototypes] Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/kernel/um_arch.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 7a9820797eae..e95f805e5004 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include -- cgit From 49ff7d871242d7fd8adb8a2d8347c5d94dda808b Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 6 Mar 2024 18:19:25 +0800 Subject: um: Fix -Wmissing-prototypes warnings for __warp_* and foo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions are not called explicitly. Let's just workaround the -Wmissing-prototypes warnings by declaring them locally similar to what was done in arch/x86/kernel/asm-offsets_32.c. This will address below -Wmissing-prototypes warnings: ./arch/x86/um/shared/sysdep/kernel-offsets.h:9:6: warning: no previous prototype for ‘foo’ [-Wmissing-prototypes] arch/um/os-Linux/main.c:187:7: warning: no previous prototype for ‘__wrap_malloc’ [-Wmissing-prototypes] arch/um/os-Linux/main.c:208:7: warning: no previous prototype for ‘__wrap_calloc’ [-Wmissing-prototypes] arch/um/os-Linux/main.c:222:6: warning: no previous prototype for ‘__wrap_free’ [-Wmissing-prototypes] arch/x86/um/user-offsets.c:17:6: warning: no previous prototype for ‘foo’ [-Wmissing-prototypes] Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/os-Linux/main.c | 5 +++++ arch/x86/um/shared/sysdep/kernel-offsets.h | 3 +++ arch/x86/um/user-offsets.c | 3 +++ 3 files changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index c8a42ecbd7a2..e82164f90288 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -184,6 +184,11 @@ int __init main(int argc, char **argv, char **envp) extern void *__real_malloc(int); +/* workaround for -Wmissing-prototypes warnings */ +void *__wrap_malloc(int size); +void *__wrap_calloc(int n, int size); +void __wrap_free(void *ptr); + void *__wrap_malloc(int size) { void *ret; diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h index a004bffb7b8d..48de3a71f845 100644 --- a/arch/x86/um/shared/sysdep/kernel-offsets.h +++ b/arch/x86/um/shared/sysdep/kernel-offsets.h @@ -6,6 +6,9 @@ #include #include +/* workaround for a warning with -Wmissing-prototypes */ +void foo(void); + void foo(void) { #include diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index e54a9814ccf1..1c77d9946199 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c @@ -14,6 +14,9 @@ COMMENT(#val " / sizeof(unsigned long)"); \ DEFINE(sym, val / sizeof(unsigned long)) +/* workaround for a warning with -Wmissing-prototypes */ +void foo(void); + void foo(void) { #ifdef __i386__ -- cgit From a0fbbd36c156b9f7b2276871d499c9943dfe5101 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Thu, 7 Mar 2024 11:49:26 +0100 Subject: um: Add winch to winch_handlers before registering winch IRQ Registering a winch IRQ is racy, an interrupt may occur before the winch is added to the winch_handlers list. If that happens, register_winch_irq() adds to that list a winch that is scheduled to be (or has already been) freed, causing a panic later in winch_cleanup(). Avoid the race by adding the winch to the winch_handlers list before registering the IRQ, and rolling back if um_request_irq() fails. Fixes: 42a359e31a0e ("uml: SIGIO support cleanup") Signed-off-by: Roberto Sassu Reviewed-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/line.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index ffc5cb92fa36..d82bc3fdb86e 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -676,24 +676,26 @@ void register_winch_irq(int fd, int tty_fd, int pid, struct tty_port *port, goto cleanup; } - *winch = ((struct winch) { .list = LIST_HEAD_INIT(winch->list), - .fd = fd, + *winch = ((struct winch) { .fd = fd, .tty_fd = tty_fd, .pid = pid, .port = port, .stack = stack }); + spin_lock(&winch_handler_lock); + list_add(&winch->list, &winch_handlers); + spin_unlock(&winch_handler_lock); + if (um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, IRQF_SHARED, "winch", winch) < 0) { printk(KERN_ERR "register_winch_irq - failed to register " "IRQ\n"); + spin_lock(&winch_handler_lock); + list_del(&winch->list); + spin_unlock(&winch_handler_lock); goto out_free; } - spin_lock(&winch_handler_lock); - list_add(&winch->list, &winch_handlers); - spin_unlock(&winch_handler_lock); - return; out_free: -- cgit From 19ee69234a7281e4706d789c764f93be6fc7b5b2 Mon Sep 17 00:00:00 2001 From: Yueh-Shun Li Date: Sat, 23 Mar 2024 17:44:25 +0000 Subject: um: Makefile: use bash from the environment Set Makefile SHELL to bash instead of /bin/bash for better portability. Some systems do not install binaries to /bin, and therefore do not provide /bin/bash. This includes Linux distros which intentionally avoid implementing the Filesystem Hierarchy Standard (FHS), such as NixOS and Guix System. The recipies inside arch/um/Makefile don't require top-level Bash to build, and setting "SHELL" to "bash" makes Make pick the Bash executable from the environment, hence this patch. Changes since last roll: - Rebase onto a more recent commit on the master branch. - Remove a dangling in-text citation from the change log. - Reword the change log. Signed-off-by: Yueh-Shun Li Reviewed-by: Johannes Berg --- arch/um/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/um/Makefile b/arch/um/Makefile index 34957dcb88b9..00b63bac5eff 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -20,7 +20,7 @@ endif ARCH_DIR := arch/um # We require bash because the vmlinux link and loader script cpp use bash # features. -SHELL := /bin/bash +SHELL := bash MODE_INCLUDE += -I$(srctree)/$(ARCH_DIR)/include/shared/skas -- cgit From 158a6b914c5196cdce2923e642a6acf0ebba3d31 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Mar 2024 10:06:34 +0100 Subject: um: signal: move pid variable where needed We have W=1 warnings on 64-bit because the pid is only used in branches on 32-bit; move it inside to get rid of the warnings. Signed-off-by: Johannes Berg Reviewed-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/x86/um/signal.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 263e1d08f216..16ff097e790d 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -155,7 +155,7 @@ static int copy_sc_from_user(struct pt_regs *regs, struct sigcontext __user *from) { struct sigcontext sc; - int err, pid; + int err; /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -201,10 +201,10 @@ static int copy_sc_from_user(struct pt_regs *regs, #undef GETREG - pid = userspace_pid[current_thread_info()->cpu]; #ifdef CONFIG_X86_32 if (have_fpx_regs) { struct user_fxsr_struct fpx; + int pid = userspace_pid[current_thread_info()->cpu]; err = copy_from_user(&fpx, &((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0], @@ -240,7 +240,7 @@ static int copy_sc_to_user(struct sigcontext __user *to, { struct sigcontext sc; struct faultinfo * fi = ¤t->thread.arch.faultinfo; - int err, pid; + int err; memset(&sc, 0, sizeof(struct sigcontext)); #define PUTREG(regno, regname) sc.regname = regs->regs.gp[HOST_##regno] @@ -288,10 +288,9 @@ static int copy_sc_to_user(struct sigcontext __user *to, if (err) return 1; - pid = userspace_pid[current_thread_info()->cpu]; - #ifdef CONFIG_X86_32 if (have_fpx_regs) { + int pid = userspace_pid[current_thread_info()->cpu]; struct user_fxsr_struct fpx; err = save_fpx_registers(pid, (unsigned long *) &fpx); -- cgit From e3cce8d87d6407f83a5741c3c5d54bf1365c6ac6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Mar 2024 10:06:35 +0100 Subject: um: slirp: remove set but unused variable 'pid' The code doesn't use 'pid' here, just remove it. Signed-off-by: Johannes Berg Reviewed-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/drivers/slirp_user.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c index 8f633e2e5f3d..97228aa080cb 100644 --- a/arch/um/drivers/slirp_user.c +++ b/arch/um/drivers/slirp_user.c @@ -49,7 +49,7 @@ static int slirp_tramp(char **argv, int fd) static int slirp_open(void *data) { struct slirp_data *pri = data; - int fds[2], pid, err; + int fds[2], err; err = os_pipe(fds, 1, 1); if (err) @@ -60,7 +60,6 @@ static int slirp_open(void *data) printk(UM_KERN_ERR "slirp_tramp failed - errno = %d\n", -err); goto out; } - pid = err; pri->slave = fds[1]; pri->slip.pos = 0; -- cgit From 584ed2f76ff5fe360d87a04d17b6520c7999e06b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Mar 2024 10:06:36 +0100 Subject: um: vector: fix bpfflash parameter evaluation With W=1 the build complains about a pointer compared to zero, clearly the result should've been compared. Fixes: 9807019a62dc ("um: Loadable BPF "Firmware" for vector drivers") Signed-off-by: Johannes Berg Reviewed-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/drivers/vector_kern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index dc2feae789cb..63e5f108a6b9 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -141,7 +141,7 @@ static bool get_bpf_flash(struct arglist *def) if (allow != NULL) { if (kstrtoul(allow, 10, &result) == 0) - return (allow > 0); + return result > 0; } return false; } -- cgit From 2caa4982ea8ba601faf8313097720f87aafa7ea5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Mar 2024 10:06:37 +0100 Subject: um: vector: remove unused len variable/calculation The len variable is unused, so not needed, remove it. Signed-off-by: Johannes Berg Reviewed-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/drivers/vector_kern.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 63e5f108a6b9..4279793b11b7 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -712,11 +712,9 @@ static struct vector_device *find_device(int n) static int vector_parse(char *str, int *index_out, char **str_out, char **error_out) { - int n, len, err; + int n, err; char *start = str; - len = strlen(str); - while ((*str != ':') && (strlen(str) > 1)) str++; if (*str != ':') { -- cgit From dac847ae2b718d41b72bd68eb911ca2862ecfb38 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Mar 2024 10:06:38 +0100 Subject: um: process: remove unused 'n' variable The return value of fn() wasn't used for a long time, so no need to assign it to a variable, addressing a W=1 warning. This seems to be - with patches from others posted to the list before - the last W=1 warning in arch/um/. Fixes: 22e2430d60db ("x86, um: convert to saner kernel_execve() semantics") Signed-off-by: Johannes Berg Reviewed-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 1395ff3017f0..e5d7d24045a2 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -109,7 +109,7 @@ int get_current_pid(void) */ void new_thread_handler(void) { - int (*fn)(void *), n; + int (*fn)(void *); void *arg; if (current->thread.prev_sched != NULL) @@ -122,7 +122,7 @@ void new_thread_handler(void) /* * callback returns only if the kernel thread execs a process */ - n = fn(arg); + fn(arg); userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); } -- cgit From f95bab86106ee5b180b197a68246282b56ef5d8a Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 6 Mar 2024 18:19:22 +0800 Subject: um: Stop tracking host PID in cpu_tasks The host PID tracked in 'cpu_tasks' is no longer used. Stopping tracking it will also save some cycles. Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/include/shared/as-layout.h | 1 - arch/um/kernel/process.c | 12 ++---------- arch/um/kernel/skas/process.c | 4 ---- 3 files changed, 2 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h index 9ec3015bc5e2..c22f46a757dc 100644 --- a/arch/um/include/shared/as-layout.h +++ b/arch/um/include/shared/as-layout.h @@ -31,7 +31,6 @@ #include struct cpu_task { - int pid; void *task; }; diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index e5d7d24045a2..d2134802f6a8 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -43,13 +43,7 @@ * cares about its entry, so it's OK if another processor is modifying its * entry. */ -struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } }; - -static inline int external_pid(void) -{ - /* FIXME: Need to look up userspace_pid by cpu */ - return userspace_pid[0]; -} +struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { NULL } }; void free_stack(unsigned long stack, int order) { @@ -70,8 +64,7 @@ unsigned long alloc_stack(int order, int atomic) static inline void set_current(struct task_struct *task) { - cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task) - { external_pid(), task }); + cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task) { task }); } struct task_struct *__switch_to(struct task_struct *from, struct task_struct *to) @@ -206,7 +199,6 @@ void um_idle_sleep(void) void arch_cpu_idle(void) { - cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); um_idle_sleep(); } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index fdd5922f9222..99a5cbb36083 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -18,12 +18,8 @@ extern void start_kernel(void); static int __init start_kernel_proc(void *unused) { - int pid; - block_signals_trace(); - pid = os_getpid(); - cpu_tasks[0].pid = pid; cpu_tasks[0].task = current; start_kernel(); -- cgit From 323ced9669a8f70a9ae707cfe46f852ca23ed23e Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Tue, 23 Apr 2024 20:58:52 +0800 Subject: um: Fix -Wmissing-prototypes warnings for (rt_)sigreturn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use SYSCALL_DEFINE0 to define (rt_)sigreturn. This will address below -Wmissing-prototypes warnings: arch/x86/um/signal.c:453:6: warning: no previous prototype for ‘sys_sigreturn’ [-Wmissing-prototypes] arch/x86/um/signal.c:560:6: warning: no previous prototype for ‘sys_rt_sigreturn’ [-Wmissing-prototypes] Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/x86/um/signal.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 16ff097e790d..2cc8c2309022 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -449,7 +450,7 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, return 0; } -long sys_sigreturn(void) +SYSCALL_DEFINE0(sigreturn) { unsigned long sp = PT_REGS_SP(¤t->thread.regs); struct sigframe __user *frame = (struct sigframe __user *)(sp - 8); @@ -556,7 +557,7 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, } #endif -long sys_rt_sigreturn(void) +SYSCALL_DEFINE0(rt_sigreturn) { unsigned long sp = PT_REGS_SP(¤t->thread.regs); struct rt_sigframe __user *frame = -- cgit From 2cbade17b18c0f0fd9963f26c9fc9b057eb1cb3a Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Tue, 23 Apr 2024 20:58:53 +0800 Subject: um: Fix the -Wmissing-prototypes warning for __switch_mm The __switch_mm function is defined in the user code, and is called by the kernel code. It should be declared in a shared header. Fixes: 4dc706c2f292 ("um: take um_mmu.h to asm/mmu.h, clean asm/mmu_context.h a bit") Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/include/asm/mmu.h | 2 -- arch/um/include/shared/skas/mm_id.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h index a7555e43ed14..f2923c767bb9 100644 --- a/arch/um/include/asm/mmu.h +++ b/arch/um/include/asm/mmu.h @@ -14,8 +14,6 @@ typedef struct mm_context { struct uml_arch_mm_context arch; } mm_context_t; -extern void __switch_mm(struct mm_id * mm_idp); - /* Avoid tangled inclusion with asm/ldt.h */ extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm); extern void free_ldt(struct mm_context *mm); diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h index e82e203f5f41..92dbf727e384 100644 --- a/arch/um/include/shared/skas/mm_id.h +++ b/arch/um/include/shared/skas/mm_id.h @@ -15,4 +15,6 @@ struct mm_id { int kill; }; +void __switch_mm(struct mm_id *mm_idp); + #endif -- cgit From 3144013e48f4f6e5127223c4ebc488016815dedb Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Tue, 23 Apr 2024 20:58:54 +0800 Subject: um: Fix the -Wmissing-prototypes warning for get_thread_reg The get_thread_reg function is defined in the user code, and is called by the kernel code. It should be declared in a shared header. Fixes: dbba7f704aa0 ("um: stop polluting the namespace with registers.h contents") Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/include/asm/processor-generic.h | 1 - arch/x86/um/shared/sysdep/archsetjmp.h | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 6c3779541845..5a7c05275aa7 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -94,7 +94,6 @@ extern struct cpuinfo_um boot_cpu_data; #define current_cpu_data boot_cpu_data #define cache_line_size() (boot_cpu_data.cache_alignment) -extern unsigned long get_thread_reg(int reg, jmp_buf *buf); #define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf) extern unsigned long __get_wchan(struct task_struct *p); diff --git a/arch/x86/um/shared/sysdep/archsetjmp.h b/arch/x86/um/shared/sysdep/archsetjmp.h index 166cedbab926..8c81d1a604a9 100644 --- a/arch/x86/um/shared/sysdep/archsetjmp.h +++ b/arch/x86/um/shared/sysdep/archsetjmp.h @@ -1,6 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __X86_UM_SYSDEP_ARCHSETJMP_H +#define __X86_UM_SYSDEP_ARCHSETJMP_H + #ifdef __i386__ #include "archsetjmp_32.h" #else #include "archsetjmp_64.h" #endif + +unsigned long get_thread_reg(int reg, jmp_buf *buf); + +#endif /* __X86_UM_SYSDEP_ARCHSETJMP_H */ -- cgit From 6a85e34c4d07d2ec0c153067baff338ac0db55ca Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Tue, 23 Apr 2024 20:58:55 +0800 Subject: um: Fix the declaration of kasan_map_memory Make it match its definition (size_t vs unsigned long). And declare it in a shared header to fix the -Wmissing-prototypes warning, as it is defined in the user code and called in the kernel code. Fixes: 5b301409e8bc ("UML: add support for KASAN under x86_64") Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/include/asm/kasan.h | 1 - arch/um/include/shared/kern_util.h | 2 ++ arch/um/os-Linux/mem.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/um/include/asm/kasan.h b/arch/um/include/asm/kasan.h index 0d6547f4ec85..f97bb1f7b851 100644 --- a/arch/um/include/asm/kasan.h +++ b/arch/um/include/asm/kasan.h @@ -24,7 +24,6 @@ #ifdef CONFIG_KASAN void kasan_init(void); -void kasan_map_memory(void *start, unsigned long len); extern int kasan_um_is_ready; #ifdef CONFIG_STATIC_LINK diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index 81bc38a2e3fc..95521b1f5b20 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -67,4 +67,6 @@ extern void fatal_sigsegv(void) __attribute__ ((noreturn)); void um_idle_sleep(void); +void kasan_map_memory(void *start, size_t len); + #endif diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index 8530b2e08604..c6c9495b1432 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -15,6 +15,7 @@ #include #include #include +#include #include /* -- cgit From 847d3abc6aeda1266192d4236e6a766cdf04eb0f Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Tue, 23 Apr 2024 20:58:56 +0800 Subject: um: Add an internal header shared among the user code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move relevant declarations to this header. This will address below -Wmissing-prototypes warnings: arch/um/os-Linux/elf_aux.c:26:13: warning: no previous prototype for ‘scan_elf_aux’ [-Wmissing-prototypes] arch/um/os-Linux/mem.c:213:13: warning: no previous prototype for ‘check_tmpexec’ [-Wmissing-prototypes] arch/um/os-Linux/skas/process.c:107:6: warning: no previous prototype for ‘wait_stub_done’ [-Wmissing-prototypes] Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/um/os-Linux/elf_aux.c | 1 + arch/um/os-Linux/internal.h | 20 ++++++++++++++++++++ arch/um/os-Linux/main.c | 3 +-- arch/um/os-Linux/mem.c | 1 + arch/um/os-Linux/skas/mem.c | 3 +-- arch/um/os-Linux/skas/process.c | 1 + arch/um/os-Linux/start_up.c | 3 +-- 7 files changed, 26 insertions(+), 6 deletions(-) create mode 100644 arch/um/os-Linux/internal.h (limited to 'arch') diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index 344ac403fb5d..0a0f91cf4d6d 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -13,6 +13,7 @@ #include #include #include +#include "internal.h" typedef Elf32_auxv_t elf_auxv_t; diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h new file mode 100644 index 000000000000..317fca190c2b --- /dev/null +++ b/arch/um/os-Linux/internal.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_OS_LINUX_INTERNAL_H +#define __UM_OS_LINUX_INTERNAL_H + +/* + * elf_aux.c + */ +void scan_elf_aux(char **envp); + +/* + * mem.c + */ +void check_tmpexec(void); + +/* + * skas/process.c + */ +void wait_stub_done(int pid); + +#endif /* __UM_OS_LINUX_INTERNAL_H */ diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index e82164f90288..f98ff79cdbf7 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -16,6 +16,7 @@ #include #include #include +#include "internal.h" #define PGD_BOUND (4 * 1024 * 1024) #define STACKSIZE (8 * 1024 * 1024) @@ -102,8 +103,6 @@ static void setup_env_path(void) } } -extern void scan_elf_aux( char **envp); - int __init main(int argc, char **argv, char **envp) { char **new_argv; diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index c6c9495b1432..cf44d386f23c 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -17,6 +17,7 @@ #include #include #include +#include "internal.h" /* * kasan_map_memory - maps memory from @start with a size of @len. diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index 953fb10f3f93..1f9c1bffc3a6 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -17,11 +17,10 @@ #include #include #include +#include "../internal.h" extern char batch_syscall_stub[], __syscall_stub_start[]; -extern void wait_stub_done(int pid); - static inline unsigned long *check_init_stack(struct mm_id * mm_idp, unsigned long *stack) { diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 1f5c3f2523d1..41a288dcfc34 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -23,6 +23,7 @@ #include #include #include +#include "../internal.h" int is_skas_winch(int pid, int fd, void *data) { diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 6b21061c431c..89ad9f4f865c 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -25,6 +25,7 @@ #include #include #include +#include "internal.h" static void ptrace_child(void) { @@ -222,8 +223,6 @@ static void __init check_ptrace(void) check_sysemu(); } -extern void check_tmpexec(void); - static void __init check_coredump_limit(void) { struct rlimit lim; -- cgit From 67c3c7de410c3f1e9f2cd87d7e6cec9e0c6a0e4b Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Tue, 23 Apr 2024 20:58:57 +0800 Subject: um: Fix -Wmissing-prototypes warnings for __vdso_* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VDSO functions are defined as globals and intended to be called from userspace. Let's just workaround the -Wmissing-prototypes warnings by declaring them locally. This will address below -Wmissing-prototypes warnings: arch/x86/um/vdso/um_vdso.c:16:5: warning: no previous prototype for ‘__vdso_clock_gettime’ [-Wmissing-prototypes] arch/x86/um/vdso/um_vdso.c:30:5: warning: no previous prototype for ‘__vdso_gettimeofday’ [-Wmissing-prototypes] arch/x86/um/vdso/um_vdso.c:44:21: warning: no previous prototype for ‘__vdso_time’ [-Wmissing-prototypes] arch/x86/um/vdso/um_vdso.c:57:1: warning: no previous prototype for ‘__vdso_getcpu’ [-Wmissing-prototypes] While at it, also fix the "WARNING: Prefer 'unsigned int *' to bare use of 'unsigned *'" checkpatch warning. Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/x86/um/vdso/um_vdso.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c index ff0f3b4b6c45..cbae2584124f 100644 --- a/arch/x86/um/vdso/um_vdso.c +++ b/arch/x86/um/vdso/um_vdso.c @@ -13,6 +13,12 @@ #include #include +/* workaround for -Wmissing-prototypes warnings */ +int __vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts); +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +__kernel_old_time_t __vdso_time(__kernel_old_time_t *t); +long __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused); + int __vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts) { long ret; @@ -54,7 +60,7 @@ __kernel_old_time_t __vdso_time(__kernel_old_time_t *t) __kernel_old_time_t time(__kernel_old_time_t *t) __attribute__((weak, alias("__vdso_time"))); long -__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) +__vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused) { /* * UML does not support SMP, we can cheat here. :) @@ -68,5 +74,5 @@ __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) return 0; } -long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) +long getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *tcache) __attribute__((weak, alias("__vdso_getcpu"))); -- cgit From 470dbef50606a4a08dca97133b30cf106207d1ab Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Tue, 23 Apr 2024 20:58:58 +0800 Subject: um: Remove unused do_get_thread_area function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not used since it was introduced by commit aa6758d4867c ("[PATCH] uml: implement {get,set}_thread_area for i386"). Now, it's causing a -Wmissing-prototypes warning: arch/x86/um/tls_32.c:39:5: warning: no previous prototype for ‘do_get_thread_area’ [-Wmissing-prototypes] 39 | int do_get_thread_area(struct user_desc *info) | ^~~~~~~~~~~~~~~~~~ The original author also had doubts about whether it should be used. Considering that 18 years have passed, let's just remove it. Signed-off-by: Tiwei Bie Signed-off-by: Richard Weinberger --- arch/x86/um/tls_32.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c index ba40b1b8e179..d301deee041f 100644 --- a/arch/x86/um/tls_32.c +++ b/arch/x86/um/tls_32.c @@ -36,22 +36,6 @@ static int do_set_thread_area(struct user_desc *info) return ret; } -int do_get_thread_area(struct user_desc *info) -{ - int ret; - u32 cpu; - - cpu = get_cpu(); - ret = os_get_thread_area(info, userspace_pid[cpu]); - put_cpu(); - - if (ret) - printk(KERN_ERR "PTRACE_GET_THREAD_AREA failed, err = %d, " - "index = %d\n", ret, info->entry_number); - - return ret; -} - /* * sys_get_thread_area: get a yet unused TLS descriptor index. * XXX: Consider leaving one free slot for glibc usage at first place. This must @@ -231,7 +215,6 @@ out: return ret; } -/* XXX: use do_get_thread_area to read the host value? I'm not at all sure! */ static int get_tls_entry(struct task_struct *task, struct user_desc *info, int idx) { -- cgit From 5aca3252ddb1e21a3d4281c72102e8da4957d41f Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Sun, 10 Mar 2024 08:52:27 +0100 Subject: um: rtc: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Reviewed-by: Geert Uytterhoeven Signed-off-by: Richard Weinberger --- arch/um/drivers/rtc_kern.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c index 97ceb205cfe6..3a1582219c4b 100644 --- a/arch/um/drivers/rtc_kern.c +++ b/arch/um/drivers/rtc_kern.c @@ -168,16 +168,15 @@ cleanup: return err; } -static int uml_rtc_remove(struct platform_device *pdev) +static void uml_rtc_remove(struct platform_device *pdev) { device_init_wakeup(&pdev->dev, 0); uml_rtc_cleanup(); - return 0; } static struct platform_driver uml_rtc_driver = { .probe = uml_rtc_probe, - .remove = uml_rtc_remove, + .remove_new = uml_rtc_remove, .driver = { .name = "uml-rtc", }, -- cgit From 919e3ece7f5aaf7b5f3c54538d5303b6eeeb053b Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Sun, 10 Mar 2024 08:52:28 +0100 Subject: um: virtio_uml: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Reviewed-by: Geert Uytterhoeven Signed-off-by: Richard Weinberger --- arch/um/drivers/virtio_uml.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 8adca2000e51..77faa2cf3a13 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -1241,12 +1241,11 @@ error_free: return rc; } -static int virtio_uml_remove(struct platform_device *pdev) +static void virtio_uml_remove(struct platform_device *pdev) { struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); unregister_virtio_device(&vu_dev->vdev); - return 0; } /* Command line device list */ @@ -1445,7 +1444,7 @@ static int virtio_uml_resume(struct platform_device *pdev) static struct platform_driver virtio_uml_driver = { .probe = virtio_uml_probe, - .remove = virtio_uml_remove, + .remove_new = virtio_uml_remove, .driver = { .name = "virtio-uml", .of_match_table = virtio_uml_match, -- cgit From 9ad6bb3298d1c008bb36b1f4fa9ea896d2904b5a Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 27 Mar 2024 07:38:12 -0700 Subject: riscv: Remove PGDIR_SIZE_L3 and TASK_SIZE_MIN TASK_SIZE_MIN is unused since commit 085e2ff9aeb0 ("efi: libstub: Drop randomization of runtime memory map"). PGDIR_SIZE_L3 is only used in the definition of TASK_SIZE_MIN. Signed-off-by: Samuel Holland Acked-by: Arnd Bergmann Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240327143858.711792-2-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable-64.h | 2 -- arch/riscv/include/asm/pgtable.h | 2 -- 2 files changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 221a5c1ee287..8c36a8818432 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -16,8 +16,6 @@ extern bool pgtable_l5_enabled; #define PGDIR_SHIFT_L3 30 #define PGDIR_SHIFT_L4 39 #define PGDIR_SHIFT_L5 48 -#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3) - #define PGDIR_SHIFT (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \ (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)) /* Size of region mapped by a page global directory */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 97fcde30e247..f5cc8bcc7f8d 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -870,7 +870,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) */ #ifdef CONFIG_64BIT #define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2) -#define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2) #ifdef CONFIG_COMPAT #define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE) @@ -882,7 +881,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #else #define TASK_SIZE FIXADDR_START -#define TASK_SIZE_MIN TASK_SIZE #endif #else /* CONFIG_MMU */ -- cgit From ad5643cf2f699989daa85d909403febd6712fccb Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 27 Mar 2024 07:38:13 -0700 Subject: riscv: Define TASK_SIZE_MAX for __access_ok() TASK_SIZE_MAX should be set to a constant value, at least the largest valid userspace address under any runtime configuration. This optimizes the check in __access_ok(), which no longer needs to compute the runtime value of TASK_SIZE. The check does not need to be exact, as long as it accepts all valid userspace addresses and rejects all valid kernel addresses; well-behaved programs will never fail the access_ok() check. For RISC-V, which requires all virtual addresses to be sign extended, the optimal choice is LONG_MAX because it simplifies the limit comparison to a sign bit test. This removes about half of the references to pgtable_l[45]_enabled. Signed-off-by: Samuel Holland Reviewed-by: Arnd Bergmann Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240327143858.711792-3-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index f5cc8bcc7f8d..762a85551764 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -870,6 +870,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) */ #ifdef CONFIG_64BIT #define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2) +#define TASK_SIZE_MAX LONG_MAX #ifdef CONFIG_COMPAT #define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE) -- cgit From cdf9df020130237edf8860187aca3636d679aa48 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 5 Apr 2024 10:32:53 +0200 Subject: locking/x86/xen: Use try_cmpxchg() in xen_alloc_p2m_entry() Use try_cmpxchg() instead of cmpxchg(*ptr, old, new) == old. The x86 CMPXCHG instruction returns success in the ZF flag, so this change saves a compare after CMPXCHG. Also, try_cmpxchg() implicitly assigns old *ptr value to "old" when CMPXCHG fails. There is no need to explicitly assign old *ptr value to the temporary, which can simplify the surrounding source code. No functional change intended. Signed-off-by: Uros Bizjak Reviewed-by: Juergen Gross Cc: Juergen Gross Cc: Boris Ostrovsky Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20240405083335.507471-1-ubizjak@gmail.com Signed-off-by: Juergen Gross --- arch/x86/xen/p2m.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 9bdc3b656b2c..99918beccd80 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -555,7 +555,6 @@ int xen_alloc_p2m_entry(unsigned long pfn) /* Separately check the mid mfn level */ unsigned long missing_mfn; unsigned long mid_mfn_mfn; - unsigned long old_mfn; mid_mfn = alloc_p2m_page(); if (!mid_mfn) @@ -565,12 +564,12 @@ int xen_alloc_p2m_entry(unsigned long pfn) missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); mid_mfn_mfn = virt_to_mfn(mid_mfn); - old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); - if (old_mfn != missing_mfn) { - free_p2m_page(mid_mfn); - mid_mfn = mfn_to_virt(old_mfn); - } else { + /* try_cmpxchg() updates missing_mfn on failure. */ + if (try_cmpxchg(top_mfn_p, &missing_mfn, mid_mfn_mfn)) { p2m_top_mfn_p[topidx] = mid_mfn; + } else { + free_p2m_page(mid_mfn); + mid_mfn = mfn_to_virt(missing_mfn); } } } else { -- cgit From a6aa4eb994ee9ced905743817c5de8451d26b911 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Mon, 29 Apr 2024 17:50:53 +0200 Subject: xen/x86: add extra pages to unpopulated-alloc if available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 262fc47ac174 ('xen/balloon: don't use PV mode extra memory for zone device allocations') removed the addition of the extra memory ranges to the unpopulated range allocator, using those only for the balloon driver. This forces the unpopulated allocator to attach hotplug ranges even when spare memory (as part of the extra memory ranges) is available. Furthermore, on PVH domains it defeats the purpose of commit 38620fc4e893 ('x86/xen: attempt to inflate the memory balloon on PVH'), as extra memory ranges would only be used to map foreign memory if the kernel is built without XEN_UNPOPULATED_ALLOC support. Fix this by adding a helpers that adds the extra memory ranges to the list of unpopulated pages, and zeroes the ranges so they are not also consumed by the balloon driver. This should have been part of 38620fc4e893, hence the fixes tag. Note the current logic relies on unpopulated_init() (and hence arch_xen_unpopulated_init()) always being called ahead of balloon_init(), so that the extra memory regions are consumed by arch_xen_unpopulated_init(). Fixes: 38620fc4e893 ('x86/xen: attempt to inflate the memory balloon on PVH') Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20240429155053.72509-1-roger.pau@citrix.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a01ca255b0c6..b88722dfc4f8 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -382,3 +382,36 @@ void __init xen_add_extra_mem(unsigned long start_pfn, unsigned long n_pfns) memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); } + +#ifdef CONFIG_XEN_UNPOPULATED_ALLOC +int __init arch_xen_unpopulated_init(struct resource **res) +{ + unsigned int i; + + if (!xen_domain()) + return -ENODEV; + + /* Must be set strictly before calling xen_free_unpopulated_pages(). */ + *res = &iomem_resource; + + /* + * Initialize with pages from the extra memory regions (see + * arch/x86/xen/setup.c). + */ + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + unsigned int j; + + for (j = 0; j < xen_extra_mem[i].n_pfns; j++) { + struct page *pg = + pfn_to_page(xen_extra_mem[i].start_pfn + j); + + xen_free_unpopulated_pages(1, &pg); + } + + /* Zero so region is not also added to the balloon driver. */ + xen_extra_mem[i].n_pfns = 0; + } + + return 0; +} +#endif -- cgit From 66ee3636eddcc82ab82b539d08b85fb5ac1dff9b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 4 Feb 2024 21:20:03 +0900 Subject: x86/kconfig: Select ARCH_WANT_FRAME_POINTERS again when UNWINDER_FRAME_POINTER=y It took me some time to understand the purpose of the tricky code at the end of arch/x86/Kconfig.debug. Without it, the following would be shown: WARNING: unmet direct dependencies detected for FRAME_POINTER because 81d387190039 ("x86/kconfig: Consolidate unwinders into multiple choice selection") removed 'select ARCH_WANT_FRAME_POINTERS'. The correct and more straightforward approach should have been to move it where 'select FRAME_POINTER' is located. Several architectures properly handle the conditional selection of ARCH_WANT_FRAME_POINTERS. For example, 'config UNWINDER_FRAME_POINTER' in arch/arm/Kconfig.debug. Fixes: 81d387190039 ("x86/kconfig: Consolidate unwinders into multiple choice selection") Signed-off-by: Masahiro Yamada Signed-off-by: Borislav Petkov (AMD) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20240204122003.53795-1-masahiroy@kernel.org --- arch/x86/Kconfig.debug | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index c5d614d28a75..74777a97e394 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -248,6 +248,7 @@ config UNWINDER_ORC config UNWINDER_FRAME_POINTER bool "Frame pointer unwinder" + select ARCH_WANT_FRAME_POINTERS select FRAME_POINTER help This option enables the frame pointer unwinder for unwinding kernel @@ -271,7 +272,3 @@ config UNWINDER_GUESS overhead. endchoice - -config FRAME_POINTER - depends on !UNWINDER_ORC && !UNWINDER_GUESS - bool -- cgit From 9d22c96316ac59ed38e80920c698fed38717b91b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 17 May 2024 16:40:36 +0200 Subject: x86/topology: Handle bogus ACPI tables correctly The ACPI specification clearly states how the processors should be enumerated in the MADT: "To ensure that the boot processor is supported post initialization, two guidelines should be followed. The first is that OSPM should initialize processors in the order that they appear in the MADT. The second is that platform firmware should list the boot processor as the first processor entry in the MADT. ... Failure of OSPM implementations and platform firmware to abide by these guidelines can result in both unpredictable and non optimal platform operation." The kernel relies on that ordering to detect the real BSP on crash kernels which is important to avoid sending a INIT IPI to it as that would cause a full machine reset. On a Dell XPS 16 9640 the BIOS ignores this rule and enumerates the CPUs in the wrong order. As a consequence the kernel falsely detects a crash kernel and disables the corresponding CPU. Prevent this by checking the IA32_APICBASE MSR for the BSP bit on the boot CPU. If that bit is set, then the MADT based BSP detection can be safely ignored. If the kernel detects a mismatch between the BSP bit and the first enumerated MADT entry then emit a firmware bug message. This obviously also has to be taken into account when the boot APIC ID and the first enumerated APIC ID match. If the boot CPU does not have the BSP bit set in the APICBASE MSR then there is no way for the boot CPU to determine which of the CPUs is the real BSP. Sending an INIT to the real BSP would reset the machine so the only sane way to deal with that is to limit the number of CPUs to one and emit a corresponding warning message. Fixes: 5c5682b9f87a ("x86/cpu: Detect real BSP on crash kernels") Reported-by: Carsten Tolkmit Signed-off-by: Thomas Gleixner Tested-by: Carsten Tolkmit Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87le48jycb.ffs@tglx Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218837 --- arch/x86/kernel/cpu/topology.c | 53 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index d17c9b71eb4a..621a151ccf7d 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -128,6 +128,9 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) static __init bool check_for_real_bsp(u32 apic_id) { + bool is_bsp = false, has_apic_base = boot_cpu_data.x86 >= 6; + u64 msr; + /* * There is no real good way to detect whether this a kdump() * kernel, but except on the Voyager SMP monstrosity which is not @@ -144,17 +147,61 @@ static __init bool check_for_real_bsp(u32 apic_id) if (topo_info.real_bsp_apic_id != BAD_APICID) return false; + /* + * Check whether the enumeration order is broken by evaluating the + * BSP bit in the APICBASE MSR. If the CPU does not have the + * APICBASE MSR then the BSP detection is not possible and the + * kernel must rely on the firmware enumeration order. + */ + if (has_apic_base) { + rdmsrl(MSR_IA32_APICBASE, msr); + is_bsp = !!(msr & MSR_IA32_APICBASE_BSP); + } + if (apic_id == topo_info.boot_cpu_apic_id) { - topo_info.real_bsp_apic_id = apic_id; - return false; + /* + * If the boot CPU has the APIC BSP bit set then the + * firmware enumeration is agreeing. If the CPU does not + * have the APICBASE MSR then the only choice is to trust + * the enumeration order. + */ + if (is_bsp || !has_apic_base) { + topo_info.real_bsp_apic_id = apic_id; + return false; + } + /* + * If the boot APIC is enumerated first, but the APICBASE + * MSR does not have the BSP bit set, then there is no way + * to discover the real BSP here. Assume a crash kernel and + * limit the number of CPUs to 1 as an INIT to the real BSP + * would reset the machine. + */ + pr_warn("Enumerated BSP APIC %x is not marked in APICBASE MSR\n", apic_id); + pr_warn("Assuming crash kernel. Limiting to one CPU to prevent machine INIT\n"); + set_nr_cpu_ids(1); + goto fwbug; } - pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n", + pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x != %x\n", topo_info.boot_cpu_apic_id, apic_id); + + if (is_bsp) { + /* + * The boot CPU has the APIC BSP bit set. Use it and complain + * about the broken firmware enumeration. + */ + topo_info.real_bsp_apic_id = topo_info.boot_cpu_apic_id; + goto fwbug; + } + pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n"); topo_info.real_bsp_apic_id = apic_id; return true; + +fwbug: + pr_warn(FW_BUG "APIC enumeration order not specification compliant\n"); + return false; } static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level, -- cgit From 6d85a058cf4941b5b2713b879ef41430e6aa74f3 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 20 May 2024 15:45:32 -0700 Subject: crypto: x86/aes-xts - switch to new Intel CPU model defines New CPU #defines encode vendor and family as well as model. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Eric Biggers Acked-by: Herbert Xu Link: https://lore.kernel.org/r/20240520224620.9480-2-tony.luck@intel.com --- arch/x86/crypto/aesni-intel_glue.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 5b25d2a58aeb..ef031655b2d3 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1223,14 +1223,14 @@ DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800); * implementation with ymm registers (256-bit vectors) will be used instead. */ static const struct x86_cpu_id zmm_exclusion_list[] = { - { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_SKYLAKE_X }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_X }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_D }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_L }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_NNPI }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_TIGERLAKE_L }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_TIGERLAKE }, + X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), + X86_MATCH_VFM(INTEL_ICELAKE_D, 0), + X86_MATCH_VFM(INTEL_ICELAKE, 0), + X86_MATCH_VFM(INTEL_ICELAKE_L, 0), + X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), + X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), + X86_MATCH_VFM(INTEL_TIGERLAKE, 0), /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */ {}, -- cgit From 93022482b2948a9a7e9b5a2bb685f2e1cb4c3348 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 20 May 2024 15:45:33 -0700 Subject: x86/cpu: Fix x86_match_cpu() to match just X86_VENDOR_INTEL Code in v6.9 arch/x86/kernel/smpboot.c was changed by commit 4db64279bc2b ("x86/cpu: Switch to new Intel CPU model defines") from: static const struct x86_cpu_id intel_cod_cpu[] = { X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, 0), /* COD */ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, 0), /* COD */ X86_MATCH_INTEL_FAM6_MODEL(ANY, 1), /* SNC */ <--- 443 {} }; static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { const struct x86_cpu_id *id = x86_match_cpu(intel_cod_cpu); to: static const struct x86_cpu_id intel_cod_cpu[] = { X86_MATCH_VFM(INTEL_HASWELL_X, 0), /* COD */ X86_MATCH_VFM(INTEL_BROADWELL_X, 0), /* COD */ X86_MATCH_VFM(INTEL_ANY, 1), /* SNC */ {} }; static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { const struct x86_cpu_id *id = x86_match_cpu(intel_cod_cpu); On an Intel CPU with SNC enabled this code previously matched the rule on line 443 to avoid printing messages about insane cache configuration. The new code did not match any rules. Expanding the macros for the intel_cod_cpu[] array shows that the old is equivalent to: static const struct x86_cpu_id intel_cod_cpu[] = { [0] = { .vendor = 0, .family = 6, .model = 0x3F, .steppings = 0, .feature = 0, .driver_data = 0 }, [1] = { .vendor = 0, .family = 6, .model = 0x4F, .steppings = 0, .feature = 0, .driver_data = 0 }, [2] = { .vendor = 0, .family = 6, .model = 0x00, .steppings = 0, .feature = 0, .driver_data = 1 }, [3] = { .vendor = 0, .family = 0, .model = 0x00, .steppings = 0, .feature = 0, .driver_data = 0 } } while the new code expands to: static const struct x86_cpu_id intel_cod_cpu[] = { [0] = { .vendor = 0, .family = 6, .model = 0x3F, .steppings = 0, .feature = 0, .driver_data = 0 }, [1] = { .vendor = 0, .family = 6, .model = 0x4F, .steppings = 0, .feature = 0, .driver_data = 0 }, [2] = { .vendor = 0, .family = 0, .model = 0x00, .steppings = 0, .feature = 0, .driver_data = 1 }, [3] = { .vendor = 0, .family = 0, .model = 0x00, .steppings = 0, .feature = 0, .driver_data = 0 } } Looking at the code for x86_match_cpu(): const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) { const struct x86_cpu_id *m; struct cpuinfo_x86 *c = &boot_cpu_data; for (m = match; m->vendor | m->family | m->model | m->steppings | m->feature; m++) { ... } return NULL; it is clear that there was no match because the ANY entry in the table (array index 2) is now the loop termination condition (all of vendor, family, model, steppings, and feature are zero). So this code was working before because the "ANY" check was looking for any Intel CPU in family 6. But fails now because the family is a wild card. So the root cause is that x86_match_cpu() has never been able to match on a rule with just X86_VENDOR_INTEL and all other fields set to wildcards. Add a new flags field to struct x86_cpu_id that has a bit set to indicate that this entry in the array is valid. Update X86_MATCH*() macros to set that bit. Change the end-marker check in x86_match_cpu() to just check the flags field for this bit. Backporter notes: The commit in Fixes is really the one that is broken: you can't have m->vendor as part of the loop termination conditional in x86_match_cpu() because it can happen - as it has happened above - that that whole conditional is 0 albeit vendor == 0 is a valid case - X86_VENDOR_INTEL is 0. However, the only case where the above happens is the SNC check added by 4db64279bc2b1 so you only need this fix if you have backported that other commit 4db64279bc2b ("x86/cpu: Switch to new Intel CPU model defines") Fixes: 644e9cbbe3fc ("Add driver auto probing for x86 features v4") Suggested-by: Thomas Gleixner Suggested-by: Borislav Petkov Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Cc: # see above Link: https://lore.kernel.org/r/20240517144312.GBZkdtAOuJZCvxhFbJ@fat_crate.local --- arch/x86/include/asm/cpu_device_id.h | 5 +++++ arch/x86/kernel/cpu/match.c | 4 +--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index 970a232009c3..b6325ee30871 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -53,6 +53,9 @@ #define X86_CENTAUR_FAM6_C7_D 0xd #define X86_CENTAUR_FAM6_NANO 0xf +/* x86_cpu_id::flags */ +#define X86_CPU_ID_FLAG_ENTRY_VALID BIT(0) + #define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) /** * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching @@ -79,6 +82,7 @@ .model = _model, \ .steppings = _steppings, \ .feature = _feature, \ + .flags = X86_CPU_ID_FLAG_ENTRY_VALID, \ .driver_data = (unsigned long) _data \ } @@ -89,6 +93,7 @@ .model = _model, \ .steppings = _steppings, \ .feature = _feature, \ + .flags = X86_CPU_ID_FLAG_ENTRY_VALID, \ .driver_data = (unsigned long) _data \ } diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 8651643bddae..8e7de733320a 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -38,9 +38,7 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) const struct x86_cpu_id *m; struct cpuinfo_x86 *c = &boot_cpu_data; - for (m = match; - m->vendor | m->family | m->model | m->steppings | m->feature; - m++) { + for (m = match; m->flags & X86_CPU_ID_FLAG_ENTRY_VALID; m++) { if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor) continue; if (m->family != X86_FAMILY_ANY && c->x86 != m->family) -- cgit From c67ddf59ac44adc60649730bf8347e37c516b001 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 15 May 2024 07:50:39 +0200 Subject: riscv: force PAGE_SIZE linear mapping if debug_pagealloc is enabled debug_pagealloc is a debug feature which clears the valid bit in page table entry for freed pages to detect illegal accesses to freed memory. For this feature to work, virtual mapping must have PAGE_SIZE resolution. (No, we cannot map with huge pages and split them only when needed; because pages can be allocated/freed in atomic context and page splitting cannot be done in atomic context) Force linear mapping to use small pages if debug_pagealloc is enabled. Note that it is not necessary to force the entire linear mapping, but only those that are given to memory allocator. Some parts of memory can keep using huge page mapping (for example, kernel's executable code). But these parts are minority, so keep it simple. This is just a debug feature, some extra overhead should be acceptable. Fixes: 5fde3db5eb02 ("riscv: add ARCH_SUPPORTS_DEBUG_PAGEALLOC support") Signed-off-by: Nam Cao Cc: stable@vger.kernel.org Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/2e391fa6c6f9b3fcf1b41cefbace02ee4ab4bf59.1715750938.git.namcao@linutronix.de Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fe8e159394d8..9020844f5189 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -668,6 +668,9 @@ void __init create_pgd_mapping(pgd_t *pgdp, static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va, phys_addr_t size) { + if (debug_pagealloc_enabled()) + return PAGE_SIZE; + if (pgtable_l5_enabled && !(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE) return P4D_SIZE; -- cgit From fb1cf0878328fe75d47f0aed0a65b30126fcefc4 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 15 May 2024 07:50:40 +0200 Subject: riscv: rewrite __kernel_map_pages() to fix sleeping in invalid context __kernel_map_pages() is a debug function which clears the valid bit in page table entry for deallocated pages to detect illegal memory accesses to freed pages. This function set/clear the valid bit using __set_memory(). __set_memory() acquires init_mm's semaphore, and this operation may sleep. This is problematic, because __kernel_map_pages() can be called in atomic context, and thus is illegal to sleep. An example warning that this causes: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1578 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2, name: kthreadd preempt_count: 2, expected: 0 CPU: 0 PID: 2 Comm: kthreadd Not tainted 6.9.0-g1d4c6d784ef6 #37 Hardware name: riscv-virtio,qemu (DT) Call Trace: [] dump_backtrace+0x1c/0x24 [] show_stack+0x2c/0x38 [] dump_stack_lvl+0x5a/0x72 [] dump_stack+0x14/0x1c [] __might_resched+0x104/0x10e [] __might_sleep+0x3e/0x62 [] down_write+0x20/0x72 [] __set_memory+0x82/0x2fa [] __kernel_map_pages+0x5a/0xd4 [] __alloc_pages_bulk+0x3b2/0x43a [] __vmalloc_node_range+0x196/0x6ba [] copy_process+0x72c/0x17ec [] kernel_clone+0x60/0x2fe [] kernel_thread+0x82/0xa0 [] kthreadd+0x14a/0x1be [] ret_from_fork+0xe/0x1c Rewrite this function with apply_to_existing_page_range(). It is fine to not have any locking, because __kernel_map_pages() works with pages being allocated/deallocated and those pages are not changed by anyone else in the meantime. Fixes: 5fde3db5eb02 ("riscv: add ARCH_SUPPORTS_DEBUG_PAGEALLOC support") Signed-off-by: Nam Cao Cc: stable@vger.kernel.org Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/1289ecba9606a19917bc12b6c27da8aa23e1e5ae.1715750938.git.namcao@linutronix.de Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/pageattr.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 410056a50aa9..271d01a5ba4d 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -387,17 +387,33 @@ int set_direct_map_default_noflush(struct page *page) } #ifdef CONFIG_DEBUG_PAGEALLOC +static int debug_pagealloc_set_page(pte_t *pte, unsigned long addr, void *data) +{ + int enable = *(int *)data; + + unsigned long val = pte_val(ptep_get(pte)); + + if (enable) + val |= _PAGE_PRESENT; + else + val &= ~_PAGE_PRESENT; + + set_pte(pte, __pte(val)); + + return 0; +} + void __kernel_map_pages(struct page *page, int numpages, int enable) { if (!debug_pagealloc_enabled()) return; - if (enable) - __set_memory((unsigned long)page_address(page), numpages, - __pgprot(_PAGE_PRESENT), __pgprot(0)); - else - __set_memory((unsigned long)page_address(page), numpages, - __pgprot(0), __pgprot(_PAGE_PRESENT)); + unsigned long start = (unsigned long)page_address(page); + unsigned long size = PAGE_SIZE * numpages; + + apply_to_existing_page_range(&init_mm, start, size, debug_pagealloc_set_page, &enable); + + flush_tlb_kernel_range(start, start + size); } #endif -- cgit From e482eab4d1eb31031eff2b6afb71776483101979 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Thu, 2 May 2024 21:50:50 -0700 Subject: riscv: cpufeature: Fix thead vector hwcap removal The riscv_cpuinfo struct that contains mvendorid and marchid is not populated until all harts are booted which happens after the DT parsing. Use the mvendorid/marchid from the boot hart to determine if the DT contains an invalid V. Fixes: d82f32202e0d ("RISC-V: Ignore V from the riscv,isa DT property on older T-Head CPUs") Signed-off-by: Charlie Jenkins Reviewed-by: Conor Dooley Reviewed-by: Guo Ren Link: https://lore.kernel.org/r/20240502-cpufeature_fixes-v4-1-b3d1a088722d@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 2 ++ arch/riscv/kernel/cpu.c | 40 ++++++++++++++++++++++++++++++++++++---- arch/riscv/kernel/cpufeature.c | 8 ++++++-- 3 files changed, 44 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 6e68f8dff76b..0fab508a65b3 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -370,6 +370,8 @@ static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1 static inline void sbi_init(void) {} #endif /* CONFIG_RISCV_SBI */ +unsigned long riscv_get_mvendorid(void); +unsigned long riscv_get_marchid(void); unsigned long riscv_cached_mvendorid(unsigned int cpu_id); unsigned long riscv_cached_marchid(unsigned int cpu_id); unsigned long riscv_cached_mimpid(unsigned int cpu_id); diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index d11d6320fb0d..c1f3655238fd 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -139,6 +139,34 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) return -1; } +unsigned long __init riscv_get_marchid(void) +{ + struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); + +#if IS_ENABLED(CONFIG_RISCV_SBI) + ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); +#elif IS_ENABLED(CONFIG_RISCV_M_MODE) + ci->marchid = csr_read(CSR_MARCHID); +#else + ci->marchid = 0; +#endif + return ci->marchid; +} + +unsigned long __init riscv_get_mvendorid(void) +{ + struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); + +#if IS_ENABLED(CONFIG_RISCV_SBI) + ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); +#elif IS_ENABLED(CONFIG_RISCV_M_MODE) + ci->mvendorid = csr_read(CSR_MVENDORID); +#else + ci->mvendorid = 0; +#endif + return ci->mvendorid; +} + DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); unsigned long riscv_cached_mvendorid(unsigned int cpu_id) @@ -170,12 +198,16 @@ static int riscv_cpuinfo_starting(unsigned int cpu) struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); #if IS_ENABLED(CONFIG_RISCV_SBI) - ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); - ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); + if (!ci->mvendorid) + ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); + if (!ci->marchid) + ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); ci->mimpid = sbi_spec_is_0_1() ? 0 : sbi_get_mimpid(); #elif IS_ENABLED(CONFIG_RISCV_M_MODE) - ci->mvendorid = csr_read(CSR_MVENDORID); - ci->marchid = csr_read(CSR_MARCHID); + if (!ci->mvendorid) + ci->mvendorid = csr_read(CSR_MVENDORID); + if (!ci->marchid) + ci->marchid = csr_read(CSR_MARCHID); ci->mimpid = csr_read(CSR_MIMPID); #else ci->mvendorid = 0; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 3ed2359eae35..13d4fc0d1817 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -490,6 +490,8 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) struct acpi_table_header *rhct; acpi_status status; unsigned int cpu; + u64 boot_vendorid; + u64 boot_archid; if (!acpi_disabled) { status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct); @@ -497,6 +499,9 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) return; } + boot_vendorid = riscv_get_mvendorid(); + boot_archid = riscv_get_marchid(); + for_each_possible_cpu(cpu) { struct riscv_isainfo *isainfo = &hart_isa[cpu]; unsigned long this_hwcap = 0; @@ -544,8 +549,7 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) * CPU cores with the ratified spec will contain non-zero * marchid. */ - if (acpi_disabled && riscv_cached_mvendorid(cpu) == THEAD_VENDOR_ID && - riscv_cached_marchid(cpu) == 0x0) { + if (acpi_disabled && boot_vendorid == THEAD_VENDOR_ID && boot_archid == 0x0) { this_hwcap &= ~isa2hwcap[RISCV_ISA_EXT_v]; clear_bit(RISCV_ISA_EXT_v, isainfo->isa); } -- cgit From e67e98ee8952c7d5ce986d1dc6f8221ab8674afa Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Thu, 2 May 2024 21:50:51 -0700 Subject: riscv: cpufeature: Fix extension subset checking This loop is supposed to check if ext->subset_ext_ids[j] is valid, rather than if ext->subset_ext_ids[i] is valid, before setting the extension id ext->subset_ext_ids[j] in isainfo->isa. Signed-off-by: Charlie Jenkins Reviewed-by: Conor Dooley Reviewed-by: Alexandre Ghiti Reviewed-by: Andrew Jones Fixes: 0d8295ed975b ("riscv: add ISA extension parsing for scalar crypto") Link: https://lore.kernel.org/r/20240502-cpufeature_fixes-v4-2-b3d1a088722d@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 13d4fc0d1817..5ef48cb20ee1 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -603,7 +603,7 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) if (ext->subset_ext_size) { for (int j = 0; j < ext->subset_ext_size; j++) { - if (riscv_isa_extension_check(ext->subset_ext_ids[i])) + if (riscv_isa_extension_check(ext->subset_ext_ids[j])) set_bit(ext->subset_ext_ids[j], isainfo->isa); } } -- cgit From e79dfcbfb902a99268cc8022031461da7a8e2bc8 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sat, 4 May 2024 21:34:38 +0200 Subject: riscv: make image compression configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the build process would always set KBUILD_IMAGE to the uncompressed Image file (unless XIP_KERNEL or EFI_ZBOOT was enabled) and unconditionally compress it into Image.gz. However there are already build targets for Image.bz2, Image.lz4, Image.lzma, Image.lzo and Image.zstd, so let's make use of those, make the compression method configurable and set KBUILD_IMAGE accordingly so that targets like 'make install' and 'make bindeb-pkg' will use the chosen image. Tested-by: Björn Töpel Signed-off-by: Emil Renner Berthing Reviewed-by: Nicolas Schier Reviewed-by: Masahiro Yamada Link: https://lore.kernel.org/r/20240504193446.196886-2-emil.renner.berthing@canonical.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 7 +++++++ arch/riscv/Makefile | 27 ++++++++++++++++++++------- arch/riscv/boot/install.sh | 9 ++++++--- 3 files changed, 33 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0bdcc383644d..dc846689230a 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -140,6 +140,13 @@ config RISCV select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO if MMU && 64BIT select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_KERNEL_BZIP2 if !XIP_KERNEL && !EFI_ZBOOT + select HAVE_KERNEL_GZIP if !XIP_KERNEL && !EFI_ZBOOT + select HAVE_KERNEL_LZ4 if !XIP_KERNEL && !EFI_ZBOOT + select HAVE_KERNEL_LZMA if !XIP_KERNEL && !EFI_ZBOOT + select HAVE_KERNEL_LZO if !XIP_KERNEL && !EFI_ZBOOT + select HAVE_KERNEL_UNCOMPRESSED if !XIP_KERNEL && !EFI_ZBOOT + select HAVE_KERNEL_ZSTD if !XIP_KERNEL && !EFI_ZBOOT select HAVE_KPROBES if !XIP_KERNEL select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index c537764ee3c9..6ebf505ddd32 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -151,6 +151,21 @@ endif endif endif +boot := arch/riscv/boot +boot-image-y := Image +boot-image-$(CONFIG_KERNEL_BZIP2) := Image.bz2 +boot-image-$(CONFIG_KERNEL_GZIP) := Image.gz +boot-image-$(CONFIG_KERNEL_LZ4) := Image.lz4 +boot-image-$(CONFIG_KERNEL_LZMA) := Image.lzma +boot-image-$(CONFIG_KERNEL_LZO) := Image.lzo +boot-image-$(CONFIG_KERNEL_ZSTD) := Image.zst +ifdef CONFIG_RISCV_M_MODE +boot-image-$(CONFIG_ARCH_CANAAN) := loader.bin +endif +boot-image-$(CONFIG_EFI_ZBOOT) := vmlinuz.efi +boot-image-$(CONFIG_XIP_KERNEL) := xipImage +KBUILD_IMAGE := $(boot)/$(boot-image-y) + libs-y += arch/riscv/lib/ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a @@ -168,21 +183,19 @@ endif vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg -BOOT_TARGETS := Image Image.gz loader loader.bin xipImage vmlinuz.efi +BOOT_TARGETS := Image Image.gz Image.bz2 Image.lz4 Image.lzma Image.lzo Image.zst loader loader.bin xipImage vmlinuz.efi all: $(notdir $(KBUILD_IMAGE)) loader.bin: loader -Image.gz loader vmlinuz.efi: Image +Image.gz Image.bz2 Image.lz4 Image.lzma Image.lzo Image.zst loader xipImage vmlinuz.efi: Image + $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ @$(kecho) ' Kernel: $(boot)/$@ is ready' -Image.%: Image - $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ - -install: KBUILD_IMAGE := $(boot)/Image -zinstall: KBUILD_IMAGE := $(boot)/Image.gz +# the install target always installs KBUILD_IMAGE (which may be compressed) +# but keep the zinstall target for compatibility with older releases install zinstall: $(call cmd,install) diff --git a/arch/riscv/boot/install.sh b/arch/riscv/boot/install.sh index 4c63f3f0643d..a8df7591513a 100755 --- a/arch/riscv/boot/install.sh +++ b/arch/riscv/boot/install.sh @@ -17,15 +17,18 @@ # $3 - kernel map file # $4 - default install path (blank if root directory) -if [ "$(basename $2)" = "Image.gz" ]; then +case "${2##*/}" in # Compressed install +Image.*|vmlinuz.efi) echo "Installing compressed kernel" base=vmlinuz -else + ;; # Normal install +*) echo "Installing normal kernel" base=vmlinux -fi + ;; +esac if [ -f $4/$base-$1 ]; then mv $4/$base-$1 $4/$base-$1.old -- cgit From 07501c4907851fa2f2038ea602bc1fcfaebd3407 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sat, 4 May 2024 21:34:39 +0200 Subject: riscv: show help string for riscv-specific targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define the archhelp variable so that 'make ACRH=riscv help' will show the targets specific to building a RISC-V kernel like other architectures. Tested-by: Björn Töpel Signed-off-by: Emil Renner Berthing Reviewed-by: Masahiro Yamada Link: https://lore.kernel.org/r/20240504193446.196886-3-emil.renner.berthing@canonical.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch') diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 6ebf505ddd32..8a128def849f 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -216,3 +216,20 @@ rv32_defconfig: PHONY += rv32_nommu_virt_defconfig rv32_nommu_virt_defconfig: $(Q)$(MAKE) -f $(srctree)/Makefile nommu_virt_defconfig 32-bit.config + +define archhelp + echo ' Image - Uncompressed kernel image (arch/riscv/boot/Image)' + echo ' Image.gz - Compressed kernel image (arch/riscv/boot/Image.gz)' + echo ' Image.bz2 - Compressed kernel image (arch/riscv/boot/Image.bz2)' + echo ' Image.lz4 - Compressed kernel image (arch/riscv/boot/Image.lz4)' + echo ' Image.lzma - Compressed kernel image (arch/riscv/boot/Image.lzma)' + echo ' Image.lzo - Compressed kernel image (arch/riscv/boot/Image.lzo)' + echo ' Image.zst - Compressed kernel image (arch/riscv/boot/Image.zst)' + echo ' vmlinuz.efi - Compressed EFI kernel image (arch/riscv/boot/vmlinuz.efi)' + echo ' Default when CONFIG_EFI_ZBOOT=y' + echo ' xipImage - Execute-in-place kernel image (arch/riscv/boot/xipImage)' + echo ' Default when CONFIG_XIP_KERNEL=y' + echo ' install - Install kernel using (your) ~/bin/$(INSTALLKERNEL) or' + echo ' (distribution) /sbin/$(INSTALLKERNEL) or install to ' + echo ' $$(INSTALL_PATH)' +endef -- cgit From 2aff5f955bbae311ca4b66e1dbd934e8f346d1f1 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Sat, 11 May 2024 09:57:25 +0800 Subject: riscv: do not select MODULE_SECTIONS by default Since commit aad15bc85c18 ("riscv: Change code model of module to medany to improve data accessing"), kernel modules have not been built with -fPIC, so they wouldn't have R_RISCV_GOT_HI20 or R_RISCV_CALL_PLT relocations, and handling of those relocations is unnecessary. If RELOCATABLE=y, kernel modules will be built with -fPIE, which would reintroduce said relocations, so only select MODULE_SECTIONS when RELOCATABLE. Signed-off-by: Qingfang Deng Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240511015725.1162-1-dqfext@gmail.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index dc846689230a..b219570d4ad7 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -176,7 +176,6 @@ config RISCV select LOCK_MM_AND_FIND_VMA select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU select MODULES_USE_ELF_RELA if MODULES - select MODULE_SECTIONS if MODULES select OF select OF_EARLY_FLATTREE select OF_IRQ @@ -867,6 +866,7 @@ config PARAVIRT_TIME_ACCOUNTING config RELOCATABLE bool "Build a relocatable kernel" depends on MMU && 64BIT && !XIP_KERNEL + select MODULE_SECTIONS if MODULES help This builds a kernel as a Position Independent Executable (PIE), which retains all relocation metadata required to relocate the -- cgit From 7caa9765465f60b6d88e22264892cee12d971888 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Fri, 5 Apr 2024 14:24:53 +0000 Subject: ftrace: riscv: move from REGS to ARGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit replaces riscv's support for FTRACE_WITH_REGS with support for FTRACE_WITH_ARGS. This is required for the ongoing effort to stop relying on stop_machine() for RISCV's implementation of ftrace. The main relevant benefit that this change will bring for the above use-case is that now we don't have separate ftrace_caller and ftrace_regs_caller trampolines. This will allow the callsite to call ftrace_caller by modifying a single instruction. Now the callsite can do something similar to: When not tracing: | When tracing: func: func: auipc t0, ftrace_caller_top auipc t0, ftrace_caller_top nop <==================> jalr t0, ftrace_caller_bottom [...] [...] The above assumes that we are dropping the support of calling a direct trampoline from the callsite. We need to drop this as the callsite can't change the target address to call, it can only enable/disable a call to a preset target (ftrace_caller in the above diagram). We can later optimize this by calling an intermediate dispatcher trampoline before ftrace_caller. Currently, ftrace_regs_caller saves all CPU registers in the format of struct pt_regs and allows the tracer to modify them. We don't need to save all of the CPU registers because at function entry only a subset of pt_regs is live: |----------+----------+---------------------------------------------| | Register | ABI Name | Description | |----------+----------+---------------------------------------------| | x1 | ra | Return address for traced function | | x2 | sp | Stack pointer | | x5 | t0 | Return address for ftrace_caller trampoline | | x8 | s0/fp | Frame pointer | | x10-11 | a0-1 | Function arguments/return values | | x12-17 | a2-7 | Function arguments | |----------+----------+---------------------------------------------| See RISCV calling convention[1] for the above table. Saving just the live registers decreases the amount of stack space required from 288 Bytes to 112 Bytes. Basic testing was done with this on the VisionFive 2 development board. Note: - Moving from REGS to ARGS will mean that RISCV will stop supporting KPROBES_ON_FTRACE as it requires full pt_regs to be saved. - KPROBES_ON_FTRACE will be supplanted by FPROBES see [2]. [1] https://riscv.org/wp-content/uploads/2015/01/riscv-calling.pdf [2] https://lore.kernel.org/all/170887410337.564249.6360118840946697039.stgit@devnote2/ Signed-off-by: Puranjay Mohan Tested-by: Björn Töpel Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20240405142453.4187-1-puranjay@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- arch/riscv/include/asm/ftrace.h | 76 ++++++++++++++++-- arch/riscv/kernel/asm-offsets.c | 18 +++++ arch/riscv/kernel/ftrace.c | 17 ++-- arch/riscv/kernel/mcount-dyn.S | 171 ++++++++++++---------------------------- 5 files changed, 143 insertions(+), 141 deletions(-) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index b219570d4ad7..9320a9d01596 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -128,7 +128,7 @@ config RISCV select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS - select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 1276d7d9ca8b..9eb31a7ea0aa 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -124,20 +124,82 @@ struct dyn_ftrace; int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +#define arch_ftrace_get_regs(regs) NULL struct ftrace_ops; -struct ftrace_regs; +struct ftrace_regs { + unsigned long epc; + unsigned long ra; + unsigned long sp; + unsigned long s0; + unsigned long t1; + union { + unsigned long args[8]; + struct { + unsigned long a0; + unsigned long a1; + unsigned long a2; + unsigned long a3; + unsigned long a4; + unsigned long a5; + unsigned long a6; + unsigned long a7; + }; + }; +}; + +static __always_inline unsigned long ftrace_regs_get_instruction_pointer(const struct ftrace_regs + *fregs) +{ + return fregs->epc; +} + +static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, + unsigned long pc) +{ + fregs->epc = pc; +} + +static __always_inline unsigned long ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs) +{ + return fregs->sp; +} + +static __always_inline unsigned long ftrace_regs_get_argument(struct ftrace_regs *fregs, + unsigned int n) +{ + if (n < 8) + return fregs->args[n]; + return 0; +} + +static __always_inline unsigned long ftrace_regs_get_return_value(const struct ftrace_regs *fregs) +{ + return fregs->a0; +} + +static __always_inline void ftrace_regs_set_return_value(struct ftrace_regs *fregs, + unsigned long ret) +{ + fregs->a0 = ret; +} + +static __always_inline void ftrace_override_function_with_return(struct ftrace_regs *fregs) +{ + fregs->epc = fregs->ra; +} + +int ftrace_regs_query_register_offset(const char *name); + void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); #define ftrace_graph_func ftrace_graph_func -static inline void __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) +static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { - regs->t1 = addr; + fregs->t1 = addr; } -#define arch_ftrace_set_direct_caller(fregs, addr) \ - __arch_ftrace_set_direct_caller(&(fregs)->regs, addr) -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index a03129f40c46..b09ca5f944f7 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -488,4 +489,21 @@ void asm_offsets(void) DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN)); OFFSET(STACKFRAME_FP, stackframe, fp); OFFSET(STACKFRAME_RA, stackframe, ra); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS + DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct ftrace_regs), STACK_ALIGN)); + DEFINE(FREGS_EPC, offsetof(struct ftrace_regs, epc)); + DEFINE(FREGS_RA, offsetof(struct ftrace_regs, ra)); + DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp)); + DEFINE(FREGS_S0, offsetof(struct ftrace_regs, s0)); + DEFINE(FREGS_T1, offsetof(struct ftrace_regs, t1)); + DEFINE(FREGS_A0, offsetof(struct ftrace_regs, a0)); + DEFINE(FREGS_A1, offsetof(struct ftrace_regs, a1)); + DEFINE(FREGS_A2, offsetof(struct ftrace_regs, a2)); + DEFINE(FREGS_A3, offsetof(struct ftrace_regs, a3)); + DEFINE(FREGS_A4, offsetof(struct ftrace_regs, a4)); + DEFINE(FREGS_A5, offsetof(struct ftrace_regs, a5)); + DEFINE(FREGS_A6, offsetof(struct ftrace_regs, a6)); + DEFINE(FREGS_A7, offsetof(struct ftrace_regs, a7)); +#endif } diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 4f4987a6d83d..4d14eadcd765 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -127,10 +127,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) { int ret = __ftrace_modify_call((unsigned long)&ftrace_call, (unsigned long)func, true, true); - if (!ret) { - ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call, - (unsigned long)func, true, true); - } return ret; } @@ -172,7 +168,7 @@ void arch_ftrace_update_code(int command) } #endif -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -214,16 +210,13 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, } #ifdef CONFIG_DYNAMIC_FTRACE -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - struct pt_regs *regs = arch_ftrace_get_regs(fregs); - unsigned long *parent = (unsigned long *)®s->ra; - - prepare_ftrace_return(parent, ip, frame_pointer(regs)); + prepare_ftrace_return(&fregs->ra, ip, fregs->s0); } -#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ extern void ftrace_graph_call(void); int ftrace_enable_ftrace_graph_caller(void) { @@ -236,6 +229,6 @@ int ftrace_disable_ftrace_graph_caller(void) return __ftrace_modify_call((unsigned long)&ftrace_graph_call, (unsigned long)&prepare_ftrace_return, false, true); } -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index b7561288e8da..745dd4c4a69c 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -56,138 +56,77 @@ addi sp, sp, ABI_SIZE_ON_STACK .endm -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS /** -* SAVE_ABI_REGS - save regs against the pt_regs struct -* -* @all: tell if saving all the regs -* -* If all is set, all the regs will be saved, otherwise only ABI -* related regs (a0-a7,epc,ra and optional s0) will be saved. +* SAVE_ABI_REGS - save regs against the ftrace_regs struct * * After the stack is established, * * 0(sp) stores the PC of the traced function which can be accessed -* by &(fregs)->regs->epc in tracing function. Note that the real +* by &(fregs)->epc in tracing function. Note that the real * function entry address should be computed with -FENTRY_RA_OFFSET. * * 8(sp) stores the function return address (i.e. parent IP) that -* can be accessed by &(fregs)->regs->ra in tracing function. +* can be accessed by &(fregs)->ra in tracing function. * * The other regs are saved at the respective localtion and accessed -* by the respective pt_regs member. +* by the respective ftrace_regs member. * * Here is the layout of stack for your reference. * * PT_SIZE_ON_STACK -> +++++++++ * + ..... + -* + t3-t6 + -* + s2-s11+ * + a0-a7 + --++++-> ftrace_caller saved -* + s1 + + -* + s0 + --+ -* + t0-t2 + + -* + tp + + -* + gp + + +* + t1 + --++++-> direct tramp address +* + s0 + --+ // frame pointer * + sp + + * + ra + --+ // parent IP * sp -> + epc + --+ // PC * +++++++++ **/ - .macro SAVE_ABI_REGS, all=0 - addi sp, sp, -PT_SIZE_ON_STACK - - REG_S t0, PT_EPC(sp) - REG_S x1, PT_RA(sp) - - // save the ABI regs - - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - - // save the leftover regs + .macro SAVE_ABI_REGS + mv t4, sp // Save original SP in T4 + addi sp, sp, -FREGS_SIZE_ON_STACK - .if \all == 1 - REG_S x2, PT_SP(sp) - REG_S x3, PT_GP(sp) - REG_S x4, PT_TP(sp) - REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x8, PT_S0(sp) - REG_S x9, PT_S1(sp) - REG_S x18, PT_S2(sp) - REG_S x19, PT_S3(sp) - REG_S x20, PT_S4(sp) - REG_S x21, PT_S5(sp) - REG_S x22, PT_S6(sp) - REG_S x23, PT_S7(sp) - REG_S x24, PT_S8(sp) - REG_S x25, PT_S9(sp) - REG_S x26, PT_S10(sp) - REG_S x27, PT_S11(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) - - // save s0 if FP_TEST defined - - .else + REG_S t0, FREGS_EPC(sp) + REG_S x1, FREGS_RA(sp) + REG_S t4, FREGS_SP(sp) // Put original SP on stack #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - REG_S x8, PT_S0(sp) + REG_S x8, FREGS_S0(sp) #endif - .endif + REG_S x6, FREGS_T1(sp) + + // save the arguments + REG_S x10, FREGS_A0(sp) + REG_S x11, FREGS_A1(sp) + REG_S x12, FREGS_A2(sp) + REG_S x13, FREGS_A3(sp) + REG_S x14, FREGS_A4(sp) + REG_S x15, FREGS_A5(sp) + REG_S x16, FREGS_A6(sp) + REG_S x17, FREGS_A7(sp) .endm .macro RESTORE_ABI_REGS, all=0 - REG_L t0, PT_EPC(sp) - REG_L x1, PT_RA(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - - .if \all == 1 - REG_L x2, PT_SP(sp) - REG_L x3, PT_GP(sp) - REG_L x4, PT_TP(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x8, PT_S0(sp) - REG_L x9, PT_S1(sp) - REG_L x18, PT_S2(sp) - REG_L x19, PT_S3(sp) - REG_L x20, PT_S4(sp) - REG_L x21, PT_S5(sp) - REG_L x22, PT_S6(sp) - REG_L x23, PT_S7(sp) - REG_L x24, PT_S8(sp) - REG_L x25, PT_S9(sp) - REG_L x26, PT_S10(sp) - REG_L x27, PT_S11(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) - - .else + REG_L t0, FREGS_EPC(sp) + REG_L x1, FREGS_RA(sp) #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - REG_L x8, PT_S0(sp) + REG_L x8, FREGS_S0(sp) #endif - .endif - addi sp, sp, PT_SIZE_ON_STACK + REG_L x6, FREGS_T1(sp) + + // restore the arguments + REG_L x10, FREGS_A0(sp) + REG_L x11, FREGS_A1(sp) + REG_L x12, FREGS_A2(sp) + REG_L x13, FREGS_A3(sp) + REG_L x14, FREGS_A4(sp) + REG_L x15, FREGS_A5(sp) + REG_L x16, FREGS_A6(sp) + REG_L x17, FREGS_A7(sp) + + addi sp, sp, FREGS_SIZE_ON_STACK .endm .macro PREPARE_ARGS @@ -198,9 +137,9 @@ mv a3, sp .endm -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ -#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS SYM_FUNC_START(ftrace_caller) SAVE_ABI @@ -227,33 +166,23 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) jr t0 SYM_FUNC_END(ftrace_caller) -#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ -SYM_FUNC_START(ftrace_regs_caller) +#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ +SYM_FUNC_START(ftrace_caller) mv t1, zero - SAVE_ABI_REGS 1 + SAVE_ABI_REGS PREPARE_ARGS -SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) +SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub - RESTORE_ABI_REGS 1 + RESTORE_ABI_REGS bnez t1, .Ldirect jr t0 .Ldirect: jr t1 -SYM_FUNC_END(ftrace_regs_caller) - -SYM_FUNC_START(ftrace_caller) - SAVE_ABI_REGS 0 - PREPARE_ARGS - -SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) - call ftrace_stub - - RESTORE_ABI_REGS 0 - jr t0 SYM_FUNC_END(ftrace_caller) -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS SYM_CODE_START(ftrace_stub_direct_tramp) -- cgit From a2a4d4a6a0bf5eba66f8b0b32502cc20d82715a0 Mon Sep 17 00:00:00 2001 From: Matthew Bystrin Date: Tue, 21 May 2024 22:13:13 +0300 Subject: riscv: stacktrace: fixed walk_stackframe() If the load access fault occures in a leaf function (with CONFIG_FRAME_POINTER=y), when wrong stack trace will be displayed: [] regmap_mmio_read32le+0xe/0x1c ---[ end trace 0000000000000000 ]--- Registers dump: ra 0xffffffff80485758 sp 0xffffffc80200b9a0 fp 0xffffffc80200b9b0 pc 0xffffffff804853ba Stack dump: 0xffffffc80200b9a0: 0xffffffc80200b9e0 0xffffffc80200b9e0 0xffffffc80200b9b0: 0xffffffff8116d7e8 0x0000000000000100 0xffffffc80200b9c0: 0xffffffd8055b9400 0xffffffd8055b9400 0xffffffc80200b9d0: 0xffffffc80200b9f0 0xffffffff8047c526 0xffffffc80200b9e0: 0xffffffc80200ba30 0xffffffff8047fe9a The assembler dump of the function preambula: add sp,sp,-16 sd s0,8(sp) add s0,sp,16 In the fist stack frame, where ra is not stored on the stack we can observe: 0(sp) 8(sp) .---------------------------------------------. sp->| frame->fp | frame->ra (saved fp) | |---------------------------------------------| fp->| .... | .... | |---------------------------------------------| | | | and in the code check is performed: if (regs && (regs->epc == pc) && (frame->fp & 0x7)) I see no reason to check frame->fp value at all, because it is can be uninitialized value on the stack. A better way is to check frame->ra to be an address on the stack. After the stacktrace shows as expect: [] regmap_mmio_read32le+0xe/0x1c [] regmap_mmio_read+0x24/0x52 [] _regmap_bus_reg_read+0x1a/0x22 [] _regmap_read+0x5c/0xea [] _regmap_update_bits+0x76/0xc0 ... ---[ end trace 0000000000000000 ]--- As pointed by Samuel Holland it is incorrect to remove check of the stackframe entirely. Changes since v2 [2]: - Add accidentally forgotten curly brace Changes since v1 [1]: - Instead of just dropping frame->fp check, replace it with validation of frame->ra, which should be a stack address. - Move frame pointer validation into the separate function. [1] https://lore.kernel.org/linux-riscv/20240426072701.6463-1-dev.mbstr@gmail.com/ [2] https://lore.kernel.org/linux-riscv/20240521131314.48895-1-dev.mbstr@gmail.com/ Fixes: f766f77a74f5 ("riscv/stacktrace: Fix stack output without ra on the stack top") Signed-off-by: Matthew Bystrin Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20240521191727.62012-1-dev.mbstr@gmail.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 64a9c093aef9..528ec7cc9a62 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -18,6 +18,16 @@ extern asmlinkage void ret_from_exception(void); +static inline int fp_is_valid(unsigned long fp, unsigned long sp) +{ + unsigned long low, high; + + low = sp + sizeof(struct stackframe); + high = ALIGN(sp, THREAD_SIZE); + + return !(fp < low || fp > high || fp & 0x07); +} + void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { @@ -41,21 +51,19 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, } for (;;) { - unsigned long low, high; struct stackframe *frame; if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc)))) break; - /* Validate frame pointer */ - low = sp + sizeof(struct stackframe); - high = ALIGN(sp, THREAD_SIZE); - if (unlikely(fp < low || fp > high || fp & 0x7)) + if (unlikely(!fp_is_valid(fp, sp))) break; + /* Unwind stack frame */ frame = (struct stackframe *)fp - 1; sp = fp; - if (regs && (regs->epc == pc) && (frame->fp & 0x7)) { + if (regs && (regs->epc == pc) && fp_is_valid(frame->ra, sp)) { + /* We hit function where ra is not saved on the stack */ fp = frame->ra; pc = regs->ra; } else { -- cgit From 10378a39ed76b8ee915edaff0939c62d5e3ddb54 Mon Sep 17 00:00:00 2001 From: Zhao Ke Date: Mon, 18 Mar 2024 14:54:04 +0800 Subject: Use bool value in set_cpu_online() The declaration of set_cpu_online() takes a bool value. So replace int here to make it consistent with the declaration. Signed-off-by: Zhao Ke Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240318065404.123668-1-ke.zhao@shingroup.cn Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 4b3c50da48ba..1319b29ce3b5 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -224,7 +224,7 @@ asmlinkage __visible void smp_callin(void) riscv_ipi_enable(); numa_add_cpu(curr_cpuid); - set_cpu_online(curr_cpuid, 1); + set_cpu_online(curr_cpuid, true); if (has_vector()) { if (riscv_v_setup_vsize()) -- cgit From 7e6eae24daf6bdb812c14d40b76c23de1371149d Mon Sep 17 00:00:00 2001 From: Xingyou Chen Date: Sun, 17 Mar 2024 13:55:56 +0800 Subject: riscv: typo in comment for get_f64_reg Signed-off-by: Xingyou Chen Reviewed-by: Randy Dunlap Link: https://lore.kernel.org/r/20240317055556.9449-1-rockrush@rockwork.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/fpu.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S index 2c543f130f93..327cf527dd7e 100644 --- a/arch/riscv/kernel/fpu.S +++ b/arch/riscv/kernel/fpu.S @@ -211,7 +211,7 @@ SYM_FUNC_START(put_f64_reg) SYM_FUNC_END(put_f64_reg) /* - * put_f64_reg - Get a 64 bits FP register value and returned it or store it to + * get_f64_reg - Get a 64 bits FP register value and returned it or store it to * a pointer. * a0 = FP register index to be retrieved * a1 = If xlen == 32, pointer which should be loaded with the FP register value -- cgit From f1905946bed052522522303f1d144f506ef5d9f9 Mon Sep 17 00:00:00 2001 From: Xiao Wang Date: Wed, 13 Mar 2024 18:33:34 +0800 Subject: riscv: uaccess: Allow the last potential unrolled copy When the dst buffer pointer points to the last accessible aligned addr, we could still run another iteration of unrolled copy. Signed-off-by: Xiao Wang Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240313103334.4036554-1-xiao.w.wang@intel.com Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/uaccess.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index bc22c078aba8..64792a7ae72e 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -103,7 +103,7 @@ SYM_FUNC_START(fallback_scalar_usercopy) fixup REG_S t4, 7*SZREG(a0), 10f addi a0, a0, 8*SZREG addi a1, a1, 8*SZREG - bltu a0, t0, 2b + bleu a0, t0, 2b addi t0, t0, 8*SZREG /* revert to original value */ j .Lbyte_copy_tail -- cgit From 9850e73e82972f518b75dd0d94d2322f44d9191d Mon Sep 17 00:00:00 2001 From: Xiao Wang Date: Wed, 13 Mar 2024 17:19:29 +0800 Subject: riscv: uaccess: Relax the threshold for fast path The bytes copy for unaligned head would cover at most SZREG-1 bytes, so it's better to set the threshold as >= (SZREG-1 + word_copy stride size) which equals to 9*SZREG-1. Signed-off-by: Xiao Wang Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240313091929.4029960-1-xiao.w.wang@intel.com Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/uaccess.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 64792a7ae72e..1399d797d81b 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -44,7 +44,7 @@ SYM_FUNC_START(fallback_scalar_usercopy) * Use byte copy only if too small. * SZREG holds 4 for RV32 and 8 for RV64 */ - li a3, 9*SZREG /* size must be larger than size in word_copy */ + li a3, 9*SZREG-1 /* size must >= (word_copy stride + SZREG-1) */ bltu a2, a3, .Lbyte_copy_tail /* -- cgit From 4c6c0020427a4547845a83f7e4d6085e16c3e24f Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 3 Apr 2024 16:38:03 +0800 Subject: riscv: mm: accelerate pagefault when badaccess The access_error() of vma already checked under per-VMA lock, if it is a bad access, directly handle error, no need to retry with mmap_lock again. Since the page faut is handled under per-VMA lock, count it as a vma lock event with VMA_LOCK_SUCCESS. Reviewed-by: Suren Baghdasaryan Signed-off-by: Kefeng Wang Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240403083805.1818160-6-wangkefeng.wang@huawei.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 5224f3733802..b3fcf7d67efb 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -293,8 +293,8 @@ void handle_page_fault(struct pt_regs *regs) if (unlikely(access_error(cause, vma))) { vma_end_read(vma); count_vm_vma_lock_event(VMA_LOCK_SUCCESS); - tsk->thread.bad_cause = cause; - bad_area_nosemaphore(regs, SEGV_ACCERR, addr); + tsk->thread.bad_cause = SEGV_ACCERR; + bad_area_nosemaphore(regs, code, addr); return; } -- cgit From 6ca445d8af0ed5950ebf899415fd6bfcd7d9d7a3 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 23 May 2024 13:51:34 +0200 Subject: riscv: Fix early ftrace nop patching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c97bf629963e ("riscv: Fix text patching when IPI are used") converted ftrace_make_nop() to use patch_insn_write() which does not emit any icache flush relying entirely on __ftrace_modify_code() to do that. But we missed that ftrace_make_nop() was called very early directly when converting mcount calls into nops (actually on riscv it converts 2B nops emitted by the compiler into 4B nops). This caused crashes on multiple HW as reported by Conor and Björn since the booting core could have half-patched instructions in its icache which would trigger an illegal instruction trap: fix this by emitting a local flush icache when early patching nops. Fixes: c97bf629963e ("riscv: Fix text patching when IPI are used") Signed-off-by: Alexandre Ghiti Reported-by: Conor Dooley Tested-by: Conor Dooley Reviewed-by: Björn Töpel Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20240523115134.70380-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 6 ++++++ arch/riscv/kernel/ftrace.c | 3 +++ 2 files changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index dd8d07146116..ce79c558a4c8 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -13,6 +13,12 @@ static inline void local_flush_icache_all(void) asm volatile ("fence.i" ::: "memory"); } +static inline void local_flush_icache_range(unsigned long start, + unsigned long end) +{ + local_flush_icache_all(); +} + #define PG_dcache_clean PG_arch_1 static inline void flush_dcache_folio(struct folio *folio) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 4d14eadcd765..87cbd86576b2 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -120,6 +120,9 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); mutex_unlock(&text_mutex); + if (!mod) + local_flush_icache_range(rec->ip, rec->ip + MCOUNT_INSN_SIZE); + return out; } -- cgit From a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Wed, 22 May 2024 15:02:18 -0700 Subject: genirq/cpuhotplug, x86/vector: Prevent vector leak during CPU offline The absence of IRQD_MOVE_PCNTXT prevents immediate effectiveness of interrupt affinity reconfiguration via procfs. Instead, the change is deferred until the next instance of the interrupt being triggered on the original CPU. When the interrupt next triggers on the original CPU, the new affinity is enforced within __irq_move_irq(). A vector is allocated from the new CPU, but the old vector on the original CPU remains and is not immediately reclaimed. Instead, apicd->move_in_progress is flagged, and the reclaiming process is delayed until the next trigger of the interrupt on the new CPU. Upon the subsequent triggering of the interrupt on the new CPU, irq_complete_move() adds a task to the old CPU's vector_cleanup list if it remains online. Subsequently, the timer on the old CPU iterates over its vector_cleanup list, reclaiming old vectors. However, a rare scenario arises if the old CPU is outgoing before the interrupt triggers again on the new CPU. In that case irq_force_complete_move() is not invoked on the outgoing CPU to reclaim the old apicd->prev_vector because the interrupt isn't currently affine to the outgoing CPU, and irq_needs_fixup() returns false. Even though __vector_schedule_cleanup() is later called on the new CPU, it doesn't reclaim apicd->prev_vector; instead, it simply resets both apicd->move_in_progress and apicd->prev_vector to 0. As a result, the vector remains unreclaimed in vector_matrix, leading to a CPU vector leak. To address this issue, move the invocation of irq_force_complete_move() before the irq_needs_fixup() call to reclaim apicd->prev_vector, if the interrupt is currently or used to be affine to the outgoing CPU. Additionally, reclaim the vector in __vector_schedule_cleanup() as well, following a warning message, although theoretically it should never see apicd->move_in_progress with apicd->prev_cpu pointing to an offline CPU. Fixes: f0383c24b485 ("genirq/cpuhotplug: Add support for cleaning up move in progress") Signed-off-by: Dongli Zhang Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240522220218.162423-1-dongli.zhang@oracle.com --- arch/x86/kernel/apic/vector.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 9eec52925fa3..557318145038 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -1035,7 +1035,8 @@ static void __vector_schedule_cleanup(struct apic_chip_data *apicd) add_timer_on(&cl->timer, cpu); } } else { - apicd->prev_vector = 0; + pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu); + free_moved_vector(apicd); } raw_spin_unlock(&vector_lock); } @@ -1072,6 +1073,7 @@ void irq_complete_move(struct irq_cfg *cfg) */ void irq_force_complete_move(struct irq_desc *desc) { + unsigned int cpu = smp_processor_id(); struct apic_chip_data *apicd; struct irq_data *irqd; unsigned int vector; @@ -1096,10 +1098,11 @@ void irq_force_complete_move(struct irq_desc *desc) goto unlock; /* - * If prev_vector is empty, no action required. + * If prev_vector is empty or the descriptor is neither currently + * nor previously on the outgoing CPU no action required. */ vector = apicd->prev_vector; - if (!vector) + if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) goto unlock; /* -- cgit From ff388fe5c481d39cc0a5940d1ad46f7920f1d646 Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Mon, 15 Apr 2024 16:35:20 +0000 Subject: mseal: wire up mseal syscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Introduce mseal", v10. This patchset proposes a new mseal() syscall for the Linux kernel. In a nutshell, mseal() protects the VMAs of a given virtual memory range against modifications, such as changes to their permission bits. Modern CPUs support memory permissions, such as the read/write (RW) and no-execute (NX) bits. Linux has supported NX since the release of kernel version 2.6.8 in August 2004 [1]. The memory permission feature improves the security stance on memory corruption bugs, as an attacker cannot simply write to arbitrary memory and point the code to it. The memory must be marked with the X bit, or else an exception will occur. Internally, the kernel maintains the memory permissions in a data structure called VMA (vm_area_struct). mseal() additionally protects the VMA itself against modifications of the selected seal type. Memory sealing is useful to mitigate memory corruption issues where a corrupted pointer is passed to a memory management system. For example, such an attacker primitive can break control-flow integrity guarantees since read-only memory that is supposed to be trusted can become writable or .text pages can get remapped. Memory sealing can automatically be applied by the runtime loader to seal .text and .rodata pages and applications can additionally seal security critical data at runtime. A similar feature already exists in the XNU kernel with the VM_FLAGS_PERMANENT [3] flag and on OpenBSD with the mimmutable syscall [4]. Also, Chrome wants to adopt this feature for their CFI work [2] and this patchset has been designed to be compatible with the Chrome use case. Two system calls are involved in sealing the map: mmap() and mseal(). The new mseal() is an syscall on 64 bit CPU, and with following signature: int mseal(void addr, size_t len, unsigned long flags) addr/len: memory range. flags: reserved. mseal() blocks following operations for the given memory range. 1> Unmapping, moving to another location, and shrinking the size, via munmap() and mremap(), can leave an empty space, therefore can be replaced with a VMA with a new set of attributes. 2> Moving or expanding a different VMA into the current location, via mremap(). 3> Modifying a VMA via mmap(MAP_FIXED). 4> Size expansion, via mremap(), does not appear to pose any specific risks to sealed VMAs. It is included anyway because the use case is unclear. In any case, users can rely on merging to expand a sealed VMA. 5> mprotect() and pkey_mprotect(). 6> Some destructive madvice() behaviors (e.g. MADV_DONTNEED) for anonymous memory, when users don't have write permission to the memory. Those behaviors can alter region contents by discarding pages, effectively a memset(0) for anonymous memory. The idea that inspired this patch comes from Stephen Röttger’s work in V8 CFI [5]. Chrome browser in ChromeOS will be the first user of this API. Indeed, the Chrome browser has very specific requirements for sealing, which are distinct from those of most applications. For example, in the case of libc, sealing is only applied to read-only (RO) or read-execute (RX) memory segments (such as .text and .RELRO) to prevent them from becoming writable, the lifetime of those mappings are tied to the lifetime of the process. Chrome wants to seal two large address space reservations that are managed by different allocators. The memory is mapped RW- and RWX respectively but write access to it is restricted using pkeys (or in the future ARM permission overlay extensions). The lifetime of those mappings are not tied to the lifetime of the process, therefore, while the memory is sealed, the allocators still need to free or discard the unused memory. For example, with madvise(DONTNEED). However, always allowing madvise(DONTNEED) on this range poses a security risk. For example if a jump instruction crosses a page boundary and the second page gets discarded, it will overwrite the target bytes with zeros and change the control flow. Checking write-permission before the discard operation allows us to control when the operation is valid. In this case, the madvise will only succeed if the executing thread has PKEY write permissions and PKRU changes are protected in software by control-flow integrity. Although the initial version of this patch series is targeting the Chrome browser as its first user, it became evident during upstream discussions that we would also want to ensure that the patch set eventually is a complete solution for memory sealing and compatible with other use cases. The specific scenario currently in mind is glibc's use case of loading and sealing ELF executables. To this end, Stephen is working on a change to glibc to add sealing support to the dynamic linker, which will seal all non-writable segments at startup. Once this work is completed, all applications will be able to automatically benefit from these new protections. In closing, I would like to formally acknowledge the valuable contributions received during the RFC process, which were instrumental in shaping this patch: Jann Horn: raising awareness and providing valuable insights on the destructive madvise operations. Liam R. Howlett: perf optimization. Linus Torvalds: assisting in defining system call signature and scope. Theo de Raadt: sharing the experiences and insight gained from implementing mimmutable() in OpenBSD. MM perf benchmarks ================== This patch adds a loop in the mprotect/munmap/madvise(DONTNEED) to check the VMAs’ sealing flag, so that no partial update can be made, when any segment within the given memory range is sealed. To measure the performance impact of this loop, two tests are developed. [8] The first is measuring the time taken for a particular system call, by using clock_gettime(CLOCK_MONOTONIC). The second is using PERF_COUNT_HW_REF_CPU_CYCLES (exclude user space). Both tests have similar results. The tests have roughly below sequence: for (i = 0; i < 1000, i++) create 1000 mappings (1 page per VMA) start the sampling for (j = 0; j < 1000, j++) mprotect one mapping stop and save the sample delete 1000 mappings calculates all samples. Below tests are performed on Intel(R) Pentium(R) Gold 7505 @ 2.00GHz, 4G memory, Chromebook. Based on the latest upstream code: The first test (measuring time) syscall__ vmas t t_mseal delta_ns per_vma % munmap__ 1 909 944 35 35 104% munmap__ 2 1398 1502 104 52 107% munmap__ 4 2444 2594 149 37 106% munmap__ 8 4029 4323 293 37 107% munmap__ 16 6647 6935 288 18 104% munmap__ 32 11811 12398 587 18 105% mprotect 1 439 465 26 26 106% mprotect 2 1659 1745 86 43 105% mprotect 4 3747 3889 142 36 104% mprotect 8 6755 6969 215 27 103% mprotect 16 13748 14144 396 25 103% mprotect 32 27827 28969 1142 36 104% madvise_ 1 240 262 22 22 109% madvise_ 2 366 442 76 38 121% madvise_ 4 623 751 128 32 121% madvise_ 8 1110 1324 215 27 119% madvise_ 16 2127 2451 324 20 115% madvise_ 32 4109 4642 534 17 113% The second test (measuring cpu cycle) syscall__ vmas cpu cmseal delta_cpu per_vma % munmap__ 1 1790 1890 100 100 106% munmap__ 2 2819 3033 214 107 108% munmap__ 4 4959 5271 312 78 106% munmap__ 8 8262 8745 483 60 106% munmap__ 16 13099 14116 1017 64 108% munmap__ 32 23221 24785 1565 49 107% mprotect 1 906 967 62 62 107% mprotect 2 3019 3203 184 92 106% mprotect 4 6149 6569 420 105 107% mprotect 8 9978 10524 545 68 105% mprotect 16 20448 21427 979 61 105% mprotect 32 40972 42935 1963 61 105% madvise_ 1 434 497 63 63 115% madvise_ 2 752 899 147 74 120% madvise_ 4 1313 1513 200 50 115% madvise_ 8 2271 2627 356 44 116% madvise_ 16 4312 4883 571 36 113% madvise_ 32 8376 9319 943 29 111% Based on the result, for 6.8 kernel, sealing check adds 20-40 nano seconds, or around 50-100 CPU cycles, per VMA. In addition, I applied the sealing to 5.10 kernel: The first test (measuring time) syscall__ vmas t tmseal delta_ns per_vma % munmap__ 1 357 390 33 33 109% munmap__ 2 442 463 21 11 105% munmap__ 4 614 634 20 5 103% munmap__ 8 1017 1137 120 15 112% munmap__ 16 1889 2153 263 16 114% munmap__ 32 4109 4088 -21 -1 99% mprotect 1 235 227 -7 -7 97% mprotect 2 495 464 -30 -15 94% mprotect 4 741 764 24 6 103% mprotect 8 1434 1437 2 0 100% mprotect 16 2958 2991 33 2 101% mprotect 32 6431 6608 177 6 103% madvise_ 1 191 208 16 16 109% madvise_ 2 300 324 24 12 108% madvise_ 4 450 473 23 6 105% madvise_ 8 753 806 53 7 107% madvise_ 16 1467 1592 125 8 108% madvise_ 32 2795 3405 610 19 122% The second test (measuring cpu cycle) syscall__ nbr_vma cpu cmseal delta_cpu per_vma % munmap__ 1 684 715 31 31 105% munmap__ 2 861 898 38 19 104% munmap__ 4 1183 1235 51 13 104% munmap__ 8 1999 2045 46 6 102% munmap__ 16 3839 3816 -23 -1 99% munmap__ 32 7672 7887 216 7 103% mprotect 1 397 443 46 46 112% mprotect 2 738 788 50 25 107% mprotect 4 1221 1256 35 9 103% mprotect 8 2356 2429 72 9 103% mprotect 16 4961 4935 -26 -2 99% mprotect 32 9882 10172 291 9 103% madvise_ 1 351 380 29 29 108% madvise_ 2 565 615 49 25 109% madvise_ 4 872 933 61 15 107% madvise_ 8 1508 1640 132 16 109% madvise_ 16 3078 3323 245 15 108% madvise_ 32 5893 6704 811 25 114% For 5.10 kernel, sealing check adds 0-15 ns in time, or 10-30 CPU cycles, there is even decrease in some cases. It might be interesting to compare 5.10 and 6.8 kernel The first test (measuring time) syscall__ vmas t_5_10 t_6_8 delta_ns per_vma % munmap__ 1 357 909 552 552 254% munmap__ 2 442 1398 956 478 316% munmap__ 4 614 2444 1830 458 398% munmap__ 8 1017 4029 3012 377 396% munmap__ 16 1889 6647 4758 297 352% munmap__ 32 4109 11811 7702 241 287% mprotect 1 235 439 204 204 187% mprotect 2 495 1659 1164 582 335% mprotect 4 741 3747 3006 752 506% mprotect 8 1434 6755 5320 665 471% mprotect 16 2958 13748 10790 674 465% mprotect 32 6431 27827 21397 669 433% madvise_ 1 191 240 49 49 125% madvise_ 2 300 366 67 33 122% madvise_ 4 450 623 173 43 138% madvise_ 8 753 1110 357 45 147% madvise_ 16 1467 2127 660 41 145% madvise_ 32 2795 4109 1314 41 147% The second test (measuring cpu cycle) syscall__ vmas cpu_5_10 c_6_8 delta_cpu per_vma % munmap__ 1 684 1790 1106 1106 262% munmap__ 2 861 2819 1958 979 327% munmap__ 4 1183 4959 3776 944 419% munmap__ 8 1999 8262 6263 783 413% munmap__ 16 3839 13099 9260 579 341% munmap__ 32 7672 23221 15549 486 303% mprotect 1 397 906 509 509 228% mprotect 2 738 3019 2281 1140 409% mprotect 4 1221 6149 4929 1232 504% mprotect 8 2356 9978 7622 953 423% mprotect 16 4961 20448 15487 968 412% mprotect 32 9882 40972 31091 972 415% madvise_ 1 351 434 82 82 123% madvise_ 2 565 752 186 93 133% madvise_ 4 872 1313 442 110 151% madvise_ 8 1508 2271 763 95 151% madvise_ 16 3078 4312 1234 77 140% madvise_ 32 5893 8376 2483 78 142% From 5.10 to 6.8 munmap: added 250-550 ns in time, or 500-1100 in cpu cycle, per vma. mprotect: added 200-750 ns in time, or 500-1200 in cpu cycle, per vma. madvise: added 33-50 ns in time, or 70-110 in cpu cycle, per vma. In comparison to mseal, which adds 20-40 ns or 50-100 CPU cycles, the increase from 5.10 to 6.8 is significantly larger, approximately ten times greater for munmap and mprotect. When I discuss the mm performance with Brian Makin, an engineer who worked on performance, it was brought to my attention that such performance benchmarks, which measuring millions of mm syscall in a tight loop, may not accurately reflect real-world scenarios, such as that of a database service. Also this is tested using a single HW and ChromeOS, the data from another HW or distribution might be different. It might be best to take this data with a grain of salt. This patch (of 5): Wire up mseal syscall for all architectures. Link: https://lkml.kernel.org/r/20240415163527.626541-1-jeffxu@chromium.org Link: https://lkml.kernel.org/r/20240415163527.626541-2-jeffxu@chromium.org Signed-off-by: Jeff Xu Reviewed-by: Kees Cook Reviewed-by: Liam R. Howlett Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: Guenter Roeck Cc: Jann Horn [Bug #2] Cc: Jeff Xu Cc: Jonathan Corbet Cc: Jorge Lucangeli Obes Cc: Linus Torvalds Cc: Matthew Wilcox (Oracle) Cc: Muhammad Usama Anjum Cc: Pedro Falcato Cc: Stephen Röttger Cc: Suren Baghdasaryan Cc: Amer Al Shanawany Cc: Javier Carrasco Cc: Shuah Khan Signed-off-by: Andrew Morton --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + 17 files changed, 18 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 26cce7e7f70b..74720667fe09 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -501,3 +501,4 @@ 569 common lsm_get_self_attr sys_lsm_get_self_attr 570 common lsm_set_self_attr sys_lsm_set_self_attr 571 common lsm_list_modules sys_lsm_list_modules +572 common mseal sys_mseal diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index b6c9e01e14f5..2ed7d229c8f9 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -475,3 +475,4 @@ 459 common lsm_get_self_attr sys_lsm_get_self_attr 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 491b2b9bd553..1346579f802f 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 462 +#define __NR_compat_syscalls 463 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 7118282d1c79..266b96acc014 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -929,6 +929,8 @@ __SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr) __SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) #define __NR_lsm_list_modules 461 __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) +#define __NR_mseal 462 +__SYSCALL(__NR_mseal, sys_mseal) /* * Please add new compat syscalls above this comment and update diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 7fd43fd4c9f2..22a3cbd4c602 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -461,3 +461,4 @@ 459 common lsm_get_self_attr sys_lsm_get_self_attr 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index b00ab2cabab9..2b81a6bd78b2 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -467,3 +467,4 @@ 459 common lsm_get_self_attr sys_lsm_get_self_attr 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 83cfc9eb6b88..cc869f5d5693 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -400,3 +400,4 @@ 459 n32 lsm_get_self_attr sys_lsm_get_self_attr 460 n32 lsm_set_self_attr sys_lsm_set_self_attr 461 n32 lsm_list_modules sys_lsm_list_modules +462 n32 mseal sys_mseal diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 532b855df589..1464c6be6eb3 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -376,3 +376,4 @@ 459 n64 lsm_get_self_attr sys_lsm_get_self_attr 460 n64 lsm_set_self_attr sys_lsm_set_self_attr 461 n64 lsm_list_modules sys_lsm_list_modules +462 n64 mseal sys_mseal diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index f45c9530ea93..008ebe60263e 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -449,3 +449,4 @@ 459 o32 lsm_get_self_attr sys_lsm_get_self_attr 460 o32 lsm_set_self_attr sys_lsm_set_self_attr 461 o32 lsm_list_modules sys_lsm_list_modules +462 o32 mseal sys_mseal diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index b236a84c4e12..b13c21373974 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -460,3 +460,4 @@ 459 common lsm_get_self_attr sys_lsm_get_self_attr 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 17173b82ca21..3656f1ca7a21 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -548,3 +548,4 @@ 459 common lsm_get_self_attr sys_lsm_get_self_attr 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 095bb86339a7..bd0fee24ad10 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -464,3 +464,4 @@ 459 common lsm_get_self_attr sys_lsm_get_self_attr sys_lsm_get_self_attr 460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal sys_mseal diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 86fe269f0220..bbf83a2db986 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -464,3 +464,4 @@ 459 common lsm_get_self_attr sys_lsm_get_self_attr 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index b23d59313589..ac6c281ccfe0 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -507,3 +507,4 @@ 459 common lsm_get_self_attr sys_lsm_get_self_attr 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 5f8591ce7f25..7fd1f57ad3d3 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -466,3 +466,4 @@ 459 i386 lsm_get_self_attr sys_lsm_get_self_attr 460 i386 lsm_set_self_attr sys_lsm_set_self_attr 461 i386 lsm_list_modules sys_lsm_list_modules +462 i386 mseal sys_mseal diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index cc78226ffc35..a396f6e6ab5b 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -383,6 +383,7 @@ 459 common lsm_get_self_attr sys_lsm_get_self_attr 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index dd116598fb25..67083fc1b2f5 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -432,3 +432,4 @@ 459 common lsm_get_self_attr sys_lsm_get_self_attr 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal -- cgit From b1480ed230acf4f7f069a7f5e3ddda62bbf4ba97 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 22 May 2024 00:38:13 +0300 Subject: arm64: patching: fix handling of execmem addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Klara Modin reported warnings for a kernel configured with BPF_JIT but without MODULES: [ 44.131296] Trying to vfree() bad address (000000004a17c299) [ 44.138024] WARNING: CPU: 1 PID: 193 at mm/vmalloc.c:3189 remove_vm_area (mm/vmalloc.c:3189 (discriminator 1)) [ 44.146675] CPU: 1 PID: 193 Comm: kworker/1:2 Tainted: G D W 6.9.0-01786-g2c9e5d4a0082 #25 [ 44.158229] Hardware name: Raspberry Pi 3 Model B (DT) [ 44.164433] Workqueue: events bpf_prog_free_deferred [ 44.170492] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 44.178601] pc : remove_vm_area (mm/vmalloc.c:3189 (discriminator 1)) [ 44.183705] lr : remove_vm_area (mm/vmalloc.c:3189 (discriminator 1)) [ 44.188772] sp : ffff800082a13c70 [ 44.193112] x29: ffff800082a13c70 x28: 0000000000000000 x27: 0000000000000000 [ 44.201384] x26: 0000000000000000 x25: ffff00003a44efa0 x24: 00000000d4202000 [ 44.209658] x23: ffff800081223dd0 x22: ffff00003a198a40 x21: ffff8000814dd880 [ 44.217924] x20: 00000000d4202000 x19: ffff8000814dd880 x18: 0000000000000006 [ 44.226206] x17: 0000000000000000 x16: 0000000000000020 x15: 0000000000000002 [ 44.234460] x14: ffff8000811a6370 x13: 0000000020000000 x12: 0000000000000000 [ 44.242710] x11: ffff8000811a6370 x10: 0000000000000144 x9 : ffff8000811fe370 [ 44.250959] x8 : 0000000000017fe8 x7 : 00000000fffff000 x6 : ffff8000811fe370 [ 44.259206] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 [ 44.267457] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000002203240 [ 44.275703] Call trace: [ 44.279158] remove_vm_area (mm/vmalloc.c:3189 (discriminator 1)) [ 44.283858] vfree (mm/vmalloc.c:3322) [ 44.287835] execmem_free (mm/execmem.c:70) [ 44.292347] bpf_jit_free_exec+0x10/0x1c [ 44.297283] bpf_prog_pack_free (kernel/bpf/core.c:1006) [ 44.302457] bpf_jit_binary_pack_free (kernel/bpf/core.c:1195) [ 44.307951] bpf_jit_free (include/linux/filter.h:1083 arch/arm64/net/bpf_jit_comp.c:2474) [ 44.312342] bpf_prog_free_deferred (kernel/bpf/core.c:2785) [ 44.317785] process_one_work (kernel/workqueue.c:3273) [ 44.322684] worker_thread (kernel/workqueue.c:3342 (discriminator 2) kernel/workqueue.c:3429 (discriminator 2)) [ 44.327292] kthread (kernel/kthread.c:388) [ 44.331342] ret_from_fork (arch/arm64/kernel/entry.S:861) The problem is because bpf_arch_text_copy() silently fails to write to the read-only area as a result of patch_map() faulting and the resulting -EFAULT being chucked away. Update patch_map() to use CONFIG_EXECMEM instead of CONFIG_STRICT_MODULE_RWX to check for vmalloc addresses. Link: https://lkml.kernel.org/r/20240521213813.703309-1-rppt@kernel.org Fixes: 2c9e5d4a0082 ("bpf: remove CONFIG_BPF_JIT dependency on CONFIG_MODULES of") Signed-off-by: Will Deacon Signed-off-by: Mike Rapoport (IBM) Reported-by: Klara Modin Closes: https://lore.kernel.org/all/7983fbbf-0127-457c-9394-8d6e4299c685@gmail.com Tested-by: Klara Modin Cc: Björn Töpel Cc: Luis Chamberlain Signed-off-by: Andrew Morton --- arch/arm64/kernel/patching.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index 255534930368..945df74005c7 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -36,7 +36,7 @@ static void __kprobes *patch_map(void *addr, int fixmap) if (image) page = phys_to_page(__pa_symbol(addr)); - else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + else if (IS_ENABLED(CONFIG_EXECMEM)) page = vmalloc_to_page(addr); else return addr; -- cgit From dd6a403795f0c7b5c566f86f2ee6b687278d3c1c Mon Sep 17 00:00:00 2001 From: Shahab Vahedi Date: Sat, 25 May 2024 05:56:28 +0200 Subject: ARC, bpf: Fix issues reported by the static analyzers Also updated couple of comments along the way. One of the issues reported was indeed a bug in the code: memset(ctx, 0, sizeof(ctx)) // original line memset(ctx, 0, sizeof(*ctx)) // fixed line That was a nice catch. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202405222314.UG5F2NHn-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202405232036.Xqoc3b0J-lkp@intel.com/ Signed-off-by: Shahab Vahedi Link: https://lore.kernel.org/r/20240525035628.1026-1-list+bpf@vahedi.org Signed-off-by: Alexei Starovoitov --- arch/arc/net/bpf_jit.h | 2 +- arch/arc/net/bpf_jit_arcv2.c | 10 ++++++---- arch/arc/net/bpf_jit_core.c | 22 +++++++++++----------- 3 files changed, 18 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arc/net/bpf_jit.h b/arch/arc/net/bpf_jit.h index ec44873c42d1..495f3023e4c1 100644 --- a/arch/arc/net/bpf_jit.h +++ b/arch/arc/net/bpf_jit.h @@ -39,7 +39,7 @@ /************** Functions that the back-end must provide **************/ /* Extension for 32-bit operations. */ -inline u8 zext(u8 *buf, u8 rd); +u8 zext(u8 *buf, u8 rd); /***** Moves *****/ u8 mov_r32(u8 *buf, u8 rd, u8 rs, u8 sign_ext); u8 mov_r32_i32(u8 *buf, u8 reg, s32 imm); diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c index 31bfb6e9ce00..4458e409ca0a 100644 --- a/arch/arc/net/bpf_jit_arcv2.c +++ b/arch/arc/net/bpf_jit_arcv2.c @@ -62,7 +62,7 @@ enum { * If/when we decide to add ARCv2 instructions that do use register pairs, * the mapping, hopefully, doesn't need to be revisited. */ -const u8 bpf2arc[][2] = { +static const u8 bpf2arc[][2] = { /* Return value from in-kernel function, and exit value from eBPF */ [BPF_REG_0] = {ARC_R_8, ARC_R_9}, /* Arguments from eBPF program to in-kernel function */ @@ -1302,7 +1302,7 @@ static u8 arc_b(u8 *buf, s32 offset) /************* Packers (Deal with BPF_REGs) **************/ -inline u8 zext(u8 *buf, u8 rd) +u8 zext(u8 *buf, u8 rd) { if (rd != BPF_REG_FP) return arc_movi_r(buf, REG_HI(rd), 0); @@ -2235,6 +2235,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext) break; default: /* The caller must have handled this. */ + break; } } else { /* @@ -2253,6 +2254,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext) break; default: /* The caller must have handled this. */ + break; } } @@ -2517,7 +2519,7 @@ u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size) #define JCC64_NR_OF_JMPS 3 /* Number of jumps in jcc64 template. */ #define JCC64_INSNS_TO_END 3 /* Number of insn. inclusive the 2nd jmp to end. */ #define JCC64_SKIP_JMP 1 /* Index of the "skip" jump to "end". */ -const struct { +static const struct { /* * "jit_off" is common between all "jmp[]" and is coupled with * "cond" of each "jmp[]" instance. e.g.: @@ -2883,7 +2885,7 @@ u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off) * The "ARC_CC_SET" becomes "CC_unequal" because of the "tst" * instruction that precedes the conditional branch. */ -const u8 arcv2_32_jmps[ARC_CC_LAST] = { +static const u8 arcv2_32_jmps[ARC_CC_LAST] = { [ARC_CC_UGT] = CC_great_u, [ARC_CC_UGE] = CC_great_eq_u, [ARC_CC_ULT] = CC_less_u, diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c index 6f6b4ffccf2c..e3628922c24a 100644 --- a/arch/arc/net/bpf_jit_core.c +++ b/arch/arc/net/bpf_jit_core.c @@ -159,7 +159,7 @@ static void jit_dump(const struct jit_context *ctx) /* Initialise the context so there's no garbage. */ static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog) { - memset(ctx, 0, sizeof(ctx)); + memset(ctx, 0, sizeof(*ctx)); ctx->orig_prog = prog; @@ -167,7 +167,7 @@ static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog) ctx->prog = bpf_jit_blind_constants(prog); if (IS_ERR(ctx->prog)) return PTR_ERR(ctx->prog); - ctx->blinded = (ctx->prog == ctx->orig_prog ? false : true); + ctx->blinded = (ctx->prog != ctx->orig_prog); /* If the verifier doesn't zero-extend, then we have to do it. */ ctx->do_zext = !ctx->prog->aux->verifier_zext; @@ -1182,12 +1182,12 @@ static int jit_prepare(struct jit_context *ctx) } /* - * All the "handle_*()" functions have been called before by the - * "jit_prepare()". If there was an error, we would know by now. - * Therefore, no extra error checking at this point, other than - * a sanity check at the end that expects the calculated length - * (jit.len) to be equal to the length of generated instructions - * (jit.index). + * jit_compile() is the real compilation phase. jit_prepare() is + * invoked before jit_compile() as a dry-run to make sure everything + * will go OK and allocate the necessary memory. + * + * In the end, jit_compile() checks if it has produced the same number + * of instructions as jit_prepare() would. */ static int jit_compile(struct jit_context *ctx) { @@ -1407,9 +1407,9 @@ static struct bpf_prog *do_extra_pass(struct bpf_prog *prog) /* * This function may be invoked twice for the same stream of BPF - * instructions. The "extra pass" happens, when there are "call"s - * involved that their addresses are not known during the first - * invocation. + * instructions. The "extra pass" happens, when there are + * (re)locations involved that their addresses are not known + * during the first run. */ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { -- cgit