From 5c251e9dc0e127bac6fc5b8e6696363d2e35f515 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Oct 2020 14:32:27 -0600 Subject: signal: Add task_sigpending() helper This is in preparation for maintaining signal_pending() as the decider of whether or not a schedule() loop should be broken, or continue sleeping. This is different than the core signal use cases, which really need to know whether an actual signal is pending or not. task_sigpending() returns non-zero if TIF_SIGPENDING is set. Only core kernel use cases should care about the distinction between the two, make sure those use the task_sigpending() helper. Signed-off-by: Jens Axboe Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20201026203230.386348-2-axboe@kernel.dk --- include/linux/sched/signal.h | 9 +++++++-- kernel/events/uprobes.c | 2 +- kernel/signal.c | 8 ++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 1bad18a1d8ba..404145dc536e 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -353,11 +353,16 @@ static inline int restart_syscall(void) return -ERESTARTNOINTR; } -static inline int signal_pending(struct task_struct *p) +static inline int task_sigpending(struct task_struct *p) { return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } +static inline int signal_pending(struct task_struct *p) +{ + return task_sigpending(p); +} + static inline int __fatal_signal_pending(struct task_struct *p) { return unlikely(sigismember(&p->pending.signal, SIGKILL)); @@ -365,7 +370,7 @@ static inline int __fatal_signal_pending(struct task_struct *p) static inline int fatal_signal_pending(struct task_struct *p) { - return signal_pending(p) && __fatal_signal_pending(p); + return task_sigpending(p) && __fatal_signal_pending(p); } static inline int signal_pending_state(long state, struct task_struct *p) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 649fd53dc9ad..edd0c985a939 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1973,7 +1973,7 @@ bool uprobe_deny_signal(void) WARN_ON_ONCE(utask->state != UTASK_SSTEP); - if (signal_pending(t)) { + if (task_sigpending(t)) { spin_lock_irq(&t->sighand->siglock); clear_tsk_thread_flag(t, TIF_SIGPENDING); spin_unlock_irq(&t->sighand->siglock); diff --git a/kernel/signal.c b/kernel/signal.c index 42b67d2cea37..b179eccc86d0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -983,7 +983,7 @@ static inline bool wants_signal(int sig, struct task_struct *p) if (task_is_stopped_or_traced(p)) return false; - return task_curr(p) || !signal_pending(p); + return task_curr(p) || !task_sigpending(p); } static void complete_signal(int sig, struct task_struct *p, enum pid_type type) @@ -2822,7 +2822,7 @@ static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which) /* Remove the signals this thread can handle. */ sigandsets(&retarget, &retarget, &t->blocked); - if (!signal_pending(t)) + if (!task_sigpending(t)) signal_wake_up(t, 0); if (sigisemptyset(&retarget)) @@ -2856,7 +2856,7 @@ void exit_signals(struct task_struct *tsk) cgroup_threadgroup_change_end(tsk); - if (!signal_pending(tsk)) + if (!task_sigpending(tsk)) goto out; unblocked = tsk->blocked; @@ -2900,7 +2900,7 @@ long do_no_restart_syscall(struct restart_block *param) static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset) { - if (signal_pending(tsk) && !thread_group_empty(tsk)) { + if (task_sigpending(tsk) && !thread_group_empty(tsk)) { sigset_t newblocked; /* A set of now blocked but previously unblocked signals. */ sigandnsets(&newblocked, newset, ¤t->blocked); -- cgit From 12db8b690010ccfadf9d0b49a1e1798e47dbbe1a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Oct 2020 14:32:28 -0600 Subject: entry: Add support for TIF_NOTIFY_SIGNAL Add TIF_NOTIFY_SIGNAL handling in the generic entry code, which if set, will return true if signal_pending() is used in a wait loop. That causes an exit of the loop so that notify_signal tracehooks can be run. If the wait loop is currently inside a system call, the system call is restarted once task_work has been processed. In preparation for only having arch_do_signal() handle syscall restarts if _TIF_SIGPENDING isn't set, rename it to arch_do_signal_or_restart(). Pass in a boolean that tells the architecture specific signal handler if it should attempt to get a signal, or just process a potential syscall restart. For !CONFIG_GENERIC_ENTRY archs, add the TIF_NOTIFY_SIGNAL handling to get_signal(). This is done to minimize the needed architecture changes to support this feature. Signed-off-by: Jens Axboe Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20201026203230.386348-3-axboe@kernel.dk --- arch/x86/kernel/signal.c | 4 ++-- include/linux/entry-common.h | 11 ++++++++--- include/linux/entry-kvm.h | 4 ++-- include/linux/sched/signal.h | 11 ++++++++++- include/linux/tracehook.h | 27 +++++++++++++++++++++++++++ kernel/entry/common.c | 14 +++++++++++--- kernel/entry/kvm.c | 3 +++ kernel/signal.c | 14 ++++++++++++++ 8 files changed, 77 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index d5fa494c2304..ec3b9c6e5a4c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -804,11 +804,11 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -void arch_do_signal(struct pt_regs *regs) +void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { struct ksignal ksig; - if (get_signal(&ksig)) { + if (has_signal && get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ handle_signal(&ksig, regs); return; diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index efebbffcd5cc..c7bfac45f951 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -37,6 +37,10 @@ # define _TIF_UPROBE (0) #endif +#ifndef _TIF_NOTIFY_SIGNAL +# define _TIF_NOTIFY_SIGNAL (0) +#endif + /* * TIF flags handled in syscall_enter_from_usermode() */ @@ -69,7 +73,7 @@ #define EXIT_TO_USER_MODE_WORK \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | \ + _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ARCH_EXIT_TO_USER_MODE_WORK) /** @@ -226,12 +230,13 @@ static __always_inline void arch_exit_to_user_mode(void) { } #endif /** - * arch_do_signal - Architecture specific signal delivery function + * arch_do_signal_or_restart - Architecture specific signal delivery function * @regs: Pointer to currents pt_regs + * @has_signal: actual signal to handle * * Invoked from exit_to_user_mode_loop(). */ -void arch_do_signal(struct pt_regs *regs); +void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal); /** * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit() diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 0cef17afb41a..9b93f8584ff7 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -11,8 +11,8 @@ # define ARCH_XFER_TO_GUEST_MODE_WORK (0) #endif -#define XFER_TO_GUEST_MODE_WORK \ - (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ +#define XFER_TO_GUEST_MODE_WORK \ + (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \ _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) struct kvm_vcpu; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 404145dc536e..bd5afa076189 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -360,6 +360,15 @@ static inline int task_sigpending(struct task_struct *p) static inline int signal_pending(struct task_struct *p) { +#if defined(TIF_NOTIFY_SIGNAL) + /* + * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same + * behavior in terms of ensuring that we break out of wait loops + * so that notify signal callbacks can be processed. + */ + if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL))) + return 1; +#endif return task_sigpending(p); } @@ -507,7 +516,7 @@ extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize); static inline void restore_saved_sigmask_unless(bool interrupted) { if (interrupted) - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); + WARN_ON(!signal_pending(current)); else restore_saved_sigmask(); } diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 36fb3bbed6b2..1e8caca92e1f 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -198,4 +198,31 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) blkcg_maybe_throttle_current(); } +/* + * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This + * is currently used by TWA_SIGNAL based task_work, which requires breaking + * wait loops to ensure that task_work is noticed and run. + */ +static inline void tracehook_notify_signal(void) +{ +#if defined(TIF_NOTIFY_SIGNAL) + clear_thread_flag(TIF_NOTIFY_SIGNAL); + smp_mb__after_atomic(); + if (current->task_works) + task_work_run(); +#endif +} + +/* + * Called when we have work to process from exit_to_user_mode_loop() + */ +static inline void set_notify_signal(struct task_struct *task) +{ +#if defined(TIF_NOTIFY_SIGNAL) + if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && + !wake_up_state(task, TASK_INTERRUPTIBLE)) + kick_process(task); +#endif +} + #endif /* */ diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 9852e0d62d95..42eff115c426 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -109,7 +109,15 @@ static __always_inline void exit_to_user_mode(void) } /* Workaround to allow gradual conversion of architecture code */ -void __weak arch_do_signal(struct pt_regs *regs) { } +void __weak arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { } + +static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work) +{ + if (ti_work & _TIF_NOTIFY_SIGNAL) + tracehook_notify_signal(); + + arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING); +} static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work) @@ -131,8 +139,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, if (ti_work & _TIF_PATCH_PENDING) klp_update_patch_state(current); - if (ti_work & _TIF_SIGPENDING) - arch_do_signal(regs); + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) + handle_signal_work(regs, ti_work); if (ti_work & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index eb1a8a4c867c..b828a3ddebf1 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -8,6 +8,9 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) do { int ret; + if (ti_work & _TIF_NOTIFY_SIGNAL) + tracehook_notify_signal(); + if (ti_work & _TIF_SIGPENDING) { kvm_handle_signal_exit(vcpu); return -EINTR; diff --git a/kernel/signal.c b/kernel/signal.c index b179eccc86d0..61b377e65c46 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2529,6 +2529,20 @@ bool get_signal(struct ksignal *ksig) struct signal_struct *signal = current->signal; int signr; + /* + * For non-generic architectures, check for TIF_NOTIFY_SIGNAL so + * that the arch handlers don't all have to do it. If we get here + * without TIF_SIGPENDING, just exit after running signal work. + */ +#ifdef TIF_NOTIFY_SIGNAL + if (!IS_ENABLED(CONFIG_GENERIC_ENTRY)) { + if (test_thread_flag(TIF_NOTIFY_SIGNAL)) + tracehook_notify_signal(); + if (!task_sigpending(current)) + return false; + } +#endif + if (unlikely(uprobe_deny_signal())) return false; -- cgit From 114518eb6430b832d2f9f5a008043b913ccf0e24 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Oct 2020 14:32:30 -0600 Subject: task_work: Use TIF_NOTIFY_SIGNAL if available If the arch supports TIF_NOTIFY_SIGNAL, then use that for TWA_SIGNAL as it's more efficient than using the signal delivery method. This is especially true on threaded applications, where ->sighand is shared across threads, but it's also lighter weight on non-shared cases. io_uring is a heavy consumer of TWA_SIGNAL based task_work. A test with threads shows a nice improvement running an io_uring based echo server. stock kernel: 0.01% <= 0.1 milliseconds 95.86% <= 0.2 milliseconds 98.27% <= 0.3 milliseconds 99.71% <= 0.4 milliseconds 100.00% <= 0.5 milliseconds 100.00% <= 0.6 milliseconds 100.00% <= 0.7 milliseconds 100.00% <= 0.8 milliseconds 100.00% <= 0.9 milliseconds 100.00% <= 1.0 milliseconds 100.00% <= 1.1 milliseconds 100.00% <= 2 milliseconds 100.00% <= 3 milliseconds 100.00% <= 3 milliseconds 1378930.00 requests per second ~1600% CPU 1.38M requests/second, and all 16 CPUs are maxed out. patched kernel: 0.01% <= 0.1 milliseconds 98.24% <= 0.2 milliseconds 99.47% <= 0.3 milliseconds 99.99% <= 0.4 milliseconds 100.00% <= 0.5 milliseconds 100.00% <= 0.6 milliseconds 100.00% <= 0.7 milliseconds 100.00% <= 0.8 milliseconds 100.00% <= 0.9 milliseconds 100.00% <= 1.2 milliseconds 1666111.38 requests per second ~1450% CPU 1.67M requests/second, and we're no longer just hammering on the sighand lock. The original reporter states: "For 5.7.15 my benchmark achieves 1.6M qps and system cpu is at ~80%. for 5.7.16 or later it achieves only 1M qps and the system cpu is is at ~100%" with the only difference there being that TWA_SIGNAL is used unconditionally in 5.7.16, since it's required to be able to handle the inability to run task_work if the application is waiting in the kernel already on an event that needs task_work run to be satisfied. Also see commit 0ba9c9edcd15. Reported-by: Roman Gershman Signed-off-by: Jens Axboe Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20201026203230.386348-5-axboe@kernel.dk --- kernel/task_work.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/kernel/task_work.c b/kernel/task_work.c index 613b2d634af8..ae058893913c 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -5,6 +5,34 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ +/* + * TWA_SIGNAL signaling - use TIF_NOTIFY_SIGNAL, if available, as it's faster + * than TIF_SIGPENDING as there's no dependency on ->sighand. The latter is + * shared for threads, and can cause contention on sighand->lock. Even for + * the non-threaded case TIF_NOTIFY_SIGNAL is more efficient, as no locking + * or IRQ disabling is involved for notification (or running) purposes. + */ +static void task_work_notify_signal(struct task_struct *task) +{ +#if defined(TIF_NOTIFY_SIGNAL) + set_notify_signal(task); +#else + unsigned long flags; + + /* + * Only grab the sighand lock if we don't already have some + * task_work pending. This pairs with the smp_store_mb() + * in get_signal(), see comment there. + */ + if (!(READ_ONCE(task->jobctl) & JOBCTL_TASK_WORK) && + lock_task_sighand(task, &flags)) { + task->jobctl |= JOBCTL_TASK_WORK; + signal_wake_up(task, 0); + unlock_task_sighand(task, &flags); + } +#endif +} + /** * task_work_add - ask the @task to execute @work->func() * @task: the task which should run the callback @@ -28,7 +56,6 @@ int task_work_add(struct task_struct *task, struct callback_head *work, int notify) { struct callback_head *head; - unsigned long flags; do { head = READ_ONCE(task->task_works); @@ -42,17 +69,7 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify) set_notify_resume(task); break; case TWA_SIGNAL: - /* - * Only grab the sighand lock if we don't already have some - * task_work pending. This pairs with the smp_store_mb() - * in get_signal(), see comment there. - */ - if (!(READ_ONCE(task->jobctl) & JOBCTL_TASK_WORK) && - lock_task_sighand(task, &flags)) { - task->jobctl |= JOBCTL_TASK_WORK; - signal_wake_up(task, 0); - unlock_task_sighand(task, &flags); - } + task_work_notify_signal(task); break; } -- cgit