diff options
Diffstat (limited to 'include/linux/rseq.h')
| -rw-r--r-- | include/linux/rseq.h | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/include/linux/rseq.h b/include/linux/rseq.h new file mode 100644 index 000000000000..2266f4dc77b6 --- /dev/null +++ b/include/linux/rseq.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _LINUX_RSEQ_H +#define _LINUX_RSEQ_H + +#ifdef CONFIG_RSEQ +#include <linux/sched.h> + +#include <uapi/linux/rseq.h> + +void __rseq_handle_slowpath(struct pt_regs *regs); + +/* Invoked from resume_user_mode_work() */ +static inline void rseq_handle_slowpath(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_GENERIC_ENTRY)) { + if (current->rseq.event.slowpath) + __rseq_handle_slowpath(regs); + } else { + /* '&' is intentional to spare one conditional branch */ + if (current->rseq.event.sched_switch & current->rseq.event.has_rseq) + __rseq_handle_slowpath(regs); + } +} + +void __rseq_signal_deliver(int sig, struct pt_regs *regs); + +/* + * Invoked from signal delivery to fixup based on the register context before + * switching to the signal delivery context. + */ +static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { + /* '&' is intentional to spare one conditional branch */ + if (current->rseq.event.has_rseq & current->rseq.event.user_irq) + __rseq_signal_deliver(ksig->sig, regs); + } else { + if (current->rseq.event.has_rseq) + __rseq_signal_deliver(ksig->sig, regs); + } +} + +static inline void rseq_raise_notify_resume(struct task_struct *t) +{ + set_tsk_thread_flag(t, TIF_RSEQ); +} + +/* Invoked from context switch to force evaluation on exit to user */ +static __always_inline void rseq_sched_switch_event(struct task_struct *t) +{ + struct rseq_event *ev = &t->rseq.event; + + if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { + /* + * Avoid a boat load of conditionals by using simple logic + * to determine whether NOTIFY_RESUME needs to be raised. + * + * It's required when the CPU or MM CID has changed or + * the entry was from user space. + */ + bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq; + + if (raise) { + ev->sched_switch = true; + rseq_raise_notify_resume(t); + } + } else { + if (ev->has_rseq) { + t->rseq.event.sched_switch = true; + rseq_raise_notify_resume(t); + } + } +} + +/* + * Invoked from __set_task_cpu() when a task migrates or from + * mm_cid_schedin() when the CID changes to enforce an IDs update. + * + * This does not raise TIF_NOTIFY_RESUME as that happens in + * rseq_sched_switch_event(). + */ +static __always_inline void rseq_sched_set_ids_changed(struct task_struct *t) +{ + t->rseq.event.ids_changed = true; +} + +/* Enforce a full update after RSEQ registration and when execve() failed */ +static inline void rseq_force_update(void) +{ + if (current->rseq.event.has_rseq) { + current->rseq.event.ids_changed = true; + current->rseq.event.sched_switch = true; + rseq_raise_notify_resume(current); + } +} + +/* + * KVM/HYPERV invoke resume_user_mode_work() before entering guest mode, + * which clears TIF_NOTIFY_RESUME on architectures that don't use the + * generic TIF bits and therefore can't provide a separate TIF_RSEQ flag. + * + * To avoid updating user space RSEQ in that case just to do it eventually + * again before returning to user space, because __rseq_handle_slowpath() + * does nothing when invoked with NULL register state. + * + * After returning from guest mode, before exiting to userspace, hypervisors + * must invoke this function to re-raise TIF_NOTIFY_RESUME if necessary. + */ +static inline void rseq_virt_userspace_exit(void) +{ + /* + * The generic optimization for deferring RSEQ updates until the next + * exit relies on having a dedicated TIF_RSEQ. + */ + if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) && + current->rseq.event.sched_switch) + rseq_raise_notify_resume(current); +} + +static inline void rseq_reset(struct task_struct *t) +{ + memset(&t->rseq, 0, sizeof(t->rseq)); + t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED; +} + +static inline void rseq_execve(struct task_struct *t) +{ + rseq_reset(t); +} + +/* + * If parent process has a registered restartable sequences area, the + * child inherits. Unregister rseq for a clone with CLONE_VM set. + * + * On fork, keep the IDs (CPU, MMCID) of the parent, which avoids a fault + * on the COW page on exit to user space, when the child stays on the same + * CPU as the parent. That's obviously not guaranteed, but in overcommit + * scenarios it is more likely and optimizes for the fork/exec case without + * taking the fault. + */ +static inline void rseq_fork(struct task_struct *t, u64 clone_flags) +{ + if (clone_flags & CLONE_VM) + rseq_reset(t); + else + t->rseq = current->rseq; +} + +#else /* CONFIG_RSEQ */ +static inline void rseq_handle_slowpath(struct pt_regs *regs) { } +static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } +static inline void rseq_sched_switch_event(struct task_struct *t) { } +static inline void rseq_sched_set_ids_changed(struct task_struct *t) { } +static inline void rseq_force_update(void) { } +static inline void rseq_virt_userspace_exit(void) { } +static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { } +static inline void rseq_execve(struct task_struct *t) { } +#endif /* !CONFIG_RSEQ */ + +#ifdef CONFIG_DEBUG_RSEQ +void rseq_syscall(struct pt_regs *regs); +#else /* CONFIG_DEBUG_RSEQ */ +static inline void rseq_syscall(struct pt_regs *regs) { } +#endif /* !CONFIG_DEBUG_RSEQ */ + +#endif /* _LINUX_RSEQ_H */ |
