summaryrefslogtreecommitdiff
path: root/include/linux/rseq.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/rseq.h')
-rw-r--r--include/linux/rseq.h166
1 files changed, 166 insertions, 0 deletions
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
new file mode 100644
index 000000000000..2266f4dc77b6
--- /dev/null
+++ b/include/linux/rseq.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _LINUX_RSEQ_H
+#define _LINUX_RSEQ_H
+
+#ifdef CONFIG_RSEQ
+#include <linux/sched.h>
+
+#include <uapi/linux/rseq.h>
+
+void __rseq_handle_slowpath(struct pt_regs *regs);
+
+/* Invoked from resume_user_mode_work() */
+static inline void rseq_handle_slowpath(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_GENERIC_ENTRY)) {
+ if (current->rseq.event.slowpath)
+ __rseq_handle_slowpath(regs);
+ } else {
+ /* '&' is intentional to spare one conditional branch */
+ if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
+ __rseq_handle_slowpath(regs);
+ }
+}
+
+void __rseq_signal_deliver(int sig, struct pt_regs *regs);
+
+/*
+ * Invoked from signal delivery to fixup based on the register context before
+ * switching to the signal delivery context.
+ */
+static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ /* '&' is intentional to spare one conditional branch */
+ if (current->rseq.event.has_rseq & current->rseq.event.user_irq)
+ __rseq_signal_deliver(ksig->sig, regs);
+ } else {
+ if (current->rseq.event.has_rseq)
+ __rseq_signal_deliver(ksig->sig, regs);
+ }
+}
+
+static inline void rseq_raise_notify_resume(struct task_struct *t)
+{
+ set_tsk_thread_flag(t, TIF_RSEQ);
+}
+
+/* Invoked from context switch to force evaluation on exit to user */
+static __always_inline void rseq_sched_switch_event(struct task_struct *t)
+{
+ struct rseq_event *ev = &t->rseq.event;
+
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ /*
+ * Avoid a boat load of conditionals by using simple logic
+ * to determine whether NOTIFY_RESUME needs to be raised.
+ *
+ * It's required when the CPU or MM CID has changed or
+ * the entry was from user space.
+ */
+ bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq;
+
+ if (raise) {
+ ev->sched_switch = true;
+ rseq_raise_notify_resume(t);
+ }
+ } else {
+ if (ev->has_rseq) {
+ t->rseq.event.sched_switch = true;
+ rseq_raise_notify_resume(t);
+ }
+ }
+}
+
+/*
+ * Invoked from __set_task_cpu() when a task migrates or from
+ * mm_cid_schedin() when the CID changes to enforce an IDs update.
+ *
+ * This does not raise TIF_NOTIFY_RESUME as that happens in
+ * rseq_sched_switch_event().
+ */
+static __always_inline void rseq_sched_set_ids_changed(struct task_struct *t)
+{
+ t->rseq.event.ids_changed = true;
+}
+
+/* Enforce a full update after RSEQ registration and when execve() failed */
+static inline void rseq_force_update(void)
+{
+ if (current->rseq.event.has_rseq) {
+ current->rseq.event.ids_changed = true;
+ current->rseq.event.sched_switch = true;
+ rseq_raise_notify_resume(current);
+ }
+}
+
+/*
+ * KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
+ * which clears TIF_NOTIFY_RESUME on architectures that don't use the
+ * generic TIF bits and therefore can't provide a separate TIF_RSEQ flag.
+ *
+ * To avoid updating user space RSEQ in that case just to do it eventually
+ * again before returning to user space, because __rseq_handle_slowpath()
+ * does nothing when invoked with NULL register state.
+ *
+ * After returning from guest mode, before exiting to userspace, hypervisors
+ * must invoke this function to re-raise TIF_NOTIFY_RESUME if necessary.
+ */
+static inline void rseq_virt_userspace_exit(void)
+{
+ /*
+ * The generic optimization for deferring RSEQ updates until the next
+ * exit relies on having a dedicated TIF_RSEQ.
+ */
+ if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) &&
+ current->rseq.event.sched_switch)
+ rseq_raise_notify_resume(current);
+}
+
+static inline void rseq_reset(struct task_struct *t)
+{
+ memset(&t->rseq, 0, sizeof(t->rseq));
+ t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED;
+}
+
+static inline void rseq_execve(struct task_struct *t)
+{
+ rseq_reset(t);
+}
+
+/*
+ * If parent process has a registered restartable sequences area, the
+ * child inherits. Unregister rseq for a clone with CLONE_VM set.
+ *
+ * On fork, keep the IDs (CPU, MMCID) of the parent, which avoids a fault
+ * on the COW page on exit to user space, when the child stays on the same
+ * CPU as the parent. That's obviously not guaranteed, but in overcommit
+ * scenarios it is more likely and optimizes for the fork/exec case without
+ * taking the fault.
+ */
+static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
+{
+ if (clone_flags & CLONE_VM)
+ rseq_reset(t);
+ else
+ t->rseq = current->rseq;
+}
+
+#else /* CONFIG_RSEQ */
+static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
+static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
+static inline void rseq_sched_switch_event(struct task_struct *t) { }
+static inline void rseq_sched_set_ids_changed(struct task_struct *t) { }
+static inline void rseq_force_update(void) { }
+static inline void rseq_virt_userspace_exit(void) { }
+static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { }
+static inline void rseq_execve(struct task_struct *t) { }
+#endif /* !CONFIG_RSEQ */
+
+#ifdef CONFIG_DEBUG_RSEQ
+void rseq_syscall(struct pt_regs *regs);
+#else /* CONFIG_DEBUG_RSEQ */
+static inline void rseq_syscall(struct pt_regs *regs) { }
+#endif /* !CONFIG_DEBUG_RSEQ */
+
+#endif /* _LINUX_RSEQ_H */