summaryrefslogtreecommitdiff
path: root/kernel/livepatch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-28 14:53:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-28 14:53:30 -0700
commit586b222d748e91c619d68e9239654ebc7fed9b0c (patch)
tree433154fb388d301fe94831f5a5223545d20fb7f3 /kernel/livepatch
parent7c339778f908875772c17f2e04ed731aac772881 (diff)
parentf31dcb152a3d0816e2f1deab4e64572336da197d (diff)
Merge tag 'sched-core-2023-04-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - Allow unprivileged PSI poll()ing - Fix performance regression introduced by mm_cid - Improve livepatch stalls by adding livepatch task switching to cond_resched(). This resolves livepatching busy-loop stalls with certain CPU-bound kthreads - Improve sched_move_task() performance on autogroup configs - On core-scheduling CPUs, avoid selecting throttled tasks to run - Misc cleanups, fixes and improvements * tag 'sched-core-2023-04-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/clock: Fix local_clock() before sched_clock_init() sched/rt: Fix bad task migration for rt tasks sched: Fix performance regression introduced by mm_cid sched/core: Make sched_dynamic_mutex static sched/psi: Allow unprivileged polling of N*2s period sched/psi: Extract update_triggers side effect sched/psi: Rename existing poll members in preparation sched/psi: Rearrange polling code in preparation sched/fair: Fix inaccurate tally of ttwu_move_affine vhost: Fix livepatch timeouts in vhost_worker() livepatch,sched: Add livepatch task switching to cond_resched() livepatch: Skip task_call_func() for current task livepatch: Convert stack entries array to percpu sched: Interleave cfs bandwidth timers for improved single thread performance at low utilization sched/core: Reduce cost of sched_move_task when config autogroup sched/core: Avoid selecting the task that is throttled to run when core-sched enable sched/topology: Make sched_energy_mutex,update static
Diffstat (limited to 'kernel/livepatch')
-rw-r--r--kernel/livepatch/core.c1
-rw-r--r--kernel/livepatch/transition.c122
2 files changed, 105 insertions, 18 deletions
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index fc851455740c..61328328c474 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -33,6 +33,7 @@
*
* - klp_ftrace_handler()
* - klp_update_patch_state()
+ * - __klp_sched_try_switch()
*/
DEFINE_MUTEX(klp_mutex);
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index f1b25ec581e0..e9fd83a02228 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -9,11 +9,14 @@
#include <linux/cpu.h>
#include <linux/stacktrace.h>
+#include <linux/static_call.h>
#include "core.h"
#include "patch.h"
#include "transition.h"
#define MAX_STACK_ENTRIES 100
+DEFINE_PER_CPU(unsigned long[MAX_STACK_ENTRIES], klp_stack_entries);
+
#define STACK_ERR_BUF_SIZE 128
#define SIGNALS_TIMEOUT 15
@@ -25,6 +28,25 @@ static int klp_target_state = KLP_UNDEFINED;
static unsigned int klp_signals_cnt;
/*
+ * When a livepatch is in progress, enable klp stack checking in
+ * cond_resched(). This helps CPU-bound kthreads get patched.
+ */
+#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
+
+#define klp_cond_resched_enable() sched_dynamic_klp_enable()
+#define klp_cond_resched_disable() sched_dynamic_klp_disable()
+
+#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
+
+DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
+EXPORT_SYMBOL(klp_sched_try_switch_key);
+
+#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
+#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
+
+#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
+
+/*
* This work can be performed periodically to finish patching or unpatching any
* "straggler" tasks which failed to transition in the first attempt.
*/
@@ -172,8 +194,8 @@ void klp_update_patch_state(struct task_struct *task)
* barrier (smp_rmb) for two cases:
*
* 1) Enforce the order of the TIF_PATCH_PENDING read and the
- * klp_target_state read. The corresponding write barrier is in
- * klp_init_transition().
+ * klp_target_state read. The corresponding write barriers are in
+ * klp_init_transition() and klp_reverse_transition().
*
* 2) Enforce the order of the TIF_PATCH_PENDING read and a future read
* of func->transition, if klp_ftrace_handler() is called later on
@@ -240,12 +262,15 @@ static int klp_check_stack_func(struct klp_func *func, unsigned long *entries,
*/
static int klp_check_stack(struct task_struct *task, const char **oldname)
{
- static unsigned long entries[MAX_STACK_ENTRIES];
+ unsigned long *entries = this_cpu_ptr(klp_stack_entries);
struct klp_object *obj;
struct klp_func *func;
int ret, nr_entries;
- ret = stack_trace_save_tsk_reliable(task, entries, ARRAY_SIZE(entries));
+ /* Protect 'klp_stack_entries' */
+ lockdep_assert_preemption_disabled();
+
+ ret = stack_trace_save_tsk_reliable(task, entries, MAX_STACK_ENTRIES);
if (ret < 0)
return -EINVAL;
nr_entries = ret;
@@ -307,7 +332,11 @@ static bool klp_try_switch_task(struct task_struct *task)
* functions. If all goes well, switch the task to the target patch
* state.
*/
- ret = task_call_func(task, klp_check_and_switch_task, &old_name);
+ if (task == current)
+ ret = klp_check_and_switch_task(current, &old_name);
+ else
+ ret = task_call_func(task, klp_check_and_switch_task, &old_name);
+
switch (ret) {
case 0: /* success */
break;
@@ -334,6 +363,44 @@ static bool klp_try_switch_task(struct task_struct *task)
return !ret;
}
+void __klp_sched_try_switch(void)
+{
+ if (likely(!klp_patch_pending(current)))
+ return;
+
+ /*
+ * This function is called from cond_resched() which is called in many
+ * places throughout the kernel. Using the klp_mutex here might
+ * deadlock.
+ *
+ * Instead, disable preemption to prevent racing with other callers of
+ * klp_try_switch_task(). Thanks to task_call_func() they won't be
+ * able to switch this task while it's running.
+ */
+ preempt_disable();
+
+ /*
+ * Make sure current didn't get patched between the above check and
+ * preempt_disable().
+ */
+ if (unlikely(!klp_patch_pending(current)))
+ goto out;
+
+ /*
+ * Enforce the order of the TIF_PATCH_PENDING read above and the
+ * klp_target_state read in klp_try_switch_task(). The corresponding
+ * write barriers are in klp_init_transition() and
+ * klp_reverse_transition().
+ */
+ smp_rmb();
+
+ klp_try_switch_task(current);
+
+out:
+ preempt_enable();
+}
+EXPORT_SYMBOL(__klp_sched_try_switch);
+
/*
* Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
* Kthreads with TIF_PATCH_PENDING set are woken up.
@@ -440,7 +507,8 @@ void klp_try_complete_transition(void)
return;
}
- /* we're done, now cleanup the data structures */
+ /* Done! Now cleanup the data structures. */
+ klp_cond_resched_disable();
patch = klp_transition_patch;
klp_complete_transition();
@@ -492,6 +560,8 @@ void klp_start_transition(void)
set_tsk_thread_flag(task, TIF_PATCH_PENDING);
}
+ klp_cond_resched_enable();
+
klp_signals_cnt = 0;
}
@@ -547,8 +617,9 @@ void klp_init_transition(struct klp_patch *patch, int state)
* see a func in transition with a task->patch_state of KLP_UNDEFINED.
*
* Also enforce the order of the klp_target_state write and future
- * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() doesn't
- * set a task->patch_state to KLP_UNDEFINED.
+ * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() and
+ * __klp_sched_try_switch() don't set a task->patch_state to
+ * KLP_UNDEFINED.
*/
smp_wmb();
@@ -584,14 +655,10 @@ void klp_reverse_transition(void)
klp_target_state == KLP_PATCHED ? "patching to unpatching" :
"unpatching to patching");
- klp_transition_patch->enabled = !klp_transition_patch->enabled;
-
- klp_target_state = !klp_target_state;
-
/*
* Clear all TIF_PATCH_PENDING flags to prevent races caused by
- * klp_update_patch_state() running in parallel with
- * klp_start_transition().
+ * klp_update_patch_state() or __klp_sched_try_switch() running in
+ * parallel with the reverse transition.
*/
read_lock(&tasklist_lock);
for_each_process_thread(g, task)
@@ -601,9 +668,28 @@ void klp_reverse_transition(void)
for_each_possible_cpu(cpu)
clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
- /* Let any remaining calls to klp_update_patch_state() complete */
+ /*
+ * Make sure all existing invocations of klp_update_patch_state() and
+ * __klp_sched_try_switch() see the cleared TIF_PATCH_PENDING before
+ * starting the reverse transition.
+ */
klp_synchronize_transition();
+ /*
+ * All patching has stopped, now re-initialize the global variables to
+ * prepare for the reverse transition.
+ */
+ klp_transition_patch->enabled = !klp_transition_patch->enabled;
+ klp_target_state = !klp_target_state;
+
+ /*
+ * Enforce the order of the klp_target_state write and the
+ * TIF_PATCH_PENDING writes in klp_start_transition() to ensure
+ * klp_update_patch_state() and __klp_sched_try_switch() don't set
+ * task->patch_state to the wrong value.
+ */
+ smp_wmb();
+
klp_start_transition();
}
@@ -617,9 +703,9 @@ void klp_copy_process(struct task_struct *child)
* the task flag up to date with the parent here.
*
* The operation is serialized against all klp_*_transition()
- * operations by the tasklist_lock. The only exception is
- * klp_update_patch_state(current), but we cannot race with
- * that because we are current.
+ * operations by the tasklist_lock. The only exceptions are
+ * klp_update_patch_state(current) and __klp_sched_try_switch(), but we
+ * cannot race with them because we are current.
*/
if (test_tsk_thread_flag(current, TIF_PATCH_PENDING))
set_tsk_thread_flag(child, TIF_PATCH_PENDING);