From 87ff19cb2f1aa55a5d8b691e6690cc059a59d2ec Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 2 Dec 2018 21:31:30 -0800 Subject: sched/wake_q: Add branch prediction hint to wake_q_add() cmpxchg The cmpxchg() will fail when the task is already in the process of waking up, and as such is an extremely rare occurrence. Micro-optimize the call and put an unlikely() around it. To no surprise, when using CONFIG_PROFILE_ANNOTATED_BRANCHES under a number of workloads the incorrect rate was a mere 1-2%. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: Yongji Xie Cc: andrea.parri@amarulasolutions.com Cc: lilin24@baidu.com Cc: liuqi16@baidu.com Cc: nixun@baidu.com Cc: xieyongji@baidu.com Cc: yuanlinsi01@baidu.com Cc: zhangyu31@baidu.com Link: https://lkml.kernel.org/r/20181203053130.gwkw6kg72azt2npb@linux-r8p5 Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d8d76a65cfdd..b05eef7d7a1f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -421,7 +421,7 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) * state, even in the failed case, an explicit smp_mb() must be used. */ smp_mb__before_atomic(); - if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)) + if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))) return; get_task_struct(task); -- cgit From b5a4e2bb0f4c86bfeb38df3e1d5b2f1272f0e673 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 19 Dec 2018 18:23:16 +0000 Subject: Revert "sched/core: Take the hotplug lock in sched_init_smp()" This reverts commit 40fa3780bac2b654edf23f6b13f4e2dd550aea10. Now that we have a system-wide muting of hotplug lockdep during init, this is no longer needed. Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: cai@gmx.us Cc: daniel.lezcano@linaro.org Cc: dietmar.eggemann@arm.com Cc: linux-arm-kernel@lists.infradead.org Cc: longman@redhat.com Cc: marc.zyngier@arm.com Cc: mark.rutland@arm.com Link: https://lkml.kernel.org/r/1545243796-23224-3-git-send-email-valentin.schneider@arm.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b05eef7d7a1f..3c8b4dba3d2d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5867,14 +5867,11 @@ void __init sched_init_smp(void) /* * There's no userspace yet to cause hotplug operations; hence all the * CPU masks are stable and all blatant races in the below code cannot - * happen. The hotplug lock is nevertheless taken to satisfy lockdep, - * but there won't be any contention on it. + * happen. */ - cpus_read_lock(); mutex_lock(&sched_domains_mutex); sched_init_domains(cpu_active_mask); mutex_unlock(&sched_domains_mutex); - cpus_read_unlock(); /* Move init over to a non-isolated CPU */ if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) -- cgit From 07879c6a3740fbbf3c8891a0ab484c20a12794d8 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 18 Dec 2018 11:53:52 -0800 Subject: sched/wake_q: Reduce reference counting for special users Some users, specifically futexes and rwsems, required fixes that allowed the callers to be safe when wakeups occur before they are expected by wake_up_q(). Such scenarios also play games and rely on reference counting, and until now were pivoting on wake_q doing it. With the wake_q_add() call being moved down, this can no longer be the case. As such we end up with a a double task refcounting overhead; and these callers care enough about this (being rather core-ish). This patch introduces a wake_q_add_safe() call that serves for callers that have already done refcounting and therefore the task is 'safe' from wake_q point of view (int that it requires reference throughout the entire queue/>wakeup cycle). In the one case it has internal reference counting, in the other case it consumes the reference counting. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: Will Deacon Cc: Xie Yongji Cc: Yongji Xie Cc: andrea.parri@amarulasolutions.com Cc: lilin24@baidu.com Cc: liuqi16@baidu.com Cc: nixun@baidu.com Cc: yuanlinsi01@baidu.com Cc: zhangyu31@baidu.com Link: https://lkml.kernel.org/r/20181218195352.7orq3upiwfdbrdne@linux-r8p5 Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 60 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 16 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3c8b4dba3d2d..64ceaa5158c5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -396,19 +396,7 @@ static bool set_nr_if_polling(struct task_struct *p) #endif #endif -/** - * wake_q_add() - queue a wakeup for 'later' waking. - * @head: the wake_q_head to add @task to - * @task: the task to queue for 'later' wakeup - * - * Queue a task for later wakeup, most likely by the wake_up_q() call in the - * same context, _HOWEVER_ this is not guaranteed, the wakeup can come - * instantly. - * - * This function must be used as-if it were wake_up_process(); IOW the task - * must be ready to be woken at this location. - */ -void wake_q_add(struct wake_q_head *head, struct task_struct *task) +static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) { struct wake_q_node *node = &task->wake_q; @@ -422,15 +410,55 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) */ smp_mb__before_atomic(); if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))) - return; - - get_task_struct(task); + return false; /* * The head is context local, there can be no concurrency. */ *head->lastp = node; head->lastp = &node->next; + return true; +} + +/** + * wake_q_add() - queue a wakeup for 'later' waking. + * @head: the wake_q_head to add @task to + * @task: the task to queue for 'later' wakeup + * + * Queue a task for later wakeup, most likely by the wake_up_q() call in the + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come + * instantly. + * + * This function must be used as-if it were wake_up_process(); IOW the task + * must be ready to be woken at this location. + */ +void wake_q_add(struct wake_q_head *head, struct task_struct *task) +{ + if (__wake_q_add(head, task)) + get_task_struct(task); +} + +/** + * wake_q_add_safe() - safely queue a wakeup for 'later' waking. + * @head: the wake_q_head to add @task to + * @task: the task to queue for 'later' wakeup + * + * Queue a task for later wakeup, most likely by the wake_up_q() call in the + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come + * instantly. + * + * This function must be used as-if it were wake_up_process(); IOW the task + * must be ready to be woken at this location. + * + * This function is essentially a task-safe equivalent to wake_q_add(). Callers + * that already hold reference to @task can call the 'safe' version and trust + * wake_q to do the right thing depending whether or not the @task is already + * queued for wakeup. + */ +void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) +{ + if (!__wake_q_add(head, task)) + put_task_struct(task); } void wake_up_q(struct wake_q_head *head) -- cgit