From 3ccfebedd8cf54e291c809c838d8ad5cc00f5688 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Jan 2018 15:20:11 -0500 Subject: powerpc, membarrier: Skip memory barrier in switch_mm() Allow PowerPC to skip the full memory barrier in switch_mm(), and only issue the barrier when scheduling into a task belonging to a process that has registered to use expedited private. Threads targeting the same VM but which belong to different thread groups is a tricky case. It has a few consequences: It turns out that we cannot rely on get_nr_threads(p) to count the number of threads using a VM. We can use (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) instead to skip the synchronize_sched() for cases where the VM only has a single user, and that user only has a single thread. It also turns out that we cannot use for_each_thread() to set thread flags in all threads using a VM, as it only iterates on the thread group. Therefore, test the membarrier state variable directly rather than relying on thread flags. This means membarrier_register_private_expedited() needs to set the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag, issue synchronize_sched(), and only then set MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY which allows private expedited membarrier commands to succeed. membarrier_arch_switch_mm() now tests for the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag. Signed-off-by: Mathieu Desnoyers Acked-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Alan Stern Cc: Alexander Viro Cc: Andrea Parri Cc: Andrew Hunter Cc: Andy Lutomirski Cc: Avi Kivity Cc: Benjamin Herrenschmidt Cc: Boqun Feng Cc: Dave Watson Cc: David Sehr Cc: Greg Hackmann Cc: Linus Torvalds Cc: Maged Michael Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Russell King Cc: Will Deacon Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20180129202020.8515-3-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- include/linux/sched/mm.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux/sched/mm.h') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 3d49b91b674d..26307cdc3969 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -215,14 +215,25 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) #ifdef CONFIG_MEMBARRIER enum { MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), - MEMBARRIER_STATE_SWITCH_MM = (1U << 1), + MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1), }; +#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS +#include +#endif + static inline void membarrier_execve(struct task_struct *t) { atomic_set(&t->mm->membarrier_state, 0); } #else +#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS +static inline void membarrier_arch_switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) +{ +} +#endif static inline void membarrier_execve(struct task_struct *t) { } -- cgit From 306e060435d7a3aef8f6f033e43b0f581638adce Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Jan 2018 15:20:12 -0500 Subject: membarrier: Document scheduler barrier requirements Document the membarrier requirement on having a full memory barrier in __schedule() after coming from user-space, before storing to rq->curr. It is provided by smp_mb__after_spinlock() in __schedule(). Document that membarrier requires a full barrier on transition from kernel thread to userspace thread. We currently have an implicit barrier from atomic_dec_and_test() in mmdrop() that ensures this. The x86 switch_mm_irqs_off() full barrier is currently provided by many cpumask update operations as well as write_cr3(). Document that write_cr3() provides this barrier. Signed-off-by: Mathieu Desnoyers Acked-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Andrea Parri Cc: Andrew Hunter Cc: Andy Lutomirski Cc: Avi Kivity Cc: Benjamin Herrenschmidt Cc: Boqun Feng Cc: Dave Watson Cc: David Sehr Cc: Greg Hackmann Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Maged Michael Cc: Michael Ellerman Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Russell King Cc: Will Deacon Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/20180129202020.8515-4-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- include/linux/sched/mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/sched/mm.h') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 26307cdc3969..b84e0fde1d72 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -39,6 +39,11 @@ static inline void mmgrab(struct mm_struct *mm) extern void __mmdrop(struct mm_struct *); static inline void mmdrop(struct mm_struct *mm) { + /* + * The implicit full barrier implied by atomic_dec_and_test() is + * required by the membarrier system call before returning to + * user-space, after storing to rq->curr. + */ if (unlikely(atomic_dec_and_test(&mm->mm_count))) __mmdrop(mm); } -- cgit From c5f58bd58f432be5d92df33c5458e0bcbee3aadf Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Jan 2018 15:20:13 -0500 Subject: membarrier: Provide GLOBAL_EXPEDITED command Allow expedited membarrier to be used for data shared between processes through shared memory. Processes wishing to receive the membarriers register with MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED. Those which want to issue membarrier invoke MEMBARRIER_CMD_GLOBAL_EXPEDITED. This allows extremely simple kernel-level implementation: we have almost everything we need with the PRIVATE_EXPEDITED barrier code. All we need to do is to add a flag in the mm_struct that will be used to check whether we need to send the IPI to the current thread of each CPU. There is a slight downside to this approach compared to targeting specific shared memory users: when performing a membarrier operation, all registered "global" receivers will get the barrier, even if they don't share a memory mapping with the sender issuing MEMBARRIER_CMD_GLOBAL_EXPEDITED. This registration approach seems to fit the requirement of not disturbing processes that really deeply care about real-time: they simply should not register with MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED. In order to align the membarrier command names, the "MEMBARRIER_CMD_SHARED" command is renamed to "MEMBARRIER_CMD_GLOBAL", keeping an alias of MEMBARRIER_CMD_SHARED to MEMBARRIER_CMD_GLOBAL for UAPI header backward compatibility. Signed-off-by: Mathieu Desnoyers Acked-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Andrea Parri Cc: Andrew Hunter Cc: Andy Lutomirski Cc: Avi Kivity Cc: Benjamin Herrenschmidt Cc: Boqun Feng Cc: Dave Watson Cc: David Sehr Cc: Greg Hackmann Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Maged Michael Cc: Michael Ellerman Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Russell King Cc: Will Deacon Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/20180129202020.8515-5-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- include/linux/sched/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/sched/mm.h') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index b84e0fde1d72..1c4e40c5efaf 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -219,8 +219,10 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) #ifdef CONFIG_MEMBARRIER enum { - MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), - MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1), + MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), + MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1), + MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2), + MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3), }; #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS -- cgit From 70216e18e519a54a2f13569e8caff99a092a92d6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Jan 2018 15:20:17 -0500 Subject: membarrier: Provide core serializing command, *_SYNC_CORE Provide core serializing membarrier command to support memory reclaim by JIT. Each architecture needs to explicitly opt into that support by documenting in their architecture code how they provide the core serializing instructions required when returning from the membarrier IPI, and after the scheduler has updated the curr->mm pointer (before going back to user-space). They should then select ARCH_HAS_MEMBARRIER_SYNC_CORE to enable support for that command on their architecture. Architectures selecting this feature need to either document that they issue core serializing instructions when returning to user-space, or implement their architecture-specific sync_core_before_usermode(). Signed-off-by: Mathieu Desnoyers Acked-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Andrea Parri Cc: Andrew Hunter Cc: Andy Lutomirski Cc: Avi Kivity Cc: Benjamin Herrenschmidt Cc: Boqun Feng Cc: Dave Watson Cc: David Sehr Cc: Greg Hackmann Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Maged Michael Cc: Michael Ellerman Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Russell King Cc: Will Deacon Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/20180129202020.8515-9-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- include/linux/sched/mm.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux/sched/mm.h') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 1c4e40c5efaf..03a169087a18 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -7,6 +7,7 @@ #include #include #include +#include /* * Routines for handling mm_structs @@ -223,12 +224,26 @@ enum { MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1), MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2), MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3), + MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4), + MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5), +}; + +enum { + MEMBARRIER_FLAG_SYNC_CORE = (1U << 0), }; #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS #include #endif +static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) +{ + if (likely(!(atomic_read(&mm->membarrier_state) & + MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE))) + return; + sync_core_before_usermode(); +} + static inline void membarrier_execve(struct task_struct *t) { atomic_set(&t->mm->membarrier_state, 0); @@ -244,6 +259,9 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev, static inline void membarrier_execve(struct task_struct *t) { } +static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) +{ +} #endif #endif /* _LINUX_SCHED_MM_H */ -- cgit