From ca528cc501896a808dc79c3c0544369d23b331c8 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Thu, 6 Apr 2023 13:31:45 -0700 Subject: sched/topology: Remove SHARED_CHILD from ASYM_PACKING Only x86 and Power7 use ASYM_PACKING. They use it differently. Power7 has cores of equal priority, but the SMT siblings of a core have different priorities. Parent scheduling domains do not need (nor have) the ASYM_PACKING flag. SHARED_CHILD is not needed. Using SHARED_PARENT would cause the topology debug code to complain. X86 has cores of different priority, but all the SMT siblings of the core have equal priority. It needs ASYM_PACKING at the MC level, but not at the SMT level (it also needs it at upper levels if they have scheduling groups of different priority). Removing ASYM_PACKING from the SMT domain causes the topology debug code to complain. Remove SHARED_CHILD for now. We still need a topology check that satisfies both architectures. Suggested-by: Valentin Schneider Signed-off-by: Ricardo Neri Signed-off-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Link: https://lore.kernel.org/r/20230406203148.19182-10-ricardo.neri-calderon@linux.intel.com --- include/linux/sched/sd_flags.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index 57bde66d95f7..fad77b5172e2 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -132,12 +132,9 @@ SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) /* * Place busy tasks earlier in the domain * - * SHARED_CHILD: Usually set on the SMT level. Technically could be set further - * up, but currently assumed to be set from the base domain - * upwards (see update_top_cache_domain()). * NEEDS_GROUPS: Load balancing flag. */ -SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) +SD_FLAG(SD_ASYM_PACKING, SDF_NEEDS_GROUPS) /* * Prefer to place tasks in a sibling domain -- cgit From d5e1586617be7093ea3419e3fa9387ed833cdbb1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 2 Jun 2023 10:42:53 +0200 Subject: sched: Unconditionally use full-fat wait_task_inactive() While modifying wait_task_inactive() for PREEMPT_RT; the build robot noted that UP got broken. This led to audit and consideration of the UP implementation of wait_task_inactive(). It looks like the UP implementation is also broken for PREEMPT; consider task_current_syscall() getting preempted between the two calls to wait_task_inactive(). Therefore move the wait_task_inactive() implementation out of CONFIG_SMP and unconditionally use it. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230602103731.GA630648%40hirez.programming.kicks-ass.net --- include/linux/sched.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index eed5d65b8d1f..1292d38d66cc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2006,15 +2006,12 @@ static __always_inline void scheduler_ipi(void) */ preempt_fold_need_resched(); } -extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); #else static inline void scheduler_ipi(void) { } -static inline unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) -{ - return 1; -} #endif +extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); + /* * Set thread flags in other task's structures. * See asm/thread_info.h for TIF_xxxx flags available: -- cgit From d16317de9b412aa7bd3598c607112298e36b4352 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 May 2023 12:20:59 +0200 Subject: seqlock/latch: Provide raw_read_seqcount_latch_retry() The read side of seqcount_latch consists of: do { seq = raw_read_seqcount_latch(&latch->seq); ... } while (read_seqcount_latch_retry(&latch->seq, seq)); which is asymmetric in the raw_ department, and sure enough, read_seqcount_latch_retry() includes (explicit) instrumentation where raw_read_seqcount_latch() does not. This inconsistency becomes a problem when trying to use it from noinstr code. As such, fix it by renaming and re-implementing raw_read_seqcount_latch_retry() without the instrumentation. Specifically the instrumentation in question is kcsan_atomic_next(0) in do___read_seqcount_retry(). Loosing this annotation is not a problem because raw_read_seqcount_latch() does not pass through kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX). Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: Petr Mladek Tested-by: Michael Kelley # Hyper-V Link: https://lore.kernel.org/r/20230519102715.233598176@infradead.org --- include/linux/rbtree_latch.h | 2 +- include/linux/seqlock.h | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h index 3d1a9e716b80..6a0999c26c7c 100644 --- a/include/linux/rbtree_latch.h +++ b/include/linux/rbtree_latch.h @@ -206,7 +206,7 @@ latch_tree_find(void *key, struct latch_tree_root *root, do { seq = raw_read_seqcount_latch(&root->seq); node = __lt_find(key, root, seq & 1, ops->comp); - } while (read_seqcount_latch_retry(&root->seq, seq)); + } while (raw_read_seqcount_latch_retry(&root->seq, seq)); return node; } diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 3926e9027947..987a59d977c5 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -671,9 +671,9 @@ typedef struct { * * Return: sequence counter raw value. Use the lowest bit as an index for * picking which data copy to read. The full counter must then be checked - * with read_seqcount_latch_retry(). + * with raw_read_seqcount_latch_retry(). */ -static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s) +static __always_inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s) { /* * Pairs with the first smp_wmb() in raw_write_seqcount_latch(). @@ -683,16 +683,17 @@ static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s) } /** - * read_seqcount_latch_retry() - end a seqcount_latch_t read section + * raw_read_seqcount_latch_retry() - end a seqcount_latch_t read section * @s: Pointer to seqcount_latch_t * @start: count, from raw_read_seqcount_latch() * * Return: true if a read section retry is required, else false */ -static inline int -read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) +static __always_inline int +raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) { - return read_seqcount_retry(&s->seqcount, start); + smp_rmb(); + return unlikely(READ_ONCE(s->seqcount.sequence) != start); } /** @@ -752,7 +753,7 @@ read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) * entry = data_query(latch->data[idx], ...); * * // This includes needed smp_rmb() - * } while (read_seqcount_latch_retry(&latch->seq, seq)); + * } while (raw_read_seqcount_latch_retry(&latch->seq, seq)); * * return entry; * } -- cgit From fc4a0db4149afcdae2527f0d8c376accca34adc9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 May 2023 12:21:05 +0200 Subject: math64: Always inline u128 version of mul_u64_u64_shr() In order to prevent the following complaint from happening, always inline the u128 variant of mul_u64_u64_shr() -- which is what x86_64 will use. vmlinux.o: warning: objtool: read_hv_sched_clock_tsc+0x5a: call to mul_u64_u64_shr.constprop.0() leaves .noinstr.text section It should compile into something like: asm("mul %[mul];" "shrd %rdx, %rax, %cl" : "+&a" (a) : "c" shift, [mul] "r" (mul) : "d"); Which is silly not to inline, but it happens. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley # Hyper-V Link: https://lore.kernel.org/r/20230519102715.637420396@infradead.org --- include/linux/math64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/math64.h b/include/linux/math64.h index 8b9191a2849e..bf74478926d4 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -168,7 +168,7 @@ static __always_inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) #endif /* mul_u64_u32_shr */ #ifndef mul_u64_u64_shr -static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift) +static __always_inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift) { return (u64)(((unsigned __int128)a * mul) >> shift); } -- cgit From fb7d4948c4da2dbd26da4b7ec76bbd2f19ff862a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 May 2023 12:21:10 +0200 Subject: sched/clock: Provide local_clock_noinstr() Now that all ARCH_WANTS_NO_INSTR architectures (arm64, loongarch, s390, x86) provide sched_clock_noinstr(), use this to provide local_clock_noinstr(). This local_clock_noinstr() will be safe to use from noinstr code with the assumption that any such noinstr code is non-preemptible (it had better be, entry code will have IRQs disabled while __cpuidle must have preemption disabled). Specifically, preempt_enable_notrace(), a common part of many a sched_clock() implementation calls out to schedule() -- even though, per the above, it will never trigger -- which frustrates noinstr validation. vmlinux.o: warning: objtool: local_clock+0xb5: call to preempt_schedule_notrace_thunk() leaves .noinstr.text section Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley # Hyper-V Link: https://lore.kernel.org/r/20230519102715.978624636@infradead.org --- include/linux/sched/clock.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index ca008f7d3615..196f0ca351a2 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -12,7 +12,16 @@ * * Please use one of the three interfaces below. */ -extern unsigned long long notrace sched_clock(void); +extern u64 sched_clock(void); + +#if defined(CONFIG_ARCH_WANTS_NO_INSTR) || defined(CONFIG_GENERIC_SCHED_CLOCK) +extern u64 sched_clock_noinstr(void); +#else +static __always_inline u64 sched_clock_noinstr(void) +{ + return sched_clock(); +} +#endif /* * See the comment in kernel/sched/clock.c @@ -45,6 +54,11 @@ static inline u64 cpu_clock(int cpu) return sched_clock(); } +static __always_inline u64 local_clock_noinstr(void) +{ + return sched_clock_noinstr(); +} + static __always_inline u64 local_clock(void) { return sched_clock(); @@ -79,6 +93,7 @@ static inline u64 cpu_clock(int cpu) return sched_clock_cpu(cpu); } +extern u64 local_clock_noinstr(void); extern u64 local_clock(void); #endif -- cgit From 0cce0fde499a92c726cd2e24f7763644f7c9f971 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 3 Jun 2023 15:36:45 +0800 Subject: sched/topology: Mark set_sched_topology() __init All callers of set_sched_topology() are within __init section. Mark it __init too. Signed-off-by: Miaohe Lin Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lore.kernel.org/r/20230603073645.1173332-1-linmiaohe@huawei.com --- include/linux/sched/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 816df6cc444e..67b573d5bf28 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -203,7 +203,7 @@ struct sched_domain_topology_level { #endif }; -extern void set_sched_topology(struct sched_domain_topology_level *tl); +extern void __init set_sched_topology(struct sched_domain_topology_level *tl); #ifdef CONFIG_SCHED_DEBUG # define SD_INIT_NAME(type) .name = #type -- cgit From ef73d6a4ef0b35524125c3cfc6deafc26a0c966a Mon Sep 17 00:00:00 2001 From: Arve Hjønnevåg Date: Fri, 2 Jun 2023 21:23:46 +0000 Subject: sched/wait: Fix a kthread_park race with wait_woken() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kthread_park and wait_woken have a similar race that kthread_stop and wait_woken used to have before it was fixed in commit cb6538e740d7 ("sched/wait: Fix a kthread race with wait_woken()"). Extend that fix to also cover kthread_park. [jstultz: Made changes suggested by Peter to optimize memory loads] Signed-off-by: Arve Hjønnevåg Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lore.kernel.org/r/20230602212350.535358-1-jstultz@google.com --- include/linux/kthread.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 30e5bec81d2b..f1f95a71a4bc 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -89,6 +89,7 @@ int kthread_stop(struct task_struct *k); bool kthread_should_stop(void); bool kthread_should_park(void); bool __kthread_should_park(struct task_struct *k); +bool kthread_should_stop_or_park(void); bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_func(struct task_struct *k); void *kthread_data(struct task_struct *k); -- cgit