diff options
Diffstat (limited to 'kernel/locking')
-rw-r--r-- | kernel/locking/lock_events.h | 4 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 204 | ||||
-rw-r--r-- | kernel/locking/lockdep_internals.h | 3 | ||||
-rw-r--r-- | kernel/locking/lockdep_proc.c | 2 | ||||
-rw-r--r-- | kernel/locking/locktorture.c | 7 | ||||
-rw-r--r-- | kernel/locking/mutex-debug.c | 12 | ||||
-rw-r--r-- | kernel/locking/mutex.c | 53 | ||||
-rw-r--r-- | kernel/locking/mutex.h | 27 | ||||
-rw-r--r-- | kernel/locking/osq_lock.c | 3 | ||||
-rw-r--r-- | kernel/locking/percpu-rwsem.c | 11 | ||||
-rw-r--r-- | kernel/locking/qspinlock.c | 15 | ||||
-rw-r--r-- | kernel/locking/qspinlock_paravirt.h | 81 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 78 | ||||
-rw-r--r-- | kernel/locking/rtmutex_api.c | 22 | ||||
-rw-r--r-- | kernel/locking/rtmutex_common.h | 7 | ||||
-rw-r--r-- | kernel/locking/rwbase_rt.c | 8 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 42 | ||||
-rw-r--r-- | kernel/locking/semaphore.c | 13 | ||||
-rw-r--r-- | kernel/locking/spinlock.c | 16 | ||||
-rw-r--r-- | kernel/locking/spinlock_rt.c | 19 | ||||
-rw-r--r-- | kernel/locking/test-ww_mutex.c | 18 | ||||
-rw-r--r-- | kernel/locking/ww_mutex.h | 53 |
22 files changed, 449 insertions, 249 deletions
diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h index a6016b91803d..d2345e9c0190 100644 --- a/kernel/locking/lock_events.h +++ b/kernel/locking/lock_events.h @@ -53,8 +53,8 @@ static inline void __lockevent_add(enum lock_events event, int inc) #else /* CONFIG_LOCK_EVENT_COUNTS */ #define lockevent_inc(ev) -#define lockevent_add(ev, c) -#define lockevent_cond_inc(ev, c) +#define lockevent_add(ev, c) do { (void)(c); } while (0) +#define lockevent_cond_inc(ev, c) do { (void)(c); } while (0) #endif /* CONFIG_LOCK_EVENT_COUNTS */ diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 151bd3de5936..4470680f0226 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -56,6 +56,7 @@ #include <linux/kprobes.h> #include <linux/lockdep.h> #include <linux/context_tracking.h> +#include <linux/console.h> #include <asm/sections.h> @@ -78,7 +79,7 @@ module_param(lock_stat, int, 0644); #endif #ifdef CONFIG_SYSCTL -static struct ctl_table kern_lockdep_table[] = { +static const struct ctl_table kern_lockdep_table[] = { #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", @@ -97,7 +98,6 @@ static struct ctl_table kern_lockdep_table[] = { .proc_handler = proc_dointvec, }, #endif /* CONFIG_LOCK_STAT */ - { } }; static __init int kernel_lockdep_sysctls_init(void) @@ -157,10 +157,12 @@ static inline void lockdep_unlock(void) __this_cpu_dec(lockdep_recursion); } +#ifdef CONFIG_PROVE_LOCKING static inline bool lockdep_assert_locked(void) { return DEBUG_LOCKS_WARN_ON(__owner != current); } +#endif static struct task_struct *lockdep_selftest_task_struct; @@ -430,7 +432,7 @@ static inline u16 hlock_id(struct held_lock *hlock) return (hlock->class_idx | (hlock->read << MAX_LOCKDEP_KEYS_BITS)); } -static inline unsigned int chain_hlock_class_idx(u16 hlock_id) +static inline __maybe_unused unsigned int chain_hlock_class_idx(u16 hlock_id) { return hlock_id & (MAX_LOCKDEP_KEYS - 1); } @@ -574,8 +576,10 @@ static struct lock_trace *save_trace(void) if (!debug_locks_off_graph_unlock()) return NULL; + nbcon_cpu_emergency_enter(); print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); dump_stack(); + nbcon_cpu_emergency_exit(); return NULL; } @@ -786,7 +790,7 @@ static void lockdep_print_held_locks(struct task_struct *p) printk("no locks held by %s/%d.\n", p->comm, task_pid_nr(p)); else printk("%d lock%s held by %s/%d:\n", depth, - depth > 1 ? "s" : "", p->comm, task_pid_nr(p)); + str_plural(depth), p->comm, task_pid_nr(p)); /* * It's not reliable to print a task's held locks if it's not sleeping * and it's not the current task. @@ -888,11 +892,13 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { instrumentation_begin(); debug_locks_off(); + nbcon_cpu_emergency_enter(); printk(KERN_ERR "BUG: looking up invalid subclass: %u\n", subclass); printk(KERN_ERR "turning off the locking correctness validator.\n"); dump_stack(); + nbcon_cpu_emergency_exit(); instrumentation_end(); return NULL; } @@ -969,11 +975,13 @@ static bool assign_lock_key(struct lockdep_map *lock) else { /* Debug-check: all keys must be persistent! */ debug_locks_off(); + nbcon_cpu_emergency_enter(); pr_err("INFO: trying to register non-static key.\n"); pr_err("The code is fine but needs lockdep annotation, or maybe\n"); pr_err("you didn't initialize this object before use?\n"); pr_err("turning off the locking correctness validator.\n"); dump_stack(); + nbcon_cpu_emergency_exit(); return false; } @@ -1317,8 +1325,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) return NULL; } + nbcon_cpu_emergency_enter(); print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); dump_stack(); + nbcon_cpu_emergency_exit(); return NULL; } nr_lock_classes++; @@ -1350,11 +1360,13 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) if (verbose(class)) { graph_unlock(); + nbcon_cpu_emergency_enter(); printk("\nnew class %px: %s", class->key, class->name); if (class->name_version > 1) printk(KERN_CONT "#%d", class->name_version); printk(KERN_CONT "\n"); dump_stack(); + nbcon_cpu_emergency_exit(); if (!graph_lock()) { return NULL; @@ -1393,8 +1405,10 @@ static struct lock_list *alloc_list_entry(void) if (!debug_locks_off_graph_unlock()) return NULL; + nbcon_cpu_emergency_enter(); print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!"); dump_stack(); + nbcon_cpu_emergency_exit(); return NULL; } nr_list_entries++; @@ -2040,6 +2054,8 @@ static noinline void print_circular_bug(struct lock_list *this, depth = get_lock_depth(target); + nbcon_cpu_emergency_enter(); + print_circular_bug_header(target, depth, check_src, check_tgt); parent = get_lock_parent(target); @@ -2058,6 +2074,8 @@ static noinline void print_circular_bug(struct lock_list *this, printk("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } static noinline void print_bfs_bug(int ret) @@ -2068,6 +2086,9 @@ static noinline void print_bfs_bug(int ret) /* * Breadth-first-search failed, graph got corrupted? */ + if (ret == BFS_EQUEUEFULL) + pr_warn("Increase LOCKDEP_CIRCULAR_QUEUE_BITS to avoid this warning:\n"); + WARN(1, "lockdep bfs error:%d\n", ret); } @@ -2570,6 +2591,8 @@ print_bad_irq_dependency(struct task_struct *curr, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("=====================================================\n"); pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n", @@ -2619,11 +2642,13 @@ print_bad_irq_dependency(struct task_struct *curr, pr_warn(" and %s-irq-unsafe lock:\n", irqclass); next_root->trace = save_trace(); if (!next_root->trace) - return; + goto out; print_shortest_lock_dependencies(forwards_entry, next_root); pr_warn("\nstack backtrace:\n"); dump_stack(); +out: + nbcon_cpu_emergency_exit(); } static const char *state_names[] = { @@ -2988,6 +3013,8 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("============================================\n"); pr_warn("WARNING: possible recursive locking detected\n"); @@ -3010,6 +3037,8 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } /* @@ -3607,6 +3636,8 @@ static void print_collision(struct task_struct *curr, struct held_lock *hlock_next, struct lock_chain *chain) { + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("============================\n"); pr_warn("WARNING: chain_key collision\n"); @@ -3623,6 +3654,8 @@ static void print_collision(struct task_struct *curr, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } #endif @@ -3713,8 +3746,10 @@ static inline int add_chain_cache(struct task_struct *curr, if (!debug_locks_off_graph_unlock()) return 0; + nbcon_cpu_emergency_enter(); print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!"); dump_stack(); + nbcon_cpu_emergency_exit(); return 0; } chain->chain_key = chain_key; @@ -3731,8 +3766,10 @@ static inline int add_chain_cache(struct task_struct *curr, if (!debug_locks_off_graph_unlock()) return 0; + nbcon_cpu_emergency_enter(); print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!"); dump_stack(); + nbcon_cpu_emergency_exit(); return 0; } @@ -3971,6 +4008,8 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, if (!debug_locks_off() || debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("================================\n"); pr_warn("WARNING: inconsistent lock state\n"); @@ -3999,6 +4038,8 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } /* @@ -4033,6 +4074,8 @@ print_irq_inversion_bug(struct task_struct *curr, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("========================================================\n"); pr_warn("WARNING: possible irq lock inversion dependency detected\n"); @@ -4073,11 +4116,13 @@ print_irq_inversion_bug(struct task_struct *curr, pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); root->trace = save_trace(); if (!root->trace) - return; + goto out; print_shortest_lock_dependencies(other, root); pr_warn("\nstack backtrace:\n"); dump_stack(); +out: + nbcon_cpu_emergency_exit(); } /* @@ -4154,6 +4199,8 @@ void print_irqtrace_events(struct task_struct *curr) { const struct irqtrace_events *trace = &curr->irqtrace; + nbcon_cpu_emergency_enter(); + printk("irq event stamp: %u\n", trace->irq_events); printk("hardirqs last enabled at (%u): [<%px>] %pS\n", trace->hardirq_enable_event, (void *)trace->hardirq_enable_ip, @@ -4167,6 +4214,8 @@ void print_irqtrace_events(struct task_struct *curr) printk("softirqs last disabled at (%u): [<%px>] %pS\n", trace->softirq_disable_event, (void *)trace->softirq_disable_ip, (void *)trace->softirq_disable_ip); + + nbcon_cpu_emergency_exit(); } static int HARDIRQ_verbose(struct lock_class *class) @@ -4539,6 +4588,30 @@ void lockdep_softirqs_off(unsigned long ip) debug_atomic_inc(redundant_softirqs_off); } +/** + * lockdep_cleanup_dead_cpu - Ensure CPU lockdep state is cleanly stopped + * + * @cpu: index of offlined CPU + * @idle: task pointer for offlined CPU's idle thread + * + * Invoked after the CPU is dead. Ensures that the tracing infrastructure + * is left in a suitable state for the CPU to be subsequently brought + * online again. + */ +void lockdep_cleanup_dead_cpu(unsigned int cpu, struct task_struct *idle) +{ + if (unlikely(!debug_locks)) + return; + + if (unlikely(per_cpu(hardirqs_enabled, cpu))) { + pr_warn("CPU %u left hardirqs enabled!", cpu); + if (idle) + print_irqtrace_events(idle); + /* Clean it up for when the CPU comes online again. */ + per_cpu(hardirqs_enabled, cpu) = 0; + } +} + static int mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) { @@ -4687,10 +4760,12 @@ unlock: * We must printk outside of the graph_lock: */ if (ret == 2) { + nbcon_cpu_emergency_enter(); printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); print_lock(this); print_irqtrace_events(curr); dump_stack(); + nbcon_cpu_emergency_exit(); } return ret; @@ -4731,6 +4806,8 @@ print_lock_invalid_wait_context(struct task_struct *curr, if (debug_locks_silent) return 0; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("=============================\n"); pr_warn("[ BUG: Invalid wait context ]\n"); @@ -4750,6 +4827,8 @@ print_lock_invalid_wait_context(struct task_struct *curr, pr_warn("stack backtrace:\n"); dump_stack(); + nbcon_cpu_emergency_exit(); + return 0; } @@ -4918,6 +4997,9 @@ EXPORT_SYMBOL_GPL(lockdep_init_map_type); struct lock_class_key __lockdep_no_validate__; EXPORT_SYMBOL_GPL(__lockdep_no_validate__); +struct lock_class_key __lockdep_no_track__; +EXPORT_SYMBOL_GPL(__lockdep_no_track__); + #ifdef CONFIG_PROVE_LOCKING void lockdep_set_lock_cmp_fn(struct lockdep_map *lock, lock_cmp_fn cmp_fn, lock_print_fn print_fn) @@ -4954,6 +5036,8 @@ print_lock_nested_lock_not_held(struct task_struct *curr, if (debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("==================================\n"); pr_warn("WARNING: Nested lock was not taken\n"); @@ -4974,6 +5058,8 @@ print_lock_nested_lock_not_held(struct task_struct *curr, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } static int __lock_is_held(const struct lockdep_map *lock, int read); @@ -5002,6 +5088,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (unlikely(!debug_locks)) return 0; + if (unlikely(lock->key == &__lockdep_no_track__)) + return 0; + if (!prove_locking || lock->key == &__lockdep_no_validate__) check = 0; @@ -5019,11 +5108,13 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, debug_class_ops_inc(class); if (very_verbose(class)) { + nbcon_cpu_emergency_enter(); printk("\nacquire class [%px] %s", class->key, class->name); if (class->name_version > 1) printk(KERN_CONT "#%d", class->name_version); printk(KERN_CONT "\n"); dump_stack(); + nbcon_cpu_emergency_exit(); } /* @@ -5150,6 +5241,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, #endif if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { debug_locks_off(); + nbcon_cpu_emergency_enter(); print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!"); printk(KERN_DEBUG "depth: %i max: %lu!\n", curr->lockdep_depth, MAX_LOCK_DEPTH); @@ -5157,6 +5249,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, lockdep_print_held_locks(current); debug_show_all_locks(); dump_stack(); + nbcon_cpu_emergency_exit(); return 0; } @@ -5176,6 +5269,8 @@ static void print_unlock_imbalance_bug(struct task_struct *curr, if (debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("=====================================\n"); pr_warn("WARNING: bad unlock balance detected!\n"); @@ -5192,6 +5287,8 @@ static void print_unlock_imbalance_bug(struct task_struct *curr, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } static noinstr int match_held_lock(const struct held_lock *hlock, @@ -5764,7 +5861,8 @@ void lock_release(struct lockdep_map *lock, unsigned long ip) trace_lock_release(lock, ip); - if (unlikely(!lockdep_enabled())) + if (unlikely(!lockdep_enabled() || + lock->key == &__lockdep_no_track__)) return; raw_local_irq_save(flags); @@ -5895,6 +5993,8 @@ static void print_lock_contention_bug(struct task_struct *curr, if (debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("=================================\n"); pr_warn("WARNING: bad contention detected!\n"); @@ -5911,6 +6011,8 @@ static void print_lock_contention_bug(struct task_struct *curr, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } static void @@ -5930,6 +6032,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!depth)) return; + if (unlikely(lock->key == &__lockdep_no_track__)) + return; + hlock = find_held_lock(curr, lock, depth, &i); if (!hlock) { print_lock_contention_bug(curr, lock, ip); @@ -5972,6 +6077,9 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!depth)) return; + if (unlikely(lock->key == &__lockdep_no_track__)) + return; + hlock = find_held_lock(curr, lock, depth, &i); if (!hlock) { print_lock_contention_bug(curr, lock, _RET_IP_); @@ -6184,25 +6292,27 @@ static struct pending_free *get_pending_free(void) static void free_zapped_rcu(struct rcu_head *cb); /* - * Schedule an RCU callback if no RCU callback is pending. Must be called with - * the graph lock held. - */ -static void call_rcu_zapped(struct pending_free *pf) +* See if we need to queue an RCU callback, must called with +* the lockdep lock held, returns false if either we don't have +* any pending free or the callback is already scheduled. +* Otherwise, a call_rcu() must follow this function call. +*/ +static bool prepare_call_rcu_zapped(struct pending_free *pf) { WARN_ON_ONCE(inside_selftest()); if (list_empty(&pf->zapped)) - return; + return false; if (delayed_free.scheduled) - return; + return false; delayed_free.scheduled = true; WARN_ON_ONCE(delayed_free.pf + delayed_free.index != pf); delayed_free.index ^= 1; - call_rcu(&delayed_free.rcu_head, free_zapped_rcu); + return true; } /* The caller must hold the graph lock. May be called from RCU context. */ @@ -6228,6 +6338,7 @@ static void free_zapped_rcu(struct rcu_head *ch) { struct pending_free *pf; unsigned long flags; + bool need_callback; if (WARN_ON_ONCE(ch != &delayed_free.rcu_head)) return; @@ -6239,14 +6350,18 @@ static void free_zapped_rcu(struct rcu_head *ch) pf = delayed_free.pf + (delayed_free.index ^ 1); __free_zapped_classes(pf); delayed_free.scheduled = false; + need_callback = + prepare_call_rcu_zapped(delayed_free.pf + delayed_free.index); + lockdep_unlock(); + raw_local_irq_restore(flags); /* - * If there's anything on the open list, close and start a new callback. - */ - call_rcu_zapped(delayed_free.pf + delayed_free.index); + * If there's pending free and its callback has not been scheduled, + * queue an RCU callback. + */ + if (need_callback) + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); - lockdep_unlock(); - raw_local_irq_restore(flags); } /* @@ -6286,6 +6401,7 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size) { struct pending_free *pf; unsigned long flags; + bool need_callback; init_data_structures_once(); @@ -6293,10 +6409,11 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size) lockdep_lock(); pf = get_pending_free(); __lockdep_free_key_range(pf, start, size); - call_rcu_zapped(pf); + need_callback = prepare_call_rcu_zapped(pf); lockdep_unlock(); raw_local_irq_restore(flags); - + if (need_callback) + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); /* * Wait for any possible iterators from look_up_lock_class() to pass * before continuing to free the memory they refer to. @@ -6390,6 +6507,7 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock) struct pending_free *pf; unsigned long flags; int locked; + bool need_callback = false; raw_local_irq_save(flags); locked = graph_lock(); @@ -6398,11 +6516,13 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock) pf = get_pending_free(); __lockdep_reset_lock(pf, lock); - call_rcu_zapped(pf); + need_callback = prepare_call_rcu_zapped(pf); graph_unlock(); out_irq: raw_local_irq_restore(flags); + if (need_callback) + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); } /* @@ -6446,6 +6566,7 @@ void lockdep_unregister_key(struct lock_class_key *key) struct pending_free *pf; unsigned long flags; bool found = false; + bool need_callback = false; might_sleep(); @@ -6466,11 +6587,14 @@ void lockdep_unregister_key(struct lock_class_key *key) if (found) { pf = get_pending_free(); __lockdep_free_key_range(pf, key, 1); - call_rcu_zapped(pf); + need_callback = prepare_call_rcu_zapped(pf); } lockdep_unlock(); raw_local_irq_restore(flags); + if (need_callback) + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); + /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */ synchronize_rcu(); } @@ -6478,17 +6602,17 @@ EXPORT_SYMBOL_GPL(lockdep_unregister_key); void __init lockdep_init(void) { - printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); + pr_info("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); - printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); - printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); - printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); - printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); - printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); - printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); - printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); + pr_info("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); + pr_info("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); + pr_info("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); + pr_info("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); + pr_info("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); + pr_info("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); + pr_info("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); - printk(" memory used by lock dependency info: %zu kB\n", + pr_info(" memory used by lock dependency info: %zu kB\n", (sizeof(lock_classes) + sizeof(lock_classes_in_use) + sizeof(classhash_table) + @@ -6506,12 +6630,12 @@ void __init lockdep_init(void) ); #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) - printk(" memory used for stack traces: %zu kB\n", + pr_info(" memory used for stack traces: %zu kB\n", (sizeof(stack_trace) + sizeof(stack_trace_hash)) / 1024 ); #endif - printk(" per task-struct memory footprint: %zu bytes\n", + pr_info(" per task-struct memory footprint: %zu bytes\n", sizeof(((struct task_struct *)NULL)->held_locks)); } @@ -6524,6 +6648,8 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, if (debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("=========================\n"); pr_warn("WARNING: held lock freed!\n"); @@ -6536,6 +6662,8 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } static inline int not_in_range(const void* mem_from, unsigned long mem_len, @@ -6582,6 +6710,8 @@ static void print_held_locks_bug(void) if (debug_locks_silent) return; + nbcon_cpu_emergency_enter(); + pr_warn("\n"); pr_warn("====================================\n"); pr_warn("WARNING: %s/%d still has locks held!\n", @@ -6591,6 +6721,8 @@ static void print_held_locks_bug(void) lockdep_print_held_locks(current); pr_warn("\nstack backtrace:\n"); dump_stack(); + + nbcon_cpu_emergency_exit(); } void debug_check_no_locks_held(void) @@ -6648,6 +6780,7 @@ asmlinkage __visible void lockdep_sys_exit(void) if (unlikely(curr->lockdep_depth)) { if (!debug_locks_off()) return; + nbcon_cpu_emergency_enter(); pr_warn("\n"); pr_warn("================================================\n"); pr_warn("WARNING: lock held when returning to user space!\n"); @@ -6656,6 +6789,7 @@ asmlinkage __visible void lockdep_sys_exit(void) pr_warn("%s/%d is leaving the kernel with locks still held!\n", curr->comm, curr->pid); lockdep_print_held_locks(curr); + nbcon_cpu_emergency_exit(); } /* @@ -6672,6 +6806,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) bool rcu = warn_rcu_enter(); /* Note: the following can be executed concurrently, so be careful. */ + nbcon_cpu_emergency_enter(); pr_warn("\n"); pr_warn("=============================\n"); pr_warn("WARNING: suspicious RCU usage\n"); @@ -6710,6 +6845,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) lockdep_print_held_locks(curr); pr_warn("\nstack backtrace:\n"); dump_stack(); + nbcon_cpu_emergency_exit(); warn_rcu_exit(rcu); } EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index bbe9000260d0..20f9ef58d3d0 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -119,7 +119,8 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ = #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) -#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) +#define AVG_LOCKDEP_CHAIN_DEPTH 5 +#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS * AVG_LOCKDEP_CHAIN_DEPTH) extern struct lock_chain lock_chains[]; diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index e2bfb1db589d..6db0f43fc4df 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -424,7 +424,7 @@ static void seq_line(struct seq_file *m, char c, int offset, int length) for (i = 0; i < offset; i++) seq_puts(m, " "); for (i = 0; i < length; i++) - seq_printf(m, "%c", c); + seq_putc(m, c); seq_puts(m, "\n"); } diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 415d81e6ce70..cc33470f4de9 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -30,6 +30,7 @@ #include <linux/torture.h> #include <linux/reboot.h> +MODULE_DESCRIPTION("torture test facility for locking"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>"); @@ -105,7 +106,7 @@ static const struct kernel_param_ops lt_bind_ops = { module_param_cb(bind_readers, <_bind_ops, &bind_readers, 0644); module_param_cb(bind_writers, <_bind_ops, &bind_writers, 0644); -long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool dowarn); static struct task_struct *stats_task; static struct task_struct **writer_tasks; @@ -1357,7 +1358,7 @@ static int __init lock_torture_init(void) if (torture_init_error(firsterr)) goto unwind; if (cpumask_nonempty(bind_writers)) - torture_sched_setaffinity(writer_tasks[i]->pid, bind_writers); + torture_sched_setaffinity(writer_tasks[i]->pid, bind_writers, true); create_reader: if (cxt.cur_ops->readlock == NULL || (j >= cxt.nrealreaders_stress)) @@ -1368,7 +1369,7 @@ static int __init lock_torture_init(void) if (torture_init_error(firsterr)) goto unwind; if (cpumask_nonempty(bind_readers)) - torture_sched_setaffinity(reader_tasks[j]->pid, bind_readers); + torture_sched_setaffinity(reader_tasks[j]->pid, bind_readers, true); } if (stat_interval > 0) { firsterr = torture_create_kthread(lock_torture_stats, NULL, diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index bc8abb8549d2..6e6f6071cfa2 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -12,6 +12,7 @@ */ #include <linux/mutex.h> #include <linux/delay.h> +#include <linux/device.h> #include <linux/export.h> #include <linux/poison.h> #include <linux/sched.h> @@ -89,6 +90,17 @@ void debug_mutex_init(struct mutex *lock, const char *name, lock->magic = lock; } +static void devm_mutex_release(void *res) +{ + mutex_destroy(res); +} + +int __devm_mutex_init(struct device *dev, struct mutex *lock) +{ + return devm_add_action_or_reset(dev, devm_mutex_release, lock); +} +EXPORT_SYMBOL_GPL(__devm_mutex_init); + /*** * mutex_destroy - mark a mutex unusable * @lock: the mutex to be destroyed diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index cbae8c0b89ab..b36f23de48f1 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -56,31 +56,6 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) } EXPORT_SYMBOL(__mutex_init); -/* - * @owner: contains: 'struct task_struct *' to the current lock owner, - * NULL means not owned. Since task_struct pointers are aligned at - * at least L1_CACHE_BYTES, we have low bits to store extra state. - * - * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup. - * Bit1 indicates unlock needs to hand the lock to the top-waiter - * Bit2 indicates handoff has been done and we're waiting for pickup. - */ -#define MUTEX_FLAG_WAITERS 0x01 -#define MUTEX_FLAG_HANDOFF 0x02 -#define MUTEX_FLAG_PICKUP 0x04 - -#define MUTEX_FLAGS 0x07 - -/* - * Internal helper function; C doesn't allow us to hide it :/ - * - * DO NOT USE (outside of mutex code). - */ -static inline struct task_struct *__mutex_owner(struct mutex *lock) -{ - return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS); -} - static inline struct task_struct *__owner_task(unsigned long owner) { return (struct task_struct *)(owner & ~MUTEX_FLAGS); @@ -575,8 +550,10 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas struct lockdep_map *nest_lock, unsigned long ip, struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) { + DEFINE_WAKE_Q(wake_q); struct mutex_waiter waiter; struct ww_mutex *ww; + unsigned long flags; int ret; if (!use_ww_ctx) @@ -619,13 +596,13 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas return 0; } - raw_spin_lock(&lock->wait_lock); + raw_spin_lock_irqsave(&lock->wait_lock, flags); /* * After waiting to acquire the wait_lock, try again. */ if (__mutex_trylock(lock)) { if (ww_ctx) - __ww_mutex_check_waiters(lock, ww_ctx); + __ww_mutex_check_waiters(lock, ww_ctx, &wake_q); goto skip_wait; } @@ -645,7 +622,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas * Add in stamp order, waking up waiters that must kill * themselves. */ - ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx); + ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx, &wake_q); if (ret) goto err_early_kill; } @@ -680,7 +657,8 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas goto err; } - raw_spin_unlock(&lock->wait_lock); + raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q); + schedule_preempt_disabled(); first = __mutex_waiter_is_first(lock, &waiter); @@ -701,9 +679,9 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas trace_contention_begin(lock, LCB_F_MUTEX); } - raw_spin_lock(&lock->wait_lock); + raw_spin_lock_irqsave(&lock->wait_lock, flags); } - raw_spin_lock(&lock->wait_lock); + raw_spin_lock_irqsave(&lock->wait_lock, flags); acquired: __set_current_state(TASK_RUNNING); @@ -714,7 +692,7 @@ acquired: */ if (!ww_ctx->is_wait_die && !__mutex_waiter_is_first(lock, &waiter)) - __ww_mutex_check_waiters(lock, ww_ctx); + __ww_mutex_check_waiters(lock, ww_ctx, &wake_q); } __mutex_remove_waiter(lock, &waiter); @@ -729,7 +707,7 @@ skip_wait: if (ww_ctx) ww_mutex_lock_acquired(ww, ww_ctx); - raw_spin_unlock(&lock->wait_lock); + raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q); preempt_enable(); return 0; @@ -738,7 +716,7 @@ err: __mutex_remove_waiter(lock, &waiter); err_early_kill: trace_contention_end(lock, ret); - raw_spin_unlock(&lock->wait_lock); + raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q); debug_mutex_free_waiter(&waiter); mutex_release(&lock->dep_map, ip); preempt_enable(); @@ -908,6 +886,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne struct task_struct *next = NULL; DEFINE_WAKE_Q(wake_q); unsigned long owner; + unsigned long flags; mutex_release(&lock->dep_map, ip); @@ -934,7 +913,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne } } - raw_spin_lock(&lock->wait_lock); + raw_spin_lock_irqsave(&lock->wait_lock, flags); debug_mutex_unlock(lock); if (!list_empty(&lock->wait_list)) { /* get the first entry from the wait-list: */ @@ -951,9 +930,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne if (owner & MUTEX_FLAG_HANDOFF) __mutex_handoff(lock, next); - raw_spin_unlock(&lock->wait_lock); - - wake_up_q(&wake_q); + raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q); } #ifndef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 0b2a79c4013b..cbff35b9b7ae 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -20,6 +20,33 @@ struct mutex_waiter { #endif }; +/* + * @owner: contains: 'struct task_struct *' to the current lock owner, + * NULL means not owned. Since task_struct pointers are aligned at + * at least L1_CACHE_BYTES, we have low bits to store extra state. + * + * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup. + * Bit1 indicates unlock needs to hand the lock to the top-waiter + * Bit2 indicates handoff has been done and we're waiting for pickup. + */ +#define MUTEX_FLAG_WAITERS 0x01 +#define MUTEX_FLAG_HANDOFF 0x02 +#define MUTEX_FLAG_PICKUP 0x04 + +#define MUTEX_FLAGS 0x07 + +/* + * Internal helper function; C doesn't allow us to hide it :/ + * + * DO NOT USE (outside of mutex & scheduler code). + */ +static inline struct task_struct *__mutex_owner(struct mutex *lock) +{ + if (!lock) + return NULL; + return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS); +} + #ifdef CONFIG_DEBUG_MUTEXES extern void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter); diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index 75a6f6133866..b4233dc2c2b0 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -215,8 +215,7 @@ void osq_unlock(struct optimistic_spin_queue *lock) /* * Fast path for the uncontended case. */ - if (likely(atomic_cmpxchg_release(&lock->tail, curr, - OSQ_UNLOCKED_VAL) == curr)) + if (atomic_try_cmpxchg_release(&lock->tail, &curr, OSQ_UNLOCKED_VAL)) return; /* diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 185bd1c906b0..6083883c4fe0 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -223,9 +223,10 @@ static bool readers_active_check(struct percpu_rw_semaphore *sem) void __sched percpu_down_write(struct percpu_rw_semaphore *sem) { + bool contended = false; + might_sleep(); rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); - trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE); /* Notify readers to take the slow path. */ rcu_sync_enter(&sem->rss); @@ -234,8 +235,11 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem) * Try set sem->block; this provides writer-writer exclusion. * Having sem->block set makes new readers block. */ - if (!__percpu_down_write_trylock(sem)) + if (!__percpu_down_write_trylock(sem)) { + trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE); percpu_rwsem_wait(sem, /* .reader = */ false); + contended = true; + } /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */ @@ -247,7 +251,8 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem) /* Wait for all active readers to complete. */ rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE); - trace_contention_end(sem, 0); + if (contended) + trace_contention_end(sem, 0); } EXPORT_SYMBOL_GPL(percpu_down_write); diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ebe6b8ec7cb3..7d96bed718e4 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -220,21 +220,18 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) */ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) { - u32 old, new, val = atomic_read(&lock->val); + u32 old, new; - for (;;) { - new = (val & _Q_LOCKED_PENDING_MASK) | tail; + old = atomic_read(&lock->val); + do { + new = (old & _Q_LOCKED_PENDING_MASK) | tail; /* * We can use relaxed semantics since the caller ensures that * the MCS node is properly initialized before updating the * tail. */ - old = atomic_cmpxchg_relaxed(&lock->val, val, new); - if (old == val) - break; + } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new)); - val = old; - } return old; } #endif /* _Q_PENDING_BITS == 8 */ @@ -586,7 +583,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath); #include "qspinlock_paravirt.h" #include "qspinlock.c" -bool nopvspin __initdata; +bool nopvspin; static __init int parse_nopvspin(char *arg) { nopvspin = true; diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 6a0184e9c234..dc1cb90e3644 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -38,13 +38,13 @@ #define PV_PREV_CHECK_MASK 0xff /* - * Queue node uses: vcpu_running & vcpu_halted. - * Queue head uses: vcpu_running & vcpu_hashed. + * Queue node uses: VCPU_RUNNING & VCPU_HALTED. + * Queue head uses: VCPU_RUNNING & VCPU_HASHED. */ enum vcpu_state { - vcpu_running = 0, - vcpu_halted, /* Used only in pv_wait_node */ - vcpu_hashed, /* = pv_hash'ed + vcpu_halted */ + VCPU_RUNNING = 0, + VCPU_HALTED, /* Used only in pv_wait_node */ + VCPU_HASHED, /* = pv_hash'ed + VCPU_HALTED */ }; struct pv_node { @@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) */ for (;;) { int val = atomic_read(&lock->val); + u8 old = 0; if (!(val & _Q_LOCKED_PENDING_MASK) && - (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) { + try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) { lockevent_inc(pv_lock_stealing); return true; } @@ -116,11 +117,12 @@ static __always_inline void set_pending(struct qspinlock *lock) * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the * lock just to be sure that it will get it. */ -static __always_inline int trylock_clear_pending(struct qspinlock *lock) +static __always_inline bool trylock_clear_pending(struct qspinlock *lock) { + u16 old = _Q_PENDING_VAL; + return !READ_ONCE(lock->locked) && - (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL, - _Q_LOCKED_VAL) == _Q_PENDING_VAL); + try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL); } #else /* _Q_PENDING_BITS == 8 */ static __always_inline void set_pending(struct qspinlock *lock) @@ -128,27 +130,21 @@ static __always_inline void set_pending(struct qspinlock *lock) atomic_or(_Q_PENDING_VAL, &lock->val); } -static __always_inline int trylock_clear_pending(struct qspinlock *lock) +static __always_inline bool trylock_clear_pending(struct qspinlock *lock) { - int val = atomic_read(&lock->val); - - for (;;) { - int old, new; - - if (val & _Q_LOCKED_MASK) - break; + int old, new; + old = atomic_read(&lock->val); + do { + if (old & _Q_LOCKED_MASK) + return false; /* * Try to clear pending bit & set locked bit */ - old = val; - new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; - val = atomic_cmpxchg_acquire(&lock->val, old, new); + new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; + } while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new)); - if (val == old) - return 1; - } - return 0; + return true; } #endif /* _Q_PENDING_BITS == 8 */ @@ -216,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node) int hopcnt = 0; for_each_hash_entry(he, offset, hash) { + struct qspinlock *old = NULL; hopcnt++; - if (!cmpxchg(&he->lock, NULL, lock)) { + if (try_cmpxchg(&he->lock, &old, lock)) { WRITE_ONCE(he->node, node); lockevent_pv_hop(hopcnt); return &he->lock; @@ -269,7 +266,7 @@ pv_wait_early(struct pv_node *prev, int loop) if ((loop & PV_PREV_CHECK_MASK) != 0) return false; - return READ_ONCE(prev->state) != vcpu_running; + return READ_ONCE(prev->state) != VCPU_RUNNING; } /* @@ -282,7 +279,7 @@ static void pv_init_node(struct mcs_spinlock *node) BUILD_BUG_ON(sizeof(struct pv_node) > sizeof(struct qnode)); pn->cpu = smp_processor_id(); - pn->state = vcpu_running; + pn->state = VCPU_RUNNING; } /* @@ -294,8 +291,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) { struct pv_node *pn = (struct pv_node *)node; struct pv_node *pp = (struct pv_node *)prev; - int loop; bool wait_early; + int loop; for (;;) { for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) { @@ -311,26 +308,26 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) /* * Order pn->state vs pn->locked thusly: * - * [S] pn->state = vcpu_halted [S] next->locked = 1 + * [S] pn->state = VCPU_HALTED [S] next->locked = 1 * MB MB - * [L] pn->locked [RmW] pn->state = vcpu_hashed + * [L] pn->locked [RmW] pn->state = VCPU_HASHED * * Matches the cmpxchg() from pv_kick_node(). */ - smp_store_mb(pn->state, vcpu_halted); + smp_store_mb(pn->state, VCPU_HALTED); if (!READ_ONCE(node->locked)) { lockevent_inc(pv_wait_node); lockevent_cond_inc(pv_wait_early, wait_early); - pv_wait(&pn->state, vcpu_halted); + pv_wait(&pn->state, VCPU_HALTED); } /* - * If pv_kick_node() changed us to vcpu_hashed, retain that + * If pv_kick_node() changed us to VCPU_HASHED, retain that * value so that pv_wait_head_or_lock() knows to not also try * to hash this lock. */ - cmpxchg(&pn->state, vcpu_halted, vcpu_running); + cmpxchg(&pn->state, VCPU_HALTED, VCPU_RUNNING); /* * If the locked flag is still not set after wakeup, it is a @@ -360,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - + u8 old = VCPU_HALTED; /* * If the vCPU is indeed halted, advance its state to match that of * pv_wait_node(). If OTOH this fails, the vCPU was running and will @@ -377,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) * subsequent writes. */ smp_mb__before_atomic(); - if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed) - != vcpu_halted) + if (!try_cmpxchg_relaxed(&pn->state, &old, VCPU_HASHED)) return; /* @@ -411,7 +407,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) * If pv_kick_node() already advanced our state, we don't need to * insert ourselves into the hash table anymore. */ - if (READ_ONCE(pn->state) == vcpu_hashed) + if (READ_ONCE(pn->state) == VCPU_HASHED) lp = (struct qspinlock **)1; /* @@ -424,7 +420,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) * Set correct vCPU state to be used by queue node wait-early * mechanism. */ - WRITE_ONCE(pn->state, vcpu_running); + WRITE_ONCE(pn->state, VCPU_RUNNING); /* * Set the pending bit in the active lock spinning loop to @@ -464,7 +460,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) goto gotlock; } } - WRITE_ONCE(pn->state, vcpu_hashed); + WRITE_ONCE(pn->state, VCPU_HASHED); lockevent_inc(pv_wait_head); lockevent_cond_inc(pv_wait_again, waitcnt); pv_wait(&lock->locked, _Q_SLOW_VAL); @@ -546,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) #ifndef __pv_queued_spin_unlock __visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock) { - u8 locked; + u8 locked = _Q_LOCKED_VAL; /* * We must not unlock if SLOW, because in that case we must first * unhash. Otherwise it would be possible to have multiple @lock * entries, which would be BAD. */ - locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0); - if (likely(locked == _Q_LOCKED_VAL)) + if (try_cmpxchg_release(&lock->locked, &locked, 0)) return; __pv_queued_spin_unlock_slowpath(lock, locked); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 4a10e8c16fd2..4a8df1800cbb 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -34,13 +34,15 @@ static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, struct rt_mutex *lock, - struct ww_acquire_ctx *ww_ctx) + struct ww_acquire_ctx *ww_ctx, + struct wake_q_head *wake_q) { return 0; } static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, - struct ww_acquire_ctx *ww_ctx) + struct ww_acquire_ctx *ww_ctx, + struct wake_q_head *wake_q) { } @@ -237,12 +239,13 @@ static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, */ static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) { - unsigned long owner, *p = (unsigned long *) &lock->owner; + unsigned long *p = (unsigned long *) &lock->owner; + unsigned long owner, new; + owner = READ_ONCE(*p); do { - owner = *p; - } while (cmpxchg_relaxed(p, owner, - owner | RT_MUTEX_HAS_WAITERS) != owner); + new = owner | RT_MUTEX_HAS_WAITERS; + } while (!try_cmpxchg_relaxed(p, &owner, new)); /* * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE @@ -346,7 +349,7 @@ static __always_inline int __waiter_prio(struct task_struct *task) { int prio = task->prio; - if (!rt_prio(prio)) + if (!rt_or_dl_prio(prio)) return DEFAULT_PRIO; return prio; @@ -434,7 +437,7 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, * Note that RT tasks are excluded from same priority (lateral) * steals to prevent the introduction of an unbounded latency. */ - if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio)) + if (rt_or_dl_prio(waiter->tree.prio)) return false; return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree); @@ -1200,7 +1203,8 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter, struct task_struct *task, struct ww_acquire_ctx *ww_ctx, - enum rtmutex_chainwalk chwalk) + enum rtmutex_chainwalk chwalk, + struct wake_q_head *wake_q) { struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex_waiter *top_waiter = waiter; @@ -1244,7 +1248,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, /* Check whether the waiter should back out immediately */ rtm = container_of(lock, struct rt_mutex, rtmutex); - res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); + res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, wake_q); if (res) { raw_spin_lock(&task->pi_lock); rt_mutex_dequeue(lock, waiter); @@ -1288,7 +1292,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, */ get_task_struct(owner); - raw_spin_unlock_irq(&lock->wait_lock); + raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q); res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, next_lock, waiter, task); @@ -1592,6 +1596,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock, * or TASK_UNINTERRUPTIBLE) * @timeout: the pre-initialized and started timer, or NULL for none * @waiter: the pre-initialized rt_mutex_waiter + * @wake_q: wake_q of tasks to wake when we drop the lock->wait_lock * * Must be called with lock->wait_lock held and interrupts disabled */ @@ -1599,7 +1604,9 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, struct ww_acquire_ctx *ww_ctx, unsigned int state, struct hrtimer_sleeper *timeout, - struct rt_mutex_waiter *waiter) + struct rt_mutex_waiter *waiter, + struct wake_q_head *wake_q) + __releases(&lock->wait_lock) __acquires(&lock->wait_lock) { struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); struct task_struct *owner; @@ -1629,7 +1636,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, owner = rt_mutex_owner(lock); else owner = NULL; - raw_spin_unlock_irq(&lock->wait_lock); + raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q); if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) rt_mutex_schedule(); @@ -1643,6 +1650,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, } static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, + struct rt_mutex_base *lock, struct rt_mutex_waiter *w) { /* @@ -1655,10 +1663,10 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, if (build_ww_mutex() && w->ww_ctx) return; - /* - * Yell loudly and stop the task right here. - */ + raw_spin_unlock_irq(&lock->wait_lock); + WARN(1, "rtmutex deadlock detected\n"); + while (1) { set_current_state(TASK_INTERRUPTIBLE); rt_mutex_schedule(); @@ -1672,12 +1680,14 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, * @state: The task state for sleeping * @chwalk: Indicator whether full or partial chainwalk is requested * @waiter: Initializer waiter for blocking + * @wake_q: The wake_q to wake tasks after we release the wait_lock */ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, struct ww_acquire_ctx *ww_ctx, unsigned int state, enum rtmutex_chainwalk chwalk, - struct rt_mutex_waiter *waiter) + struct rt_mutex_waiter *waiter, + struct wake_q_head *wake_q) { struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); struct ww_mutex *ww = ww_container_of(rtm); @@ -1688,7 +1698,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, /* Try to acquire the lock again: */ if (try_to_take_rt_mutex(lock, current, NULL)) { if (build_ww_mutex() && ww_ctx) { - __ww_mutex_check_waiters(rtm, ww_ctx); + __ww_mutex_check_waiters(rtm, ww_ctx, wake_q); ww_mutex_lock_acquired(ww, ww_ctx); } return 0; @@ -1698,21 +1708,21 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, trace_contention_begin(lock, LCB_F_RT); - ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); + ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk, wake_q); if (likely(!ret)) - ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); + ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter, wake_q); if (likely(!ret)) { /* acquired the lock */ if (build_ww_mutex() && ww_ctx) { if (!ww_ctx->is_wait_die) - __ww_mutex_check_waiters(rtm, ww_ctx); + __ww_mutex_check_waiters(rtm, ww_ctx, wake_q); ww_mutex_lock_acquired(ww, ww_ctx); } } else { __set_current_state(TASK_RUNNING); remove_waiter(lock, waiter); - rt_mutex_handle_deadlock(ret, chwalk, waiter); + rt_mutex_handle_deadlock(ret, chwalk, lock, waiter); } /* @@ -1728,7 +1738,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, struct ww_acquire_ctx *ww_ctx, - unsigned int state) + unsigned int state, + struct wake_q_head *wake_q) { struct rt_mutex_waiter waiter; int ret; @@ -1737,7 +1748,7 @@ static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, waiter.ww_ctx = ww_ctx; ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, - &waiter); + &waiter, wake_q); debug_rt_mutex_free_waiter(&waiter); return ret; @@ -1753,6 +1764,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, struct ww_acquire_ctx *ww_ctx, unsigned int state) { + DEFINE_WAKE_Q(wake_q); unsigned long flags; int ret; @@ -1774,8 +1786,8 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, * irqsave/restore variants. */ raw_spin_lock_irqsave(&lock->wait_lock, flags); - ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state, &wake_q); + raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q); rt_mutex_post_schedule(); return ret; @@ -1801,8 +1813,11 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, /** * rtlock_slowlock_locked - Slow path lock acquisition for RT locks * @lock: The underlying RT mutex + * @wake_q: The wake_q to wake tasks after we release the wait_lock */ -static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) +static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock, + struct wake_q_head *wake_q) + __releases(&lock->wait_lock) __acquires(&lock->wait_lock) { struct rt_mutex_waiter waiter; struct task_struct *owner; @@ -1819,7 +1834,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) trace_contention_begin(lock, LCB_F_RT); - task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); + task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK, wake_q); for (;;) { /* Try to acquire the lock again */ @@ -1830,7 +1845,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) owner = rt_mutex_owner(lock); else owner = NULL; - raw_spin_unlock_irq(&lock->wait_lock); + raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q); if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) schedule_rtlock(); @@ -1855,10 +1870,11 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) { unsigned long flags; + DEFINE_WAKE_Q(wake_q); raw_spin_lock_irqsave(&lock->wait_lock, flags); - rtlock_slowlock_locked(lock); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + rtlock_slowlock_locked(lock, &wake_q); + raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q); } #endif /* RT_MUTEX_BUILD_SPINLOCKS */ diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c index a6974d044593..191e4720e546 100644 --- a/kernel/locking/rtmutex_api.c +++ b/kernel/locking/rtmutex_api.c @@ -175,10 +175,10 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex_base *lock, } /* - * We've already deboosted, mark_wakeup_next_waiter() will - * retain preempt_disabled when we drop the wait_lock, to - * avoid inversion prior to the wakeup. preempt_disable() - * therein pairs with rt_mutex_postunlock(). + * mark_wakeup_next_waiter() deboosts and retains preemption + * disabled when dropping the wait_lock, to avoid inversion prior + * to the wakeup. preempt_disable() therein pairs with the + * preempt_enable() in rt_mutex_postunlock(). */ mark_wakeup_next_waiter(wqh, lock); @@ -275,6 +275,7 @@ void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock) * @lock: the rt_mutex to take * @waiter: the pre-initialized rt_mutex_waiter * @task: the task to prepare + * @wake_q: the wake_q to wake tasks after we release the wait_lock * * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. @@ -291,7 +292,8 @@ void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock) */ int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter, - struct task_struct *task) + struct task_struct *task, + struct wake_q_head *wake_q) { int ret; @@ -302,7 +304,7 @@ int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, /* We enforce deadlock detection for futexes */ ret = task_blocks_on_rt_mutex(lock, waiter, task, NULL, - RT_MUTEX_FULL_CHAINWALK); + RT_MUTEX_FULL_CHAINWALK, wake_q); if (ret && !rt_mutex_owner(lock)) { /* @@ -341,12 +343,16 @@ int __sched rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, struct task_struct *task) { int ret; + DEFINE_WAKE_Q(wake_q); raw_spin_lock_irq(&lock->wait_lock); - ret = __rt_mutex_start_proxy_lock(lock, waiter, task); + ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q); if (unlikely(ret)) remove_waiter(lock, waiter); + preempt_disable(); raw_spin_unlock_irq(&lock->wait_lock); + wake_up_q(&wake_q); + preempt_enable(); return ret; } @@ -377,7 +383,7 @@ int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock, raw_spin_lock_irq(&lock->wait_lock); /* sleep on the mutex */ set_current_state(TASK_INTERRUPTIBLE); - ret = rt_mutex_slowlock_block(lock, NULL, TASK_INTERRUPTIBLE, to, waiter); + ret = rt_mutex_slowlock_block(lock, NULL, TASK_INTERRUPTIBLE, to, waiter, NULL); /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 1162e07cdaea..78dd3d8c6554 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -59,8 +59,8 @@ struct rt_mutex_waiter { }; /** - * rt_wake_q_head - Wrapper around regular wake_q_head to support - * "sleeping" spinlocks on RT + * struct rt_wake_q_head - Wrapper around regular wake_q_head to support + * "sleeping" spinlocks on RT * @head: The regular wake_q_head for sleeping lock variants * @rtlock_task: Task pointer for RT lock (spin/rwlock) wakeups */ @@ -83,7 +83,8 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex_base *lock, extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock); extern int __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter, - struct task_struct *task); + struct task_struct *task, + struct wake_q_head *); extern int rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter, struct task_struct *task); diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 34a59569db6b..9f4322c07486 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -69,6 +69,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, unsigned int state) { struct rt_mutex_base *rtm = &rwb->rtmutex; + DEFINE_WAKE_Q(wake_q); int ret; rwbase_pre_schedule(); @@ -110,7 +111,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, * For rwlocks this returns 0 unconditionally, so the below * !ret conditionals are optimized out. */ - ret = rwbase_rtmutex_slowlock_locked(rtm, state); + ret = rwbase_rtmutex_slowlock_locked(rtm, state, &wake_q); /* * On success the rtmutex is held, so there can't be a writer @@ -121,7 +122,12 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, */ if (!ret) atomic_inc(&rwb->readers); + + preempt_disable(); raw_spin_unlock_irq(&rtm->wait_lock); + wake_up_q(&wake_q); + preempt_enable(); + if (!ret) rwbase_rtmutex_unlock(rtm); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 2340b6d90ec6..2ddb827e3bea 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -35,7 +35,7 @@ /* * The least significant 2 bits of the owner value has the following * meanings when set. - * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers + * - Bit 0: RWSEM_READER_OWNED - rwsem may be owned by readers (just a hint) * - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock * * When the rwsem is reader-owned and a spinning writer has timed out, @@ -181,12 +181,21 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) __rwsem_set_reader_owned(sem, current); } +#ifdef CONFIG_DEBUG_RWSEMS +/* + * Return just the real task structure pointer of the owner + */ +static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) +{ + return (struct task_struct *) + (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); +} + /* * Return true if the rwsem is owned by a reader. */ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) { -#ifdef CONFIG_DEBUG_RWSEMS /* * Check the count to see if it is write-locked. */ @@ -194,11 +203,9 @@ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) if (count & RWSEM_WRITER_MASK) return false; -#endif return rwsem_test_oflags(sem, RWSEM_READER_OWNED); } -#ifdef CONFIG_DEBUG_RWSEMS /* * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there * is a task pointer in owner of a reader-owned rwsem, it will be the @@ -266,15 +273,6 @@ static inline bool rwsem_write_trylock(struct rw_semaphore *sem) } /* - * Return just the real task structure pointer of the owner - */ -static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) -{ - return (struct task_struct *) - (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); -} - -/* * Return the real task structure pointer of the owner and the embedded * flags in the owner. pflags must be non-NULL. */ @@ -631,7 +629,7 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, * if it is an RT task or wait in the wait queue * for too long. */ - if (has_handoff || (!rt_task(waiter->task) && + if (has_handoff || (!rt_or_dl_task(waiter->task) && !time_after(jiffies, waiter->timeout))) return false; @@ -914,7 +912,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) if (owner_state != OWNER_WRITER) { if (need_resched()) break; - if (rt_task(current) && + if (rt_or_dl_task(current) && (prev_owner_state != OWNER_WRITER)) break; } @@ -1002,8 +1000,8 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat /* * To prevent a constant stream of readers from starving a sleeping - * waiter, don't attempt optimistic lock stealing if the lock is - * currently owned by readers. + * writer, don't attempt optimistic lock stealing if the lock is + * very likely owned by readers. */ if ((atomic_long_read(&sem->owner) & RWSEM_READER_OWNED) && (rcnt > 1) && !(count & RWSEM_WRITER_LOCKED)) @@ -1297,7 +1295,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline int __down_write_common(struct rw_semaphore *sem, int state) +static __always_inline int __down_write_common(struct rw_semaphore *sem, int state) { int ret = 0; @@ -1310,12 +1308,12 @@ static inline int __down_write_common(struct rw_semaphore *sem, int state) return ret; } -static inline void __down_write(struct rw_semaphore *sem) +static __always_inline void __down_write(struct rw_semaphore *sem) { __down_write_common(sem, TASK_UNINTERRUPTIBLE); } -static inline int __down_write_killable(struct rw_semaphore *sem) +static __always_inline int __down_write_killable(struct rw_semaphore *sem) { return __down_write_common(sem, TASK_KILLABLE); } @@ -1415,8 +1413,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem) #define rwbase_rtmutex_lock_state(rtm, state) \ __rt_mutex_lock(rtm, state) -#define rwbase_rtmutex_slowlock_locked(rtm, state) \ - __rt_mutex_slowlock_locked(rtm, NULL, state) +#define rwbase_rtmutex_slowlock_locked(rtm, state, wq) \ + __rt_mutex_slowlock_locked(rtm, NULL, state, wq) #define rwbase_rtmutex_unlock(rtm) \ __rt_mutex_unlock(rtm) diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index 34bfae72f295..de9117c0e671 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -29,6 +29,7 @@ #include <linux/export.h> #include <linux/sched.h> #include <linux/sched/debug.h> +#include <linux/sched/wake_q.h> #include <linux/semaphore.h> #include <linux/spinlock.h> #include <linux/ftrace.h> @@ -38,7 +39,7 @@ static noinline void __down(struct semaphore *sem); static noinline int __down_interruptible(struct semaphore *sem); static noinline int __down_killable(struct semaphore *sem); static noinline int __down_timeout(struct semaphore *sem, long timeout); -static noinline void __up(struct semaphore *sem); +static noinline void __up(struct semaphore *sem, struct wake_q_head *wake_q); /** * down - acquire the semaphore @@ -183,13 +184,16 @@ EXPORT_SYMBOL(down_timeout); void __sched up(struct semaphore *sem) { unsigned long flags; + DEFINE_WAKE_Q(wake_q); raw_spin_lock_irqsave(&sem->lock, flags); if (likely(list_empty(&sem->wait_list))) sem->count++; else - __up(sem); + __up(sem, &wake_q); raw_spin_unlock_irqrestore(&sem->lock, flags); + if (!wake_q_empty(&wake_q)) + wake_up_q(&wake_q); } EXPORT_SYMBOL(up); @@ -269,11 +273,12 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long timeout) return __down_common(sem, TASK_UNINTERRUPTIBLE, timeout); } -static noinline void __sched __up(struct semaphore *sem) +static noinline void __sched __up(struct semaphore *sem, + struct wake_q_head *wake_q) { struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, struct semaphore_waiter, list); list_del(&waiter->list); waiter->up = true; - wake_up_process(waiter->task); + wake_q_add(wake_q, waiter->task); } diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 8475a0794f8c..7685defd7c52 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -65,7 +65,7 @@ EXPORT_PER_CPU_SYMBOL(__mmiowb_state); * towards that other CPU that it should break the lock ASAP. */ #define BUILD_LOCK_OPS(op, locktype) \ -void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ +static void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ { \ for (;;) { \ preempt_disable(); \ @@ -77,7 +77,7 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ } \ } \ \ -unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ +static unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ { \ unsigned long flags; \ \ @@ -95,12 +95,12 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ return flags; \ } \ \ -void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \ +static void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \ { \ _raw_##op##_lock_irqsave(lock); \ } \ \ -void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ +static void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ { \ unsigned long flags; \ \ @@ -413,3 +413,11 @@ notrace int in_lock_functions(unsigned long addr) && addr < (unsigned long)__lock_text_end; } EXPORT_SYMBOL(in_lock_functions); + +#if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_PREEMPT_RT) +void notrace lockdep_assert_in_softirq_func(void) +{ + lockdep_assert_in_softirq(); +} +EXPORT_SYMBOL(lockdep_assert_in_softirq_func); +#endif diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c index 38e292454fcc..db1e11b45de6 100644 --- a/kernel/locking/spinlock_rt.c +++ b/kernel/locking/spinlock_rt.c @@ -51,7 +51,7 @@ static __always_inline void __rt_spin_lock(spinlock_t *lock) migrate_disable(); } -void __sched rt_spin_lock(spinlock_t *lock) +void __sched rt_spin_lock(spinlock_t *lock) __acquires(RCU) { spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); __rt_spin_lock(lock); @@ -75,7 +75,7 @@ void __sched rt_spin_lock_nest_lock(spinlock_t *lock, EXPORT_SYMBOL(rt_spin_lock_nest_lock); #endif -void __sched rt_spin_unlock(spinlock_t *lock) +void __sched rt_spin_unlock(spinlock_t *lock) __releases(RCU) { spin_release(&lock->dep_map, _RET_IP_); migrate_enable(); @@ -162,9 +162,10 @@ rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state) } static __always_inline int -rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state) +rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state, + struct wake_q_head *wake_q) { - rtlock_slowlock_locked(rtm); + rtlock_slowlock_locked(rtm, wake_q); return 0; } @@ -225,7 +226,7 @@ int __sched rt_write_trylock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_write_trylock); -void __sched rt_read_lock(rwlock_t *rwlock) +void __sched rt_read_lock(rwlock_t *rwlock) __acquires(RCU) { rtlock_might_resched(); rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); @@ -235,7 +236,7 @@ void __sched rt_read_lock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_read_lock); -void __sched rt_write_lock(rwlock_t *rwlock) +void __sched rt_write_lock(rwlock_t *rwlock) __acquires(RCU) { rtlock_might_resched(); rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); @@ -246,7 +247,7 @@ void __sched rt_write_lock(rwlock_t *rwlock) EXPORT_SYMBOL(rt_write_lock); #ifdef CONFIG_DEBUG_LOCK_ALLOC -void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) +void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(RCU) { rtlock_might_resched(); rwlock_acquire(&rwlock->dep_map, subclass, 0, _RET_IP_); @@ -257,7 +258,7 @@ void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) EXPORT_SYMBOL(rt_write_lock_nested); #endif -void __sched rt_read_unlock(rwlock_t *rwlock) +void __sched rt_read_unlock(rwlock_t *rwlock) __releases(RCU) { rwlock_release(&rwlock->dep_map, _RET_IP_); migrate_enable(); @@ -266,7 +267,7 @@ void __sched rt_read_unlock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_read_unlock); -void __sched rt_write_unlock(rwlock_t *rwlock) +void __sched rt_write_unlock(rwlock_t *rwlock) __releases(RCU) { rwlock_release(&rwlock->dep_map, _RET_IP_); rcu_read_unlock(); diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 78719e1ef1b1..bcb1b9fea588 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -62,7 +62,8 @@ static int __test_mutex(unsigned int flags) int ret; ww_mutex_init(&mtx.mutex, &ww_class); - ww_acquire_init(&ctx, &ww_class); + if (flags & TEST_MTX_CTX) + ww_acquire_init(&ctx, &ww_class); INIT_WORK_ONSTACK(&mtx.work, test_mutex_work); init_completion(&mtx.ready); @@ -90,7 +91,8 @@ static int __test_mutex(unsigned int flags) ret = wait_for_completion_timeout(&mtx.done, TIMEOUT); } ww_mutex_unlock(&mtx.mutex); - ww_acquire_fini(&ctx); + if (flags & TEST_MTX_CTX) + ww_acquire_fini(&ctx); if (ret) { pr_err("%s(flags=%x): mutual exclusion failure\n", @@ -402,7 +404,7 @@ static inline u32 prandom_u32_below(u32 ceil) static int *get_random_order(int count) { int *order; - int n, r, tmp; + int n, r; order = kmalloc_array(count, sizeof(*order), GFP_KERNEL); if (!order) @@ -413,11 +415,8 @@ static int *get_random_order(int count) for (n = count - 1; n > 1; n--) { r = prandom_u32_below(n + 1); - if (r != n) { - tmp = order[n]; - order[n] = order[r]; - order[r] = tmp; - } + if (r != n) + swap(order[n], order[r]); } return order; @@ -679,7 +678,7 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - ret = stress(2047, hweight32(STRESS_ALL)*ncpus, STRESS_ALL); + ret = stress(2046, hweight32(STRESS_ALL)*ncpus, STRESS_ALL); if (ret) return ret; @@ -697,3 +696,4 @@ module_exit(test_ww_mutex_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("API test facility for ww_mutexes"); diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index 3ad2cc4823e5..37f025a096c9 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -70,14 +70,14 @@ __ww_mutex_has_waiters(struct mutex *lock) return atomic_long_read(&lock->owner) & MUTEX_FLAG_WAITERS; } -static inline void lock_wait_lock(struct mutex *lock) +static inline void lock_wait_lock(struct mutex *lock, unsigned long *flags) { - raw_spin_lock(&lock->wait_lock); + raw_spin_lock_irqsave(&lock->wait_lock, *flags); } -static inline void unlock_wait_lock(struct mutex *lock) +static inline void unlock_wait_lock(struct mutex *lock, unsigned long *flags) { - raw_spin_unlock(&lock->wait_lock); + raw_spin_unlock_irqrestore(&lock->wait_lock, *flags); } static inline void lockdep_assert_wait_lock_held(struct mutex *lock) @@ -144,14 +144,14 @@ __ww_mutex_has_waiters(struct rt_mutex *lock) return rt_mutex_has_waiters(&lock->rtmutex); } -static inline void lock_wait_lock(struct rt_mutex *lock) +static inline void lock_wait_lock(struct rt_mutex *lock, unsigned long *flags) { - raw_spin_lock(&lock->rtmutex.wait_lock); + raw_spin_lock_irqsave(&lock->rtmutex.wait_lock, *flags); } -static inline void unlock_wait_lock(struct rt_mutex *lock) +static inline void unlock_wait_lock(struct rt_mutex *lock, unsigned long *flags) { - raw_spin_unlock(&lock->rtmutex.wait_lock); + raw_spin_unlock_irqrestore(&lock->rtmutex.wait_lock, *flags); } static inline void lockdep_assert_wait_lock_held(struct rt_mutex *lock) @@ -237,7 +237,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) int a_prio = a->task->prio; int b_prio = b->task->prio; - if (rt_prio(a_prio) || rt_prio(b_prio)) { + if (rt_or_dl_prio(a_prio) || rt_or_dl_prio(b_prio)) { if (a_prio > b_prio) return true; @@ -275,7 +275,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) */ static bool __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter, - struct ww_acquire_ctx *ww_ctx) + struct ww_acquire_ctx *ww_ctx, struct wake_q_head *wake_q) { if (!ww_ctx->is_wait_die) return false; @@ -284,7 +284,7 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter, #ifndef WW_RT debug_mutex_wake_waiter(lock, waiter); #endif - wake_up_process(waiter->task); + wake_q_add(wake_q, waiter->task); } return true; @@ -299,7 +299,8 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter, */ static bool __ww_mutex_wound(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx, - struct ww_acquire_ctx *hold_ctx) + struct ww_acquire_ctx *hold_ctx, + struct wake_q_head *wake_q) { struct task_struct *owner = __ww_mutex_owner(lock); @@ -331,7 +332,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock, * wakeup pending to re-read the wounded state. */ if (owner != current) - wake_up_process(owner); + wake_q_add(wake_q, owner); return true; } @@ -352,7 +353,8 @@ static bool __ww_mutex_wound(struct MUTEX *lock, * The current task must not be on the wait list. */ static void -__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) +__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx, + struct wake_q_head *wake_q) { struct MUTEX_WAITER *cur; @@ -364,8 +366,8 @@ __ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) if (!cur->ww_ctx) continue; - if (__ww_mutex_die(lock, cur, ww_ctx) || - __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx)) + if (__ww_mutex_die(lock, cur, ww_ctx, wake_q) || + __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx, wake_q)) break; } } @@ -377,6 +379,9 @@ __ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) static __always_inline void ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { + DEFINE_WAKE_Q(wake_q); + unsigned long flags; + ww_mutex_lock_acquired(lock, ctx); /* @@ -404,9 +409,12 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) * Uh oh, we raced in fastpath, check if any of the waiters need to * die or wound us. */ - lock_wait_lock(&lock->base); - __ww_mutex_check_waiters(&lock->base, ctx); - unlock_wait_lock(&lock->base); + lock_wait_lock(&lock->base, &flags); + __ww_mutex_check_waiters(&lock->base, ctx, &wake_q); + preempt_disable(); + unlock_wait_lock(&lock->base, &flags); + wake_up_q(&wake_q); + preempt_enable(); } static __always_inline int @@ -488,7 +496,8 @@ __ww_mutex_check_kill(struct MUTEX *lock, struct MUTEX_WAITER *waiter, static inline int __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter, struct MUTEX *lock, - struct ww_acquire_ctx *ww_ctx) + struct ww_acquire_ctx *ww_ctx, + struct wake_q_head *wake_q) { struct MUTEX_WAITER *cur, *pos = NULL; bool is_wait_die; @@ -532,7 +541,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter, pos = cur; /* Wait-Die: ensure younger waiters die. */ - __ww_mutex_die(lock, cur, ww_ctx); + __ww_mutex_die(lock, cur, ww_ctx, wake_q); } __ww_waiter_add(lock, waiter, pos); @@ -550,7 +559,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter, * such that either we or the fastpath will wound @ww->ctx. */ smp_mb(); - __ww_mutex_wound(lock, ww_ctx, ww->ctx); + __ww_mutex_wound(lock, ww_ctx, ww->ctx, wake_q); } return 0; |