From bc2eecd7ecce40af43b6eb3d256b6076257df846 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 1 Aug 2017 00:31:32 -0400 Subject: futex: Allow for compiling out PI support This makes it possible to preserve basic futex support and compile out the PI support when RT mutexes are not available. Signed-off-by: Nicolas Pitre Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Darren Hart Link: http://lkml.kernel.org/r/alpine.LFD.2.20.1708010024190.5981@knanqh.ubzr --- kernel/locking/rtmutex_common.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 72ad45a9a794..8d039b928d61 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -40,6 +40,9 @@ struct rt_mutex_waiter { /* * Various helpers to access the waiters-tree: */ + +#ifdef CONFIG_RT_MUTEXES + static inline int rt_mutex_has_waiters(struct rt_mutex *lock) { return !RB_EMPTY_ROOT(&lock->waiters); @@ -69,6 +72,32 @@ task_top_pi_waiter(struct task_struct *p) pi_tree_entry); } +#else + +static inline int rt_mutex_has_waiters(struct rt_mutex *lock) +{ + return false; +} + +static inline struct rt_mutex_waiter * +rt_mutex_top_waiter(struct rt_mutex *lock) +{ + return NULL; +} + +static inline int task_has_pi_waiters(struct task_struct *p) +{ + return false; +} + +static inline struct rt_mutex_waiter * +task_top_pi_waiter(struct task_struct *p) +{ + return NULL; +} + +#endif + /* * lock->owner state tracking: */ -- cgit From 50972fe78f24f1cd0b9d7bbf1f87d2be9e4f412e Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Fri, 14 Jul 2017 19:17:56 +0530 Subject: locking/osq_lock: Fix osq_lock queue corruption Fix ordering of link creation between node->prev and prev->next in osq_lock(). A case in which the status of optimistic spin queue is CPU6->CPU2 in which CPU6 has acquired the lock. tail v ,-. <- ,-. |6| |2| `-' -> `-' At this point if CPU0 comes in to acquire osq_lock, it will update the tail count. CPU2 CPU0 ---------------------------------- tail v ,-. <- ,-. ,-. |6| |2| |0| `-' -> `-' `-' After tail count update if CPU2 starts to unqueue itself from optimistic spin queue, it will find an updated tail count with CPU0 and update CPU2 node->next to NULL in osq_wait_next(). unqueue-A tail v ,-. <- ,-. ,-. |6| |2| |0| `-' `-' `-' unqueue-B ->tail != curr && !node->next If reordering of following stores happen then prev->next where prev being CPU2 would be updated to point to CPU0 node: tail v ,-. <- ,-. ,-. |6| |2| |0| `-' `-' -> `-' osq_wait_next() node->next <- 0 xchg(node->next, NULL) tail v ,-. <- ,-. ,-. |6| |2| |0| `-' `-' `-' unqueue-C At this point if next instruction WRITE_ONCE(next->prev, prev); in CPU2 path is committed before the update of CPU0 node->prev = prev then CPU0 node->prev will point to CPU6 node. tail v----------. v ,-. <- ,-. ,-. |6| |2| |0| `-' `-' `-' `----------^ At this point if CPU0 path's node->prev = prev is committed resulting in change of CPU0 prev back to CPU2 node. CPU2 node->next is NULL currently, tail v ,-. <- ,-. <- ,-. |6| |2| |0| `-' `-' `-' `----------^ so if CPU0 gets into unqueue path of osq_lock it will keep spinning in infinite loop as condition prev->next == node will never be true. Signed-off-by: Prateek Sood [ Added pictures, rewrote comments. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: sramana@codeaurora.org Link: http://lkml.kernel.org/r/1500040076-27626-1-git-send-email-prsood@codeaurora.org Signed-off-by: Ingo Molnar --- kernel/locking/osq_lock.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index a3167941093b..a74ee6abd039 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -109,6 +109,19 @@ bool osq_lock(struct optimistic_spin_queue *lock) prev = decode_cpu(old); node->prev = prev; + + /* + * osq_lock() unqueue + * + * node->prev = prev osq_wait_next() + * WMB MB + * prev->next = node next->prev = prev // unqueue-C + * + * Here 'node->prev' and 'next->prev' are the same variable and we need + * to ensure these stores happen in-order to avoid corrupting the list. + */ + smp_wmb(); + WRITE_ONCE(prev->next, node); /* -- cgit From 0aa1125fa8bc5e5f98317156728fa4d0293561a5 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 19 Jun 2017 21:02:12 +0300 Subject: locking/rwsem-spinlock: Add killable versions of __down_read() Rename __down_read() in __down_read_common() and teach it to abort waiting in case of pending signals and killable state argument passed. Note, that we shouldn't wake anybody up in EINTR path, as: We check for signal_pending_state() after (!waiter.task) test and under spinlock. So, current task wasn't able to be woken up. It may be in two cases: a writer is owner of the sem, or a writer is a first waiter of the sem. If a writer is owner of the sem, no one else may work with it in parallel. It will wake somebody, when it call up_write() or downgrade_write(). If a writer is the first waiter, it will be woken up, when the last active reader releases the sem, and sem->count became 0. Also note, that set_current_state() may be moved down to schedule() (after !waiter.task check), as all assignments in this type of semaphore (including wake_up), occur under spinlock, so we can't miss anything. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: avagin@virtuozzo.com Cc: davem@davemloft.net Cc: fenghua.yu@intel.com Cc: gorcunov@virtuozzo.com Cc: heiko.carstens@de.ibm.com Cc: hpa@zytor.com Cc: ink@jurassic.park.msu.ru Cc: mattst88@gmail.com Cc: rth@twiddle.net Cc: schwidefsky@de.ibm.com Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/149789533283.9059.9829416940494747182.stgit@localhost.localdomain Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-spinlock.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 20819df98125..0848634c5512 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -126,7 +126,7 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem) /* * get a read lock on the semaphore */ -void __sched __down_read(struct rw_semaphore *sem) +int __sched __down_read_common(struct rw_semaphore *sem, int state) { struct rwsem_waiter waiter; unsigned long flags; @@ -140,8 +140,6 @@ void __sched __down_read(struct rw_semaphore *sem) goto out; } - set_current_state(TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ waiter.task = current; waiter.type = RWSEM_WAITING_FOR_READ; @@ -149,20 +147,41 @@ void __sched __down_read(struct rw_semaphore *sem) list_add_tail(&waiter.list, &sem->wait_list); - /* we don't need to touch the semaphore struct anymore */ - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - /* wait to be given the lock */ for (;;) { if (!waiter.task) break; + if (signal_pending_state(state, current)) + goto out_nolock; + set_current_state(state); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); schedule(); - set_current_state(TASK_UNINTERRUPTIBLE); + raw_spin_lock_irqsave(&sem->wait_lock, flags); } - __set_current_state(TASK_RUNNING); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); out: - ; + return 0; + +out_nolock: + /* + * We didn't take the lock, so that there is a writer, which + * is owner or the first waiter of the sem. If it's a waiter, + * it will be woken by current owner. Not need to wake anybody. + */ + list_del(&waiter.list); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + return -EINTR; +} + +void __sched __down_read(struct rw_semaphore *sem) +{ + __down_read_common(sem, TASK_UNINTERRUPTIBLE); +} + +int __sched __down_read_killable(struct rw_semaphore *sem) +{ + return __down_read_common(sem, TASK_KILLABLE); } /* -- cgit From 83ced169d9a01f22eb39f1fcc1f89ad9d223238f Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 19 Jun 2017 21:02:26 +0300 Subject: locking/rwsem-xadd: Add killable versions of rwsem_down_read_failed() Rename rwsem_down_read_failed() in __rwsem_down_read_failed_common() and teach it to abort waiting in case of pending signals and killable state argument passed. Note, that we shouldn't wake anybody up in EINTR path, as: We check for (waiter.task) under spinlock before we go to out_nolock path. Current task wasn't able to be woken up, so there are a writer, owning the sem, or a writer, which is the first waiter. In the both cases we shouldn't wake anybody. If there is a writer, owning the sem, and we were the only waiter, remove RWSEM_WAITING_BIAS, as there are no waiters anymore. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: avagin@virtuozzo.com Cc: davem@davemloft.net Cc: fenghua.yu@intel.com Cc: gorcunov@virtuozzo.com Cc: heiko.carstens@de.ibm.com Cc: hpa@zytor.com Cc: ink@jurassic.park.msu.ru Cc: mattst88@gmail.com Cc: rth@twiddle.net Cc: schwidefsky@de.ibm.com Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/149789534632.9059.2901382369609922565.stgit@localhost.localdomain Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 34e727f18e49..02f660666ab8 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -221,8 +221,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, /* * Wait for the read lock to be granted */ -__visible -struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) +static inline struct rw_semaphore __sched * +__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) { long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; @@ -255,17 +255,44 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) /* wait to be given the lock */ while (true) { - set_current_state(TASK_UNINTERRUPTIBLE); + set_current_state(state); if (!waiter.task) break; + if (signal_pending_state(state, current)) { + raw_spin_lock_irq(&sem->wait_lock); + if (waiter.task) + goto out_nolock; + raw_spin_unlock_irq(&sem->wait_lock); + break; + } schedule(); } __set_current_state(TASK_RUNNING); return sem; +out_nolock: + list_del(&waiter.list); + if (list_empty(&sem->wait_list)) + atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); + raw_spin_unlock_irq(&sem->wait_lock); + __set_current_state(TASK_RUNNING); + return ERR_PTR(-EINTR); +} + +__visible struct rw_semaphore * __sched +rwsem_down_read_failed(struct rw_semaphore *sem) +{ + return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(rwsem_down_read_failed); +__visible struct rw_semaphore * __sched +rwsem_down_read_failed_killable(struct rw_semaphore *sem) +{ + return __rwsem_down_read_failed_common(sem, TASK_KILLABLE); +} +EXPORT_SYMBOL(rwsem_down_read_failed_killable); + /* * This function must be called with the sem->wait_lock held to prevent * race conditions between checking the rwsem wait list and setting the -- cgit From d92a8cfcb37ecd1315269dab741f073b63b3a8b6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 Mar 2017 10:13:38 +0100 Subject: locking/lockdep: Rework FS_RECLAIM annotation A while ago someone, and I cannot find the email just now, asked if we could not implement the RECLAIM_FS inversion stuff with a 'fake' lock like we use for other things like workqueues etc. I think this should be possible which allows reducing the 'irq' states and will reduce the amount of __bfs() lookups we do. Removing the 1 IRQ state results in 4 less __bfs() walks per dependency, improving lockdep performance. And by moving this annotation out of the lockdep code it becomes easier for the mm people to extend. Signed-off-by: Peter Zijlstra (Intel) Cc: Byungchul Park Cc: Linus Torvalds Cc: Mel Gorman Cc: Michal Hocko Cc: Nikolay Borisov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: iamjoonsoo.kim@lge.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 95 +---------------------------------------- kernel/locking/lockdep_states.h | 1 - 2 files changed, 1 insertion(+), 95 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 7d2499bec5fe..986f2fa79dbb 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -344,14 +344,12 @@ EXPORT_SYMBOL(lockdep_on); #if VERBOSE # define HARDIRQ_VERBOSE 1 # define SOFTIRQ_VERBOSE 1 -# define RECLAIM_VERBOSE 1 #else # define HARDIRQ_VERBOSE 0 # define SOFTIRQ_VERBOSE 0 -# define RECLAIM_VERBOSE 0 #endif -#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE +#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE /* * Quick filtering for interesting events: */ @@ -2567,14 +2565,6 @@ static int SOFTIRQ_verbose(struct lock_class *class) return 0; } -static int RECLAIM_FS_verbose(struct lock_class *class) -{ -#if RECLAIM_VERBOSE - return class_filter(class); -#endif - return 0; -} - #define STRICT_READ_CHECKS 1 static int (*state_verbose_f[])(struct lock_class *class) = { @@ -2870,57 +2860,6 @@ void trace_softirqs_off(unsigned long ip) debug_atomic_inc(redundant_softirqs_off); } -static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) -{ - struct task_struct *curr = current; - - if (unlikely(!debug_locks)) - return; - - gfp_mask = current_gfp_context(gfp_mask); - - /* no reclaim without waiting on it */ - if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) - return; - - /* this guy won't enter reclaim */ - if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) - return; - - /* We're only interested __GFP_FS allocations for now */ - if (!(gfp_mask & __GFP_FS) || (curr->flags & PF_MEMALLOC_NOFS)) - return; - - /* - * Oi! Can't be having __GFP_FS allocations with IRQs disabled. - */ - if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) - return; - - /* Disable lockdep if explicitly requested */ - if (gfp_mask & __GFP_NOLOCKDEP) - return; - - mark_held_locks(curr, RECLAIM_FS); -} - -static void check_flags(unsigned long flags); - -void lockdep_trace_alloc(gfp_t gfp_mask) -{ - unsigned long flags; - - if (unlikely(current->lockdep_recursion)) - return; - - raw_local_irq_save(flags); - check_flags(flags); - current->lockdep_recursion = 1; - __lockdep_trace_alloc(gfp_mask, flags); - current->lockdep_recursion = 0; - raw_local_irq_restore(flags); -} - static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) { /* @@ -2966,22 +2905,6 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) } } - /* - * We reuse the irq context infrastructure more broadly as a general - * context checking code. This tests GFP_FS recursion (a lock taken - * during reclaim for a GFP_FS allocation is held over a GFP_FS - * allocation). - */ - if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) { - if (hlock->read) { - if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ)) - return 0; - } else { - if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS)) - return 0; - } - } - return 1; } @@ -3040,10 +2963,6 @@ static inline int separate_irq_context(struct task_struct *curr, return 0; } -void lockdep_trace_alloc(gfp_t gfp_mask) -{ -} - #endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ /* @@ -3952,18 +3871,6 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie) } EXPORT_SYMBOL_GPL(lock_unpin_lock); -void lockdep_set_current_reclaim_state(gfp_t gfp_mask) -{ - current->lockdep_reclaim_gfp = current_gfp_context(gfp_mask); -} -EXPORT_SYMBOL_GPL(lockdep_set_current_reclaim_state); - -void lockdep_clear_current_reclaim_state(void) -{ - current->lockdep_reclaim_gfp = 0; -} -EXPORT_SYMBOL_GPL(lockdep_clear_current_reclaim_state); - #ifdef CONFIG_LOCK_STAT static int print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, diff --git a/kernel/locking/lockdep_states.h b/kernel/locking/lockdep_states.h index 995b0cc2b84c..35ca09f2ed0b 100644 --- a/kernel/locking/lockdep_states.h +++ b/kernel/locking/lockdep_states.h @@ -6,4 +6,3 @@ */ LOCKDEP_STATE(HARDIRQ) LOCKDEP_STATE(SOFTIRQ) -LOCKDEP_STATE(RECLAIM_FS) -- cgit From ae813308f4630642d2c1c87553929ce95f29f9ef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 Mar 2017 10:13:38 +0100 Subject: locking/lockdep: Avoid creating redundant links Two boots + a make defconfig, the first didn't have the redundant bit in, the second did: lock-classes: 1168 1169 [max: 8191] direct dependencies: 7688 5812 [max: 32768] indirect dependencies: 25492 25937 all direct dependencies: 220113 217512 dependency chains: 9005 9008 [max: 65536] dependency chain hlocks: 34450 34366 [max: 327680] in-hardirq chains: 55 51 in-softirq chains: 371 378 in-process chains: 8579 8579 stack-trace entries: 108073 88474 [max: 524288] combined max dependencies: 178738560 169094640 max locking depth: 15 15 max bfs queue depth: 320 329 cyclic checks: 9123 9190 redundant checks: 5046 redundant links: 1828 find-mask forwards checks: 2564 2599 find-mask backwards checks: 39521 39789 So it saves nearly 2k links and a fair chunk of stack-trace entries, but as expected, makes no real difference on the indirect dependencies. At the same time, you see the max BFS depth increase, which is also expected, although it could easily be boot variance -- these numbers are not entirely stable between boots. The down side is that the cycles in the graph become larger and thus the reports harder to read. XXX: do we want this as a CONFIG variable, implied by LOCKDEP_SMALL? Signed-off-by: Peter Zijlstra (Intel) Cc: Byungchul Park Cc: Linus Torvalds Cc: Mel Gorman Cc: Michal Hocko Cc: Nikolay Borisov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: iamjoonsoo.kim@lge.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Link: http://lkml.kernel.org/r/20170303091338.GH6536@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 27 +++++++++++++++++++++++++++ kernel/locking/lockdep_internals.h | 2 ++ kernel/locking/lockdep_proc.c | 4 ++++ 3 files changed, 33 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 986f2fa79dbb..b2dd313951ce 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1307,6 +1307,19 @@ check_noncircular(struct lock_list *root, struct lock_class *target, return result; } +static noinline int +check_redundant(struct lock_list *root, struct lock_class *target, + struct lock_list **target_entry) +{ + int result; + + debug_atomic_inc(nr_redundant_checks); + + result = __bfs_forwards(root, target, class_equal, target_entry); + + return result; +} + #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) /* * Forwards and backwards subgraph searching, for the purposes of @@ -1872,6 +1885,20 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, } } + /* + * Is the -> link redundant? + */ + this.class = hlock_class(prev); + this.parent = NULL; + ret = check_redundant(&this, hlock_class(next), &target_entry); + if (!ret) { + debug_atomic_inc(nr_redundant); + return 2; + } + if (ret < 0) + return print_bfs_bug(ret); + + if (!*stack_saved) { if (!save_trace(&trace)) return 0; diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index c08fbd2f5ba9..1da4669d57a7 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -143,6 +143,8 @@ struct lockdep_stats { int redundant_softirqs_on; int redundant_softirqs_off; int nr_unused_locks; + int nr_redundant_checks; + int nr_redundant; int nr_cyclic_checks; int nr_cyclic_check_recursions; int nr_find_usage_forwards_checks; diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index 6d1fcc786081..68d9e267ccd4 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -201,6 +201,10 @@ static void lockdep_stats_debug_show(struct seq_file *m) debug_atomic_read(chain_lookup_hits)); seq_printf(m, " cyclic checks: %11llu\n", debug_atomic_read(nr_cyclic_checks)); + seq_printf(m, " redundant checks: %11llu\n", + debug_atomic_read(nr_redundant_checks)); + seq_printf(m, " redundant links: %11llu\n", + debug_atomic_read(nr_redundant)); seq_printf(m, " find-mask forwards checks: %11llu\n", debug_atomic_read(nr_find_usage_forwards_checks)); seq_printf(m, " find-mask backwards checks: %11llu\n", -- cgit From 545c23f2e954eb3365629b20ceeef4eadb1ff97f Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:48 +0900 Subject: locking/lockdep: Refactor lookup_chain_cache() Currently, lookup_chain_cache() provides both 'lookup' and 'add' functionalities in a function. However, each is useful. So this patch makes lookup_chain_cache() only do 'lookup' functionality and makes add_chain_cahce() only do 'add' functionality. And it's more readable than before. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-2-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 141 +++++++++++++++++++++++++++++++---------------- 1 file changed, 93 insertions(+), 48 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b2dd313951ce..e029f2f3b8dc 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2151,20 +2151,26 @@ static int check_no_collision(struct task_struct *curr, } /* - * Look up a dependency chain. If the key is not present yet then - * add it and return 1 - in this case the new dependency chain is - * validated. If the key is already hashed, return 0. - * (On return with 1 graph_lock is held.) + * Adds a dependency chain into chain hashtable. And must be called with + * graph_lock held. + * + * Return 0 if fail, and graph_lock is released. + * Return 1 if succeed, with graph_lock held. */ -static inline int lookup_chain_cache(struct task_struct *curr, - struct held_lock *hlock, - u64 chain_key) +static inline int add_chain_cache(struct task_struct *curr, + struct held_lock *hlock, + u64 chain_key) { struct lock_class *class = hlock_class(hlock); struct hlist_head *hash_head = chainhashentry(chain_key); struct lock_chain *chain; int i, j; + /* + * Allocate a new chain entry from the static array, and add + * it to the hash: + */ + /* * We might need to take the graph lock, ensure we've got IRQs * disabled to make this an IRQ-safe lock.. for recursion reasons @@ -2172,43 +2178,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, */ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return 0; - /* - * We can walk it lock-free, because entries only get added - * to the hash: - */ - hlist_for_each_entry_rcu(chain, hash_head, entry) { - if (chain->chain_key == chain_key) { -cache_hit: - debug_atomic_inc(chain_lookup_hits); - if (!check_no_collision(curr, hlock, chain)) - return 0; - if (very_verbose(class)) - printk("\nhash chain already cached, key: " - "%016Lx tail class: [%p] %s\n", - (unsigned long long)chain_key, - class->key, class->name); - return 0; - } - } - if (very_verbose(class)) - printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", - (unsigned long long)chain_key, class->key, class->name); - /* - * Allocate a new chain entry from the static array, and add - * it to the hash: - */ - if (!graph_lock()) - return 0; - /* - * We have to walk the chain again locked - to avoid duplicates: - */ - hlist_for_each_entry(chain, hash_head, entry) { - if (chain->chain_key == chain_key) { - graph_unlock(); - goto cache_hit; - } - } if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { if (!debug_locks_off_graph_unlock()) return 0; @@ -2260,6 +2230,78 @@ cache_hit: return 1; } +/* + * Look up a dependency chain. + */ +static inline struct lock_chain *lookup_chain_cache(u64 chain_key) +{ + struct hlist_head *hash_head = chainhashentry(chain_key); + struct lock_chain *chain; + + /* + * We can walk it lock-free, because entries only get added + * to the hash: + */ + hlist_for_each_entry_rcu(chain, hash_head, entry) { + if (chain->chain_key == chain_key) { + debug_atomic_inc(chain_lookup_hits); + return chain; + } + } + return NULL; +} + +/* + * If the key is not present yet in dependency chain cache then + * add it and return 1 - in this case the new dependency chain is + * validated. If the key is already hashed, return 0. + * (On return with 1 graph_lock is held.) + */ +static inline int lookup_chain_cache_add(struct task_struct *curr, + struct held_lock *hlock, + u64 chain_key) +{ + struct lock_class *class = hlock_class(hlock); + struct lock_chain *chain = lookup_chain_cache(chain_key); + + if (chain) { +cache_hit: + if (!check_no_collision(curr, hlock, chain)) + return 0; + + if (very_verbose(class)) { + printk("\nhash chain already cached, key: " + "%016Lx tail class: [%p] %s\n", + (unsigned long long)chain_key, + class->key, class->name); + } + + return 0; + } + + if (very_verbose(class)) { + printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", + (unsigned long long)chain_key, class->key, class->name); + } + + if (!graph_lock()) + return 0; + + /* + * We have to walk the chain again locked - to avoid duplicates: + */ + chain = lookup_chain_cache(chain_key); + if (chain) { + graph_unlock(); + goto cache_hit; + } + + if (!add_chain_cache(curr, hlock, chain_key)) + return 0; + + return 1; +} + static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, struct held_lock *hlock, int chain_head, u64 chain_key) { @@ -2270,11 +2312,11 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, * * We look up the chain_key and do the O(N^2) check and update of * the dependencies only if this is a new dependency chain. - * (If lookup_chain_cache() returns with 1 it acquires + * (If lookup_chain_cache_add() return with 1 it acquires * graph_lock for us) */ if (!hlock->trylock && hlock->check && - lookup_chain_cache(curr, hlock, chain_key)) { + lookup_chain_cache_add(curr, hlock, chain_key)) { /* * Check whether last held lock: * @@ -2302,14 +2344,17 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, * Add dependency only if this lock is not the head * of the chain, and if it's not a secondary read-lock: */ - if (!chain_head && ret != 2) + if (!chain_head && ret != 2) { if (!check_prevs_add(curr, hlock)) return 0; + } + graph_unlock(); - } else - /* after lookup_chain_cache(): */ + } else { + /* after lookup_chain_cache_add(): */ if (unlikely(!debug_locks)) return 0; + } return 1; } -- cgit From 49347a986ab45eb1dafbf25170647c890f8ff192 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:49 +0900 Subject: locking/lockdep: Add a function building a chain between two classes Crossrelease needs to build a chain between two classes regardless of their contexts. However, add_chain_cache() cannot be used for that purpose since it assumes that it's called in the acquisition context of the hlock. So this patch introduces a new function doing it. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-3-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index e029f2f3b8dc..bdf6b31f702b 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2150,6 +2150,76 @@ static int check_no_collision(struct task_struct *curr, return 1; } +/* + * This is for building a chain between just two different classes, + * instead of adding a new hlock upon current, which is done by + * add_chain_cache(). + * + * This can be called in any context with two classes, while + * add_chain_cache() must be done within the lock owener's context + * since it uses hlock which might be racy in another context. + */ +static inline int add_chain_cache_classes(unsigned int prev, + unsigned int next, + unsigned int irq_context, + u64 chain_key) +{ + struct hlist_head *hash_head = chainhashentry(chain_key); + struct lock_chain *chain; + + /* + * Allocate a new chain entry from the static array, and add + * it to the hash: + */ + + /* + * We might need to take the graph lock, ensure we've got IRQs + * disabled to make this an IRQ-safe lock.. for recursion reasons + * lockdep won't complain about its own locking errors. + */ + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return 0; + + if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { + if (!debug_locks_off_graph_unlock()) + return 0; + + print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!"); + dump_stack(); + return 0; + } + + chain = lock_chains + nr_lock_chains++; + chain->chain_key = chain_key; + chain->irq_context = irq_context; + chain->depth = 2; + if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { + chain->base = nr_chain_hlocks; + nr_chain_hlocks += chain->depth; + chain_hlocks[chain->base] = prev - 1; + chain_hlocks[chain->base + 1] = next -1; + } +#ifdef CONFIG_DEBUG_LOCKDEP + /* + * Important for check_no_collision(). + */ + else { + if (!debug_locks_off_graph_unlock()) + return 0; + + print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!"); + dump_stack(); + return 0; + } +#endif + + hlist_add_head_rcu(&chain->entry, hash_head); + debug_atomic_inc(chain_lookup_misses); + inc_chains(); + + return 1; +} + /* * Adds a dependency chain into chain hashtable. And must be called with * graph_lock held. -- cgit From 70911fdc9576f4eeb3986689a1c9a778a4a4aacb Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:50 +0900 Subject: locking/lockdep: Change the meaning of check_prev_add()'s return value Firstly, return 1 instead of 2 when 'prev -> next' dependency already exists. Since the value 2 is not referenced anywhere, just return 1 indicating success in this case. Secondly, return 2 instead of 1 when successfully added a lock_list entry with saving stack_trace. With that, a caller can decide whether to avoid redundant save_trace() on the caller site. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-4-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index bdf6b31f702b..7cf02fab1725 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1881,7 +1881,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, if (entry->class == hlock_class(next)) { if (distance == 1) entry->distance = 1; - return 2; + return 1; } } @@ -1935,9 +1935,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, print_lock_name(hlock_class(next)); printk(KERN_CONT "\n"); dump_stack(); - return graph_lock(); + if (!graph_lock()) + return 0; } - return 1; + return 2; } /* -- cgit From ce07a9415f266e181a0a33033a5f7138760240a4 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:51 +0900 Subject: locking/lockdep: Make check_prev_add() able to handle external stack_trace Currently, a space for stack_trace is pinned in check_prev_add(), that makes us not able to use external stack_trace. The simplest way to achieve it is to pass an external stack_trace as an argument. A more suitable solution is to pass a callback additionally along with a stack_trace so that callers can decide the way to save or whether to save. Actually crossrelease needs to do other than saving a stack_trace. So pass a stack_trace and callback to handle it, to check_prev_add(). Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-5-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 7cf02fab1725..841828ba35b9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1824,20 +1824,13 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, */ static int check_prev_add(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next, int distance, int *stack_saved) + struct held_lock *next, int distance, struct stack_trace *trace, + int (*save)(struct stack_trace *trace)) { struct lock_list *entry; int ret; struct lock_list this; struct lock_list *uninitialized_var(target_entry); - /* - * Static variable, serialized by the graph_lock(). - * - * We use this static variable to save the stack trace in case - * we call into this function multiple times due to encountering - * trylocks in the held lock stack. - */ - static struct stack_trace trace; /* * Prove that the new -> dependency would not @@ -1899,11 +1892,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, return print_bfs_bug(ret); - if (!*stack_saved) { - if (!save_trace(&trace)) - return 0; - *stack_saved = 1; - } + if (save && !save(trace)) + return 0; /* * Ok, all validations passed, add the new lock @@ -1911,14 +1901,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, */ ret = add_lock_to_list(hlock_class(next), &hlock_class(prev)->locks_after, - next->acquire_ip, distance, &trace); + next->acquire_ip, distance, trace); if (!ret) return 0; ret = add_lock_to_list(hlock_class(prev), &hlock_class(next)->locks_before, - next->acquire_ip, distance, &trace); + next->acquire_ip, distance, trace); if (!ret) return 0; @@ -1926,8 +1916,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, * Debugging printouts: */ if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { - /* We drop graph lock, so another thread can overwrite trace. */ - *stack_saved = 0; graph_unlock(); printk("\n new dependency: "); print_lock_name(hlock_class(prev)); @@ -1951,8 +1939,9 @@ static int check_prevs_add(struct task_struct *curr, struct held_lock *next) { int depth = curr->lockdep_depth; - int stack_saved = 0; struct held_lock *hlock; + struct stack_trace trace; + int (*save)(struct stack_trace *trace) = save_trace; /* * Debugging checks. @@ -1977,9 +1966,18 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) * added: */ if (hlock->read != 2 && hlock->check) { - if (!check_prev_add(curr, hlock, next, - distance, &stack_saved)) + int ret = check_prev_add(curr, hlock, next, + distance, &trace, save); + if (!ret) return 0; + + /* + * Stop saving stack_trace if save_trace() was + * called at least once: + */ + if (save && ret == 2) + save = NULL; + /* * Stop after the first non-trylock entry, * as non-trylock entries have added their -- cgit From b09be676e0ff25bd6d2e7637e26d349f9109ad75 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:52 +0900 Subject: locking/lockdep: Implement the 'crossrelease' feature Lockdep is a runtime locking correctness validator that detects and reports a deadlock or its possibility by checking dependencies between locks. It's useful since it does not report just an actual deadlock but also the possibility of a deadlock that has not actually happened yet. That enables problems to be fixed before they affect real systems. However, this facility is only applicable to typical locks, such as spinlocks and mutexes, which are normally released within the context in which they were acquired. However, synchronization primitives like page locks or completions, which are allowed to be released in any context, also create dependencies and can cause a deadlock. So lockdep should track these locks to do a better job. The 'crossrelease' implementation makes these primitives also be tracked. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-6-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 508 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 485 insertions(+), 23 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 841828ba35b9..56f69cc53ddc 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -58,6 +58,10 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_LOCKDEP_CROSSRELEASE +#include +#endif + #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; module_param(prove_locking, int, 0644); @@ -724,6 +728,18 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); } +#ifdef CONFIG_LOCKDEP_CROSSRELEASE +static void cross_init(struct lockdep_map *lock, int cross); +static int cross_lock(struct lockdep_map *lock); +static int lock_acquire_crosslock(struct held_lock *hlock); +static int lock_release_crosslock(struct lockdep_map *lock); +#else +static inline void cross_init(struct lockdep_map *lock, int cross) {} +static inline int cross_lock(struct lockdep_map *lock) { return 0; } +static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; } +static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; } +#endif + /* * Register a lock's class in the hash-table, if the class is not present * yet. Otherwise we look it up. We cache the result in the lock object @@ -1795,6 +1811,9 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, if (nest) return 2; + if (cross_lock(prev->instance)) + continue; + return print_deadlock_bug(curr, prev, next); } return 1; @@ -1962,30 +1981,36 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) int distance = curr->lockdep_depth - depth + 1; hlock = curr->held_locks + depth - 1; /* - * Only non-recursive-read entries get new dependencies - * added: + * Only non-crosslock entries get new dependencies added. + * Crosslock entries will be added by commit later: */ - if (hlock->read != 2 && hlock->check) { - int ret = check_prev_add(curr, hlock, next, - distance, &trace, save); - if (!ret) - return 0; - + if (!cross_lock(hlock->instance)) { /* - * Stop saving stack_trace if save_trace() was - * called at least once: + * Only non-recursive-read entries get new dependencies + * added: */ - if (save && ret == 2) - save = NULL; + if (hlock->read != 2 && hlock->check) { + int ret = check_prev_add(curr, hlock, next, + distance, &trace, save); + if (!ret) + return 0; - /* - * Stop after the first non-trylock entry, - * as non-trylock entries have added their - * own direct dependencies already, so this - * lock is connected to them indirectly: - */ - if (!hlock->trylock) - break; + /* + * Stop saving stack_trace if save_trace() was + * called at least once: + */ + if (save && ret == 2) + save = NULL; + + /* + * Stop after the first non-trylock entry, + * as non-trylock entries have added their + * own direct dependencies already, so this + * lock is connected to them indirectly: + */ + if (!hlock->trylock) + break; + } } depth--; /* @@ -3176,7 +3201,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, /* * Initialize a lock instance's lock-class mapping info: */ -void lockdep_init_map(struct lockdep_map *lock, const char *name, +static void __lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass) { int i; @@ -3234,8 +3259,25 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, raw_local_irq_restore(flags); } } + +void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass) +{ + cross_init(lock, 0); + __lockdep_init_map(lock, name, key, subclass); +} EXPORT_SYMBOL_GPL(lockdep_init_map); +#ifdef CONFIG_LOCKDEP_CROSSRELEASE +void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass) +{ + cross_init(lock, 1); + __lockdep_init_map(lock, name, key, subclass); +} +EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock); +#endif + struct lock_class_key __lockdep_no_validate__; EXPORT_SYMBOL_GPL(__lockdep_no_validate__); @@ -3291,6 +3333,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int chain_head = 0; int class_idx; u64 chain_key; + int ret; if (unlikely(!debug_locks)) return 0; @@ -3339,7 +3382,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, class_idx = class - lock_classes + 1; - if (depth) { + /* TODO: nest_lock is not implemented for crosslock yet. */ + if (depth && !cross_lock(lock)) { hlock = curr->held_locks + depth - 1; if (hlock->class_idx == class_idx && nest_lock) { if (hlock->references) { @@ -3427,6 +3471,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) return 0; + ret = lock_acquire_crosslock(hlock); + /* + * 2 means normal acquire operations are needed. Otherwise, it's + * ok just to return with '0:fail, 1:success'. + */ + if (ret != 2) + return ret; + curr->curr_chain_key = chain_key; curr->lockdep_depth++; check_chain_key(curr); @@ -3664,11 +3716,19 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) struct task_struct *curr = current; struct held_lock *hlock; unsigned int depth; - int i; + int ret, i; if (unlikely(!debug_locks)) return 0; + ret = lock_release_crosslock(lock); + /* + * 2 means normal release operations are needed. Otherwise, it's + * ok just to return with '0:fail, 1:success'. + */ + if (ret != 2) + return ret; + depth = curr->lockdep_depth; /* * So we're all set to release this lock.. wait what lock? We don't @@ -4532,6 +4592,13 @@ asmlinkage __visible void lockdep_sys_exit(void) curr->comm, curr->pid); lockdep_print_held_locks(curr); } + + /* + * The lock history for each syscall should be independent. So wipe the + * slate clean on return to userspace. + */ + crossrelease_hist_end(XHLOCK_PROC); + crossrelease_hist_start(XHLOCK_PROC); } void lockdep_rcu_suspicious(const char *file, const int line, const char *s) @@ -4580,3 +4647,398 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) dump_stack(); } EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); + +#ifdef CONFIG_LOCKDEP_CROSSRELEASE + +/* + * Crossrelease works by recording a lock history for each thread and + * connecting those historic locks that were taken after the + * wait_for_completion() in the complete() context. + * + * Task-A Task-B + * + * mutex_lock(&A); + * mutex_unlock(&A); + * + * wait_for_completion(&C); + * lock_acquire_crosslock(); + * atomic_inc_return(&cross_gen_id); + * | + * | mutex_lock(&B); + * | mutex_unlock(&B); + * | + * | complete(&C); + * `-- lock_commit_crosslock(); + * + * Which will then add a dependency between B and C. + */ + +#define xhlock(i) (current->xhlocks[(i) % MAX_XHLOCKS_NR]) + +/* + * Whenever a crosslock is held, cross_gen_id will be increased. + */ +static atomic_t cross_gen_id; /* Can be wrapped */ + +/* + * Lock history stacks; we have 3 nested lock history stacks: + * + * Hard IRQ + * Soft IRQ + * History / Task + * + * The thing is that once we complete a (Hard/Soft) IRQ the future task locks + * should not depend on any of the locks observed while running the IRQ. + * + * So what we do is rewind the history buffer and erase all our knowledge of + * that temporal event. + */ + +/* + * We need this to annotate lock history boundaries. Take for instance + * workqueues; each work is independent of the last. The completion of a future + * work does not depend on the completion of a past work (in general). + * Therefore we must not carry that (lock) dependency across works. + * + * This is true for many things; pretty much all kthreads fall into this + * pattern, where they have an 'idle' state and future completions do not + * depend on past completions. Its just that since they all have the 'same' + * form -- the kthread does the same over and over -- it doesn't typically + * matter. + * + * The same is true for system-calls, once a system call is completed (we've + * returned to userspace) the next system call does not depend on the lock + * history of the previous system call. + */ +void crossrelease_hist_start(enum xhlock_context_t c) +{ + if (current->xhlocks) + current->xhlock_idx_hist[c] = current->xhlock_idx; +} + +void crossrelease_hist_end(enum xhlock_context_t c) +{ + if (current->xhlocks) + current->xhlock_idx = current->xhlock_idx_hist[c]; +} + +static int cross_lock(struct lockdep_map *lock) +{ + return lock ? lock->cross : 0; +} + +/* + * This is needed to decide the relationship between wrapable variables. + */ +static inline int before(unsigned int a, unsigned int b) +{ + return (int)(a - b) < 0; +} + +static inline struct lock_class *xhlock_class(struct hist_lock *xhlock) +{ + return hlock_class(&xhlock->hlock); +} + +static inline struct lock_class *xlock_class(struct cross_lock *xlock) +{ + return hlock_class(&xlock->hlock); +} + +/* + * Should we check a dependency with previous one? + */ +static inline int depend_before(struct held_lock *hlock) +{ + return hlock->read != 2 && hlock->check && !hlock->trylock; +} + +/* + * Should we check a dependency with next one? + */ +static inline int depend_after(struct held_lock *hlock) +{ + return hlock->read != 2 && hlock->check; +} + +/* + * Check if the xhlock is valid, which would be false if, + * + * 1. Has not used after initializaion yet. + * + * Remind hist_lock is implemented as a ring buffer. + */ +static inline int xhlock_valid(struct hist_lock *xhlock) +{ + /* + * xhlock->hlock.instance must be !NULL. + */ + return !!xhlock->hlock.instance; +} + +/* + * Record a hist_lock entry. + * + * Irq disable is only required. + */ +static void add_xhlock(struct held_lock *hlock) +{ + unsigned int idx = ++current->xhlock_idx; + struct hist_lock *xhlock = &xhlock(idx); + +#ifdef CONFIG_DEBUG_LOCKDEP + /* + * This can be done locklessly because they are all task-local + * state, we must however ensure IRQs are disabled. + */ + WARN_ON_ONCE(!irqs_disabled()); +#endif + + /* Initialize hist_lock's members */ + xhlock->hlock = *hlock; + + xhlock->trace.nr_entries = 0; + xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES; + xhlock->trace.entries = xhlock->trace_entries; + xhlock->trace.skip = 3; + save_stack_trace(&xhlock->trace); +} + +static inline int same_context_xhlock(struct hist_lock *xhlock) +{ + return xhlock->hlock.irq_context == task_irq_context(current); +} + +/* + * This should be lockless as far as possible because this would be + * called very frequently. + */ +static void check_add_xhlock(struct held_lock *hlock) +{ + /* + * Record a hist_lock, only in case that acquisitions ahead + * could depend on the held_lock. For example, if the held_lock + * is trylock then acquisitions ahead never depends on that. + * In that case, we don't need to record it. Just return. + */ + if (!current->xhlocks || !depend_before(hlock)) + return; + + add_xhlock(hlock); +} + +/* + * For crosslock. + */ +static int add_xlock(struct held_lock *hlock) +{ + struct cross_lock *xlock; + unsigned int gen_id; + + if (!graph_lock()) + return 0; + + xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock; + + gen_id = (unsigned int)atomic_inc_return(&cross_gen_id); + xlock->hlock = *hlock; + xlock->hlock.gen_id = gen_id; + graph_unlock(); + + return 1; +} + +/* + * Called for both normal and crosslock acquires. Normal locks will be + * pushed on the hist_lock queue. Cross locks will record state and + * stop regular lock_acquire() to avoid being placed on the held_lock + * stack. + * + * Return: 0 - failure; + * 1 - crosslock, done; + * 2 - normal lock, continue to held_lock[] ops. + */ +static int lock_acquire_crosslock(struct held_lock *hlock) +{ + /* + * CONTEXT 1 CONTEXT 2 + * --------- --------- + * lock A (cross) + * X = atomic_inc_return(&cross_gen_id) + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Y = atomic_read_acquire(&cross_gen_id) + * lock B + * + * atomic_read_acquire() is for ordering between A and B, + * IOW, A happens before B, when CONTEXT 2 see Y >= X. + * + * Pairs with atomic_inc_return() in add_xlock(). + */ + hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id); + + if (cross_lock(hlock->instance)) + return add_xlock(hlock); + + check_add_xhlock(hlock); + return 2; +} + +static int copy_trace(struct stack_trace *trace) +{ + unsigned long *buf = stack_trace + nr_stack_trace_entries; + unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; + unsigned int nr = min(max_nr, trace->nr_entries); + + trace->nr_entries = nr; + memcpy(buf, trace->entries, nr * sizeof(trace->entries[0])); + trace->entries = buf; + nr_stack_trace_entries += nr; + + if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) { + if (!debug_locks_off_graph_unlock()) + return 0; + + print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); + dump_stack(); + + return 0; + } + + return 1; +} + +static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock) +{ + unsigned int xid, pid; + u64 chain_key; + + xid = xlock_class(xlock) - lock_classes; + chain_key = iterate_chain_key((u64)0, xid); + pid = xhlock_class(xhlock) - lock_classes; + chain_key = iterate_chain_key(chain_key, pid); + + if (lookup_chain_cache(chain_key)) + return 1; + + if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context, + chain_key)) + return 0; + + if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1, + &xhlock->trace, copy_trace)) + return 0; + + return 1; +} + +static void commit_xhlocks(struct cross_lock *xlock) +{ + unsigned int cur = current->xhlock_idx; + unsigned int i; + + if (!graph_lock()) + return; + + for (i = 0; i < MAX_XHLOCKS_NR; i++) { + struct hist_lock *xhlock = &xhlock(cur - i); + + if (!xhlock_valid(xhlock)) + break; + + if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id)) + break; + + if (!same_context_xhlock(xhlock)) + break; + + /* + * commit_xhlock() returns 0 with graph_lock already + * released if fail. + */ + if (!commit_xhlock(xlock, xhlock)) + return; + } + + graph_unlock(); +} + +void lock_commit_crosslock(struct lockdep_map *lock) +{ + struct cross_lock *xlock; + unsigned long flags; + + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; + + if (!current->xhlocks) + return; + + /* + * Do commit hist_locks with the cross_lock, only in case that + * the cross_lock could depend on acquisitions after that. + * + * For example, if the cross_lock does not have the 'check' flag + * then we don't need to check dependencies and commit for that. + * Just skip it. In that case, of course, the cross_lock does + * not depend on acquisitions ahead, either. + * + * WARNING: Don't do that in add_xlock() in advance. When an + * acquisition context is different from the commit context, + * invalid(skipped) cross_lock might be accessed. + */ + if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + xlock = &((struct lockdep_map_cross *)lock)->xlock; + commit_xhlocks(xlock); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_commit_crosslock); + +/* + * Return: 1 - crosslock, done; + * 2 - normal lock, continue to held_lock[] ops. + */ +static int lock_release_crosslock(struct lockdep_map *lock) +{ + return cross_lock(lock) ? 1 : 2; +} + +static void cross_init(struct lockdep_map *lock, int cross) +{ + lock->cross = cross; + + /* + * Crossrelease assumes that the ring buffer size of xhlocks + * is aligned with power of 2. So force it on build. + */ + BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1)); +} + +void lockdep_init_task(struct task_struct *task) +{ + int i; + + task->xhlock_idx = UINT_MAX; + + for (i = 0; i < XHLOCK_CTX_NR; i++) + task->xhlock_idx_hist[i] = UINT_MAX; + + task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR, + GFP_KERNEL); +} + +void lockdep_free_task(struct task_struct *task) +{ + if (task->xhlocks) { + void *tmp = task->xhlocks; + /* Diable crossrelease for current */ + task->xhlocks = NULL; + kfree(tmp); + } +} +#endif -- cgit From 23f873d8f9526ed7e49a1a02a45f8afb9ae5fb84 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:53 +0900 Subject: locking/lockdep: Detect and handle hist_lock ring buffer overwrite The ring buffer can be overwritten by hardirq/softirq/work contexts. That cases must be considered on rollback or commit. For example, |<------ hist_lock ring buffer size ----->| ppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii wrapped > iiiiiiiiiiiiiiiiiiiiiii.................... where 'p' represents an acquisition in process context, 'i' represents an acquisition in irq context. On irq exit, crossrelease tries to rollback idx to original position, but it should not because the entry already has been invalid by overwriting 'i'. Avoid rollback or commit for entries overwritten. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-7-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 52 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 56f69cc53ddc..eda8114ef793 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4680,6 +4680,17 @@ EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); */ static atomic_t cross_gen_id; /* Can be wrapped */ +/* + * Make an entry of the ring buffer invalid. + */ +static inline void invalidate_xhlock(struct hist_lock *xhlock) +{ + /* + * Normally, xhlock->hlock.instance must be !NULL. + */ + xhlock->hlock.instance = NULL; +} + /* * Lock history stacks; we have 3 nested lock history stacks: * @@ -4712,14 +4723,28 @@ static atomic_t cross_gen_id; /* Can be wrapped */ */ void crossrelease_hist_start(enum xhlock_context_t c) { - if (current->xhlocks) - current->xhlock_idx_hist[c] = current->xhlock_idx; + struct task_struct *cur = current; + + if (cur->xhlocks) { + cur->xhlock_idx_hist[c] = cur->xhlock_idx; + cur->hist_id_save[c] = cur->hist_id; + } } void crossrelease_hist_end(enum xhlock_context_t c) { - if (current->xhlocks) - current->xhlock_idx = current->xhlock_idx_hist[c]; + struct task_struct *cur = current; + + if (cur->xhlocks) { + unsigned int idx = cur->xhlock_idx_hist[c]; + struct hist_lock *h = &xhlock(idx); + + cur->xhlock_idx = idx; + + /* Check if the ring was overwritten. */ + if (h->hist_id != cur->hist_id_save[c]) + invalidate_xhlock(h); + } } static int cross_lock(struct lockdep_map *lock) @@ -4765,6 +4790,7 @@ static inline int depend_after(struct held_lock *hlock) * Check if the xhlock is valid, which would be false if, * * 1. Has not used after initializaion yet. + * 2. Got invalidated. * * Remind hist_lock is implemented as a ring buffer. */ @@ -4796,6 +4822,7 @@ static void add_xhlock(struct held_lock *hlock) /* Initialize hist_lock's members */ xhlock->hlock = *hlock; + xhlock->hist_id = current->hist_id++; xhlock->trace.nr_entries = 0; xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES; @@ -4934,6 +4961,7 @@ static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock) static void commit_xhlocks(struct cross_lock *xlock) { unsigned int cur = current->xhlock_idx; + unsigned int prev_hist_id = xhlock(cur).hist_id; unsigned int i; if (!graph_lock()) @@ -4951,6 +4979,17 @@ static void commit_xhlocks(struct cross_lock *xlock) if (!same_context_xhlock(xhlock)) break; + /* + * Filter out the cases that the ring buffer was + * overwritten and the previous entry has a bigger + * hist_id than the following one, which is impossible + * otherwise. + */ + if (unlikely(before(xhlock->hist_id, prev_hist_id))) + break; + + prev_hist_id = xhlock->hist_id; + /* * commit_xhlock() returns 0 with graph_lock already * released if fail. @@ -5024,9 +5063,12 @@ void lockdep_init_task(struct task_struct *task) int i; task->xhlock_idx = UINT_MAX; + task->hist_id = 0; - for (i = 0; i < XHLOCK_CTX_NR; i++) + for (i = 0; i < XHLOCK_CTX_NR; i++) { task->xhlock_idx_hist[i] = UINT_MAX; + task->hist_id_save[i] = 0; + } task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR, GFP_KERNEL); -- cgit From 28a903f63ec0811ead70ad0f8665e838d207a25e Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:54 +0900 Subject: locking/lockdep: Handle non(or multi)-acquisition of a crosslock No acquisition might be in progress on commit of a crosslock. Completion operations enabling crossrelease are the case like: CONTEXT X CONTEXT Y --------- --------- trigger completion context complete AX commit AX wait_for_complete AX acquire AX wait where AX is a crosslock. When no acquisition is in progress, we should not perform commit because the lock does not exist, which might cause incorrect memory access. So we have to track the number of acquisitions of a crosslock to handle it. Moreover, in case that more than one acquisition of a crosslock are overlapped like: CONTEXT W CONTEXT X CONTEXT Y CONTEXT Z --------- --------- --------- --------- acquire AX (gen_id: 1) acquire A acquire AX (gen_id: 10) acquire B commit AX acquire C commit AX where A, B and C are typical locks and AX is a crosslock. Current crossrelease code performs commits in Y and Z with gen_id = 10. However, we can use gen_id = 1 to do it, since not only 'acquire AX in X' but 'acquire AX in W' also depends on each acquisition in Y and Z until their commits. So make it use gen_id = 1 instead of 10 on their commits, which adds an additional dependency 'AX -> A' in the example above. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-8-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 82 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 26 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index eda8114ef793..7f97871d48d5 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4867,11 +4867,28 @@ static int add_xlock(struct held_lock *hlock) xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock; + /* + * When acquisitions for a crosslock are overlapped, we use + * nr_acquire to perform commit for them, based on cross_gen_id + * of the first acquisition, which allows to add additional + * dependencies. + * + * Moreover, when no acquisition of a crosslock is in progress, + * we should not perform commit because the lock might not exist + * any more, which might cause incorrect memory access. So we + * have to track the number of acquisitions of a crosslock. + * + * depend_after() is necessary to initialize only the first + * valid xlock so that the xlock can be used on its commit. + */ + if (xlock->nr_acquire++ && depend_after(&xlock->hlock)) + goto unlock; + gen_id = (unsigned int)atomic_inc_return(&cross_gen_id); xlock->hlock = *hlock; xlock->hlock.gen_id = gen_id; +unlock: graph_unlock(); - return 1; } @@ -4967,35 +4984,37 @@ static void commit_xhlocks(struct cross_lock *xlock) if (!graph_lock()) return; - for (i = 0; i < MAX_XHLOCKS_NR; i++) { - struct hist_lock *xhlock = &xhlock(cur - i); + if (xlock->nr_acquire) { + for (i = 0; i < MAX_XHLOCKS_NR; i++) { + struct hist_lock *xhlock = &xhlock(cur - i); - if (!xhlock_valid(xhlock)) - break; + if (!xhlock_valid(xhlock)) + break; - if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id)) - break; + if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id)) + break; - if (!same_context_xhlock(xhlock)) - break; + if (!same_context_xhlock(xhlock)) + break; - /* - * Filter out the cases that the ring buffer was - * overwritten and the previous entry has a bigger - * hist_id than the following one, which is impossible - * otherwise. - */ - if (unlikely(before(xhlock->hist_id, prev_hist_id))) - break; + /* + * Filter out the cases that the ring buffer was + * overwritten and the previous entry has a bigger + * hist_id than the following one, which is impossible + * otherwise. + */ + if (unlikely(before(xhlock->hist_id, prev_hist_id))) + break; - prev_hist_id = xhlock->hist_id; + prev_hist_id = xhlock->hist_id; - /* - * commit_xhlock() returns 0 with graph_lock already - * released if fail. - */ - if (!commit_xhlock(xlock, xhlock)) - return; + /* + * commit_xhlock() returns 0 with graph_lock already + * released if fail. + */ + if (!commit_xhlock(xlock, xhlock)) + return; + } } graph_unlock(); @@ -5039,16 +5058,27 @@ void lock_commit_crosslock(struct lockdep_map *lock) EXPORT_SYMBOL_GPL(lock_commit_crosslock); /* - * Return: 1 - crosslock, done; + * Return: 0 - failure; + * 1 - crosslock, done; * 2 - normal lock, continue to held_lock[] ops. */ static int lock_release_crosslock(struct lockdep_map *lock) { - return cross_lock(lock) ? 1 : 2; + if (cross_lock(lock)) { + if (!graph_lock()) + return 0; + ((struct lockdep_map_cross *)lock)->xlock.nr_acquire--; + graph_unlock(); + return 1; + } + return 2; } static void cross_init(struct lockdep_map *lock, int cross) { + if (cross) + ((struct lockdep_map_cross *)lock)->xlock.nr_acquire = 0; + lock->cross = cross; /* -- cgit From 383a4bc88849b804385162e81bf704f8f9690a87 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:55 +0900 Subject: locking/lockdep: Make print_circular_bug() aware of crossrelease print_circular_bug() reporting circular bug assumes that target hlock is owned by the current. However, in crossrelease, target hlock can be owned by other than the current. So the report format needs to be changed to reflect the change. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-9-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 67 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 20 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 7f97871d48d5..1114dc42c27f 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1139,22 +1139,41 @@ print_circular_lock_scenario(struct held_lock *src, printk(KERN_CONT "\n\n"); } - printk(" Possible unsafe locking scenario:\n\n"); - printk(" CPU0 CPU1\n"); - printk(" ---- ----\n"); - printk(" lock("); - __print_lock_name(target); - printk(KERN_CONT ");\n"); - printk(" lock("); - __print_lock_name(parent); - printk(KERN_CONT ");\n"); - printk(" lock("); - __print_lock_name(target); - printk(KERN_CONT ");\n"); - printk(" lock("); - __print_lock_name(source); - printk(KERN_CONT ");\n"); - printk("\n *** DEADLOCK ***\n\n"); + if (cross_lock(tgt->instance)) { + printk(" Possible unsafe locking scenario by crosslock:\n\n"); + printk(" CPU0 CPU1\n"); + printk(" ---- ----\n"); + printk(" lock("); + __print_lock_name(parent); + printk(KERN_CONT ");\n"); + printk(" lock("); + __print_lock_name(target); + printk(KERN_CONT ");\n"); + printk(" lock("); + __print_lock_name(source); + printk(KERN_CONT ");\n"); + printk(" unlock("); + __print_lock_name(target); + printk(KERN_CONT ");\n"); + printk("\n *** DEADLOCK ***\n\n"); + } else { + printk(" Possible unsafe locking scenario:\n\n"); + printk(" CPU0 CPU1\n"); + printk(" ---- ----\n"); + printk(" lock("); + __print_lock_name(target); + printk(KERN_CONT ");\n"); + printk(" lock("); + __print_lock_name(parent); + printk(KERN_CONT ");\n"); + printk(" lock("); + __print_lock_name(target); + printk(KERN_CONT ");\n"); + printk(" lock("); + __print_lock_name(source); + printk(KERN_CONT ");\n"); + printk("\n *** DEADLOCK ***\n\n"); + } } /* @@ -1179,7 +1198,12 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, pr_warn("%s/%d is trying to acquire lock:\n", curr->comm, task_pid_nr(curr)); print_lock(check_src); - pr_warn("\nbut task is already holding lock:\n"); + + if (cross_lock(check_tgt->instance)) + pr_warn("\nbut now in release context of a crosslock acquired at the following:\n"); + else + pr_warn("\nbut task is already holding lock:\n"); + print_lock(check_tgt); pr_warn("\nwhich lock already depends on the new lock.\n\n"); pr_warn("\nthe existing dependency chain (in reverse order) is:\n"); @@ -1197,7 +1221,8 @@ static inline int class_equal(struct lock_list *entry, void *data) static noinline int print_circular_bug(struct lock_list *this, struct lock_list *target, struct held_lock *check_src, - struct held_lock *check_tgt) + struct held_lock *check_tgt, + struct stack_trace *trace) { struct task_struct *curr = current; struct lock_list *parent; @@ -1207,7 +1232,9 @@ static noinline int print_circular_bug(struct lock_list *this, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - if (!save_trace(&this->trace)) + if (cross_lock(check_tgt->instance)) + this->trace = *trace; + else if (!save_trace(&this->trace)) return 0; depth = get_lock_depth(target); @@ -1864,7 +1891,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, this.parent = NULL; ret = check_noncircular(&this, hlock_class(prev), &target_entry); if (unlikely(!ret)) - return print_circular_bug(&this, target_entry, next, prev); + return print_circular_bug(&this, target_entry, next, prev, trace); else if (unlikely(ret < 0)) return print_bfs_bug(ret); -- cgit From a10b5c564741cd3b6708f085a1fa892b63c2063d Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 14 Aug 2017 16:00:51 +0900 Subject: locking/lockdep: Add a comment about crossrelease_hist_end() in lockdep_sys_exit() In lockdep_sys_exit(), crossrelease_hist_end() is called unconditionally even when getting here without having started e.g. just after forking. But it's no problem since it would roll back to an invalid entry anyway. Add a comment to explain this. Signed-off-by: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: linux-mm@kvack.org Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502694052-16085-2-git-send-email-byungchul.park@lge.com [ Improved the description and the comments. ] Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 1114dc42c27f..257931e2fbbe 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4623,6 +4623,10 @@ asmlinkage __visible void lockdep_sys_exit(void) /* * The lock history for each syscall should be independent. So wipe the * slate clean on return to userspace. + * + * crossrelease_hist_end() works well here even when getting here + * without starting (i.e. just after forking), because it rolls back + * the index to point to the last entry, which is already invalid. */ crossrelease_hist_end(XHLOCK_PROC); crossrelease_hist_start(XHLOCK_PROC); -- cgit From 907dc16d7e23ec81a126c9585435494fa1b3a4b7 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 14 Aug 2017 16:00:52 +0900 Subject: locking/lockdep: Fix the rollback and overwrite detection logic in crossrelease As Boqun Feng pointed out, current->hist_id should be aligned with the latest valid xhlock->hist_id so that hist_id_save[] storing current->hist_id can be comparable with xhlock->hist_id. Fix it. Additionally, the condition for overwrite-detection should be the opposite. Fix the code and the comments as well. <- direction to visit hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh (h: history) ^^ ^ || start from here |previous entry current entry Reported-by: Boqun Feng Signed-off-by: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: linux-mm@kvack.org Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502694052-16085-3-git-send-email-byungchul.park@lge.com [ Improve the comments some more. ] Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 257931e2fbbe..66011c9f5df3 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4853,7 +4853,7 @@ static void add_xhlock(struct held_lock *hlock) /* Initialize hist_lock's members */ xhlock->hlock = *hlock; - xhlock->hist_id = current->hist_id++; + xhlock->hist_id = ++current->hist_id; xhlock->trace.nr_entries = 0; xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES; @@ -5029,12 +5029,12 @@ static void commit_xhlocks(struct cross_lock *xlock) break; /* - * Filter out the cases that the ring buffer was - * overwritten and the previous entry has a bigger - * hist_id than the following one, which is impossible - * otherwise. + * Filter out the cases where the ring buffer was + * overwritten and the current entry has a bigger + * hist_id than the previous one, which is impossible + * otherwise: */ - if (unlikely(before(xhlock->hist_id, prev_hist_id))) + if (unlikely(before(prev_hist_id, xhlock->hist_id))) break; prev_hist_id = xhlock->hist_id; -- cgit From e6f3faa734a00c606b7b06c6b9f15e5627d3245b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Aug 2017 13:23:30 +0200 Subject: locking/lockdep: Fix workqueue crossrelease annotation The new completion/crossrelease annotations interact unfavourable with the extant flush_work()/flush_workqueue() annotations. The problem is that when a single work class does: wait_for_completion(&C) and complete(&C) in different executions, we'll build dependencies like: lock_map_acquire(W) complete_acquire(C) and lock_map_acquire(W) complete_release(C) which results in the dependency chain: W->C->W, which lockdep thinks spells deadlock, even though there is no deadlock potential since works are ran concurrently. One possibility would be to change the work 'lock' to recursive-read, but that would mean hitting a lockdep limitation on recursive locks. Also, unconditinoally switching to recursive-read here would fail to detect the actual deadlock on single-threaded workqueues, which do have a problem with this. For now, forcefully disregard these locks for crossrelease. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: byungchul.park@lge.com Cc: david@fromorbit.com Cc: johannes@sipsolutions.net Cc: oleg@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 56 +++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 20 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 66011c9f5df3..f73ca595b81e 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4629,7 +4629,7 @@ asmlinkage __visible void lockdep_sys_exit(void) * the index to point to the last entry, which is already invalid. */ crossrelease_hist_end(XHLOCK_PROC); - crossrelease_hist_start(XHLOCK_PROC); + crossrelease_hist_start(XHLOCK_PROC, false); } void lockdep_rcu_suspicious(const char *file, const int line, const char *s) @@ -4725,25 +4725,25 @@ static inline void invalidate_xhlock(struct hist_lock *xhlock) /* * Lock history stacks; we have 3 nested lock history stacks: * - * Hard IRQ - * Soft IRQ - * History / Task + * HARD(IRQ) + * SOFT(IRQ) + * PROC(ess) * - * The thing is that once we complete a (Hard/Soft) IRQ the future task locks - * should not depend on any of the locks observed while running the IRQ. + * The thing is that once we complete a HARD/SOFT IRQ the future task locks + * should not depend on any of the locks observed while running the IRQ. So + * what we do is rewind the history buffer and erase all our knowledge of that + * temporal event. * - * So what we do is rewind the history buffer and erase all our knowledge of - * that temporal event. - */ - -/* - * We need this to annotate lock history boundaries. Take for instance - * workqueues; each work is independent of the last. The completion of a future - * work does not depend on the completion of a past work (in general). - * Therefore we must not carry that (lock) dependency across works. + * The PROCess one is special though; it is used to annotate independence + * inside a task. + * + * Take for instance workqueues; each work is independent of the last. The + * completion of a future work does not depend on the completion of a past work + * (in general). Therefore we must not carry that (lock) dependency across + * works. * * This is true for many things; pretty much all kthreads fall into this - * pattern, where they have an 'idle' state and future completions do not + * pattern, where they have an invariant state and future completions do not * depend on past completions. Its just that since they all have the 'same' * form -- the kthread does the same over and over -- it doesn't typically * matter. @@ -4751,15 +4751,31 @@ static inline void invalidate_xhlock(struct hist_lock *xhlock) * The same is true for system-calls, once a system call is completed (we've * returned to userspace) the next system call does not depend on the lock * history of the previous system call. + * + * They key property for independence, this invariant state, is that it must be + * a point where we hold no locks and have no history. Because if we were to + * hold locks, the restore at _end() would not necessarily recover it's history + * entry. Similarly, independence per-definition means it does not depend on + * prior state. */ -void crossrelease_hist_start(enum xhlock_context_t c) +void crossrelease_hist_start(enum xhlock_context_t c, bool force) { struct task_struct *cur = current; - if (cur->xhlocks) { - cur->xhlock_idx_hist[c] = cur->xhlock_idx; - cur->hist_id_save[c] = cur->hist_id; + if (!cur->xhlocks) + return; + + /* + * We call this at an invariant point, no current state, no history. + */ + if (c == XHLOCK_PROC) { + /* verified the former, ensure the latter */ + WARN_ON_ONCE(!force && cur->lockdep_depth); + invalidate_xhlock(&xhlock(cur->xhlock_idx)); } + + cur->xhlock_idx_hist[c] = cur->xhlock_idx; + cur->hist_id_save[c] = cur->hist_id; } void crossrelease_hist_end(enum xhlock_context_t c) -- cgit From f52be5708076b75a045ac52c6fef3fffb8300525 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 29 Aug 2017 10:59:39 +0200 Subject: locking/lockdep: Untangle xhlock history save/restore from task independence Where XHLOCK_{SOFT,HARD} are save/restore points in the xhlocks[] to ensure the temporal IRQ events don't interact with task state, the XHLOCK_PROC is a fundament different beast that just happens to share the interface. The purpose of XHLOCK_PROC is to annotate independent execution inside one task. For example workqueues, each work should appear to run in its own 'pristine' 'task'. Remove XHLOCK_PROC in favour of its own interface to avoid confusion. Signed-off-by: Peter Zijlstra (Intel) Cc: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: david@fromorbit.com Cc: johannes@sipsolutions.net Cc: kernel-team@lge.com Cc: oleg@redhat.com Cc: tj@kernel.org Link: http://lkml.kernel.org/r/20170829085939.ggmb6xiohw67micb@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 79 +++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 41 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index f73ca595b81e..44c8d0d17170 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4623,13 +4623,8 @@ asmlinkage __visible void lockdep_sys_exit(void) /* * The lock history for each syscall should be independent. So wipe the * slate clean on return to userspace. - * - * crossrelease_hist_end() works well here even when getting here - * without starting (i.e. just after forking), because it rolls back - * the index to point to the last entry, which is already invalid. */ - crossrelease_hist_end(XHLOCK_PROC); - crossrelease_hist_start(XHLOCK_PROC, false); + lockdep_invariant_state(false); } void lockdep_rcu_suspicious(const char *file, const int line, const char *s) @@ -4723,19 +4718,47 @@ static inline void invalidate_xhlock(struct hist_lock *xhlock) } /* - * Lock history stacks; we have 3 nested lock history stacks: + * Lock history stacks; we have 2 nested lock history stacks: * * HARD(IRQ) * SOFT(IRQ) - * PROC(ess) * * The thing is that once we complete a HARD/SOFT IRQ the future task locks * should not depend on any of the locks observed while running the IRQ. So * what we do is rewind the history buffer and erase all our knowledge of that * temporal event. - * - * The PROCess one is special though; it is used to annotate independence - * inside a task. + */ + +void crossrelease_hist_start(enum xhlock_context_t c) +{ + struct task_struct *cur = current; + + if (!cur->xhlocks) + return; + + cur->xhlock_idx_hist[c] = cur->xhlock_idx; + cur->hist_id_save[c] = cur->hist_id; +} + +void crossrelease_hist_end(enum xhlock_context_t c) +{ + struct task_struct *cur = current; + + if (cur->xhlocks) { + unsigned int idx = cur->xhlock_idx_hist[c]; + struct hist_lock *h = &xhlock(idx); + + cur->xhlock_idx = idx; + + /* Check if the ring was overwritten. */ + if (h->hist_id != cur->hist_id_save[c]) + invalidate_xhlock(h); + } +} + +/* + * lockdep_invariant_state() is used to annotate independence inside a task, to + * make one task look like multiple independent 'tasks'. * * Take for instance workqueues; each work is independent of the last. The * completion of a future work does not depend on the completion of a past work @@ -4758,40 +4781,14 @@ static inline void invalidate_xhlock(struct hist_lock *xhlock) * entry. Similarly, independence per-definition means it does not depend on * prior state. */ -void crossrelease_hist_start(enum xhlock_context_t c, bool force) +void lockdep_invariant_state(bool force) { - struct task_struct *cur = current; - - if (!cur->xhlocks) - return; - /* * We call this at an invariant point, no current state, no history. + * Verify the former, enforce the latter. */ - if (c == XHLOCK_PROC) { - /* verified the former, ensure the latter */ - WARN_ON_ONCE(!force && cur->lockdep_depth); - invalidate_xhlock(&xhlock(cur->xhlock_idx)); - } - - cur->xhlock_idx_hist[c] = cur->xhlock_idx; - cur->hist_id_save[c] = cur->hist_id; -} - -void crossrelease_hist_end(enum xhlock_context_t c) -{ - struct task_struct *cur = current; - - if (cur->xhlocks) { - unsigned int idx = cur->xhlock_idx_hist[c]; - struct hist_lock *h = &xhlock(idx); - - cur->xhlock_idx = idx; - - /* Check if the ring was overwritten. */ - if (h->hist_id != cur->hist_id_save[c]) - invalidate_xhlock(h); - } + WARN_ON_ONCE(!force && current->lockdep_depth); + invalidate_xhlock(&xhlock(current->xhlock_idx)); } static int cross_lock(struct lockdep_map *lock) -- cgit From 34d54f3d6917f519693dbe873ee59cd06fb515ed Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 14 Aug 2017 16:07:02 -0400 Subject: locking/pvqspinlock: Relax cmpxchg's to improve performance on some architectures All the locking related cmpxchg's in the following functions are replaced with the _acquire variants: - pv_queued_spin_steal_lock() - trylock_clear_pending() This change should help performance on architectures that use LL/SC. The cmpxchg in pv_kick_node() is replaced with a relaxed version with explicit memory barrier to make sure that it is fully ordered in the writing of next->lock and the reading of pn->state whether the cmpxchg is a success or failure without affecting performance in non-LL/SC architectures. On a 2-socket 12-core 96-thread Power8 system with pvqspinlock explicitly enabled, the performance of a locking microbenchmark with and without this patch on a 4.13-rc4 kernel with Xinhui's PPC qspinlock patch were as follows: # of thread w/o patch with patch % Change ----------- --------- ---------- -------- 8 5054.8 Mop/s 5209.4 Mop/s +3.1% 16 3985.0 Mop/s 4015.0 Mop/s +0.8% 32 2378.2 Mop/s 2396.0 Mop/s +0.7% Suggested-by: Peter Zijlstra Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrea Parri Cc: Boqun Feng Cc: Linus Torvalds Cc: Pan Xinhui Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1502741222-24360-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock_paravirt.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 4ccfcaae5b89..43555681c40b 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -72,7 +72,7 @@ static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock) struct __qspinlock *l = (void *)lock; if (!(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) && - (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0)) { + (cmpxchg_acquire(&l->locked, 0, _Q_LOCKED_VAL) == 0)) { qstat_inc(qstat_pv_lock_stealing, true); return true; } @@ -101,16 +101,16 @@ static __always_inline void clear_pending(struct qspinlock *lock) /* * The pending bit check in pv_queued_spin_steal_lock() isn't a memory - * barrier. Therefore, an atomic cmpxchg() is used to acquire the lock - * just to be sure that it will get it. + * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the + * lock just to be sure that it will get it. */ static __always_inline int trylock_clear_pending(struct qspinlock *lock) { struct __qspinlock *l = (void *)lock; return !READ_ONCE(l->locked) && - (cmpxchg(&l->locked_pending, _Q_PENDING_VAL, _Q_LOCKED_VAL) - == _Q_PENDING_VAL); + (cmpxchg_acquire(&l->locked_pending, _Q_PENDING_VAL, + _Q_LOCKED_VAL) == _Q_PENDING_VAL); } #else /* _Q_PENDING_BITS == 8 */ static __always_inline void set_pending(struct qspinlock *lock) @@ -138,7 +138,7 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock) */ old = val; new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; - val = atomic_cmpxchg(&lock->val, old, new); + val = atomic_cmpxchg_acquire(&lock->val, old, new); if (val == old) return 1; @@ -362,8 +362,18 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) * observe its next->locked value and advance itself. * * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() + * + * The write to next->locked in arch_mcs_spin_unlock_contended() + * must be ordered before the read of pn->state in the cmpxchg() + * below for the code to work correctly. To guarantee full ordering + * irrespective of the success or failure of the cmpxchg(), + * a relaxed version with explicit barrier is used. The control + * dependency will order the reading of pn->state before any + * subsequent writes. */ - if (cmpxchg(&pn->state, vcpu_halted, vcpu_hashed) != vcpu_halted) + smp_mb__before_atomic(); + if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed) + != vcpu_halted) return; /* -- cgit