diff options
Diffstat (limited to 'arch/powerpc/lib/qspinlock.c')
| -rw-r--r-- | arch/powerpc/lib/qspinlock.c | 159 |
1 files changed, 80 insertions, 79 deletions
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index e4bd145255d0..95ab4cdf582e 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -9,6 +9,7 @@ #include <linux/sched/clock.h> #include <asm/qspinlock.h> #include <asm/paravirt.h> +#include <trace/events/lock.h> #define MAX_NODES 4 @@ -16,7 +17,8 @@ struct qnode { struct qnode *next; struct qspinlock *lock; int cpu; - int yield_cpu; + u8 sleepy; /* 1 if the previous vCPU was preempted or + * if the previous node was sleepy */ u8 locked; /* 1 if lock acquired */ }; @@ -43,7 +45,7 @@ static bool pv_sleepy_lock_sticky __read_mostly = false; static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; static int pv_sleepy_lock_factor __read_mostly = 256; static bool pv_yield_prev __read_mostly = true; -static bool pv_yield_propagate_owner __read_mostly = true; +static bool pv_yield_sleepy_owner __read_mostly = true; static bool pv_prod_head __read_mostly = false; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -161,6 +163,8 @@ static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) { u32 prev, tmp; + kcsan_release(); + asm volatile( "\t" PPC_RELEASE_BARRIER " \n" "1: lwarx %0,0,%2 # publish_tail_cpu \n" @@ -245,22 +249,18 @@ static __always_inline void seen_sleepy_lock(void) this_cpu_write(sleepy_lock_seen_clock, sched_clock()); } -static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val) +static __always_inline void seen_sleepy_node(void) { if (pv_sleepy_lock) { if (pv_sleepy_lock_interval_ns) this_cpu_write(sleepy_lock_seen_clock, sched_clock()); - if (val & _Q_LOCKED_VAL) { - if (!(val & _Q_SLEEPY_VAL)) - try_set_sleepy(lock, val); - } + /* Don't set sleepy because we likely have a stale val */ } } -static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) +static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu) { - int cpu = decode_tail_cpu(val); - struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu); + struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu); int idx; /* @@ -351,74 +351,66 @@ static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u return __yield_to_locked_owner(lock, val, paravirt, mustq); } -static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt) +static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt) { struct qnode *next; int owner; if (!paravirt) return; - if (!pv_yield_propagate_owner) - return; - - owner = get_owner_cpu(val); - if (*set_yield_cpu == owner) + if (!pv_yield_sleepy_owner) return; next = READ_ONCE(node->next); if (!next) return; - if (vcpu_is_preempted(owner)) { - next->yield_cpu = owner; - *set_yield_cpu = owner; - } else if (*set_yield_cpu != -1) { - next->yield_cpu = owner; - *set_yield_cpu = owner; - } + if (next->sleepy) + return; + + owner = get_owner_cpu(val); + if (vcpu_is_preempted(owner)) + next->sleepy = 1; } /* Called inside spin_begin() */ -static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) +static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt) { - int prev_cpu = decode_tail_cpu(val); u32 yield_count; - int yield_cpu; bool preempted = false; if (!paravirt) goto relax; - if (!pv_yield_propagate_owner) + if (!pv_yield_sleepy_owner) goto yield_prev; - yield_cpu = READ_ONCE(node->yield_cpu); - if (yield_cpu == -1) { - /* Propagate back the -1 CPU */ - if (node->next && node->next->yield_cpu != -1) - node->next->yield_cpu = yield_cpu; - goto yield_prev; - } - - yield_count = yield_count_of(yield_cpu); - if ((yield_count & 1) == 0) - goto yield_prev; /* owner vcpu is running */ - - spin_end(); - - preempted = true; - seen_sleepy_node(lock, val); + /* + * If the previous waiter was preempted it might not be able to + * propagate sleepy to us, so check the lock in that case too. + */ + if (node->sleepy || vcpu_is_preempted(prev_cpu)) { + u32 val = READ_ONCE(lock->val); - smp_rmb(); + if (val & _Q_LOCKED_VAL) { + if (node->next && !node->next->sleepy) { + /* + * Propagate sleepy to next waiter. Only if + * owner is preempted, which allows the queue + * to become "non-sleepy" if vCPU preemption + * ceases to occur, even if the lock remains + * highly contended. + */ + if (vcpu_is_preempted(get_owner_cpu(val))) + node->next->sleepy = 1; + } - if (yield_cpu == node->yield_cpu) { - if (node->next && node->next->yield_cpu != yield_cpu) - node->next->yield_cpu = yield_cpu; - yield_to_preempted(yield_cpu, yield_count); - spin_begin(); - return preempted; + preempted = yield_to_locked_owner(lock, val, paravirt); + if (preempted) + return preempted; + } + node->sleepy = false; } - spin_begin(); yield_prev: if (!pv_yield_prev) @@ -431,11 +423,11 @@ yield_prev: spin_end(); preempted = true; - seen_sleepy_node(lock, val); + seen_sleepy_node(); smp_rmb(); /* See __yield_to_locked_owner comment */ - if (!node->locked) { + if (!READ_ONCE(node->locked)) { yield_to_preempted(prev_cpu, yield_count); spin_begin(); return preempted; @@ -541,7 +533,6 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b bool sleepy = false; bool mustq = false; int idx; - int set_yield_cpu = -1; int iters = 0; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -565,11 +556,16 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b node->next = NULL; node->lock = lock; node->cpu = smp_processor_id(); - node->yield_cpu = -1; + node->sleepy = 0; node->locked = 0; tail = encode_tail_cpu(node->cpu); + /* + * Assign all attributes of a node before it can be published. + * Issues an lwsync, serving as a release barrier, as well as a + * compiler barrier. + */ old = publish_tail_cpu(lock, tail); /* @@ -577,26 +573,23 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b * head of the waitqueue. */ if (old & _Q_TAIL_CPU_MASK) { - struct qnode *prev = get_tail_qnode(lock, old); + int prev_cpu = decode_tail_cpu(old); + struct qnode *prev = get_tail_qnode(lock, prev_cpu); /* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node); /* Wait for mcs node lock to be released */ spin_begin(); - while (!node->locked) { + while (!READ_ONCE(node->locked)) { spec_barrier(); - if (yield_to_prev(lock, node, old, paravirt)) + if (yield_to_prev(lock, node, prev_cpu, paravirt)) seen_preempted = true; } spec_barrier(); spin_end(); - /* Clear out stale propagated yield_cpu */ - if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1) - node->yield_cpu = -1; - smp_rmb(); /* acquire barrier for the mcs lock */ /* @@ -638,7 +631,7 @@ again: } } - propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); + propagate_sleepy(node, val, paravirt); preempted = yield_head_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; @@ -705,29 +698,37 @@ again: } release: - qnodesp->count--; /* release the node */ + /* + * Clear the lock before releasing the node, as another CPU might see stale + * values if an interrupt occurs after we increment qnodesp->count + * but before node->lock is initialized. The barrier ensures that + * there are no further stores to the node after it has been released. + */ + node->lock = NULL; + barrier(); + qnodesp->count--; } -void queued_spin_lock_slowpath(struct qspinlock *lock) +void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock) { + trace_contention_begin(lock, LCB_F_SPIN); /* * This looks funny, but it induces the compiler to inline both * sides of the branch rather than share code as when the condition * is passed as the paravirt argument to the functions. */ if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { - if (try_to_steal_lock(lock, true)) { + if (try_to_steal_lock(lock, true)) spec_barrier(); - return; - } - queued_spin_lock_mcs_queue(lock, true); + else + queued_spin_lock_mcs_queue(lock, true); } else { - if (try_to_steal_lock(lock, false)) { + if (try_to_steal_lock(lock, false)) spec_barrier(); - return; - } - queued_spin_lock_mcs_queue(lock, false); + else + queued_spin_lock_mcs_queue(lock, false); } + trace_contention_end(lock, 0); } EXPORT_SYMBOL(queued_spin_lock_slowpath); @@ -942,21 +943,21 @@ static int pv_yield_prev_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); -static int pv_yield_propagate_owner_set(void *data, u64 val) +static int pv_yield_sleepy_owner_set(void *data, u64 val) { - pv_yield_propagate_owner = !!val; + pv_yield_sleepy_owner = !!val; return 0; } -static int pv_yield_propagate_owner_get(void *data, u64 *val) +static int pv_yield_sleepy_owner_get(void *data, u64 *val) { - *val = pv_yield_propagate_owner; + *val = pv_yield_sleepy_owner; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n"); static int pv_prod_head_set(void *data, u64 val) { @@ -988,7 +989,7 @@ static __init int spinlock_debugfs_init(void) debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns); debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); - debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); + debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner); debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); } |
