summaryrefslogtreecommitdiff
path: root/arch/sparc/kernel/smp_64.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc/kernel/smp_64.c')
-rw-r--r--arch/sparc/kernel/smp_64.c622
1 files changed, 329 insertions, 293 deletions
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 77539eda928c..5cbd6ed5ef6f 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* smp.c: Sparc64 SMP support.
*
* Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
@@ -5,7 +6,8 @@
#include <linux/export.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/hotplug.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/threads.h>
@@ -20,11 +22,12 @@
#include <linux/cache.h>
#include <linux/jiffies.h>
#include <linux/profile.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/vmalloc.h>
#include <linux/ftrace.h>
#include <linux/cpu.h>
#include <linux/slab.h>
+#include <linux/kgdb.h>
#include <asm/head.h>
#include <asm/ptrace.h>
@@ -35,15 +38,16 @@
#include <asm/hvtramp.h>
#include <asm/io.h>
#include <asm/timer.h>
+#include <asm/setup.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/oplib.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/starfire.h>
#include <asm/tlb.h>
+#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/prom.h>
#include <asm/mdesc.h>
@@ -52,18 +56,28 @@
#include <asm/pcr.h>
#include "cpumap.h"
-
-int sparc64_multi_core __read_mostly;
+#include "kernel.h"
DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
+ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+
+cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = {
+ [0 ... NR_CPUS - 1] = CPU_MASK_NONE };
+
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL(cpu_core_sib_map);
+EXPORT_SYMBOL(cpu_core_sib_cache_map);
static cpumask_t smp_commenced_mask;
+static DEFINE_PER_CPU(bool, poke);
+static bool cpu_poke;
+
void smp_info(struct seq_file *m)
{
int i;
@@ -87,7 +101,7 @@ extern void setup_sparc64_timer(void);
static volatile unsigned long callin_flag = 0;
-void __cpuinit smp_callin(void)
+void smp_callin(void)
{
int cpuid = hard_smp_processor_id();
@@ -113,7 +127,7 @@ void __cpuinit smp_callin(void)
current_thread_info()->new_child = 0;
/* Attach to the address space of init_task. */
- atomic_inc(&init_mm.mm_count);
+ mmgrab(&init_mm);
current->active_mm = &init_mm;
/* inform the notifiers about the new cpu */
@@ -123,12 +137,10 @@ void __cpuinit smp_callin(void)
rmb();
set_cpu_online(cpuid, true);
- local_irq_enable();
- /* idle thread is expected to have preempt disabled */
- preempt_disable();
+ local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void cpu_panic(void)
@@ -150,7 +162,7 @@ void cpu_panic(void)
#define NUM_ROUNDS 64 /* magic value */
#define NUM_ITERS 5 /* likewise */
-static DEFINE_SPINLOCK(itc_sync_lock);
+static DEFINE_RAW_SPINLOCK(itc_sync_lock);
static unsigned long go[SLAVE + 1];
#define DEBUG_TICK_SYNC 0
@@ -258,7 +270,7 @@ static void smp_synchronize_one_tick(int cpu)
go[MASTER] = 0;
membar_safe("#StoreLoad");
- spin_lock_irqsave(&itc_sync_lock, flags);
+ raw_spin_lock_irqsave(&itc_sync_lock, flags);
{
for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
while (!go[MASTER])
@@ -269,19 +281,12 @@ static void smp_synchronize_one_tick(int cpu)
membar_safe("#StoreLoad");
}
}
- spin_unlock_irqrestore(&itc_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&itc_sync_lock, flags);
}
#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
-/* XXX Put this in some common place. XXX */
-static unsigned long kimage_addr_to_ra(void *p)
-{
- unsigned long val = (unsigned long) p;
-
- return kern_base + (val - KERNBASE);
-}
-
-static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg, void **descrp)
+static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg,
+ void **descrp)
{
extern unsigned long sparc64_ttable_tl0;
extern unsigned long kern_locked_tte_data;
@@ -292,9 +297,7 @@ static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread
unsigned long hv_err;
int i;
- hdesc = kzalloc(sizeof(*hdesc) +
- (sizeof(struct hvtramp_mapping) *
- num_kernel_image_mappings - 1),
+ hdesc = kzalloc(struct_size(hdesc, maps, num_kernel_image_mappings),
GFP_KERNEL);
if (!hdesc) {
printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
@@ -342,7 +345,7 @@ extern unsigned long sparc64_cpu_startup;
*/
static struct thread_info *cpu_new_thread = NULL;
-static int __cpuinit smp_boot_one_cpu(unsigned int cpu, struct task_struct *idle)
+static int smp_boot_one_cpu(unsigned int cpu, struct task_struct *idle)
{
unsigned long entry =
(unsigned long)(&sparc64_cpu_startup);
@@ -618,22 +621,48 @@ retry:
}
}
-/* Multi-cpu list version. */
+#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid])
+#define MONDO_USEC_WAIT_MIN 2
+#define MONDO_USEC_WAIT_MAX 100
+#define MONDO_RETRY_LIMIT 500000
+
+/* Multi-cpu list version.
+ *
+ * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
+ * Sometimes not all cpus receive the mondo, requiring us to re-send
+ * the mondo until all cpus have received, or cpus are truly stuck
+ * unable to receive mondo, and we timeout.
+ * Occasionally a target cpu strand is borrowed briefly by hypervisor to
+ * perform guest service, such as PCIe error handling. Consider the
+ * service time, 1 second overall wait is reasonable for 1 cpu.
+ * Here two in-between mondo check wait time are defined: 2 usec for
+ * single cpu quick turn around and up to 100usec for large cpu count.
+ * Deliver mondo to large number of cpus could take longer, we adjusts
+ * the retry count as long as target cpus are making forward progress.
+ */
static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
{
- int retries, this_cpu, prev_sent, i, saw_cpu_error;
+ int this_cpu, tot_cpus, prev_sent, i, rem;
+ int usec_wait, retries, tot_retries;
+ u16 first_cpu = 0xffff;
+ unsigned long xc_rcvd = 0;
unsigned long status;
+ int ecpuerror_id = 0;
+ int enocpu_id = 0;
u16 *cpu_list;
+ u16 cpu;
this_cpu = smp_processor_id();
-
cpu_list = __va(tb->cpu_list_pa);
-
- saw_cpu_error = 0;
- retries = 0;
+ usec_wait = cnt * MONDO_USEC_WAIT_MIN;
+ if (usec_wait > MONDO_USEC_WAIT_MAX)
+ usec_wait = MONDO_USEC_WAIT_MAX;
+ retries = tot_retries = 0;
+ tot_cpus = cnt;
prev_sent = 0;
+
do {
- int forward_progress, n_sent;
+ int n_sent, mondo_delivered, target_cpu_busy;
status = sun4v_cpu_mondo_send(cnt,
tb->cpu_list_pa,
@@ -641,94 +670,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
/* HV_EOK means all cpus received the xcall, we're done. */
if (likely(status == HV_EOK))
- break;
+ goto xcall_done;
+
+ /* If not these non-fatal errors, panic */
+ if (unlikely((status != HV_EWOULDBLOCK) &&
+ (status != HV_ECPUERROR) &&
+ (status != HV_ENOCPU)))
+ goto fatal_errors;
/* First, see if we made any forward progress.
*
+ * Go through the cpu_list, count the target cpus that have
+ * received our mondo (n_sent), and those that did not (rem).
+ * Re-pack cpu_list with the cpus remain to be retried in the
+ * front - this simplifies tracking the truly stalled cpus.
+ *
* The hypervisor indicates successful sends by setting
* cpu list entries to the value 0xffff.
+ *
+ * EWOULDBLOCK means some target cpus did not receive the
+ * mondo and retry usually helps.
+ *
+ * ECPUERROR means at least one target cpu is in error state,
+ * it's usually safe to skip the faulty cpu and retry.
+ *
+ * ENOCPU means one of the target cpu doesn't belong to the
+ * domain, perhaps offlined which is unexpected, but not
+ * fatal and it's okay to skip the offlined cpu.
*/
+ rem = 0;
n_sent = 0;
for (i = 0; i < cnt; i++) {
- if (likely(cpu_list[i] == 0xffff))
+ cpu = cpu_list[i];
+ if (likely(cpu == 0xffff)) {
n_sent++;
+ } else if ((status == HV_ECPUERROR) &&
+ (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
+ ecpuerror_id = cpu + 1;
+ } else if (status == HV_ENOCPU && !cpu_online(cpu)) {
+ enocpu_id = cpu + 1;
+ } else {
+ cpu_list[rem++] = cpu;
+ }
}
- forward_progress = 0;
- if (n_sent > prev_sent)
- forward_progress = 1;
+ /* No cpu remained, we're done. */
+ if (rem == 0)
+ break;
- prev_sent = n_sent;
+ /* Otherwise, update the cpu count for retry. */
+ cnt = rem;
- /* If we get a HV_ECPUERROR, then one or more of the cpus
- * in the list are in error state. Use the cpu_state()
- * hypervisor call to find out which cpus are in error state.
+ /* Record the overall number of mondos received by the
+ * first of the remaining cpus.
*/
- if (unlikely(status == HV_ECPUERROR)) {
- for (i = 0; i < cnt; i++) {
- long err;
- u16 cpu;
+ if (first_cpu != cpu_list[0]) {
+ first_cpu = cpu_list[0];
+ xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
+ }
- cpu = cpu_list[i];
- if (cpu == 0xffff)
- continue;
+ /* Was any mondo delivered successfully? */
+ mondo_delivered = (n_sent > prev_sent);
+ prev_sent = n_sent;
- err = sun4v_cpu_state(cpu);
- if (err == HV_CPU_STATE_ERROR) {
- saw_cpu_error = (cpu + 1);
- cpu_list[i] = 0xffff;
- }
- }
- } else if (unlikely(status != HV_EWOULDBLOCK))
- goto fatal_mondo_error;
+ /* or, was any target cpu busy processing other mondos? */
+ target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
+ xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
- /* Don't bother rewriting the CPU list, just leave the
- * 0xffff and non-0xffff entries in there and the
- * hypervisor will do the right thing.
- *
- * Only advance timeout state if we didn't make any
- * forward progress.
+ /* Retry count is for no progress. If we're making progress,
+ * reset the retry count.
*/
- if (unlikely(!forward_progress)) {
- if (unlikely(++retries > 10000))
- goto fatal_mondo_timeout;
-
- /* Delay a little bit to let other cpus catch up
- * on their cpu mondo queue work.
- */
- udelay(2 * cnt);
+ if (likely(mondo_delivered || target_cpu_busy)) {
+ tot_retries += retries;
+ retries = 0;
+ } else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
+ goto fatal_mondo_timeout;
}
- } while (1);
- if (unlikely(saw_cpu_error))
- goto fatal_mondo_cpu_error;
+ /* Delay a little bit to let other cpus catch up on
+ * their cpu mondo queue work.
+ */
+ if (!mondo_delivered)
+ udelay(usec_wait);
- return;
+ retries++;
+ } while (1);
-fatal_mondo_cpu_error:
- printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
- "(including %d) were in error state\n",
- this_cpu, saw_cpu_error - 1);
+xcall_done:
+ if (unlikely(ecpuerror_id > 0)) {
+ pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
+ this_cpu, ecpuerror_id - 1);
+ } else if (unlikely(enocpu_id > 0)) {
+ pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
+ this_cpu, enocpu_id - 1);
+ }
return;
+fatal_errors:
+ /* fatal errors include bad alignment, etc */
+ pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
+ this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
+ panic("Unexpected SUN4V mondo error %lu\n", status);
+
fatal_mondo_timeout:
- printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
- " progress after %d retries.\n",
- this_cpu, retries);
- goto dump_cpu_list_and_out;
-
-fatal_mondo_error:
- printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
- this_cpu, status);
- printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
- "mondo_block_pa(%lx)\n",
- this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
-
-dump_cpu_list_and_out:
- printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
- for (i = 0; i < cnt; i++)
- printk("%u ", cpu_list[i]);
- printk("]\n");
+ /* some cpus being non-responsive to the cpu mondo */
+ pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
+ this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
+ panic("SUN4V mondo timeout panic\n");
}
static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
@@ -821,13 +869,17 @@ void arch_send_call_function_single_ipi(int cpu)
void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
{
clear_softint(1 << irq);
+ irq_enter();
generic_smp_call_function_interrupt();
+ irq_exit();
}
void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs)
{
clear_softint(1 << irq);
+ irq_enter();
generic_smp_call_function_single_interrupt();
+ irq_exit();
}
static void tsb_sync(void *info)
@@ -867,25 +919,26 @@ extern unsigned long xcall_flush_dcache_page_cheetah;
#endif
extern unsigned long xcall_flush_dcache_page_spitfire;
-#ifdef CONFIG_DEBUG_DCFLUSH
-extern atomic_t dcpage_flushes;
-extern atomic_t dcpage_flushes_xcall;
-#endif
-
-static inline void __local_flush_dcache_page(struct page *page)
+static inline void __local_flush_dcache_folio(struct folio *folio)
{
+ unsigned int i, nr = folio_nr_pages(folio);
+
#ifdef DCACHE_ALIASING_POSSIBLE
- __flush_dcache_page(page_address(page),
+ for (i = 0; i < nr; i++)
+ __flush_dcache_page(folio_address(folio) + i * PAGE_SIZE,
((tlb_type == spitfire) &&
- page_mapping(page) != NULL));
+ folio_flush_mapping(folio) != NULL));
#else
- if (page_mapping(page) != NULL &&
- tlb_type == spitfire)
- __flush_icache_page(__pa(page_address(page)));
+ if (folio_flush_mapping(folio) != NULL &&
+ tlb_type == spitfire) {
+ unsigned long pfn = folio_pfn(folio)
+ for (i = 0; i < nr; i++)
+ __flush_icache_page((pfn + i) * PAGE_SIZE);
+ }
#endif
}
-void smp_flush_dcache_page_impl(struct page *page, int cpu)
+void smp_flush_dcache_folio_impl(struct folio *folio, int cpu)
{
int this_cpu;
@@ -899,14 +952,14 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
this_cpu = get_cpu();
if (cpu == this_cpu) {
- __local_flush_dcache_page(page);
+ __local_flush_dcache_folio(folio);
} else if (cpu_online(cpu)) {
- void *pg_addr = page_address(page);
+ void *pg_addr = folio_address(folio);
u64 data0 = 0;
if (tlb_type == spitfire) {
data0 = ((u64)&xcall_flush_dcache_page_spitfire);
- if (page_mapping(page) != NULL)
+ if (folio_flush_mapping(folio) != NULL)
data0 |= ((u64)1 << 32);
} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
@@ -914,18 +967,23 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
#endif
}
if (data0) {
- xcall_deliver(data0, __pa(pg_addr),
- (u64) pg_addr, cpumask_of(cpu));
+ unsigned int i, nr = folio_nr_pages(folio);
+
+ for (i = 0; i < nr; i++) {
+ xcall_deliver(data0, __pa(pg_addr),
+ (u64) pg_addr, cpumask_of(cpu));
#ifdef CONFIG_DEBUG_DCFLUSH
- atomic_inc(&dcpage_flushes_xcall);
+ atomic_inc(&dcpage_flushes_xcall);
#endif
+ pg_addr += PAGE_SIZE;
+ }
}
}
put_cpu();
}
-void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
+void flush_dcache_folio_all(struct mm_struct *mm, struct folio *folio)
{
void *pg_addr;
u64 data0;
@@ -939,10 +997,10 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
atomic_inc(&dcpage_flushes);
#endif
data0 = 0;
- pg_addr = page_address(page);
+ pg_addr = folio_address(folio);
if (tlb_type == spitfire) {
data0 = ((u64)&xcall_flush_dcache_page_spitfire);
- if (page_mapping(page) != NULL)
+ if (folio_flush_mapping(folio) != NULL)
data0 |= ((u64)1 << 32);
} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
@@ -950,50 +1008,24 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
#endif
}
if (data0) {
- xcall_deliver(data0, __pa(pg_addr),
- (u64) pg_addr, cpu_online_mask);
+ unsigned int i, nr = folio_nr_pages(folio);
+
+ for (i = 0; i < nr; i++) {
+ xcall_deliver(data0, __pa(pg_addr),
+ (u64) pg_addr, cpu_online_mask);
#ifdef CONFIG_DEBUG_DCFLUSH
- atomic_inc(&dcpage_flushes_xcall);
+ atomic_inc(&dcpage_flushes_xcall);
#endif
+ pg_addr += PAGE_SIZE;
+ }
}
- __local_flush_dcache_page(page);
+ __local_flush_dcache_folio(folio);
preempt_enable();
}
-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
-{
- struct mm_struct *mm;
- unsigned long flags;
-
- clear_softint(1 << irq);
-
- /* See if we need to allocate a new TLB context because
- * the version of the one we are using is now out of date.
- */
- mm = current->active_mm;
- if (unlikely(!mm || (mm == &init_mm)))
- return;
-
- spin_lock_irqsave(&mm->context.lock, flags);
-
- if (unlikely(!CTX_VALID(mm->context)))
- get_new_mmu_context(mm);
-
- spin_unlock_irqrestore(&mm->context.lock, flags);
-
- load_secondary_context(mm);
- __flush_tlb_mm(CTX_HWBITS(mm->context),
- SECONDARY_CONTEXT);
-}
-
-void smp_new_mmu_context_version(void)
-{
- smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
-}
-
#ifdef CONFIG_KGDB
-void kgdb_roundup_cpus(unsigned long flags)
+void kgdb_roundup_cpus(void)
{
smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
}
@@ -1018,38 +1050,9 @@ void smp_fetch_global_pmu(void)
* are flush_tlb_*() routines, and these run after flush_cache_*()
* which performs the flushw.
*
- * The SMP TLB coherency scheme we use works as follows:
- *
- * 1) mm->cpu_vm_mask is a bit mask of which cpus an address
- * space has (potentially) executed on, this is the heuristic
- * we use to avoid doing cross calls.
- *
- * Also, for flushing from kswapd and also for clones, we
- * use cpu_vm_mask as the list of cpus to make run the TLB.
- *
- * 2) TLB context numbers are shared globally across all processors
- * in the system, this allows us to play several games to avoid
- * cross calls.
- *
- * One invariant is that when a cpu switches to a process, and
- * that processes tsk->active_mm->cpu_vm_mask does not have the
- * current cpu's bit set, that tlb context is flushed locally.
- *
- * If the address space is non-shared (ie. mm->count == 1) we avoid
- * cross calls when we want to flush the currently running process's
- * tlb state. This is done by clearing all cpu bits except the current
- * processor's in current->mm->cpu_vm_mask and performing the
- * flush locally only. This will force any subsequent cpus which run
- * this task to flush the context from the local tlb if the process
- * migrates to another cpu (again).
- *
- * 3) For shared address spaces (threads) and swapping we bite the
- * bullet for most cases and perform the cross call (but only to
- * the cpus listed in cpu_vm_mask).
- *
- * The performance gain from "optimizing" away the cross call for threads is
- * questionable (in theory the big win for threads is the massive sharing of
- * address space state across processors).
+ * mm->cpu_vm_mask is a bit mask of which cpus an address
+ * space has (potentially) executed on, this is the heuristic
+ * we use to limit cross calls.
*/
/* This currently is only used by the hugetlb arch pre-fault
@@ -1059,18 +1062,13 @@ void smp_fetch_global_pmu(void)
void smp_flush_tlb_mm(struct mm_struct *mm)
{
u32 ctx = CTX_HWBITS(mm->context);
- int cpu = get_cpu();
- if (atomic_read(&mm->mm_users) == 1) {
- cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
- goto local_flush_and_out;
- }
+ get_cpu();
smp_cross_call_masked(&xcall_flush_tlb_mm,
ctx, 0, 0,
mm_cpumask(mm));
-local_flush_and_out:
__flush_tlb_mm(ctx, SECONDARY_CONTEXT);
put_cpu();
@@ -1093,17 +1091,15 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
{
u32 ctx = CTX_HWBITS(mm->context);
struct tlb_pending_info info;
- int cpu = get_cpu();
+
+ get_cpu();
info.ctx = ctx;
info.nr = nr;
info.vaddrs = vaddrs;
- if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
- cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
- else
- smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
- &info, 1);
+ smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
+ &info, 1);
__flush_tlb_pending(ctx, nr, vaddrs);
@@ -1113,14 +1109,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
{
unsigned long context = CTX_HWBITS(mm->context);
- int cpu = get_cpu();
- if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
- cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
- else
- smp_cross_call_masked(&xcall_flush_tlb_page,
- context, vaddr, 0,
- mm_cpumask(mm));
+ get_cpu();
+
+ smp_cross_call_masked(&xcall_flush_tlb_page,
+ context, vaddr, 0,
+ mm_cpumask(mm));
+
__flush_tlb_page(context, vaddr);
put_cpu();
@@ -1148,7 +1143,7 @@ static unsigned long penguins_are_doing_time;
void smp_capture(void)
{
- int result = atomic_add_ret(1, &smp_capture_depth);
+ int result = atomic_add_return(1, &smp_capture_depth);
if (result == 1) {
int ncpus = num_online_cpus();
@@ -1205,20 +1200,10 @@ void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs)
preempt_enable();
}
-/* /proc/profile writes can call this, don't __init it please. */
-int setup_profiling_timer(unsigned int multiplier)
-{
- return -EINVAL;
-}
-
void __init smp_prepare_cpus(unsigned int max_cpus)
{
}
-void smp_prepare_boot_cpu(void)
-{
-}
-
void __init smp_setup_processor_id(void)
{
if (tlb_type == spitfire)
@@ -1249,6 +1234,19 @@ void smp_fill_in_sib_core_maps(void)
}
}
+ for_each_present_cpu(i) {
+ unsigned int j;
+
+ for_each_present_cpu(j) {
+ if (cpu_data(i).max_cache_id ==
+ cpu_data(j).max_cache_id)
+ cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]);
+
+ if (cpu_data(i).sock_id == cpu_data(j).sock_id)
+ cpumask_set_cpu(j, &cpu_core_sib_map[i]);
+ }
+ }
+
for_each_present_cpu(i) {
unsigned int j;
@@ -1266,7 +1264,7 @@ void smp_fill_in_sib_core_maps(void)
}
}
-int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int ret = smp_boot_one_cpu(cpu, tidle);
@@ -1393,13 +1391,88 @@ void __cpu_die(unsigned int cpu)
void __init smp_cpus_done(unsigned int max_cpus)
{
- pcr_arch_init();
}
-void smp_send_reschedule(int cpu)
+static void send_cpu_ipi(int cpu)
{
- xcall_deliver((u64) &xcall_receive_signal, 0, 0,
- cpumask_of(cpu));
+ xcall_deliver((u64) &xcall_receive_signal,
+ 0, 0, cpumask_of(cpu));
+}
+
+void scheduler_poke(void)
+{
+ if (!cpu_poke)
+ return;
+
+ if (!__this_cpu_read(poke))
+ return;
+
+ __this_cpu_write(poke, false);
+ set_softint(1 << PIL_SMP_RECEIVE_SIGNAL);
+}
+
+static unsigned long send_cpu_poke(int cpu)
+{
+ unsigned long hv_err;
+
+ per_cpu(poke, cpu) = true;
+ hv_err = sun4v_cpu_poke(cpu);
+ if (hv_err != HV_EOK) {
+ per_cpu(poke, cpu) = false;
+ pr_err_ratelimited("%s: sun4v_cpu_poke() fails err=%lu\n",
+ __func__, hv_err);
+ }
+
+ return hv_err;
+}
+
+void arch_smp_send_reschedule(int cpu)
+{
+ if (cpu == smp_processor_id()) {
+ WARN_ON_ONCE(preemptible());
+ set_softint(1 << PIL_SMP_RECEIVE_SIGNAL);
+ return;
+ }
+
+ /* Use cpu poke to resume idle cpu if supported. */
+ if (cpu_poke && idle_cpu(cpu)) {
+ unsigned long ret;
+
+ ret = send_cpu_poke(cpu);
+ if (ret == HV_EOK)
+ return;
+ }
+
+ /* Use IPI in following cases:
+ * - cpu poke not supported
+ * - cpu not idle
+ * - send_cpu_poke() returns with error
+ */
+ send_cpu_ipi(cpu);
+}
+
+void smp_init_cpu_poke(void)
+{
+ unsigned long major;
+ unsigned long minor;
+ int ret;
+
+ if (tlb_type != hypervisor)
+ return;
+
+ ret = sun4v_hvapi_get(HV_GRP_CORE, &major, &minor);
+ if (ret) {
+ pr_debug("HV_GRP_CORE is not registered\n");
+ return;
+ }
+
+ if (major == 1 && minor >= 6) {
+ /* CPU POKE is registered. */
+ cpu_poke = true;
+ return;
+ }
+
+ pr_debug("CPU_POKE not supported\n");
}
void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
@@ -1408,55 +1481,39 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
scheduler_ipi();
}
-/* This is a nop because we capture all other cpus
- * anyways when making the PROM active.
- */
-void smp_send_stop(void)
+static void stop_this_cpu(void *dummy)
{
+ set_cpu_online(smp_processor_id(), false);
+ prom_stopself();
}
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
- size_t align)
+void smp_send_stop(void)
{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NEED_MULTIPLE_NODES
- int node = cpu_to_node(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = __alloc_bootmem(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = __alloc_bootmem_node(NODE_DATA(node),
- size, align, goal);
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
- "%016lx\n", cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return __alloc_bootmem(size, align, goal);
+ int cpu;
+
+ if (tlb_type == hypervisor) {
+ int this_cpu = smp_processor_id();
+#ifdef CONFIG_SERIAL_SUNHV
+ sunhv_migrate_hvcons_irq(this_cpu);
#endif
-}
+ for_each_online_cpu(cpu) {
+ if (cpu == this_cpu)
+ continue;
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
- free_bootmem(__pa(ptr), size);
+ set_cpu_online(cpu, false);
+#ifdef CONFIG_SUN_LDOMS
+ if (ldom_domaining_enabled) {
+ unsigned long hv_err;
+ hv_err = sun4v_cpu_stop(cpu);
+ if (hv_err)
+ printk(KERN_ERR "sun4v_cpu_stop() "
+ "failed err=%lu\n", hv_err);
+ } else
+#endif
+ prom_stopcpu_cpuid(cpu);
+ }
+ } else
+ smp_call_function(stop_this_cpu, NULL, 0);
}
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
@@ -1467,27 +1524,9 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return REMOTE_DISTANCE;
}
-static void __init pcpu_populate_pte(unsigned long addr)
+static int __init pcpu_cpu_to_node(int cpu)
{
- pgd_t *pgd = pgd_offset_k(addr);
- pud_t *pud;
- pmd_t *pmd;
-
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- pud_populate(&init_mm, pud, new);
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- pmd_populate_kernel(&init_mm, pmd, new);
- }
+ return cpu_to_node(cpu);
}
void __init setup_per_cpu_areas(void)
@@ -1500,18 +1539,15 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, 4 << 20,
pcpu_cpu_distance,
- pcpu_alloc_bootmem,
- pcpu_free_bootmem);
+ pcpu_cpu_to_node);
if (rc)
- pr_warning("PERCPU: %s allocator failed (%d), "
- "falling back to page size\n",
- pcpu_fc_names[pcpu_chosen_fc], rc);
+ pr_warn("PERCPU: %s allocator failed (%d), "
+ "falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
- pcpu_alloc_bootmem,
- pcpu_free_bootmem,
- pcpu_populate_pte);
+ pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);