diff options
Diffstat (limited to 'arch/sparc/kernel/smp_64.c')
| -rw-r--r-- | arch/sparc/kernel/smp_64.c | 622 |
1 files changed, 329 insertions, 293 deletions
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 77539eda928c..5cbd6ed5ef6f 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* smp.c: Sparc64 SMP support. * * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net) @@ -5,7 +6,8 @@ #include <linux/export.h> #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/hotplug.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/threads.h> @@ -20,11 +22,12 @@ #include <linux/cache.h> #include <linux/jiffies.h> #include <linux/profile.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/vmalloc.h> #include <linux/ftrace.h> #include <linux/cpu.h> #include <linux/slab.h> +#include <linux/kgdb.h> #include <asm/head.h> #include <asm/ptrace.h> @@ -35,15 +38,16 @@ #include <asm/hvtramp.h> #include <asm/io.h> #include <asm/timer.h> +#include <asm/setup.h> #include <asm/irq.h> #include <asm/irq_regs.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/oplib.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/starfire.h> #include <asm/tlb.h> +#include <asm/pgalloc.h> #include <asm/sections.h> #include <asm/prom.h> #include <asm/mdesc.h> @@ -52,18 +56,28 @@ #include <asm/pcr.h> #include "cpumap.h" - -int sparc64_multi_core __read_mostly; +#include "kernel.h" DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; cpumask_t cpu_core_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; +cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS-1] = CPU_MASK_NONE }; + +cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS - 1] = CPU_MASK_NONE }; + EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); +EXPORT_SYMBOL(cpu_core_sib_map); +EXPORT_SYMBOL(cpu_core_sib_cache_map); static cpumask_t smp_commenced_mask; +static DEFINE_PER_CPU(bool, poke); +static bool cpu_poke; + void smp_info(struct seq_file *m) { int i; @@ -87,7 +101,7 @@ extern void setup_sparc64_timer(void); static volatile unsigned long callin_flag = 0; -void __cpuinit smp_callin(void) +void smp_callin(void) { int cpuid = hard_smp_processor_id(); @@ -113,7 +127,7 @@ void __cpuinit smp_callin(void) current_thread_info()->new_child = 0; /* Attach to the address space of init_task. */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; /* inform the notifiers about the new cpu */ @@ -123,12 +137,10 @@ void __cpuinit smp_callin(void) rmb(); set_cpu_online(cpuid, true); - local_irq_enable(); - /* idle thread is expected to have preempt disabled */ - preempt_disable(); + local_irq_enable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } void cpu_panic(void) @@ -150,7 +162,7 @@ void cpu_panic(void) #define NUM_ROUNDS 64 /* magic value */ #define NUM_ITERS 5 /* likewise */ -static DEFINE_SPINLOCK(itc_sync_lock); +static DEFINE_RAW_SPINLOCK(itc_sync_lock); static unsigned long go[SLAVE + 1]; #define DEBUG_TICK_SYNC 0 @@ -258,7 +270,7 @@ static void smp_synchronize_one_tick(int cpu) go[MASTER] = 0; membar_safe("#StoreLoad"); - spin_lock_irqsave(&itc_sync_lock, flags); + raw_spin_lock_irqsave(&itc_sync_lock, flags); { for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) { while (!go[MASTER]) @@ -269,19 +281,12 @@ static void smp_synchronize_one_tick(int cpu) membar_safe("#StoreLoad"); } } - spin_unlock_irqrestore(&itc_sync_lock, flags); + raw_spin_unlock_irqrestore(&itc_sync_lock, flags); } #if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) -/* XXX Put this in some common place. XXX */ -static unsigned long kimage_addr_to_ra(void *p) -{ - unsigned long val = (unsigned long) p; - - return kern_base + (val - KERNBASE); -} - -static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg, void **descrp) +static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg, + void **descrp) { extern unsigned long sparc64_ttable_tl0; extern unsigned long kern_locked_tte_data; @@ -292,9 +297,7 @@ static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread unsigned long hv_err; int i; - hdesc = kzalloc(sizeof(*hdesc) + - (sizeof(struct hvtramp_mapping) * - num_kernel_image_mappings - 1), + hdesc = kzalloc(struct_size(hdesc, maps, num_kernel_image_mappings), GFP_KERNEL); if (!hdesc) { printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate " @@ -342,7 +345,7 @@ extern unsigned long sparc64_cpu_startup; */ static struct thread_info *cpu_new_thread = NULL; -static int __cpuinit smp_boot_one_cpu(unsigned int cpu, struct task_struct *idle) +static int smp_boot_one_cpu(unsigned int cpu, struct task_struct *idle) { unsigned long entry = (unsigned long)(&sparc64_cpu_startup); @@ -618,22 +621,48 @@ retry: } } -/* Multi-cpu list version. */ +#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid]) +#define MONDO_USEC_WAIT_MIN 2 +#define MONDO_USEC_WAIT_MAX 100 +#define MONDO_RETRY_LIMIT 500000 + +/* Multi-cpu list version. + * + * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'. + * Sometimes not all cpus receive the mondo, requiring us to re-send + * the mondo until all cpus have received, or cpus are truly stuck + * unable to receive mondo, and we timeout. + * Occasionally a target cpu strand is borrowed briefly by hypervisor to + * perform guest service, such as PCIe error handling. Consider the + * service time, 1 second overall wait is reasonable for 1 cpu. + * Here two in-between mondo check wait time are defined: 2 usec for + * single cpu quick turn around and up to 100usec for large cpu count. + * Deliver mondo to large number of cpus could take longer, we adjusts + * the retry count as long as target cpus are making forward progress. + */ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) { - int retries, this_cpu, prev_sent, i, saw_cpu_error; + int this_cpu, tot_cpus, prev_sent, i, rem; + int usec_wait, retries, tot_retries; + u16 first_cpu = 0xffff; + unsigned long xc_rcvd = 0; unsigned long status; + int ecpuerror_id = 0; + int enocpu_id = 0; u16 *cpu_list; + u16 cpu; this_cpu = smp_processor_id(); - cpu_list = __va(tb->cpu_list_pa); - - saw_cpu_error = 0; - retries = 0; + usec_wait = cnt * MONDO_USEC_WAIT_MIN; + if (usec_wait > MONDO_USEC_WAIT_MAX) + usec_wait = MONDO_USEC_WAIT_MAX; + retries = tot_retries = 0; + tot_cpus = cnt; prev_sent = 0; + do { - int forward_progress, n_sent; + int n_sent, mondo_delivered, target_cpu_busy; status = sun4v_cpu_mondo_send(cnt, tb->cpu_list_pa, @@ -641,94 +670,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) /* HV_EOK means all cpus received the xcall, we're done. */ if (likely(status == HV_EOK)) - break; + goto xcall_done; + + /* If not these non-fatal errors, panic */ + if (unlikely((status != HV_EWOULDBLOCK) && + (status != HV_ECPUERROR) && + (status != HV_ENOCPU))) + goto fatal_errors; /* First, see if we made any forward progress. * + * Go through the cpu_list, count the target cpus that have + * received our mondo (n_sent), and those that did not (rem). + * Re-pack cpu_list with the cpus remain to be retried in the + * front - this simplifies tracking the truly stalled cpus. + * * The hypervisor indicates successful sends by setting * cpu list entries to the value 0xffff. + * + * EWOULDBLOCK means some target cpus did not receive the + * mondo and retry usually helps. + * + * ECPUERROR means at least one target cpu is in error state, + * it's usually safe to skip the faulty cpu and retry. + * + * ENOCPU means one of the target cpu doesn't belong to the + * domain, perhaps offlined which is unexpected, but not + * fatal and it's okay to skip the offlined cpu. */ + rem = 0; n_sent = 0; for (i = 0; i < cnt; i++) { - if (likely(cpu_list[i] == 0xffff)) + cpu = cpu_list[i]; + if (likely(cpu == 0xffff)) { n_sent++; + } else if ((status == HV_ECPUERROR) && + (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) { + ecpuerror_id = cpu + 1; + } else if (status == HV_ENOCPU && !cpu_online(cpu)) { + enocpu_id = cpu + 1; + } else { + cpu_list[rem++] = cpu; + } } - forward_progress = 0; - if (n_sent > prev_sent) - forward_progress = 1; + /* No cpu remained, we're done. */ + if (rem == 0) + break; - prev_sent = n_sent; + /* Otherwise, update the cpu count for retry. */ + cnt = rem; - /* If we get a HV_ECPUERROR, then one or more of the cpus - * in the list are in error state. Use the cpu_state() - * hypervisor call to find out which cpus are in error state. + /* Record the overall number of mondos received by the + * first of the remaining cpus. */ - if (unlikely(status == HV_ECPUERROR)) { - for (i = 0; i < cnt; i++) { - long err; - u16 cpu; + if (first_cpu != cpu_list[0]) { + first_cpu = cpu_list[0]; + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); + } - cpu = cpu_list[i]; - if (cpu == 0xffff) - continue; + /* Was any mondo delivered successfully? */ + mondo_delivered = (n_sent > prev_sent); + prev_sent = n_sent; - err = sun4v_cpu_state(cpu); - if (err == HV_CPU_STATE_ERROR) { - saw_cpu_error = (cpu + 1); - cpu_list[i] = 0xffff; - } - } - } else if (unlikely(status != HV_EWOULDBLOCK)) - goto fatal_mondo_error; + /* or, was any target cpu busy processing other mondos? */ + target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu)); + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); - /* Don't bother rewriting the CPU list, just leave the - * 0xffff and non-0xffff entries in there and the - * hypervisor will do the right thing. - * - * Only advance timeout state if we didn't make any - * forward progress. + /* Retry count is for no progress. If we're making progress, + * reset the retry count. */ - if (unlikely(!forward_progress)) { - if (unlikely(++retries > 10000)) - goto fatal_mondo_timeout; - - /* Delay a little bit to let other cpus catch up - * on their cpu mondo queue work. - */ - udelay(2 * cnt); + if (likely(mondo_delivered || target_cpu_busy)) { + tot_retries += retries; + retries = 0; + } else if (unlikely(retries > MONDO_RETRY_LIMIT)) { + goto fatal_mondo_timeout; } - } while (1); - if (unlikely(saw_cpu_error)) - goto fatal_mondo_cpu_error; + /* Delay a little bit to let other cpus catch up on + * their cpu mondo queue work. + */ + if (!mondo_delivered) + udelay(usec_wait); - return; + retries++; + } while (1); -fatal_mondo_cpu_error: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " - "(including %d) were in error state\n", - this_cpu, saw_cpu_error - 1); +xcall_done: + if (unlikely(ecpuerror_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n", + this_cpu, ecpuerror_id - 1); + } else if (unlikely(enocpu_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n", + this_cpu, enocpu_id - 1); + } return; +fatal_errors: + /* fatal errors include bad alignment, etc */ + pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n", + this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa); + panic("Unexpected SUN4V mondo error %lu\n", status); + fatal_mondo_timeout: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " - " progress after %d retries.\n", - this_cpu, retries); - goto dump_cpu_list_and_out; - -fatal_mondo_error: - printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", - this_cpu, status); - printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " - "mondo_block_pa(%lx)\n", - this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); - -dump_cpu_list_and_out: - printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); - for (i = 0; i < cnt; i++) - printk("%u ", cpu_list[i]); - printk("]\n"); + /* some cpus being non-responsive to the cpu mondo */ + pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n", + this_cpu, first_cpu, (tot_retries + retries), tot_cpus); + panic("SUN4V mondo timeout panic\n"); } static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); @@ -821,13 +869,17 @@ void arch_send_call_function_single_ipi(int cpu) void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs) { clear_softint(1 << irq); + irq_enter(); generic_smp_call_function_interrupt(); + irq_exit(); } void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs) { clear_softint(1 << irq); + irq_enter(); generic_smp_call_function_single_interrupt(); + irq_exit(); } static void tsb_sync(void *info) @@ -867,25 +919,26 @@ extern unsigned long xcall_flush_dcache_page_cheetah; #endif extern unsigned long xcall_flush_dcache_page_spitfire; -#ifdef CONFIG_DEBUG_DCFLUSH -extern atomic_t dcpage_flushes; -extern atomic_t dcpage_flushes_xcall; -#endif - -static inline void __local_flush_dcache_page(struct page *page) +static inline void __local_flush_dcache_folio(struct folio *folio) { + unsigned int i, nr = folio_nr_pages(folio); + #ifdef DCACHE_ALIASING_POSSIBLE - __flush_dcache_page(page_address(page), + for (i = 0; i < nr; i++) + __flush_dcache_page(folio_address(folio) + i * PAGE_SIZE, ((tlb_type == spitfire) && - page_mapping(page) != NULL)); + folio_flush_mapping(folio) != NULL)); #else - if (page_mapping(page) != NULL && - tlb_type == spitfire) - __flush_icache_page(__pa(page_address(page))); + if (folio_flush_mapping(folio) != NULL && + tlb_type == spitfire) { + unsigned long pfn = folio_pfn(folio) + for (i = 0; i < nr; i++) + __flush_icache_page((pfn + i) * PAGE_SIZE); + } #endif } -void smp_flush_dcache_page_impl(struct page *page, int cpu) +void smp_flush_dcache_folio_impl(struct folio *folio, int cpu) { int this_cpu; @@ -899,14 +952,14 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) this_cpu = get_cpu(); if (cpu == this_cpu) { - __local_flush_dcache_page(page); + __local_flush_dcache_folio(folio); } else if (cpu_online(cpu)) { - void *pg_addr = page_address(page); + void *pg_addr = folio_address(folio); u64 data0 = 0; if (tlb_type == spitfire) { data0 = ((u64)&xcall_flush_dcache_page_spitfire); - if (page_mapping(page) != NULL) + if (folio_flush_mapping(folio) != NULL) data0 |= ((u64)1 << 32); } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { #ifdef DCACHE_ALIASING_POSSIBLE @@ -914,18 +967,23 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) #endif } if (data0) { - xcall_deliver(data0, __pa(pg_addr), - (u64) pg_addr, cpumask_of(cpu)); + unsigned int i, nr = folio_nr_pages(folio); + + for (i = 0; i < nr; i++) { + xcall_deliver(data0, __pa(pg_addr), + (u64) pg_addr, cpumask_of(cpu)); #ifdef CONFIG_DEBUG_DCFLUSH - atomic_inc(&dcpage_flushes_xcall); + atomic_inc(&dcpage_flushes_xcall); #endif + pg_addr += PAGE_SIZE; + } } } put_cpu(); } -void flush_dcache_page_all(struct mm_struct *mm, struct page *page) +void flush_dcache_folio_all(struct mm_struct *mm, struct folio *folio) { void *pg_addr; u64 data0; @@ -939,10 +997,10 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) atomic_inc(&dcpage_flushes); #endif data0 = 0; - pg_addr = page_address(page); + pg_addr = folio_address(folio); if (tlb_type == spitfire) { data0 = ((u64)&xcall_flush_dcache_page_spitfire); - if (page_mapping(page) != NULL) + if (folio_flush_mapping(folio) != NULL) data0 |= ((u64)1 << 32); } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { #ifdef DCACHE_ALIASING_POSSIBLE @@ -950,50 +1008,24 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) #endif } if (data0) { - xcall_deliver(data0, __pa(pg_addr), - (u64) pg_addr, cpu_online_mask); + unsigned int i, nr = folio_nr_pages(folio); + + for (i = 0; i < nr; i++) { + xcall_deliver(data0, __pa(pg_addr), + (u64) pg_addr, cpu_online_mask); #ifdef CONFIG_DEBUG_DCFLUSH - atomic_inc(&dcpage_flushes_xcall); + atomic_inc(&dcpage_flushes_xcall); #endif + pg_addr += PAGE_SIZE; + } } - __local_flush_dcache_page(page); + __local_flush_dcache_folio(folio); preempt_enable(); } -void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) -{ - struct mm_struct *mm; - unsigned long flags; - - clear_softint(1 << irq); - - /* See if we need to allocate a new TLB context because - * the version of the one we are using is now out of date. - */ - mm = current->active_mm; - if (unlikely(!mm || (mm == &init_mm))) - return; - - spin_lock_irqsave(&mm->context.lock, flags); - - if (unlikely(!CTX_VALID(mm->context))) - get_new_mmu_context(mm); - - spin_unlock_irqrestore(&mm->context.lock, flags); - - load_secondary_context(mm); - __flush_tlb_mm(CTX_HWBITS(mm->context), - SECONDARY_CONTEXT); -} - -void smp_new_mmu_context_version(void) -{ - smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0); -} - #ifdef CONFIG_KGDB -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { smp_cross_call(&xcall_kgdb_capture, 0, 0, 0); } @@ -1018,38 +1050,9 @@ void smp_fetch_global_pmu(void) * are flush_tlb_*() routines, and these run after flush_cache_*() * which performs the flushw. * - * The SMP TLB coherency scheme we use works as follows: - * - * 1) mm->cpu_vm_mask is a bit mask of which cpus an address - * space has (potentially) executed on, this is the heuristic - * we use to avoid doing cross calls. - * - * Also, for flushing from kswapd and also for clones, we - * use cpu_vm_mask as the list of cpus to make run the TLB. - * - * 2) TLB context numbers are shared globally across all processors - * in the system, this allows us to play several games to avoid - * cross calls. - * - * One invariant is that when a cpu switches to a process, and - * that processes tsk->active_mm->cpu_vm_mask does not have the - * current cpu's bit set, that tlb context is flushed locally. - * - * If the address space is non-shared (ie. mm->count == 1) we avoid - * cross calls when we want to flush the currently running process's - * tlb state. This is done by clearing all cpu bits except the current - * processor's in current->mm->cpu_vm_mask and performing the - * flush locally only. This will force any subsequent cpus which run - * this task to flush the context from the local tlb if the process - * migrates to another cpu (again). - * - * 3) For shared address spaces (threads) and swapping we bite the - * bullet for most cases and perform the cross call (but only to - * the cpus listed in cpu_vm_mask). - * - * The performance gain from "optimizing" away the cross call for threads is - * questionable (in theory the big win for threads is the massive sharing of - * address space state across processors). + * mm->cpu_vm_mask is a bit mask of which cpus an address + * space has (potentially) executed on, this is the heuristic + * we use to limit cross calls. */ /* This currently is only used by the hugetlb arch pre-fault @@ -1059,18 +1062,13 @@ void smp_fetch_global_pmu(void) void smp_flush_tlb_mm(struct mm_struct *mm) { u32 ctx = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (atomic_read(&mm->mm_users) == 1) { - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - goto local_flush_and_out; - } + get_cpu(); smp_cross_call_masked(&xcall_flush_tlb_mm, ctx, 0, 0, mm_cpumask(mm)); -local_flush_and_out: __flush_tlb_mm(ctx, SECONDARY_CONTEXT); put_cpu(); @@ -1093,17 +1091,15 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long { u32 ctx = CTX_HWBITS(mm->context); struct tlb_pending_info info; - int cpu = get_cpu(); + + get_cpu(); info.ctx = ctx; info.nr = nr; info.vaddrs = vaddrs; - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_call_function_many(mm_cpumask(mm), tlb_pending_func, - &info, 1); + smp_call_function_many(mm_cpumask(mm), tlb_pending_func, + &info, 1); __flush_tlb_pending(ctx, nr, vaddrs); @@ -1113,14 +1109,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) { unsigned long context = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_cross_call_masked(&xcall_flush_tlb_page, - context, vaddr, 0, - mm_cpumask(mm)); + get_cpu(); + + smp_cross_call_masked(&xcall_flush_tlb_page, + context, vaddr, 0, + mm_cpumask(mm)); + __flush_tlb_page(context, vaddr); put_cpu(); @@ -1148,7 +1143,7 @@ static unsigned long penguins_are_doing_time; void smp_capture(void) { - int result = atomic_add_ret(1, &smp_capture_depth); + int result = atomic_add_return(1, &smp_capture_depth); if (result == 1) { int ncpus = num_online_cpus(); @@ -1205,20 +1200,10 @@ void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs) preempt_enable(); } -/* /proc/profile writes can call this, don't __init it please. */ -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - void __init smp_prepare_cpus(unsigned int max_cpus) { } -void smp_prepare_boot_cpu(void) -{ -} - void __init smp_setup_processor_id(void) { if (tlb_type == spitfire) @@ -1249,6 +1234,19 @@ void smp_fill_in_sib_core_maps(void) } } + for_each_present_cpu(i) { + unsigned int j; + + for_each_present_cpu(j) { + if (cpu_data(i).max_cache_id == + cpu_data(j).max_cache_id) + cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]); + + if (cpu_data(i).sock_id == cpu_data(j).sock_id) + cpumask_set_cpu(j, &cpu_core_sib_map[i]); + } + } + for_each_present_cpu(i) { unsigned int j; @@ -1266,7 +1264,7 @@ void smp_fill_in_sib_core_maps(void) } } -int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { int ret = smp_boot_one_cpu(cpu, tidle); @@ -1393,13 +1391,88 @@ void __cpu_die(unsigned int cpu) void __init smp_cpus_done(unsigned int max_cpus) { - pcr_arch_init(); } -void smp_send_reschedule(int cpu) +static void send_cpu_ipi(int cpu) { - xcall_deliver((u64) &xcall_receive_signal, 0, 0, - cpumask_of(cpu)); + xcall_deliver((u64) &xcall_receive_signal, + 0, 0, cpumask_of(cpu)); +} + +void scheduler_poke(void) +{ + if (!cpu_poke) + return; + + if (!__this_cpu_read(poke)) + return; + + __this_cpu_write(poke, false); + set_softint(1 << PIL_SMP_RECEIVE_SIGNAL); +} + +static unsigned long send_cpu_poke(int cpu) +{ + unsigned long hv_err; + + per_cpu(poke, cpu) = true; + hv_err = sun4v_cpu_poke(cpu); + if (hv_err != HV_EOK) { + per_cpu(poke, cpu) = false; + pr_err_ratelimited("%s: sun4v_cpu_poke() fails err=%lu\n", + __func__, hv_err); + } + + return hv_err; +} + +void arch_smp_send_reschedule(int cpu) +{ + if (cpu == smp_processor_id()) { + WARN_ON_ONCE(preemptible()); + set_softint(1 << PIL_SMP_RECEIVE_SIGNAL); + return; + } + + /* Use cpu poke to resume idle cpu if supported. */ + if (cpu_poke && idle_cpu(cpu)) { + unsigned long ret; + + ret = send_cpu_poke(cpu); + if (ret == HV_EOK) + return; + } + + /* Use IPI in following cases: + * - cpu poke not supported + * - cpu not idle + * - send_cpu_poke() returns with error + */ + send_cpu_ipi(cpu); +} + +void smp_init_cpu_poke(void) +{ + unsigned long major; + unsigned long minor; + int ret; + + if (tlb_type != hypervisor) + return; + + ret = sun4v_hvapi_get(HV_GRP_CORE, &major, &minor); + if (ret) { + pr_debug("HV_GRP_CORE is not registered\n"); + return; + } + + if (major == 1 && minor >= 6) { + /* CPU POKE is registered. */ + cpu_poke = true; + return; + } + + pr_debug("CPU_POKE not supported\n"); } void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) @@ -1408,55 +1481,39 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) scheduler_ipi(); } -/* This is a nop because we capture all other cpus - * anyways when making the PROM active. - */ -void smp_send_stop(void) +static void stop_this_cpu(void *dummy) { + set_cpu_online(smp_processor_id(), false); + prom_stopself(); } -/** - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu - * @cpu: cpu to allocate for - * @size: size allocation in bytes - * @align: alignment - * - * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper - * does the right thing for NUMA regardless of the current - * configuration. - * - * RETURNS: - * Pointer to the allocated area on success, NULL on failure. - */ -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, - size_t align) +void smp_send_stop(void) { - const unsigned long goal = __pa(MAX_DMA_ADDRESS); -#ifdef CONFIG_NEED_MULTIPLE_NODES - int node = cpu_to_node(cpu); - void *ptr; - - if (!node_online(node) || !NODE_DATA(node)) { - ptr = __alloc_bootmem(size, align, goal); - pr_info("cpu %d has no node %d or node-local memory\n", - cpu, node); - pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", - cpu, size, __pa(ptr)); - } else { - ptr = __alloc_bootmem_node(NODE_DATA(node), - size, align, goal); - pr_debug("per cpu data for cpu%d %lu bytes on node%d at " - "%016lx\n", cpu, size, node, __pa(ptr)); - } - return ptr; -#else - return __alloc_bootmem(size, align, goal); + int cpu; + + if (tlb_type == hypervisor) { + int this_cpu = smp_processor_id(); +#ifdef CONFIG_SERIAL_SUNHV + sunhv_migrate_hvcons_irq(this_cpu); #endif -} + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; -static void __init pcpu_free_bootmem(void *ptr, size_t size) -{ - free_bootmem(__pa(ptr), size); + set_cpu_online(cpu, false); +#ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) { + unsigned long hv_err; + hv_err = sun4v_cpu_stop(cpu); + if (hv_err) + printk(KERN_ERR "sun4v_cpu_stop() " + "failed err=%lu\n", hv_err); + } else +#endif + prom_stopcpu_cpuid(cpu); + } + } else + smp_call_function(stop_this_cpu, NULL, 0); } static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) @@ -1467,27 +1524,9 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } -static void __init pcpu_populate_pte(unsigned long addr) +static int __init pcpu_cpu_to_node(int cpu) { - pgd_t *pgd = pgd_offset_k(addr); - pud_t *pud; - pmd_t *pmd; - - pud = pud_offset(pgd, addr); - if (pud_none(*pud)) { - pmd_t *new; - - new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); - pud_populate(&init_mm, pud, new); - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - pte_t *new; - - new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); - pmd_populate_kernel(&init_mm, pmd, new); - } + return cpu_to_node(cpu); } void __init setup_per_cpu_areas(void) @@ -1500,18 +1539,15 @@ void __init setup_per_cpu_areas(void) rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE, 4 << 20, pcpu_cpu_distance, - pcpu_alloc_bootmem, - pcpu_free_bootmem); + pcpu_cpu_to_node); if (rc) - pr_warning("PERCPU: %s allocator failed (%d), " - "falling back to page size\n", - pcpu_fc_names[pcpu_chosen_fc], rc); + pr_warn("PERCPU: %s allocator failed (%d), " + "falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, - pcpu_alloc_bootmem, - pcpu_free_bootmem, - pcpu_populate_pte); + pcpu_cpu_to_node); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); |
