diff options
Diffstat (limited to 'arch/sparc/kernel/smp_64.c')
| -rw-r--r-- | arch/sparc/kernel/smp_64.c | 327 |
1 files changed, 140 insertions, 187 deletions
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 3218bc43302e..5cbd6ed5ef6f 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* smp.c: Sparc64 SMP support. * * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net) @@ -21,7 +22,7 @@ #include <linux/cache.h> #include <linux/jiffies.h> #include <linux/profile.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/vmalloc.h> #include <linux/ftrace.h> #include <linux/cpu.h> @@ -42,11 +43,11 @@ #include <asm/irq.h> #include <asm/irq_regs.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/oplib.h> #include <linux/uaccess.h> #include <asm/starfire.h> #include <asm/tlb.h> +#include <asm/pgalloc.h> #include <asm/sections.h> #include <asm/prom.h> #include <asm/mdesc.h> @@ -74,6 +75,9 @@ EXPORT_SYMBOL(cpu_core_sib_cache_map); static cpumask_t smp_commenced_mask; +static DEFINE_PER_CPU(bool, poke); +static bool cpu_poke; + void smp_info(struct seq_file *m) { int i; @@ -134,9 +138,6 @@ void smp_callin(void) set_cpu_online(cpuid, true); - /* idle thread is expected to have preempt disabled */ - preempt_disable(); - local_irq_enable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); @@ -296,9 +297,7 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg, unsigned long hv_err; int i; - hdesc = kzalloc(sizeof(*hdesc) + - (sizeof(struct hvtramp_mapping) * - num_kernel_image_mappings - 1), + hdesc = kzalloc(struct_size(hdesc, maps, num_kernel_image_mappings), GFP_KERNEL); if (!hdesc) { printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate " @@ -920,20 +919,26 @@ extern unsigned long xcall_flush_dcache_page_cheetah; #endif extern unsigned long xcall_flush_dcache_page_spitfire; -static inline void __local_flush_dcache_page(struct page *page) +static inline void __local_flush_dcache_folio(struct folio *folio) { + unsigned int i, nr = folio_nr_pages(folio); + #ifdef DCACHE_ALIASING_POSSIBLE - __flush_dcache_page(page_address(page), + for (i = 0; i < nr; i++) + __flush_dcache_page(folio_address(folio) + i * PAGE_SIZE, ((tlb_type == spitfire) && - page_mapping(page) != NULL)); + folio_flush_mapping(folio) != NULL)); #else - if (page_mapping(page) != NULL && - tlb_type == spitfire) - __flush_icache_page(__pa(page_address(page))); + if (folio_flush_mapping(folio) != NULL && + tlb_type == spitfire) { + unsigned long pfn = folio_pfn(folio) + for (i = 0; i < nr; i++) + __flush_icache_page((pfn + i) * PAGE_SIZE); + } #endif } -void smp_flush_dcache_page_impl(struct page *page, int cpu) +void smp_flush_dcache_folio_impl(struct folio *folio, int cpu) { int this_cpu; @@ -947,14 +952,14 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) this_cpu = get_cpu(); if (cpu == this_cpu) { - __local_flush_dcache_page(page); + __local_flush_dcache_folio(folio); } else if (cpu_online(cpu)) { - void *pg_addr = page_address(page); + void *pg_addr = folio_address(folio); u64 data0 = 0; if (tlb_type == spitfire) { data0 = ((u64)&xcall_flush_dcache_page_spitfire); - if (page_mapping(page) != NULL) + if (folio_flush_mapping(folio) != NULL) data0 |= ((u64)1 << 32); } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { #ifdef DCACHE_ALIASING_POSSIBLE @@ -962,18 +967,23 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) #endif } if (data0) { - xcall_deliver(data0, __pa(pg_addr), - (u64) pg_addr, cpumask_of(cpu)); + unsigned int i, nr = folio_nr_pages(folio); + + for (i = 0; i < nr; i++) { + xcall_deliver(data0, __pa(pg_addr), + (u64) pg_addr, cpumask_of(cpu)); #ifdef CONFIG_DEBUG_DCFLUSH - atomic_inc(&dcpage_flushes_xcall); + atomic_inc(&dcpage_flushes_xcall); #endif + pg_addr += PAGE_SIZE; + } } } put_cpu(); } -void flush_dcache_page_all(struct mm_struct *mm, struct page *page) +void flush_dcache_folio_all(struct mm_struct *mm, struct folio *folio) { void *pg_addr; u64 data0; @@ -987,10 +997,10 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) atomic_inc(&dcpage_flushes); #endif data0 = 0; - pg_addr = page_address(page); + pg_addr = folio_address(folio); if (tlb_type == spitfire) { data0 = ((u64)&xcall_flush_dcache_page_spitfire); - if (page_mapping(page) != NULL) + if (folio_flush_mapping(folio) != NULL) data0 |= ((u64)1 << 32); } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { #ifdef DCACHE_ALIASING_POSSIBLE @@ -998,19 +1008,24 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) #endif } if (data0) { - xcall_deliver(data0, __pa(pg_addr), - (u64) pg_addr, cpu_online_mask); + unsigned int i, nr = folio_nr_pages(folio); + + for (i = 0; i < nr; i++) { + xcall_deliver(data0, __pa(pg_addr), + (u64) pg_addr, cpu_online_mask); #ifdef CONFIG_DEBUG_DCFLUSH - atomic_inc(&dcpage_flushes_xcall); + atomic_inc(&dcpage_flushes_xcall); #endif + pg_addr += PAGE_SIZE; + } } - __local_flush_dcache_page(page); + __local_flush_dcache_folio(folio); preempt_enable(); } #ifdef CONFIG_KGDB -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { smp_cross_call(&xcall_kgdb_capture, 0, 0, 0); } @@ -1035,38 +1050,9 @@ void smp_fetch_global_pmu(void) * are flush_tlb_*() routines, and these run after flush_cache_*() * which performs the flushw. * - * The SMP TLB coherency scheme we use works as follows: - * - * 1) mm->cpu_vm_mask is a bit mask of which cpus an address - * space has (potentially) executed on, this is the heuristic - * we use to avoid doing cross calls. - * - * Also, for flushing from kswapd and also for clones, we - * use cpu_vm_mask as the list of cpus to make run the TLB. - * - * 2) TLB context numbers are shared globally across all processors - * in the system, this allows us to play several games to avoid - * cross calls. - * - * One invariant is that when a cpu switches to a process, and - * that processes tsk->active_mm->cpu_vm_mask does not have the - * current cpu's bit set, that tlb context is flushed locally. - * - * If the address space is non-shared (ie. mm->count == 1) we avoid - * cross calls when we want to flush the currently running process's - * tlb state. This is done by clearing all cpu bits except the current - * processor's in current->mm->cpu_vm_mask and performing the - * flush locally only. This will force any subsequent cpus which run - * this task to flush the context from the local tlb if the process - * migrates to another cpu (again). - * - * 3) For shared address spaces (threads) and swapping we bite the - * bullet for most cases and perform the cross call (but only to - * the cpus listed in cpu_vm_mask). - * - * The performance gain from "optimizing" away the cross call for threads is - * questionable (in theory the big win for threads is the massive sharing of - * address space state across processors). + * mm->cpu_vm_mask is a bit mask of which cpus an address + * space has (potentially) executed on, this is the heuristic + * we use to limit cross calls. */ /* This currently is only used by the hugetlb arch pre-fault @@ -1076,18 +1062,13 @@ void smp_fetch_global_pmu(void) void smp_flush_tlb_mm(struct mm_struct *mm) { u32 ctx = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (atomic_read(&mm->mm_users) == 1) { - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - goto local_flush_and_out; - } + get_cpu(); smp_cross_call_masked(&xcall_flush_tlb_mm, ctx, 0, 0, mm_cpumask(mm)); -local_flush_and_out: __flush_tlb_mm(ctx, SECONDARY_CONTEXT); put_cpu(); @@ -1110,17 +1091,15 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long { u32 ctx = CTX_HWBITS(mm->context); struct tlb_pending_info info; - int cpu = get_cpu(); + + get_cpu(); info.ctx = ctx; info.nr = nr; info.vaddrs = vaddrs; - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_call_function_many(mm_cpumask(mm), tlb_pending_func, - &info, 1); + smp_call_function_many(mm_cpumask(mm), tlb_pending_func, + &info, 1); __flush_tlb_pending(ctx, nr, vaddrs); @@ -1130,14 +1109,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) { unsigned long context = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_cross_call_masked(&xcall_flush_tlb_page, - context, vaddr, 0, - mm_cpumask(mm)); + get_cpu(); + + smp_cross_call_masked(&xcall_flush_tlb_page, + context, vaddr, 0, + mm_cpumask(mm)); + __flush_tlb_page(context, vaddr); put_cpu(); @@ -1222,20 +1200,10 @@ void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs) preempt_enable(); } -/* /proc/profile writes can call this, don't __init it please. */ -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - void __init smp_prepare_cpus(unsigned int max_cpus) { } -void smp_prepare_boot_cpu(void) -{ -} - void __init smp_setup_processor_id(void) { if (tlb_type == spitfire) @@ -1246,20 +1214,6 @@ void __init smp_setup_processor_id(void) xcall_deliver_impl = hypervisor_xcall_deliver; } -void __init smp_fill_in_cpu_possible_map(void) -{ - int possible_cpus = num_possible_cpus(); - int i; - - if (possible_cpus > nr_cpu_ids) - possible_cpus = nr_cpu_ids; - - for (i = 0; i < possible_cpus; i++) - set_cpu_possible(i, true); - for (; i < NR_CPUS; i++) - set_cpu_possible(i, false); -} - void smp_fill_in_sib_core_maps(void) { unsigned int i; @@ -1439,15 +1393,86 @@ void __init smp_cpus_done(unsigned int max_cpus) { } -void smp_send_reschedule(int cpu) +static void send_cpu_ipi(int cpu) +{ + xcall_deliver((u64) &xcall_receive_signal, + 0, 0, cpumask_of(cpu)); +} + +void scheduler_poke(void) +{ + if (!cpu_poke) + return; + + if (!__this_cpu_read(poke)) + return; + + __this_cpu_write(poke, false); + set_softint(1 << PIL_SMP_RECEIVE_SIGNAL); +} + +static unsigned long send_cpu_poke(int cpu) +{ + unsigned long hv_err; + + per_cpu(poke, cpu) = true; + hv_err = sun4v_cpu_poke(cpu); + if (hv_err != HV_EOK) { + per_cpu(poke, cpu) = false; + pr_err_ratelimited("%s: sun4v_cpu_poke() fails err=%lu\n", + __func__, hv_err); + } + + return hv_err; +} + +void arch_smp_send_reschedule(int cpu) { if (cpu == smp_processor_id()) { WARN_ON_ONCE(preemptible()); set_softint(1 << PIL_SMP_RECEIVE_SIGNAL); - } else { - xcall_deliver((u64) &xcall_receive_signal, - 0, 0, cpumask_of(cpu)); + return; + } + + /* Use cpu poke to resume idle cpu if supported. */ + if (cpu_poke && idle_cpu(cpu)) { + unsigned long ret; + + ret = send_cpu_poke(cpu); + if (ret == HV_EOK) + return; } + + /* Use IPI in following cases: + * - cpu poke not supported + * - cpu not idle + * - send_cpu_poke() returns with error + */ + send_cpu_ipi(cpu); +} + +void smp_init_cpu_poke(void) +{ + unsigned long major; + unsigned long minor; + int ret; + + if (tlb_type != hypervisor) + return; + + ret = sun4v_hvapi_get(HV_GRP_CORE, &major, &minor); + if (ret) { + pr_debug("HV_GRP_CORE is not registered\n"); + return; + } + + if (major == 1 && minor >= 6) { + /* CPU POKE is registered. */ + cpu_poke = true; + return; + } + + pr_debug("CPU_POKE not supported\n"); } void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) @@ -1491,50 +1516,6 @@ void smp_send_stop(void) smp_call_function(stop_this_cpu, NULL, 0); } -/** - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu - * @cpu: cpu to allocate for - * @size: size allocation in bytes - * @align: alignment - * - * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper - * does the right thing for NUMA regardless of the current - * configuration. - * - * RETURNS: - * Pointer to the allocated area on success, NULL on failure. - */ -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, - size_t align) -{ - const unsigned long goal = __pa(MAX_DMA_ADDRESS); -#ifdef CONFIG_NEED_MULTIPLE_NODES - int node = cpu_to_node(cpu); - void *ptr; - - if (!node_online(node) || !NODE_DATA(node)) { - ptr = __alloc_bootmem(size, align, goal); - pr_info("cpu %d has no node %d or node-local memory\n", - cpu, node); - pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", - cpu, size, __pa(ptr)); - } else { - ptr = __alloc_bootmem_node(NODE_DATA(node), - size, align, goal); - pr_debug("per cpu data for cpu%d %lu bytes on node%d at " - "%016lx\n", cpu, size, node, __pa(ptr)); - } - return ptr; -#else - return __alloc_bootmem(size, align, goal); -#endif -} - -static void __init pcpu_free_bootmem(void *ptr, size_t size) -{ - free_bootmem(__pa(ptr), size); -} - static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) { if (cpu_to_node(from) == cpu_to_node(to)) @@ -1543,34 +1524,9 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } -static void __init pcpu_populate_pte(unsigned long addr) +static int __init pcpu_cpu_to_node(int cpu) { - pgd_t *pgd = pgd_offset_k(addr); - pud_t *pud; - pmd_t *pmd; - - if (pgd_none(*pgd)) { - pud_t *new; - - new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); - pgd_populate(&init_mm, pgd, new); - } - - pud = pud_offset(pgd, addr); - if (pud_none(*pud)) { - pmd_t *new; - - new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); - pud_populate(&init_mm, pud, new); - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - pte_t *new; - - new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); - pmd_populate_kernel(&init_mm, pmd, new); - } + return cpu_to_node(cpu); } void __init setup_per_cpu_areas(void) @@ -1583,18 +1539,15 @@ void __init setup_per_cpu_areas(void) rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE, 4 << 20, pcpu_cpu_distance, - pcpu_alloc_bootmem, - pcpu_free_bootmem); + pcpu_cpu_to_node); if (rc) - pr_warning("PERCPU: %s allocator failed (%d), " - "falling back to page size\n", - pcpu_fc_names[pcpu_chosen_fc], rc); + pr_warn("PERCPU: %s allocator failed (%d), " + "falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, - pcpu_alloc_bootmem, - pcpu_free_bootmem, - pcpu_populate_pte); + pcpu_cpu_to_node); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); |
