summaryrefslogtreecommitdiff
path: root/arch/sparc/kernel/smp_64.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc/kernel/smp_64.c')
-rw-r--r--arch/sparc/kernel/smp_64.c226
1 files changed, 56 insertions, 170 deletions
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 9b4506373353..f3969a3600db 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -43,11 +43,11 @@
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/oplib.h>
#include <linux/uaccess.h>
#include <asm/starfire.h>
#include <asm/tlb.h>
+#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/prom.h>
#include <asm/mdesc.h>
@@ -138,9 +138,6 @@ void smp_callin(void)
set_cpu_online(cpuid, true);
- /* idle thread is expected to have preempt disabled */
- preempt_disable();
-
local_irq_enable();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
@@ -924,20 +921,26 @@ extern unsigned long xcall_flush_dcache_page_cheetah;
#endif
extern unsigned long xcall_flush_dcache_page_spitfire;
-static inline void __local_flush_dcache_page(struct page *page)
+static inline void __local_flush_dcache_folio(struct folio *folio)
{
+ unsigned int i, nr = folio_nr_pages(folio);
+
#ifdef DCACHE_ALIASING_POSSIBLE
- __flush_dcache_page(page_address(page),
+ for (i = 0; i < nr; i++)
+ __flush_dcache_page(folio_address(folio) + i * PAGE_SIZE,
((tlb_type == spitfire) &&
- page_mapping_file(page) != NULL));
+ folio_flush_mapping(folio) != NULL));
#else
- if (page_mapping_file(page) != NULL &&
- tlb_type == spitfire)
- __flush_icache_page(__pa(page_address(page)));
+ if (folio_flush_mapping(folio) != NULL &&
+ tlb_type == spitfire) {
+ unsigned long pfn = folio_pfn(folio)
+ for (i = 0; i < nr; i++)
+ __flush_icache_page((pfn + i) * PAGE_SIZE);
+ }
#endif
}
-void smp_flush_dcache_page_impl(struct page *page, int cpu)
+void smp_flush_dcache_folio_impl(struct folio *folio, int cpu)
{
int this_cpu;
@@ -951,14 +954,14 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
this_cpu = get_cpu();
if (cpu == this_cpu) {
- __local_flush_dcache_page(page);
+ __local_flush_dcache_folio(folio);
} else if (cpu_online(cpu)) {
- void *pg_addr = page_address(page);
+ void *pg_addr = folio_address(folio);
u64 data0 = 0;
if (tlb_type == spitfire) {
data0 = ((u64)&xcall_flush_dcache_page_spitfire);
- if (page_mapping_file(page) != NULL)
+ if (folio_flush_mapping(folio) != NULL)
data0 |= ((u64)1 << 32);
} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
@@ -966,18 +969,23 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
#endif
}
if (data0) {
- xcall_deliver(data0, __pa(pg_addr),
- (u64) pg_addr, cpumask_of(cpu));
+ unsigned int i, nr = folio_nr_pages(folio);
+
+ for (i = 0; i < nr; i++) {
+ xcall_deliver(data0, __pa(pg_addr),
+ (u64) pg_addr, cpumask_of(cpu));
#ifdef CONFIG_DEBUG_DCFLUSH
- atomic_inc(&dcpage_flushes_xcall);
+ atomic_inc(&dcpage_flushes_xcall);
#endif
+ pg_addr += PAGE_SIZE;
+ }
}
}
put_cpu();
}
-void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
+void flush_dcache_folio_all(struct mm_struct *mm, struct folio *folio)
{
void *pg_addr;
u64 data0;
@@ -991,10 +999,10 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
atomic_inc(&dcpage_flushes);
#endif
data0 = 0;
- pg_addr = page_address(page);
+ pg_addr = folio_address(folio);
if (tlb_type == spitfire) {
data0 = ((u64)&xcall_flush_dcache_page_spitfire);
- if (page_mapping_file(page) != NULL)
+ if (folio_flush_mapping(folio) != NULL)
data0 |= ((u64)1 << 32);
} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
@@ -1002,13 +1010,18 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
#endif
}
if (data0) {
- xcall_deliver(data0, __pa(pg_addr),
- (u64) pg_addr, cpu_online_mask);
+ unsigned int i, nr = folio_nr_pages(folio);
+
+ for (i = 0; i < nr; i++) {
+ xcall_deliver(data0, __pa(pg_addr),
+ (u64) pg_addr, cpu_online_mask);
#ifdef CONFIG_DEBUG_DCFLUSH
- atomic_inc(&dcpage_flushes_xcall);
+ atomic_inc(&dcpage_flushes_xcall);
#endif
+ pg_addr += PAGE_SIZE;
+ }
}
- __local_flush_dcache_page(page);
+ __local_flush_dcache_folio(folio);
preempt_enable();
}
@@ -1039,38 +1052,9 @@ void smp_fetch_global_pmu(void)
* are flush_tlb_*() routines, and these run after flush_cache_*()
* which performs the flushw.
*
- * The SMP TLB coherency scheme we use works as follows:
- *
- * 1) mm->cpu_vm_mask is a bit mask of which cpus an address
- * space has (potentially) executed on, this is the heuristic
- * we use to avoid doing cross calls.
- *
- * Also, for flushing from kswapd and also for clones, we
- * use cpu_vm_mask as the list of cpus to make run the TLB.
- *
- * 2) TLB context numbers are shared globally across all processors
- * in the system, this allows us to play several games to avoid
- * cross calls.
- *
- * One invariant is that when a cpu switches to a process, and
- * that processes tsk->active_mm->cpu_vm_mask does not have the
- * current cpu's bit set, that tlb context is flushed locally.
- *
- * If the address space is non-shared (ie. mm->count == 1) we avoid
- * cross calls when we want to flush the currently running process's
- * tlb state. This is done by clearing all cpu bits except the current
- * processor's in current->mm->cpu_vm_mask and performing the
- * flush locally only. This will force any subsequent cpus which run
- * this task to flush the context from the local tlb if the process
- * migrates to another cpu (again).
- *
- * 3) For shared address spaces (threads) and swapping we bite the
- * bullet for most cases and perform the cross call (but only to
- * the cpus listed in cpu_vm_mask).
- *
- * The performance gain from "optimizing" away the cross call for threads is
- * questionable (in theory the big win for threads is the massive sharing of
- * address space state across processors).
+ * mm->cpu_vm_mask is a bit mask of which cpus an address
+ * space has (potentially) executed on, this is the heuristic
+ * we use to limit cross calls.
*/
/* This currently is only used by the hugetlb arch pre-fault
@@ -1080,18 +1064,13 @@ void smp_fetch_global_pmu(void)
void smp_flush_tlb_mm(struct mm_struct *mm)
{
u32 ctx = CTX_HWBITS(mm->context);
- int cpu = get_cpu();
- if (atomic_read(&mm->mm_users) == 1) {
- cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
- goto local_flush_and_out;
- }
+ get_cpu();
smp_cross_call_masked(&xcall_flush_tlb_mm,
ctx, 0, 0,
mm_cpumask(mm));
-local_flush_and_out:
__flush_tlb_mm(ctx, SECONDARY_CONTEXT);
put_cpu();
@@ -1114,17 +1093,15 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
{
u32 ctx = CTX_HWBITS(mm->context);
struct tlb_pending_info info;
- int cpu = get_cpu();
+
+ get_cpu();
info.ctx = ctx;
info.nr = nr;
info.vaddrs = vaddrs;
- if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
- cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
- else
- smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
- &info, 1);
+ smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
+ &info, 1);
__flush_tlb_pending(ctx, nr, vaddrs);
@@ -1134,14 +1111,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
{
unsigned long context = CTX_HWBITS(mm->context);
- int cpu = get_cpu();
- if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
- cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
- else
- smp_cross_call_masked(&xcall_flush_tlb_page,
- context, vaddr, 0,
- mm_cpumask(mm));
+ get_cpu();
+
+ smp_cross_call_masked(&xcall_flush_tlb_page,
+ context, vaddr, 0,
+ mm_cpumask(mm));
+
__flush_tlb_page(context, vaddr);
put_cpu();
@@ -1226,12 +1202,6 @@ void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs)
preempt_enable();
}
-/* /proc/profile writes can call this, don't __init it please. */
-int setup_profiling_timer(unsigned int multiplier)
-{
- return -EINVAL;
-}
-
void __init smp_prepare_cpus(unsigned int max_cpus)
{
}
@@ -1476,7 +1446,7 @@ static unsigned long send_cpu_poke(int cpu)
return hv_err;
}
-void smp_send_reschedule(int cpu)
+void arch_smp_send_reschedule(int cpu)
{
if (cpu == smp_processor_id()) {
WARN_ON_ONCE(preemptible());
@@ -1566,50 +1536,6 @@ void smp_send_stop(void)
smp_call_function(stop_this_cpu, NULL, 0);
}
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
- size_t align)
-{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NEED_MULTIPLE_NODES
- int node = cpu_to_node(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = memblock_alloc_try_nid(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE, node);
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
- "%016lx\n", cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
- memblock_free(__pa(ptr), size);
-}
-
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (cpu_to_node(from) == cpu_to_node(to))
@@ -1618,46 +1544,9 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return REMOTE_DISTANCE;
}
-static void __init pcpu_populate_pte(unsigned long addr)
+static int __init pcpu_cpu_to_node(int cpu)
{
- pgd_t *pgd = pgd_offset_k(addr);
- pud_t *pud;
- pmd_t *pmd;
-
- if (pgd_none(*pgd)) {
- pud_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pgd_populate(&init_mm, pgd, new);
- }
-
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pud_populate(&init_mm, pud, new);
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pmd_populate_kernel(&init_mm, pmd, new);
- }
-
- return;
-
-err_alloc:
- panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+ return cpu_to_node(cpu);
}
void __init setup_per_cpu_areas(void)
@@ -1670,8 +1559,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, 4 << 20,
pcpu_cpu_distance,
- pcpu_alloc_bootmem,
- pcpu_free_bootmem);
+ pcpu_cpu_to_node);
if (rc)
pr_warn("PERCPU: %s allocator failed (%d), "
"falling back to page size\n",
@@ -1679,9 +1567,7 @@ void __init setup_per_cpu_areas(void)
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
- pcpu_alloc_bootmem,
- pcpu_free_bootmem,
- pcpu_populate_pte);
+ pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);