diff options
Diffstat (limited to 'arch/mips/mm')
42 files changed, 4238 insertions, 4000 deletions
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index 7f4f93ab22b7..304692391519 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -1,26 +1,47 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux/MIPS-specific parts of the memory manager. # -obj-y += cache.o dma-default.o extable.o fault.o \ - gup.o init.o mmap.o page.o page-funcs.o \ - tlbex.o tlbex-fault.o tlb-funcs.o uasm-mips.o +obj-y += cache.o +obj-y += context.o +obj-y += extable.o +obj-y += fault.o +obj-y += init.o +obj-y += mmap.o +obj-y += page.o +obj-y += page-funcs.o +obj-y += pgtable.o +obj-y += tlbex.o +obj-y += tlbex-fault.o +obj-y += tlb-funcs.o + +ifdef CONFIG_CPU_MICROMIPS +obj-y += uasm-micromips.o +else +obj-y += uasm-mips.o +endif + +ifndef CONFIG_EVA +obj-y += maccess.o +endif obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o -obj-$(CONFIG_64BIT) += pgtable-64.o +obj-$(CONFIG_64BIT) += ioremap64.o pgtable-64.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_DMA_NONCOHERENT) += dma-noncoherent.o +obj-$(CONFIG_CPU_R3K_TLB) += tlb-r3k.o obj-$(CONFIG_CPU_R4K_CACHE_TLB) += c-r4k.o cex-gen.o tlb-r4k.o -obj-$(CONFIG_CPU_R3000) += c-r3k.o tlb-r3k.o -obj-$(CONFIG_CPU_R8000) += c-r4k.o cex-gen.o tlb-r8k.o +obj-$(CONFIG_CPU_R3000) += c-r3k.o obj-$(CONFIG_CPU_SB1) += c-r4k.o cerr-sb1.o cex-sb1.o tlb-r4k.o -obj-$(CONFIG_CPU_TX39XX) += c-tx39.o tlb-r3k.o obj-$(CONFIG_CPU_CAVIUM_OCTEON) += c-octeon.o cex-oct.o tlb-r4k.o obj-$(CONFIG_IP22_CPU_SCACHE) += sc-ip22.o obj-$(CONFIG_R5000_CPU_SCACHE) += sc-r5k.o obj-$(CONFIG_RM7000_CPU_SCACHE) += sc-rm7k.o obj-$(CONFIG_MIPS_CPU_SCACHE) += sc-mips.o +obj-$(CONFIG_SCACHE_DEBUGFS) += sc-debugfs.o -obj-$(CONFIG_SYS_SUPPORTS_MICROMIPS) += uasm-micromips.o +obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index 8557fb552863..b7393b61cfa7 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -6,7 +6,6 @@ * Copyright (C) 2005-2007 Cavium Networks */ #include <linux/export.h> -#include <linux/init.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/smp.h> @@ -19,19 +18,18 @@ #include <asm/bootinfo.h> #include <asm/cacheops.h> #include <asm/cpu-features.h> +#include <asm/cpu-type.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/r4kcache.h> #include <asm/traps.h> #include <asm/mmu_context.h> -#include <asm/war.h> #include <asm/octeon/octeon.h> unsigned long long cache_err_dcache[NR_CPUS]; EXPORT_SYMBOL_GPL(cache_err_dcache); -/** +/* * Octeon automatically flushes the dcache on tlb changes, so * from Linux's viewpoint it acts much like a physically * tagged cache. No flushing is needed @@ -57,8 +55,8 @@ static void local_octeon_flush_icache_range(unsigned long start, } /** - * Flush caches as necessary for all cores affected by a - * vma. If no vma is supplied, all cores are flushed. + * octeon_flush_icache_all_cores - Flush caches as necessary for all cores + * affected by a vma. If no vma is supplied, all cores are flushed. * * @vma: VMA to flush or NULL to flush all icaches. */ @@ -85,15 +83,20 @@ static void octeon_flush_icache_all_cores(struct vm_area_struct *vma) else mask = *cpu_online_mask; cpumask_clear_cpu(cpu, &mask); +#ifdef CONFIG_CAVIUM_OCTEON_SOC for_each_cpu(cpu, &mask) octeon_send_ipi_single(cpu, SMP_ICACHE_FLUSH); +#else + smp_call_function_many(&mask, (smp_call_func_t)octeon_local_flush_icache, + NULL, 1); +#endif preempt_enable(); #endif } -/** +/* * Called to flush the icache on all cores */ static void octeon_flush_icache_all(void) @@ -103,8 +106,7 @@ static void octeon_flush_icache_all(void) /** - * Called to flush all memory associated with a memory - * context. + * octeon_flush_cache_mm - flush all memory associated with a memory context. * * @mm: Memory context to flush */ @@ -117,7 +119,7 @@ static void octeon_flush_cache_mm(struct mm_struct *mm) } -/** +/* * Flush a range of kernel addresses out of the icache * */ @@ -128,26 +130,11 @@ static void octeon_flush_icache_range(unsigned long start, unsigned long end) /** - * Flush the icache for a trampoline. These are used for interrupt - * and exception hooking. - * - * @addr: Address to flush - */ -static void octeon_flush_cache_sigtramp(unsigned long addr) -{ - struct vm_area_struct *vma; - - vma = find_vma(current->mm, addr); - octeon_flush_icache_all_cores(vma); -} - - -/** - * Flush a range out of a vma + * octeon_flush_cache_range - Flush a range out of a vma * * @vma: VMA to flush - * @start: - * @end: + * @start: beginning address for flush + * @end: ending address for flush */ static void octeon_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) @@ -158,11 +145,11 @@ static void octeon_flush_cache_range(struct vm_area_struct *vma, /** - * Flush a specific page of a vma + * octeon_flush_cache_page - Flush a specific page of a vma * * @vma: VMA to flush page for * @page: Page to flush - * @pfn: + * @pfn: Page frame number */ static void octeon_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn) @@ -176,19 +163,20 @@ static void octeon_flush_kernel_vmap_range(unsigned long vaddr, int size) BUG(); } -/** +/* * Probe Octeon's caches * */ -static void __cpuinit probe_octeon(void) +static void probe_octeon(void) { unsigned long icache_size; unsigned long dcache_size; unsigned int config1; struct cpuinfo_mips *c = ¤t_cpu_data; + int cputype = current_cpu_type(); config1 = read_c0_config1(); - switch (c->cputype) { + switch (cputype) { case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: c->icache.linesz = 2 << ((config1 >> 19) & 7); @@ -199,7 +187,7 @@ static void __cpuinit probe_octeon(void) c->icache.sets * c->icache.ways * c->icache.linesz; c->icache.waybit = ffs(icache_size / c->icache.ways) - 1; c->dcache.linesz = 128; - if (c->cputype == CPU_CAVIUM_OCTEON_PLUS) + if (cputype == CPU_CAVIUM_OCTEON_PLUS) c->dcache.sets = 2; /* CN5XXX has two Dcache sets */ else c->dcache.sets = 1; /* CN3XXX has one Dcache set */ @@ -224,6 +212,20 @@ static void __cpuinit probe_octeon(void) c->options |= MIPS_CPU_PREFETCH; break; + case CPU_CAVIUM_OCTEON3: + c->icache.linesz = 128; + c->icache.sets = 16; + c->icache.ways = 39; + c->icache.flags |= MIPS_CACHE_VTAG; + icache_size = c->icache.sets * c->icache.ways * c->icache.linesz; + + c->dcache.linesz = 128; + c->dcache.ways = 32; + c->dcache.sets = 8; + dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; + c->options |= MIPS_CPU_PREFETCH; + break; + default: panic("Unsupported Cavium Networks CPU type"); break; @@ -237,31 +239,31 @@ static void __cpuinit probe_octeon(void) c->dcache.sets = dcache_size / (c->dcache.linesz * c->dcache.ways); if (smp_processor_id() == 0) { - pr_notice("Primary instruction cache %ldkB, %s, %d way, " - "%d sets, linesize %d bytes.\n", - icache_size >> 10, - cpu_has_vtag_icache ? + pr_info("Primary instruction cache %ldkB, %s, %d way, " + "%d sets, linesize %d bytes.\n", + icache_size >> 10, + cpu_has_vtag_icache ? "virtually tagged" : "physically tagged", - c->icache.ways, c->icache.sets, c->icache.linesz); + c->icache.ways, c->icache.sets, c->icache.linesz); - pr_notice("Primary data cache %ldkB, %d-way, %d sets, " - "linesize %d bytes.\n", - dcache_size >> 10, c->dcache.ways, - c->dcache.sets, c->dcache.linesz); + pr_info("Primary data cache %ldkB, %d-way, %d sets, " + "linesize %d bytes.\n", + dcache_size >> 10, c->dcache.ways, + c->dcache.sets, c->dcache.linesz); } } -static void __cpuinit octeon_cache_error_setup(void) +static void octeon_cache_error_setup(void) { extern char except_vec2_octeon; set_handler(0x100, &except_vec2_octeon, 0x80); } -/** +/* * Setup the Octeon cache flush routines * */ -void __cpuinit octeon_cache_init(void) +void octeon_cache_init(void) { probe_octeon(); @@ -272,11 +274,12 @@ void __cpuinit octeon_cache_init(void) flush_cache_mm = octeon_flush_cache_mm; flush_cache_page = octeon_flush_cache_page; flush_cache_range = octeon_flush_cache_range; - flush_cache_sigtramp = octeon_flush_cache_sigtramp; flush_icache_all = octeon_flush_icache_all; flush_data_cache_page = octeon_flush_data_cache_page; flush_icache_range = octeon_flush_icache_range; local_flush_icache_range = local_octeon_flush_icache_range; + __flush_icache_user_range = octeon_flush_icache_range; + __local_flush_icache_user_range = local_octeon_flush_icache_range; __flush_kernel_vmap_range = octeon_flush_kernel_vmap_range; @@ -333,7 +336,7 @@ static void co_cache_error_call_notifiers(unsigned long val) } /* - * Called when the the exception is recoverable + * Called when the exception is recoverable */ asmlinkage void cache_parity_error_octeon_recoverable(void) @@ -341,8 +344,8 @@ asmlinkage void cache_parity_error_octeon_recoverable(void) co_cache_error_call_notifiers(0); } -/** - * Called when the the exception is not recoverable +/* + * Called when the exception is not recoverable */ asmlinkage void cache_parity_error_octeon_non_recoverable(void) diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c index 704dc735a59d..5869df848fab 100644 --- a/arch/mips/mm/c-r3k.c +++ b/arch/mips/mm/c-r3k.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * r2300.c: R2000 and R3000 specific mmu/cache code. * @@ -9,14 +10,12 @@ * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov * Copyright (C) 2001, 2004, 2007 Maciej W. Rozycki */ -#include <linux/init.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/mm.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/mmu_context.h> #include <asm/isadep.h> #include <asm/io.h> @@ -26,7 +25,7 @@ static unsigned long icache_size, dcache_size; /* Size in bytes */ static unsigned long icache_lsize, dcache_lsize; /* Size in bytes */ -unsigned long __cpuinit r3k_cache_size(unsigned long ca_flags) +unsigned long r3k_cache_size(unsigned long ca_flags) { unsigned long flags, status, dummy, size; volatile unsigned long *p; @@ -61,7 +60,7 @@ unsigned long __cpuinit r3k_cache_size(unsigned long ca_flags) return size * sizeof(*p); } -unsigned long __cpuinit r3k_cache_lsize(unsigned long ca_flags) +unsigned long r3k_cache_lsize(unsigned long ca_flags) { unsigned long flags, status, lsize, i; volatile unsigned long *p; @@ -90,7 +89,7 @@ unsigned long __cpuinit r3k_cache_lsize(unsigned long ca_flags) return lsize * sizeof(*p); } -static void __cpuinit r3k_probe_cache(void) +static void r3k_probe_cache(void) { dcache_size = r3k_cache_size(ST0_ISC); if (dcache_size) @@ -240,22 +239,18 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma, unsigned long kaddr = KSEG0ADDR(pfn << PAGE_SHIFT); int exec = vma->vm_flags & VM_EXEC; struct mm_struct *mm = vma->vm_mm; - pgd_t *pgdp; - pud_t *pudp; pmd_t *pmdp; pte_t *ptep; - pr_debug("cpage[%08lx,%08lx]\n", + pr_debug("cpage[%08llx,%08lx]\n", cpu_context(smp_processor_id(), mm), addr); /* No ASID => no such page in the cache. */ if (cpu_context(smp_processor_id(), mm) == 0) return; - pgdp = pgd_offset(mm, addr); - pudp = pud_offset(pgdp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset(pmdp, addr); + pmdp = pmd_off(mm, addr); + ptep = pte_offset_kernel(pmdp, addr); /* Invalid => no such page in the cache. */ if (!(pte_val(*ptep) & _PAGE_PRESENT)) @@ -266,38 +261,10 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma, r3k_flush_icache_range(kaddr, kaddr + PAGE_SIZE); } -static void local_r3k_flush_data_cache_page(void *addr) -{ -} - static void r3k_flush_data_cache_page(unsigned long addr) { } -static void r3k_flush_cache_sigtramp(unsigned long addr) -{ - unsigned long flags; - - pr_debug("csigtramp[%08lx]\n", addr); - - flags = read_c0_status(); - - write_c0_status(flags&~ST0_IEC); - - /* Fill the TLB to avoid an exception with caches isolated. */ - asm( "lw\t$0, 0x000(%0)\n\t" - "lw\t$0, 0x004(%0)\n\t" - : : "r" (addr) ); - - write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC); - - asm( "sb\t$0, 0x000(%0)\n\t" - "sb\t$0, 0x004(%0)\n\t" - : : "r" (addr) ); - - write_c0_status(flags); -} - static void r3k_flush_kernel_vmap_range(unsigned long vaddr, int size) { BUG(); @@ -312,7 +279,7 @@ static void r3k_dma_cache_wback_inv(unsigned long start, unsigned long size) r3k_flush_dcache_range(start, start + size); } -void __cpuinit r3k_cache_init(void) +void r3k_cache_init(void) { extern void build_clear_page(void); extern void build_copy_page(void); @@ -326,20 +293,20 @@ void __cpuinit r3k_cache_init(void) flush_cache_page = r3k_flush_cache_page; flush_icache_range = r3k_flush_icache_range; local_flush_icache_range = r3k_flush_icache_range; + __flush_icache_user_range = r3k_flush_icache_range; + __local_flush_icache_user_range = r3k_flush_icache_range; __flush_kernel_vmap_range = r3k_flush_kernel_vmap_range; - flush_cache_sigtramp = r3k_flush_cache_sigtramp; - local_flush_data_cache_page = local_r3k_flush_data_cache_page; flush_data_cache_page = r3k_flush_data_cache_page; _dma_cache_wback_inv = r3k_dma_cache_wback_inv; _dma_cache_wback = r3k_dma_cache_wback_inv; _dma_cache_inv = r3k_dma_cache_wback_inv; - printk("Primary instruction cache %ldkB, linesize %ld bytes.\n", + pr_info("Primary instruction cache %ldkB, linesize %ld bytes.\n", icache_size >> 10, icache_lsize); - printk("Primary data cache %ldkB, linesize %ld bytes.\n", + pr_info("Primary data cache %ldkB, linesize %ld bytes.\n", dcache_size >> 10, dcache_lsize); build_clear_page(); diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 21813beec7a5..10413b6f6662 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -7,16 +7,19 @@ * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Ralf Baechle (ralf@gnu.org) * Copyright (C) 1999, 2000 Silicon Graphics, Inc. */ +#include <linux/cpu_pm.h> #include <linux/hardirq.h> #include <linux/init.h> #include <linux/highmem.h> #include <linux/kernel.h> #include <linux/linkage.h> +#include <linux/preempt.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/mm.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/bitops.h> +#include <linux/dma-map-ops.h> /* for dma_default_coherent */ #include <asm/bcache.h> #include <asm/bootinfo.h> @@ -24,16 +27,60 @@ #include <asm/cacheops.h> #include <asm/cpu.h> #include <asm/cpu-features.h> +#include <asm/cpu-type.h> #include <asm/io.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/r4kcache.h> #include <asm/sections.h> #include <asm/mmu_context.h> -#include <asm/war.h> #include <asm/cacheflush.h> /* for run_uncached() */ #include <asm/traps.h> -#include <asm/dma-coherence.h> +#include <asm/mips-cps.h> + +/* + * Bits describing what cache ops an SMP callback function may perform. + * + * R4K_HIT - Virtual user or kernel address based cache operations. The + * active_mm must be checked before using user addresses, falling + * back to kmap. + * R4K_INDEX - Index based cache operations. + */ + +#define R4K_HIT BIT(0) +#define R4K_INDEX BIT(1) + +/** + * r4k_op_needs_ipi() - Decide if a cache op needs to be done on every core. + * @type: Type of cache operations (R4K_HIT or R4K_INDEX). + * + * Decides whether a cache op needs to be performed on every core in the system. + * This may change depending on the @type of cache operation, as well as the set + * of online CPUs, so preemption should be disabled by the caller to prevent CPU + * hotplug from changing the result. + * + * Returns: 1 if the cache operation @type should be done on every core in + * the system. + * 0 if the cache operation @type is globalized and only needs to + * be performed on a simple CPU. + */ +static inline bool r4k_op_needs_ipi(unsigned int type) +{ + /* The MIPS Coherence Manager (CM) globalizes address-based cache ops */ + if (type == R4K_HIT && mips_cm_present()) + return false; + + /* + * Hardware doesn't globalize the required cache ops, so SMP calls may + * be needed, but only if there are foreign CPUs (non-siblings with + * separate caches). + */ + /* cpu_foreign_map[] undeclared when !CONFIG_SMP */ +#ifdef CONFIG_SMP + return !cpumask_empty(&cpu_foreign_map[0]); +#else + return false; +#endif +} /* * Special Variant of smp_call_function for use by cache functions: @@ -44,52 +91,34 @@ * primary cache. * o doesn't disable interrupts on the local CPU */ -static inline void r4k_on_each_cpu(void (*func) (void *info), void *info) +static inline void r4k_on_each_cpu(unsigned int type, + void (*func)(void *info), void *info) { preempt_disable(); - -#if !defined(CONFIG_MIPS_MT_SMP) && !defined(CONFIG_MIPS_MT_SMTC) - smp_call_function(func, info, 1); -#endif + if (r4k_op_needs_ipi(type)) + smp_call_function_many(&cpu_foreign_map[smp_processor_id()], + func, info, 1); func(info); preempt_enable(); } -#if defined(CONFIG_MIPS_CMP) -#define cpu_has_safe_index_cacheops 0 -#else -#define cpu_has_safe_index_cacheops 1 -#endif - /* * Must die. */ static unsigned long icache_size __read_mostly; static unsigned long dcache_size __read_mostly; +static unsigned long vcache_size __read_mostly; static unsigned long scache_size __read_mostly; -/* - * Dummy cache handling routines for machines without boardcaches - */ -static void cache_noop(void) {} - -static struct bcache_ops no_sc_ops = { - .bc_enable = (void *)cache_noop, - .bc_disable = (void *)cache_noop, - .bc_wback_inv = (void *)cache_noop, - .bc_inv = (void *)cache_noop -}; - -struct bcache_ops *bcops = &no_sc_ops; - #define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) #define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) #define R4600_HIT_CACHEOP_WAR_IMPL \ do { \ - if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) \ + if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && \ + cpu_is_r4600_v2_x()) \ *(volatile unsigned long *)CKSEG1; \ - if (R4600_V1_HIT_CACHEOP_WAR) \ + if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP)) \ __asm__ __volatile__("nop;nop;nop;nop"); \ } while (0) @@ -103,44 +132,65 @@ static inline void r4k_blast_dcache_page_dc32(unsigned long addr) static inline void r4k_blast_dcache_page_dc64(unsigned long addr) { - R4600_HIT_CACHEOP_WAR_IMPL; blast_dcache64_page(addr); } -static void __cpuinit r4k_blast_dcache_page_setup(void) +static inline void r4k_blast_dcache_page_dc128(unsigned long addr) +{ + blast_dcache128_page(addr); +} + +static void r4k_blast_dcache_page_setup(void) { unsigned long dc_lsize = cpu_dcache_line_size(); - if (dc_lsize == 0) + switch (dc_lsize) { + case 0: r4k_blast_dcache_page = (void *)cache_noop; - else if (dc_lsize == 16) + break; + case 16: r4k_blast_dcache_page = blast_dcache16_page; - else if (dc_lsize == 32) + break; + case 32: r4k_blast_dcache_page = r4k_blast_dcache_page_dc32; - else if (dc_lsize == 64) + break; + case 64: r4k_blast_dcache_page = r4k_blast_dcache_page_dc64; + break; + case 128: + r4k_blast_dcache_page = r4k_blast_dcache_page_dc128; + break; + default: + break; + } } -static void (* r4k_blast_dcache_page_indexed)(unsigned long addr); +#ifndef CONFIG_EVA +#define r4k_blast_dcache_user_page r4k_blast_dcache_page +#else -static void __cpuinit r4k_blast_dcache_page_indexed_setup(void) +static void (*r4k_blast_dcache_user_page)(unsigned long addr); + +static void r4k_blast_dcache_user_page_setup(void) { - unsigned long dc_lsize = cpu_dcache_line_size(); + unsigned long dc_lsize = cpu_dcache_line_size(); if (dc_lsize == 0) - r4k_blast_dcache_page_indexed = (void *)cache_noop; + r4k_blast_dcache_user_page = (void *)cache_noop; else if (dc_lsize == 16) - r4k_blast_dcache_page_indexed = blast_dcache16_page_indexed; + r4k_blast_dcache_user_page = blast_dcache16_user_page; else if (dc_lsize == 32) - r4k_blast_dcache_page_indexed = blast_dcache32_page_indexed; + r4k_blast_dcache_user_page = blast_dcache32_user_page; else if (dc_lsize == 64) - r4k_blast_dcache_page_indexed = blast_dcache64_page_indexed; + r4k_blast_dcache_user_page = blast_dcache64_user_page; } +#endif + void (* r4k_blast_dcache)(void); EXPORT_SYMBOL(r4k_blast_dcache); -static void __cpuinit r4k_blast_dcache_setup(void) +static void r4k_blast_dcache_setup(void) { unsigned long dc_lsize = cpu_dcache_line_size(); @@ -152,9 +202,11 @@ static void __cpuinit r4k_blast_dcache_setup(void) r4k_blast_dcache = blast_dcache32; else if (dc_lsize == 64) r4k_blast_dcache = blast_dcache64; + else if (dc_lsize == 128) + r4k_blast_dcache = blast_dcache128; } -/* force code alignment (used for TX49XX_ICACHE_INDEX_INV_WAR) */ +/* force code alignment (used for CONFIG_WAR_TX49XX_ICACHE_INDEX_INV) */ #define JUMP_TO_ALIGN(order) \ __asm__ __volatile__( \ "b\t1f\n\t" \ @@ -186,48 +238,19 @@ static inline void tx49_blast_icache32(void) /* I'm in even chunk. blast odd chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start + 0x400; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws, Index_Invalidate_I); + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); CACHE32_UNROLL32_ALIGN; /* I'm in odd chunk. blast even chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws, Index_Invalidate_I); -} - -static inline void blast_icache32_r4600_v1_page_indexed(unsigned long page) -{ - unsigned long flags; - - local_irq_save(flags); - blast_icache32_page_indexed(page); - local_irq_restore(flags); -} - -static inline void tx49_blast_icache32_page_indexed(unsigned long page) -{ - unsigned long indexmask = current_cpu_data.icache.waysize - 1; - unsigned long start = INDEX_BASE + (page & indexmask); - unsigned long end = start + PAGE_SIZE; - unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit; - unsigned long ws_end = current_cpu_data.icache.ways << - current_cpu_data.icache.waybit; - unsigned long ws, addr; - - CACHE32_UNROLL32_ALIGN2; - /* I'm in even chunk. blast odd chunks */ - for (ws = 0; ws < ws_end; ws += ws_inc) - for (addr = start + 0x400; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws, Index_Invalidate_I); - CACHE32_UNROLL32_ALIGN; - /* I'm in odd chunk. blast even chunks */ - for (ws = 0; ws < ws_end; ws += ws_inc) - for (addr = start; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws, Index_Invalidate_I); + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); } static void (* r4k_blast_icache_page)(unsigned long addr); -static void __cpuinit r4k_blast_icache_page_setup(void) +static void r4k_blast_icache_page_setup(void) { unsigned long ic_lsize = cpu_icache_line_size(); @@ -235,41 +258,42 @@ static void __cpuinit r4k_blast_icache_page_setup(void) r4k_blast_icache_page = (void *)cache_noop; else if (ic_lsize == 16) r4k_blast_icache_page = blast_icache16_page; + else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2EF) + r4k_blast_icache_page = loongson2_blast_icache32_page; else if (ic_lsize == 32) r4k_blast_icache_page = blast_icache32_page; else if (ic_lsize == 64) r4k_blast_icache_page = blast_icache64_page; + else if (ic_lsize == 128) + r4k_blast_icache_page = blast_icache128_page; } +#ifndef CONFIG_EVA +#define r4k_blast_icache_user_page r4k_blast_icache_page +#else -static void (* r4k_blast_icache_page_indexed)(unsigned long addr); +static void (*r4k_blast_icache_user_page)(unsigned long addr); -static void __cpuinit r4k_blast_icache_page_indexed_setup(void) +static void r4k_blast_icache_user_page_setup(void) { unsigned long ic_lsize = cpu_icache_line_size(); if (ic_lsize == 0) - r4k_blast_icache_page_indexed = (void *)cache_noop; + r4k_blast_icache_user_page = (void *)cache_noop; else if (ic_lsize == 16) - r4k_blast_icache_page_indexed = blast_icache16_page_indexed; - else if (ic_lsize == 32) { - if (R4600_V1_INDEX_ICACHEOP_WAR && cpu_is_r4600_v1_x()) - r4k_blast_icache_page_indexed = - blast_icache32_r4600_v1_page_indexed; - else if (TX49XX_ICACHE_INDEX_INV_WAR) - r4k_blast_icache_page_indexed = - tx49_blast_icache32_page_indexed; - else - r4k_blast_icache_page_indexed = - blast_icache32_page_indexed; - } else if (ic_lsize == 64) - r4k_blast_icache_page_indexed = blast_icache64_page_indexed; + r4k_blast_icache_user_page = blast_icache16_user_page; + else if (ic_lsize == 32) + r4k_blast_icache_user_page = blast_icache32_user_page; + else if (ic_lsize == 64) + r4k_blast_icache_user_page = blast_icache64_user_page; } +#endif + void (* r4k_blast_icache)(void); EXPORT_SYMBOL(r4k_blast_icache); -static void __cpuinit r4k_blast_icache_setup(void) +static void r4k_blast_icache_setup(void) { unsigned long ic_lsize = cpu_icache_line_size(); @@ -278,19 +302,24 @@ static void __cpuinit r4k_blast_icache_setup(void) else if (ic_lsize == 16) r4k_blast_icache = blast_icache16; else if (ic_lsize == 32) { - if (R4600_V1_INDEX_ICACHEOP_WAR && cpu_is_r4600_v1_x()) + if (IS_ENABLED(CONFIG_WAR_R4600_V1_INDEX_ICACHEOP) && + cpu_is_r4600_v1_x()) r4k_blast_icache = blast_r4600_v1_icache32; - else if (TX49XX_ICACHE_INDEX_INV_WAR) + else if (IS_ENABLED(CONFIG_WAR_TX49XX_ICACHE_INDEX_INV)) r4k_blast_icache = tx49_blast_icache32; + else if (current_cpu_type() == CPU_LOONGSON2EF) + r4k_blast_icache = loongson2_blast_icache32; else r4k_blast_icache = blast_icache32; } else if (ic_lsize == 64) r4k_blast_icache = blast_icache64; + else if (ic_lsize == 128) + r4k_blast_icache = blast_icache128; } static void (* r4k_blast_scache_page)(unsigned long addr); -static void __cpuinit r4k_blast_scache_page_setup(void) +static void r4k_blast_scache_page_setup(void) { unsigned long sc_lsize = cpu_scache_line_size(); @@ -306,52 +335,46 @@ static void __cpuinit r4k_blast_scache_page_setup(void) r4k_blast_scache_page = blast_scache128_page; } -static void (* r4k_blast_scache_page_indexed)(unsigned long addr); +static void (* r4k_blast_scache)(void); -static void __cpuinit r4k_blast_scache_page_indexed_setup(void) +static void r4k_blast_scache_setup(void) { unsigned long sc_lsize = cpu_scache_line_size(); if (scache_size == 0) - r4k_blast_scache_page_indexed = (void *)cache_noop; + r4k_blast_scache = (void *)cache_noop; else if (sc_lsize == 16) - r4k_blast_scache_page_indexed = blast_scache16_page_indexed; + r4k_blast_scache = blast_scache16; else if (sc_lsize == 32) - r4k_blast_scache_page_indexed = blast_scache32_page_indexed; + r4k_blast_scache = blast_scache32; else if (sc_lsize == 64) - r4k_blast_scache_page_indexed = blast_scache64_page_indexed; + r4k_blast_scache = blast_scache64; else if (sc_lsize == 128) - r4k_blast_scache_page_indexed = blast_scache128_page_indexed; + r4k_blast_scache = blast_scache128; } -static void (* r4k_blast_scache)(void); +static void (*r4k_blast_scache_node)(long node); -static void __cpuinit r4k_blast_scache_setup(void) +static void r4k_blast_scache_node_setup(void) { unsigned long sc_lsize = cpu_scache_line_size(); - if (scache_size == 0) - r4k_blast_scache = (void *)cache_noop; + if (current_cpu_type() != CPU_LOONGSON64) + r4k_blast_scache_node = (void *)cache_noop; else if (sc_lsize == 16) - r4k_blast_scache = blast_scache16; + r4k_blast_scache_node = blast_scache16_node; else if (sc_lsize == 32) - r4k_blast_scache = blast_scache32; + r4k_blast_scache_node = blast_scache32_node; else if (sc_lsize == 64) - r4k_blast_scache = blast_scache64; + r4k_blast_scache_node = blast_scache64_node; else if (sc_lsize == 128) - r4k_blast_scache = blast_scache128; + r4k_blast_scache_node = blast_scache128_node; } static inline void local_r4k___flush_cache_all(void * args) { -#if defined(CONFIG_CPU_LOONGSON2) - r4k_blast_scache(); - return; -#endif - r4k_blast_dcache(); - r4k_blast_icache(); - switch (current_cpu_type()) { + case CPU_LOONGSON2EF: case CPU_R4000SC: case CPU_R4000MC: case CPU_R4400SC: @@ -359,28 +382,75 @@ static inline void local_r4k___flush_cache_all(void * args) case CPU_R10000: case CPU_R12000: case CPU_R14000: + case CPU_R16000: + /* + * These caches are inclusive caches, that is, if something + * is not cached in the S-cache, we know it also won't be + * in one of the primary caches. + */ r4k_blast_scache(); + break; + + case CPU_LOONGSON64: + /* Use get_ebase_cpunum() for both NUMA=y/n */ + r4k_blast_scache_node(get_ebase_cpunum() >> 2); + break; + + case CPU_BMIPS5000: + r4k_blast_scache(); + __sync(); + break; + + default: + r4k_blast_dcache(); + r4k_blast_icache(); + break; } } static void r4k___flush_cache_all(void) { - r4k_on_each_cpu(local_r4k___flush_cache_all, NULL); + r4k_on_each_cpu(R4K_INDEX, local_r4k___flush_cache_all, NULL); } -static inline int has_valid_asid(const struct mm_struct *mm) +/** + * has_valid_asid() - Determine if an mm already has an ASID. + * @mm: Memory map. + * @type: R4K_HIT or R4K_INDEX, type of cache op. + * + * Determines whether @mm already has an ASID on any of the CPUs which cache ops + * of type @type within an r4k_on_each_cpu() call will affect. If + * r4k_on_each_cpu() does an SMP call to a single VPE in each core, then the + * scope of the operation is confined to sibling CPUs, otherwise all online CPUs + * will need to be checked. + * + * Must be called in non-preemptive context. + * + * Returns: 1 if the CPUs affected by @type cache ops have an ASID for @mm. + * 0 otherwise. + */ +static inline int has_valid_asid(const struct mm_struct *mm, unsigned int type) { -#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC) - int i; + unsigned int i; + const cpumask_t *mask = cpu_present_mask; + + if (cpu_has_mmid) + return cpu_context(0, mm) != 0; - for_each_online_cpu(i) + /* cpu_sibling_map[] undeclared when !CONFIG_SMP */ +#ifdef CONFIG_SMP + /* + * If r4k_on_each_cpu does SMP calls, it does them to a single VPE in + * each foreign core, so we only need to worry about siblings. + * Otherwise we need to worry about all present CPUs. + */ + if (r4k_op_needs_ipi(type)) + mask = &cpu_sibling_map[smp_processor_id()]; +#endif + for_each_cpu(i, mask) if (cpu_context(i, mm)) return 1; - return 0; -#else - return cpu_context(smp_processor_id(), mm); -#endif } static void r4k__flush_cache_vmap(void) @@ -393,15 +463,26 @@ static void r4k__flush_cache_vunmap(void) r4k_blast_dcache(); } +/* + * Note: flush_tlb_range() assumes flush_cache_range() sufficiently flushes + * whole caches when vma is executable. + */ static inline void local_r4k_flush_cache_range(void * args) { struct vm_area_struct *vma = args; int exec = vma->vm_flags & VM_EXEC; - if (!(has_valid_asid(vma->vm_mm))) + if (!has_valid_asid(vma->vm_mm, R4K_INDEX)) return; - r4k_blast_dcache(); + /* + * If dcache can alias, we must blast it since mapping is changing. + * If executable, we must ensure any dirty lines are written back far + * enough to be visible to icache. + */ + if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) + r4k_blast_dcache(); + /* If executable, blast stale lines from icache */ if (exec) r4k_blast_icache(); } @@ -411,20 +492,20 @@ static void r4k_flush_cache_range(struct vm_area_struct *vma, { int exec = vma->vm_flags & VM_EXEC; - if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) - r4k_on_each_cpu(local_r4k_flush_cache_range, vma); + if (cpu_has_dc_aliases || exec) + r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_range, vma); } static inline void local_r4k_flush_cache_mm(void * args) { struct mm_struct *mm = args; - if (!has_valid_asid(mm)) + if (!has_valid_asid(mm, R4K_INDEX)) return; /* * Kludge alert. For obscure reasons R4000SC and R4400SC go nuts if we - * only flush the primary caches but R10000 and R12000 behave sane ... + * only flush the primary caches but R1x000 behave sane ... * R4000SC and R4400SC indexed S-cache ops also invalidate primary * caches, so we can bail out early. */ @@ -444,7 +525,7 @@ static void r4k_flush_cache_mm(struct mm_struct *mm) if (!cpu_has_dc_aliases) return; - r4k_on_each_cpu(local_r4k_flush_cache_mm, mm); + r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_mm, mm); } struct flush_cache_page_args { @@ -462,24 +543,20 @@ static inline void local_r4k_flush_cache_page(void *args) int exec = vma->vm_flags & VM_EXEC; struct mm_struct *mm = vma->vm_mm; int map_coherent = 0; - pgd_t *pgdp; - pud_t *pudp; pmd_t *pmdp; pte_t *ptep; void *vaddr; /* - * If ownes no valid ASID yet, cannot possibly have gotten + * If owns no valid ASID yet, cannot possibly have gotten * this page into the cache. */ - if (!has_valid_asid(mm)) + if (!has_valid_asid(mm, R4K_HIT)) return; addr &= PAGE_MASK; - pgdp = pgd_offset(mm, addr); - pudp = pud_offset(pgdp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset(pmdp, addr); + pmdp = pmd_off(mm, addr); + ptep = pte_offset_kernel(pmdp, addr); /* * If the page isn't marked valid, the page cannot possibly be @@ -491,12 +568,14 @@ static inline void local_r4k_flush_cache_page(void *args) if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID)) vaddr = NULL; else { + struct folio *folio = page_folio(page); /* * Use kmap_coherent or kmap_atomic to do flushes for * another ASID than the current one. */ map_coherent = (cpu_has_dc_aliases && - page_mapped(page) && !Page_dcache_dirty(page)); + folio_mapped(folio) && + !folio_test_dcache_dirty(folio)); if (map_coherent) vaddr = kmap_coherent(page, addr); else @@ -505,18 +584,17 @@ static inline void local_r4k_flush_cache_page(void *args) } if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) { - r4k_blast_dcache_page(addr); + vaddr ? r4k_blast_dcache_page(addr) : + r4k_blast_dcache_user_page(addr); if (exec && !cpu_icache_snoops_remote_store) r4k_blast_scache_page(addr); } if (exec) { if (vaddr && cpu_has_vtag_icache && mm == current->active_mm) { - int cpu = smp_processor_id(); - - if (cpu_context(cpu, mm) != 0) - drop_mmu_context(mm, cpu); + drop_mmu_context(mm); } else - r4k_blast_icache_page(addr); + vaddr ? r4k_blast_icache_page(addr) : + r4k_blast_icache_user_page(addr); } if (vaddr) { @@ -536,7 +614,7 @@ static void r4k_flush_cache_page(struct vm_area_struct *vma, args.addr = addr; args.pfn = pfn; - r4k_on_each_cpu(local_r4k_flush_cache_page, &args); + r4k_on_each_cpu(R4K_HIT, local_r4k_flush_cache_page, &args); } static inline void local_r4k_flush_data_cache_page(void * addr) @@ -549,29 +627,64 @@ static void r4k_flush_data_cache_page(unsigned long addr) if (in_atomic()) local_r4k_flush_data_cache_page((void *)addr); else - r4k_on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr); + r4k_on_each_cpu(R4K_HIT, local_r4k_flush_data_cache_page, + (void *) addr); } struct flush_icache_range_args { unsigned long start; unsigned long end; + unsigned int type; + bool user; }; -static inline void local_r4k_flush_icache_range(unsigned long start, unsigned long end) +static inline void __local_r4k_flush_icache_range(unsigned long start, + unsigned long end, + unsigned int type, + bool user) { if (!cpu_has_ic_fills_f_dc) { - if (end - start >= dcache_size) { + if (type == R4K_INDEX || + (type & R4K_INDEX && end - start >= dcache_size)) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; - protected_blast_dcache_range(start, end); + if (user) + protected_blast_dcache_range(start, end); + else + blast_dcache_range(start, end); } } - if (end - start > icache_size) + if (type == R4K_INDEX || + (type & R4K_INDEX && end - start > icache_size)) r4k_blast_icache(); - else - protected_blast_icache_range(start, end); + else { + switch (boot_cpu_type()) { + case CPU_LOONGSON2EF: + protected_loongson2_blast_icache_range(start, end); + break; + + default: + if (user) + protected_blast_icache_range(start, end); + else + blast_icache_range(start, end); + break; + } + } +} + +static inline void local_r4k_flush_icache_range(unsigned long start, + unsigned long end) +{ + __local_r4k_flush_icache_range(start, end, R4K_HIT | R4K_INDEX, false); +} + +static inline void local_r4k_flush_icache_user_range(unsigned long start, + unsigned long end) +{ + __local_r4k_flush_icache_range(start, end, R4K_HIT | R4K_INDEX, true); } static inline void local_r4k_flush_icache_range_ipi(void *args) @@ -579,33 +692,76 @@ static inline void local_r4k_flush_icache_range_ipi(void *args) struct flush_icache_range_args *fir_args = args; unsigned long start = fir_args->start; unsigned long end = fir_args->end; + unsigned int type = fir_args->type; + bool user = fir_args->user; - local_r4k_flush_icache_range(start, end); + __local_r4k_flush_icache_range(start, end, type, user); } -static void r4k_flush_icache_range(unsigned long start, unsigned long end) +static void __r4k_flush_icache_range(unsigned long start, unsigned long end, + bool user) { struct flush_icache_range_args args; + unsigned long size, cache_size; args.start = start; args.end = end; + args.type = R4K_HIT | R4K_INDEX; + args.user = user; - r4k_on_each_cpu(local_r4k_flush_icache_range_ipi, &args); + /* + * Indexed cache ops require an SMP call. + * Consider if that can or should be avoided. + */ + preempt_disable(); + if (r4k_op_needs_ipi(R4K_INDEX) && !r4k_op_needs_ipi(R4K_HIT)) { + /* + * If address-based cache ops don't require an SMP call, then + * use them exclusively for small flushes. + */ + size = end - start; + cache_size = icache_size; + if (!cpu_has_ic_fills_f_dc) { + size *= 2; + cache_size += dcache_size; + } + if (size <= cache_size) + args.type &= ~R4K_INDEX; + } + r4k_on_each_cpu(args.type, local_r4k_flush_icache_range_ipi, &args); + preempt_enable(); instruction_hazard(); } +static void r4k_flush_icache_range(unsigned long start, unsigned long end) +{ + return __r4k_flush_icache_range(start, end, false); +} + +static void r4k_flush_icache_user_range(unsigned long start, unsigned long end) +{ + return __r4k_flush_icache_range(start, end, true); +} + #ifdef CONFIG_DMA_NONCOHERENT static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) { /* Catch bad driver code */ - BUG_ON(size == 0); + if (WARN_ON(size == 0)) + return; + preempt_disable(); if (cpu_has_inclusive_pcaches) { - if (size >= scache_size) - r4k_blast_scache(); - else + if (size >= scache_size) { + if (current_cpu_type() != CPU_LOONGSON64) + r4k_blast_scache(); + else + r4k_blast_scache_node(pa_to_nid(addr)); + } else { blast_scache_range(addr, addr + size); + } + preempt_enable(); __sync(); return; } @@ -613,28 +769,66 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) /* * Either no secondary cache or the available caches don't have the * subset property so we have to flush the primary caches - * explicitly + * explicitly. + * If we would need IPI to perform an INDEX-type operation, then + * we have to use the HIT-type alternative as IPI cannot be used + * here due to interrupts possibly being disabled. */ - if (cpu_has_safe_index_cacheops && size >= dcache_size) { + if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; blast_dcache_range(addr, addr + size); } + preempt_enable(); bc_wback_inv(addr, size); __sync(); } +static void prefetch_cache_inv(unsigned long addr, unsigned long size) +{ + unsigned int linesz = cpu_scache_line_size(); + unsigned long addr0 = addr, addr1; + + addr0 &= ~(linesz - 1); + addr1 = (addr0 + size - 1) & ~(linesz - 1); + + protected_writeback_scache_line(addr0); + if (likely(addr1 != addr0)) + protected_writeback_scache_line(addr1); + else + return; + + addr0 += linesz; + if (likely(addr1 != addr0)) + protected_writeback_scache_line(addr0); + else + return; + + addr1 -= linesz; + if (likely(addr1 > addr0)) + protected_writeback_scache_line(addr0); +} + static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) { /* Catch bad driver code */ - BUG_ON(size == 0); + if (WARN_ON(size == 0)) + return; + + preempt_disable(); + + if (current_cpu_type() == CPU_BMIPS5000) + prefetch_cache_inv(addr, size); if (cpu_has_inclusive_pcaches) { - if (size >= scache_size) - r4k_blast_scache(); - else { + if (size >= scache_size) { + if (current_cpu_type() != CPU_LOONGSON64) + r4k_blast_scache(); + else + r4k_blast_scache_node(pa_to_nid(addr)); + } else { /* * There is no clearly documented alignment requirement * for the cache instruction on MIPS processors and @@ -645,68 +839,24 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) */ blast_inv_scache_range(addr, addr + size); } + preempt_enable(); __sync(); return; } - if (cpu_has_safe_index_cacheops && size >= dcache_size) { + if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; blast_inv_dcache_range(addr, addr + size); } + preempt_enable(); bc_inv(addr, size); __sync(); } #endif /* CONFIG_DMA_NONCOHERENT */ -/* - * While we're protected against bad userland addresses we don't care - * very much about what happens in that case. Usually a segmentation - * fault will dump the process later on anyway ... - */ -static void local_r4k_flush_cache_sigtramp(void * arg) -{ - unsigned long ic_lsize = cpu_icache_line_size(); - unsigned long dc_lsize = cpu_dcache_line_size(); - unsigned long sc_lsize = cpu_scache_line_size(); - unsigned long addr = (unsigned long) arg; - - R4600_HIT_CACHEOP_WAR_IMPL; - if (dc_lsize) - protected_writeback_dcache_line(addr & ~(dc_lsize - 1)); - if (!cpu_icache_snoops_remote_store && scache_size) - protected_writeback_scache_line(addr & ~(sc_lsize - 1)); - if (ic_lsize) - protected_flush_icache_line(addr & ~(ic_lsize - 1)); - if (MIPS4K_ICACHE_REFILL_WAR) { - __asm__ __volatile__ ( - ".set push\n\t" - ".set noat\n\t" - ".set mips3\n\t" -#ifdef CONFIG_32BIT - "la $at,1f\n\t" -#endif -#ifdef CONFIG_64BIT - "dla $at,1f\n\t" -#endif - "cache %0,($at)\n\t" - "nop; nop; nop\n" - "1:\n\t" - ".set pop" - : - : "i" (Hit_Invalidate_I)); - } - if (MIPS_CACHE_SYNC_WAR) - __asm__ __volatile__ ("sync"); -} - -static void r4k_flush_cache_sigtramp(unsigned long addr) -{ - r4k_on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr); -} - static void r4k_flush_icache_all(void) { if (cpu_has_vtag_icache) @@ -718,6 +868,15 @@ struct flush_kernel_vmap_range_args { int size; }; +static inline void local_r4k_flush_kernel_vmap_range_index(void *args) +{ + /* + * Aliases only affect the primary caches so don't bother with + * S-caches or T-caches. + */ + r4k_blast_dcache(); +} + static inline void local_r4k_flush_kernel_vmap_range(void *args) { struct flush_kernel_vmap_range_args *vmra = args; @@ -728,12 +887,8 @@ static inline void local_r4k_flush_kernel_vmap_range(void *args) * Aliases only affect the primary caches so don't bother with * S-caches or T-caches. */ - if (cpu_has_safe_index_cacheops && size >= dcache_size) - r4k_blast_dcache(); - else { - R4600_HIT_CACHEOP_WAR_IMPL; - blast_dcache_range(vaddr, vaddr + size); - } + R4600_HIT_CACHEOP_WAR_IMPL; + blast_dcache_range(vaddr, vaddr + size); } static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size) @@ -743,7 +898,12 @@ static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size) args.vaddr = (unsigned long) vaddr; args.size = size; - r4k_on_each_cpu(local_r4k_flush_kernel_vmap_range, &args); + if (size >= dcache_size) + r4k_on_each_cpu(R4K_INDEX, + local_r4k_flush_kernel_vmap_range_index, NULL); + else + r4k_on_each_cpu(R4K_HIT, local_r4k_flush_kernel_vmap_range, + &args); } static inline void rm7k_erratum31(void) @@ -774,42 +934,73 @@ static inline void rm7k_erratum31(void) "cache\t%1, 0x3000(%0)\n\t" ".set pop\n" : - : "r" (addr), "i" (Index_Store_Tag_I), "i" (Fill)); + : "r" (addr), "i" (Index_Store_Tag_I), "i" (Fill_I)); } } -static inline void alias_74k_erratum(struct cpuinfo_mips *c) +static inline int alias_74k_erratum(struct cpuinfo_mips *c) { + unsigned int imp = c->processor_id & PRID_IMP_MASK; + unsigned int rev = c->processor_id & PRID_REV_MASK; + int present = 0; + /* * Early versions of the 74K do not update the cache tags on a * vtag miss/ptag hit which can occur in the case of KSEG0/KUSEG - * aliases. In this case it is better to treat the cache as always - * having aliases. + * aliases. In this case it is better to treat the cache as always + * having aliases. Also disable the synonym tag update feature + * where available. In this case no opportunistic tag update will + * happen where a load causes a virtual address miss but a physical + * address hit during a D-cache look-up. */ - if ((c->processor_id & 0xff) <= PRID_REV_ENCODE_332(2, 4, 0)) - c->dcache.flags |= MIPS_CACHE_VTAG; - if ((c->processor_id & 0xff) == PRID_REV_ENCODE_332(2, 4, 0)) - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); - if (((c->processor_id & 0xff00) == PRID_IMP_1074K) && - ((c->processor_id & 0xff) <= PRID_REV_ENCODE_332(1, 1, 0))) { - c->dcache.flags |= MIPS_CACHE_VTAG; - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + switch (imp) { + case PRID_IMP_74K: + if (rev <= PRID_REV_ENCODE_332(2, 4, 0)) + present = 1; + if (rev == PRID_REV_ENCODE_332(2, 4, 0)) + write_c0_config6(read_c0_config6() | MTI_CONF6_SYND); + break; + case PRID_IMP_1074K: + if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) { + present = 1; + write_c0_config6(read_c0_config6() | MTI_CONF6_SYND); + } + break; + default: + BUG(); } + + return present; +} + +static void b5k_instruction_hazard(void) +{ + __sync(); + __sync(); + __asm__ __volatile__( + " nop; nop; nop; nop; nop; nop; nop; nop\n" + " nop; nop; nop; nop; nop; nop; nop; nop\n" + " nop; nop; nop; nop; nop; nop; nop; nop\n" + " nop; nop; nop; nop; nop; nop; nop; nop\n" + : : : "memory"); } -static char *way_string[] __cpuinitdata = { NULL, "direct mapped", "2-way", - "3-way", "4-way", "5-way", "6-way", "7-way", "8-way" +static char *way_string[] = { NULL, "direct mapped", "2-way", + "3-way", "4-way", "5-way", "6-way", "7-way", "8-way", + "9-way", "10-way", "11-way", "12-way", + "13-way", "14-way", "15-way", "16-way", }; -static void __cpuinit probe_pcache(void) +static void probe_pcache(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config = read_c0_config(); unsigned int prid = read_c0_prid(); + int has_74k_erratum = 0; unsigned long config1; unsigned int lsize; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R4600: /* QED style two way caches? */ case CPU_R4700: case CPU_R5000: @@ -827,7 +1018,6 @@ static void __cpuinit probe_pcache(void) c->options |= MIPS_CPU_CACHE_CDEX_P; break; - case CPU_R5432: case CPU_R5500: icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); c->icache.linesz = 16 << ((config & CONF_IB) >> 5); @@ -880,6 +1070,7 @@ static void __cpuinit probe_pcache(void) case CPU_R10000: case CPU_R12000: case CPU_R14000: + case CPU_R16000: icache_size = 1 << (12 + ((config & R10K_CONF_IC) >> 29)); c->icache.linesz = 64; c->icache.ways = 2; @@ -893,49 +1084,6 @@ static void __cpuinit probe_pcache(void) c->options |= MIPS_CPU_PREFETCH; break; - case CPU_VR4133: - write_c0_config(config & ~VR41_CONF_P4K); - case CPU_VR4131: - /* Workaround for cache instruction bug of VR4131 */ - if (c->processor_id == 0x0c80U || c->processor_id == 0x0c81U || - c->processor_id == 0x0c82U) { - config |= 0x00400000U; - if (c->processor_id == 0x0c80U) - config |= VR41_CONF_BP; - write_c0_config(config); - } else - c->options |= MIPS_CPU_CACHE_CDEX_P; - - icache_size = 1 << (10 + ((config & CONF_IC) >> 9)); - c->icache.linesz = 16 << ((config & CONF_IB) >> 5); - c->icache.ways = 2; - c->icache.waybit = __ffs(icache_size/2); - - dcache_size = 1 << (10 + ((config & CONF_DC) >> 6)); - c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); - c->dcache.ways = 2; - c->dcache.waybit = __ffs(dcache_size/2); - break; - - case CPU_VR41XX: - case CPU_VR4111: - case CPU_VR4121: - case CPU_VR4122: - case CPU_VR4181: - case CPU_VR4181A: - icache_size = 1 << (10 + ((config & CONF_IC) >> 9)); - c->icache.linesz = 16 << ((config & CONF_IB) >> 5); - c->icache.ways = 1; - c->icache.waybit = 0; /* doesn't matter */ - - dcache_size = 1 << (10 + ((config & CONF_DC) >> 6)); - c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); - c->dcache.ways = 1; - c->dcache.waybit = 0; /* does not matter */ - - c->options |= MIPS_CPU_CACHE_CDEX_P; - break; - case CPU_RM7000: rm7k_erratum31(); @@ -953,7 +1101,7 @@ static void __cpuinit probe_pcache(void) c->options |= MIPS_CPU_PREFETCH; break; - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); c->icache.linesz = 16 << ((config & CONF_IB) >> 5); if (prid & 0x3) @@ -971,6 +1119,52 @@ static void __cpuinit probe_pcache(void) c->dcache.waybit = 0; break; + case CPU_LOONGSON64: + config1 = read_c0_config1(); + lsize = (config1 >> 19) & 7; + if (lsize) + c->icache.linesz = 2 << lsize; + else + c->icache.linesz = 0; + c->icache.sets = 64 << ((config1 >> 22) & 7); + c->icache.ways = 1 + ((config1 >> 16) & 7); + icache_size = c->icache.sets * + c->icache.ways * + c->icache.linesz; + c->icache.waybit = 0; + + lsize = (config1 >> 10) & 7; + if (lsize) + c->dcache.linesz = 2 << lsize; + else + c->dcache.linesz = 0; + c->dcache.sets = 64 << ((config1 >> 13) & 7); + c->dcache.ways = 1 + ((config1 >> 7) & 7); + dcache_size = c->dcache.sets * + c->dcache.ways * + c->dcache.linesz; + c->dcache.waybit = 0; + if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >= + (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) || + (c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R) + c->options |= MIPS_CPU_PREFETCH; + break; + + case CPU_CAVIUM_OCTEON3: + /* For now lie about the number of ways. */ + c->icache.linesz = 128; + c->icache.sets = 16; + c->icache.ways = 8; + c->icache.flags |= MIPS_CACHE_VTAG; + icache_size = c->icache.sets * c->icache.ways * c->icache.linesz; + + c->dcache.linesz = 128; + c->dcache.ways = 8; + c->dcache.sets = 8; + dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; + c->options |= MIPS_CPU_PREFETCH; + break; + default: if (!(config & MIPS_CONF_M)) panic("Don't know how to probe P-caches on this cpu."); @@ -981,10 +1175,14 @@ static void __cpuinit probe_pcache(void) */ config1 = read_c0_config1(); - if ((lsize = ((config1 >> 19) & 7))) - c->icache.linesz = 2 << lsize; - else - c->icache.linesz = lsize; + lsize = (config1 >> 19) & 7; + + /* IL == 7 is reserved */ + if (lsize == 7) + panic("Invalid icache line size"); + + c->icache.linesz = lsize ? 2 << lsize : 0; + c->icache.sets = 32 << (((config1 >> 22) + 1) & 7); c->icache.ways = 1 + ((config1 >> 16) & 7); @@ -993,7 +1191,7 @@ static void __cpuinit probe_pcache(void) c->icache.linesz; c->icache.waybit = __ffs(icache_size/c->icache.ways); - if (config & 0x8) /* VI bit */ + if (config & MIPS_CONF_VI) c->icache.flags |= MIPS_CACHE_VTAG; /* @@ -1001,10 +1199,14 @@ static void __cpuinit probe_pcache(void) */ c->dcache.flags = 0; - if ((lsize = ((config1 >> 10) & 7))) - c->dcache.linesz = 2 << lsize; - else - c->dcache.linesz= lsize; + lsize = (config1 >> 10) & 7; + + /* DL == 7 is reserved */ + if (lsize == 7) + panic("Invalid dcache line size"); + + c->dcache.linesz = lsize ? 2 << lsize : 0; + c->dcache.sets = 32 << (((config1 >> 13) + 1) & 7); c->dcache.ways = 1 + ((config1 >> 7) & 7); @@ -1025,7 +1227,8 @@ static void __cpuinit probe_pcache(void) * presumably no vendor is shipping his hardware in the "bad" * configuration. */ - if ((prid & 0xff00) == PRID_IMP_R4000 && (prid & 0xff) < 0x40 && + if ((prid & PRID_IMP_MASK) == PRID_IMP_R4000 && + (prid & PRID_REV_MASK) < PRID_REV_R4400 && !(config & CONF_SC) && c->icache.linesz != 16 && PAGE_SIZE <= 0x8000) panic("Improper R4000SC processor configuration detected"); @@ -1040,45 +1243,73 @@ static void __cpuinit probe_pcache(void) dcache_size / (c->dcache.linesz * c->dcache.ways) : 0; /* - * R10000 and R12000 P-caches are odd in a positive way. They're 32kB - * 2-way virtually indexed so normally would suffer from aliases. So + * R1x000 P-caches are odd in a positive way. They're 32kB 2-way + * virtually indexed so normally would suffer from aliases. So * normally they'd suffer from aliases but magic in the hardware deals * with that for us so we don't need to take care ourselves. */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_20KC: case CPU_25KF: + case CPU_I6400: + case CPU_I6500: case CPU_SB1: case CPU_SB1A: - case CPU_XLR: c->dcache.flags |= MIPS_CACHE_PINDEX; break; case CPU_R10000: case CPU_R12000: case CPU_R14000: + case CPU_R16000: break; + case CPU_74K: + case CPU_1074K: + has_74k_erratum = alias_74k_erratum(c); + fallthrough; case CPU_M14KC: case CPU_M14KEC: case CPU_24K: case CPU_34K: - case CPU_74K: case CPU_1004K: - if (c->cputype == CPU_74K) - alias_74k_erratum(c); - if ((read_c0_config7() & (1 << 16))) { - /* effectively physically indexed dcache, - thus no virtual aliases. */ + case CPU_INTERAPTIV: + case CPU_P5600: + case CPU_PROAPTIV: + case CPU_M5150: + case CPU_QEMU_GENERIC: + case CPU_P6600: + case CPU_M6250: + if (!(read_c0_config7() & MIPS_CONF7_IAR) && + (c->icache.waysize > PAGE_SIZE)) + c->icache.flags |= MIPS_CACHE_ALIASES; + if (!has_74k_erratum && (read_c0_config7() & MIPS_CONF7_AR)) { + /* + * Effectively physically indexed dcache, + * thus no virtual aliases. + */ c->dcache.flags |= MIPS_CACHE_PINDEX; break; } + fallthrough; default: - if (c->dcache.waysize > PAGE_SIZE) + if (has_74k_erratum || c->dcache.waysize > PAGE_SIZE) c->dcache.flags |= MIPS_CACHE_ALIASES; } - switch (c->cputype) { + /* Physically indexed caches don't suffer from virtual aliasing */ + if (c->dcache.flags & MIPS_CACHE_PINDEX) + c->dcache.flags &= ~MIPS_CACHE_ALIASES; + + /* + * In systems with CM the icache fills from L2 or closer caches, and + * thus sees remote stores without needing to write them back any + * further than that. + */ + if (mips_cm_present()) + c->icache.flags |= MIPS_IC_SNOOPS_REMOTE; + + switch (current_cpu_type()) { case CPU_20KC: /* * Some older 20Kc chips doesn't have the 'VI' bit in @@ -1088,29 +1319,62 @@ static void __cpuinit probe_pcache(void) break; case CPU_ALCHEMY: + case CPU_I6400: + case CPU_I6500: c->icache.flags |= MIPS_CACHE_IC_F_DC; break; - } -#ifdef CONFIG_CPU_LOONGSON2 - /* - * LOONGSON2 has 4 way icache, but when using indexed cache op, - * one op will act on all 4 ways - */ - c->icache.ways = 1; -#endif + case CPU_BMIPS5000: + c->icache.flags |= MIPS_CACHE_IC_F_DC; + /* Cache aliases are handled in hardware; allow HIGHMEM */ + c->dcache.flags &= ~MIPS_CACHE_ALIASES; + break; + + case CPU_LOONGSON2EF: + /* + * LOONGSON2 has 4 way icache, but when using indexed cache op, + * one op will act on all 4 ways + */ + c->icache.ways = 1; + } - printk("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n", - icache_size >> 10, - c->icache.flags & MIPS_CACHE_VTAG ? "VIVT" : "VIPT", - way_string[c->icache.ways], c->icache.linesz); + pr_info("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n", + icache_size >> 10, + c->icache.flags & MIPS_CACHE_VTAG ? "VIVT" : "VIPT", + way_string[c->icache.ways], c->icache.linesz); - printk("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n", - dcache_size >> 10, way_string[c->dcache.ways], - (c->dcache.flags & MIPS_CACHE_PINDEX) ? "PIPT" : "VIPT", - (c->dcache.flags & MIPS_CACHE_ALIASES) ? + pr_info("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n", + dcache_size >> 10, way_string[c->dcache.ways], + (c->dcache.flags & MIPS_CACHE_PINDEX) ? "PIPT" : "VIPT", + (c->dcache.flags & MIPS_CACHE_ALIASES) ? "cache aliases" : "no aliases", - c->dcache.linesz); + c->dcache.linesz); +} + +static void probe_vcache(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config2, lsize; + + if (current_cpu_type() != CPU_LOONGSON64) + return; + + config2 = read_c0_config2(); + if ((lsize = ((config2 >> 20) & 15))) + c->vcache.linesz = 2 << lsize; + else + c->vcache.linesz = lsize; + + c->vcache.sets = 64 << ((config2 >> 24) & 15); + c->vcache.ways = 1 + ((config2 >> 16) & 15); + + vcache_size = c->vcache.sets * c->vcache.ways * c->vcache.linesz; + + c->vcache.waybit = 0; + c->vcache.waysize = vcache_size / c->vcache.ways; + + pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n", + vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz); } /* @@ -1119,7 +1383,7 @@ static void __cpuinit probe_pcache(void) * executes in KSEG1 space or else you will crash and burn badly. You have * been warned. */ -static int __cpuinit probe_scache(void) +static int probe_scache(void) { unsigned long flags, addr, begin, end, pow2; unsigned int config = read_c0_config(); @@ -1169,13 +1433,12 @@ static int __cpuinit probe_scache(void) scache_size = addr; c->scache.linesz = 16 << ((config & R4K_CONF_SB) >> 22); c->scache.ways = 1; - c->dcache.waybit = 0; /* does not matter */ + c->scache.waybit = 0; /* does not matter */ return 1; } -#if defined(CONFIG_CPU_LOONGSON2) -static void __init loongson2_sc_init(void) +static void loongson2_sc_init(void) { struct cpuinfo_mips *c = ¤t_cpu_data; @@ -1190,13 +1453,39 @@ static void __init loongson2_sc_init(void) c->options |= MIPS_CPU_INCLUSIVE_CACHES; } -#endif -extern int r5k_sc_init(void); -extern int rm7k_sc_init(void); -extern int mips_sc_init(void); +static void loongson3_sc_init(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config2, lsize; + + config2 = read_c0_config2(); + lsize = (config2 >> 4) & 15; + if (lsize) + c->scache.linesz = 2 << lsize; + else + c->scache.linesz = 0; + c->scache.sets = 64 << ((config2 >> 8) & 15); + c->scache.ways = 1 + (config2 & 15); + + /* Loongson-3 has 4-Scache banks, while Loongson-2K have only 2 banks */ + if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R) + c->scache.sets *= 2; + else + c->scache.sets *= 4; + + scache_size = c->scache.sets * c->scache.ways * c->scache.linesz; -static void __cpuinit setup_scache(void) + c->scache.waybit = 0; + c->scache.waysize = scache_size / c->scache.ways; + pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", + scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); + if (scache_size) + c->options |= MIPS_CPU_INCLUSIVE_CACHES; + return; +} + +static void setup_scache(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config = read_c0_config(); @@ -1207,7 +1496,7 @@ static void __cpuinit setup_scache(void) * processors don't have a S-cache that would be relevant to the * Linux memory management. */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R4000SC: case CPU_R4000MC: case CPU_R4400SC: @@ -1220,6 +1509,7 @@ static void __cpuinit setup_scache(void) case CPU_R10000: case CPU_R12000: case CPU_R14000: + case CPU_R16000: scache_size = 0x80000 << ((config & R10K_CONF_SS) >> 16); c->scache.linesz = 64 << ((config >> 13) & 1); c->scache.ways = 2; @@ -1240,25 +1530,34 @@ static void __cpuinit setup_scache(void) #endif return; -#if defined(CONFIG_CPU_LOONGSON2) - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: loongson2_sc_init(); return; -#endif - case CPU_XLP: + + case CPU_LOONGSON64: + loongson3_sc_init(); + return; + + case CPU_CAVIUM_OCTEON3: /* don't need to worry about L2, fully coherent */ return; default: - if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | - MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2)) { + if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | + MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | + MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { #ifdef CONFIG_MIPS_CPU_SCACHE if (mips_sc_init ()) { scache_size = c->scache.ways * c->scache.sets * c->scache.linesz; printk("MIPS secondary cache %ldkB, %s, linesize %d bytes.\n", scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); + + if (current_cpu_type() == CPU_BMIPS5000) + c->options |= MIPS_CPU_INCLUSIVE_CACHES; } + #else if (!(c->scache.flags & MIPS_CACHE_NOT_PRESENT)) panic("Dunno how to handle MIPS32 / MIPS64 second level cache"); @@ -1329,7 +1628,7 @@ static void nxp_pr4450_fixup_config(void) NXP_BARRIER(); } -static int __cpuinitdata cca = -1; +static int cca = -1; static int __init cca_setup(char *str) { @@ -1340,7 +1639,7 @@ static int __init cca_setup(char *str) early_param("cca", cca_setup); -static void __cpuinit coherency_setup(void) +static void coherency_setup(void) { if (cca < 0 || cca > 7) cca = read_c0_config() & CONF_CM_CMASK; @@ -1351,7 +1650,7 @@ static void __cpuinit coherency_setup(void) /* * c0_status.cu=0 specifies that updates by the sc instruction use - * the coherency mode specified by the TLB; 1 means cachable + * the coherency mode specified by the TLB; 1 means cacheable * coherent update on write will be used. Not all processors have * this bit and; some wire it to zero, others like Toshiba had the * silly idea of putting something else there ... @@ -1380,13 +1679,12 @@ static void __cpuinit coherency_setup(void) } } -static void __cpuinit r4k_cache_error_setup(void) +static void r4k_cache_error_setup(void) { extern char __weak except_vec2_generic; extern char __weak except_vec2_sb1; - struct cpuinfo_mips *c = ¤t_cpu_data; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_SB1: case CPU_SB1A: set_uncached_handler(0x100, &except_vec2_sb1, 0x80); @@ -1398,31 +1696,34 @@ static void __cpuinit r4k_cache_error_setup(void) } } -void __cpuinit r4k_cache_init(void) +void r4k_cache_init(void) { extern void build_clear_page(void); extern void build_copy_page(void); struct cpuinfo_mips *c = ¤t_cpu_data; probe_pcache(); + probe_vcache(); setup_scache(); r4k_blast_dcache_page_setup(); - r4k_blast_dcache_page_indexed_setup(); r4k_blast_dcache_setup(); r4k_blast_icache_page_setup(); - r4k_blast_icache_page_indexed_setup(); r4k_blast_icache_setup(); r4k_blast_scache_page_setup(); - r4k_blast_scache_page_indexed_setup(); r4k_blast_scache_setup(); + r4k_blast_scache_node_setup(); +#ifdef CONFIG_EVA + r4k_blast_dcache_user_page_setup(); + r4k_blast_icache_user_page_setup(); +#endif /* * Some MIPS32 and MIPS64 processors have physically indexed caches. * This code supports virtually indexed processors and will be * unnecessarily inefficient on physically indexed processors. */ - if (c->dcache.linesz) + if (c->dcache.linesz && cpu_has_dc_aliases) shm_align_mask = max_t( unsigned long, c->dcache.sets * c->dcache.linesz - 1, PAGE_SIZE - 1); @@ -1440,24 +1741,18 @@ void __cpuinit r4k_cache_init(void) __flush_kernel_vmap_range = r4k_flush_kernel_vmap_range; - flush_cache_sigtramp = r4k_flush_cache_sigtramp; flush_icache_all = r4k_flush_icache_all; - local_flush_data_cache_page = local_r4k_flush_data_cache_page; flush_data_cache_page = r4k_flush_data_cache_page; flush_icache_range = r4k_flush_icache_range; local_flush_icache_range = local_r4k_flush_icache_range; + __flush_icache_user_range = r4k_flush_icache_user_range; + __local_flush_icache_user_range = local_r4k_flush_icache_user_range; -#if defined(CONFIG_DMA_NONCOHERENT) - if (coherentio) { - _dma_cache_wback_inv = (void *)cache_noop; - _dma_cache_wback = (void *)cache_noop; - _dma_cache_inv = (void *)cache_noop; - } else { - _dma_cache_wback_inv = r4k_dma_cache_wback_inv; - _dma_cache_wback = r4k_dma_cache_wback_inv; - _dma_cache_inv = r4k_dma_cache_inv; - } -#endif +#ifdef CONFIG_DMA_NONCOHERENT + _dma_cache_wback_inv = r4k_dma_cache_wback_inv; + _dma_cache_wback = r4k_dma_cache_wback_inv; + _dma_cache_inv = r4k_dma_cache_inv; +#endif /* CONFIG_DMA_NONCOHERENT */ build_clear_page(); build_copy_page(); @@ -1471,4 +1766,66 @@ void __cpuinit r4k_cache_init(void) coherency_setup(); board_cache_error_setup = r4k_cache_error_setup; + + /* + * Per-CPU overrides + */ + switch (current_cpu_type()) { + case CPU_BMIPS4350: + case CPU_BMIPS4380: + /* No IPI is needed because all CPUs share the same D$ */ + flush_data_cache_page = r4k_blast_dcache_page; + break; + case CPU_BMIPS5000: + /* We lose our superpowers if L2 is disabled */ + if (c->scache.flags & MIPS_CACHE_NOT_PRESENT) + break; + + /* I$ fills from D$ just by emptying the write buffers */ + flush_cache_page = (void *)b5k_instruction_hazard; + flush_cache_range = (void *)b5k_instruction_hazard; + flush_data_cache_page = (void *)b5k_instruction_hazard; + flush_icache_range = (void *)b5k_instruction_hazard; + local_flush_icache_range = (void *)b5k_instruction_hazard; + + + /* Optimization: an L2 flush implicitly flushes the L1 */ + current_cpu_data.options |= MIPS_CPU_INCLUSIVE_CACHES; + break; + case CPU_LOONGSON64: + /* Loongson-3 maintains cache coherency by hardware */ + __flush_cache_all = cache_noop; + __flush_cache_vmap = cache_noop; + __flush_cache_vunmap = cache_noop; + __flush_kernel_vmap_range = (void *)cache_noop; + flush_cache_mm = (void *)cache_noop; + flush_cache_page = (void *)cache_noop; + flush_cache_range = (void *)cache_noop; + flush_icache_all = (void *)cache_noop; + flush_data_cache_page = (void *)cache_noop; + break; + } +} + +static int r4k_cache_pm_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + switch (cmd) { + case CPU_PM_ENTER_FAILED: + case CPU_PM_EXIT: + coherency_setup(); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block r4k_cache_pm_notifier_block = { + .notifier_call = r4k_cache_pm_notifier, +}; + +static int __init r4k_cache_init_pm(void) +{ + return cpu_pm_register_notifier(&r4k_cache_pm_notifier_block); } +arch_initcall(r4k_cache_init_pm); diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c deleted file mode 100644 index ba9da270289f..000000000000 --- a/arch/mips/mm/c-tx39.c +++ /dev/null @@ -1,440 +0,0 @@ -/* - * r2300.c: R2000 and R3000 specific mmu/cache code. - * - * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - * - * with a lot of changes to make this thing work for R3000s - * Tx39XX R4k style caches added. HK - * Copyright (C) 1998, 1999, 2000 Harald Koerfgen - * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/smp.h> -#include <linux/mm.h> - -#include <asm/cacheops.h> -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/mmu_context.h> -#include <asm/isadep.h> -#include <asm/io.h> -#include <asm/bootinfo.h> -#include <asm/cpu.h> - -/* For R3000 cores with R4000 style caches */ -static unsigned long icache_size, dcache_size; /* Size in bytes */ - -#include <asm/r4kcache.h> - -extern int r3k_have_wired_reg; /* in r3k-tlb.c */ - -/* This sequence is required to ensure icache is disabled immediately */ -#define TX39_STOP_STREAMING() \ -__asm__ __volatile__( \ - ".set push\n\t" \ - ".set noreorder\n\t" \ - "b 1f\n\t" \ - "nop\n\t" \ - "1:\n\t" \ - ".set pop" \ - ) - -/* TX39H-style cache flush routines. */ -static void tx39h_flush_icache_all(void) -{ - unsigned long flags, config; - - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - blast_icache16(); - write_c0_conf(config); - local_irq_restore(flags); -} - -static void tx39h_dma_cache_wback_inv(unsigned long addr, unsigned long size) -{ - /* Catch bad driver code */ - BUG_ON(size == 0); - - iob(); - blast_inv_dcache_range(addr, addr + size); -} - - -/* TX39H2,TX39H3 */ -static inline void tx39_blast_dcache_page(unsigned long addr) -{ - if (current_cpu_type() != CPU_TX3912) - blast_dcache16_page(addr); -} - -static inline void tx39_blast_dcache_page_indexed(unsigned long addr) -{ - blast_dcache16_page_indexed(addr); -} - -static inline void tx39_blast_dcache(void) -{ - blast_dcache16(); -} - -static inline void tx39_blast_icache_page(unsigned long addr) -{ - unsigned long flags, config; - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - blast_icache16_page(addr); - write_c0_conf(config); - local_irq_restore(flags); -} - -static inline void tx39_blast_icache_page_indexed(unsigned long addr) -{ - unsigned long flags, config; - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - blast_icache16_page_indexed(addr); - write_c0_conf(config); - local_irq_restore(flags); -} - -static inline void tx39_blast_icache(void) -{ - unsigned long flags, config; - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - blast_icache16(); - write_c0_conf(config); - local_irq_restore(flags); -} - -static void tx39__flush_cache_vmap(void) -{ - tx39_blast_dcache(); -} - -static void tx39__flush_cache_vunmap(void) -{ - tx39_blast_dcache(); -} - -static inline void tx39_flush_cache_all(void) -{ - if (!cpu_has_dc_aliases) - return; - - tx39_blast_dcache(); -} - -static inline void tx39___flush_cache_all(void) -{ - tx39_blast_dcache(); - tx39_blast_icache(); -} - -static void tx39_flush_cache_mm(struct mm_struct *mm) -{ - if (!cpu_has_dc_aliases) - return; - - if (cpu_context(smp_processor_id(), mm) != 0) - tx39_blast_dcache(); -} - -static void tx39_flush_cache_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (!cpu_has_dc_aliases) - return; - if (!(cpu_context(smp_processor_id(), vma->vm_mm))) - return; - - tx39_blast_dcache(); -} - -static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn) -{ - int exec = vma->vm_flags & VM_EXEC; - struct mm_struct *mm = vma->vm_mm; - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - /* - * If ownes no valid ASID yet, cannot possibly have gotten - * this page into the cache. - */ - if (cpu_context(smp_processor_id(), mm) == 0) - return; - - page &= PAGE_MASK; - pgdp = pgd_offset(mm, page); - pudp = pud_offset(pgdp, page); - pmdp = pmd_offset(pudp, page); - ptep = pte_offset(pmdp, page); - - /* - * If the page isn't marked valid, the page cannot possibly be - * in the cache. - */ - if (!(pte_val(*ptep) & _PAGE_PRESENT)) - return; - - /* - * Doing flushes for another ASID than the current one is - * too difficult since stupid R4k caches do a TLB translation - * for every cache flush operation. So we do indexed flushes - * in that case, which doesn't overly flush the cache too much. - */ - if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID)) { - if (cpu_has_dc_aliases || exec) - tx39_blast_dcache_page(page); - if (exec) - tx39_blast_icache_page(page); - - return; - } - - /* - * Do indexed flush, too much work to get the (possible) TLB refills - * to work correctly. - */ - if (cpu_has_dc_aliases || exec) - tx39_blast_dcache_page_indexed(page); - if (exec) - tx39_blast_icache_page_indexed(page); -} - -static void local_tx39_flush_data_cache_page(void * addr) -{ - tx39_blast_dcache_page((unsigned long)addr); -} - -static void tx39_flush_data_cache_page(unsigned long addr) -{ - tx39_blast_dcache_page(addr); -} - -static void tx39_flush_icache_range(unsigned long start, unsigned long end) -{ - if (end - start > dcache_size) - tx39_blast_dcache(); - else - protected_blast_dcache_range(start, end); - - if (end - start > icache_size) - tx39_blast_icache(); - else { - unsigned long flags, config; - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - protected_blast_icache_range(start, end); - write_c0_conf(config); - local_irq_restore(flags); - } -} - -static void tx39_flush_kernel_vmap_range(unsigned long vaddr, int size) -{ - BUG(); -} - -static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size) -{ - unsigned long end; - - if (((size | addr) & (PAGE_SIZE - 1)) == 0) { - end = addr + size; - do { - tx39_blast_dcache_page(addr); - addr += PAGE_SIZE; - } while(addr != end); - } else if (size > dcache_size) { - tx39_blast_dcache(); - } else { - blast_dcache_range(addr, addr + size); - } -} - -static void tx39_dma_cache_inv(unsigned long addr, unsigned long size) -{ - unsigned long end; - - if (((size | addr) & (PAGE_SIZE - 1)) == 0) { - end = addr + size; - do { - tx39_blast_dcache_page(addr); - addr += PAGE_SIZE; - } while(addr != end); - } else if (size > dcache_size) { - tx39_blast_dcache(); - } else { - blast_inv_dcache_range(addr, addr + size); - } -} - -static void tx39_flush_cache_sigtramp(unsigned long addr) -{ - unsigned long ic_lsize = current_cpu_data.icache.linesz; - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - unsigned long config; - unsigned long flags; - - protected_writeback_dcache_line(addr & ~(dc_lsize - 1)); - - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - protected_flush_icache_line(addr & ~(ic_lsize - 1)); - write_c0_conf(config); - local_irq_restore(flags); -} - -static __init void tx39_probe_cache(void) -{ - unsigned long config; - - config = read_c0_conf(); - - icache_size = 1 << (10 + ((config & TX39_CONF_ICS_MASK) >> - TX39_CONF_ICS_SHIFT)); - dcache_size = 1 << (10 + ((config & TX39_CONF_DCS_MASK) >> - TX39_CONF_DCS_SHIFT)); - - current_cpu_data.icache.linesz = 16; - switch (current_cpu_type()) { - case CPU_TX3912: - current_cpu_data.icache.ways = 1; - current_cpu_data.dcache.ways = 1; - current_cpu_data.dcache.linesz = 4; - break; - - case CPU_TX3927: - current_cpu_data.icache.ways = 2; - current_cpu_data.dcache.ways = 2; - current_cpu_data.dcache.linesz = 16; - break; - - case CPU_TX3922: - default: - current_cpu_data.icache.ways = 1; - current_cpu_data.dcache.ways = 1; - current_cpu_data.dcache.linesz = 16; - break; - } -} - -void __cpuinit tx39_cache_init(void) -{ - extern void build_clear_page(void); - extern void build_copy_page(void); - unsigned long config; - - config = read_c0_conf(); - config &= ~TX39_CONF_WBON; - write_c0_conf(config); - - tx39_probe_cache(); - - switch (current_cpu_type()) { - case CPU_TX3912: - /* TX39/H core (writethru direct-map cache) */ - __flush_cache_vmap = tx39__flush_cache_vmap; - __flush_cache_vunmap = tx39__flush_cache_vunmap; - flush_cache_all = tx39h_flush_icache_all; - __flush_cache_all = tx39h_flush_icache_all; - flush_cache_mm = (void *) tx39h_flush_icache_all; - flush_cache_range = (void *) tx39h_flush_icache_all; - flush_cache_page = (void *) tx39h_flush_icache_all; - flush_icache_range = (void *) tx39h_flush_icache_all; - local_flush_icache_range = (void *) tx39h_flush_icache_all; - - flush_cache_sigtramp = (void *) tx39h_flush_icache_all; - local_flush_data_cache_page = (void *) tx39h_flush_icache_all; - flush_data_cache_page = (void *) tx39h_flush_icache_all; - - _dma_cache_wback_inv = tx39h_dma_cache_wback_inv; - - shm_align_mask = PAGE_SIZE - 1; - - break; - - case CPU_TX3922: - case CPU_TX3927: - default: - /* TX39/H2,H3 core (writeback 2way-set-associative cache) */ - r3k_have_wired_reg = 1; - write_c0_wired(0); /* set 8 on reset... */ - /* board-dependent init code may set WBON */ - - __flush_cache_vmap = tx39__flush_cache_vmap; - __flush_cache_vunmap = tx39__flush_cache_vunmap; - - flush_cache_all = tx39_flush_cache_all; - __flush_cache_all = tx39___flush_cache_all; - flush_cache_mm = tx39_flush_cache_mm; - flush_cache_range = tx39_flush_cache_range; - flush_cache_page = tx39_flush_cache_page; - flush_icache_range = tx39_flush_icache_range; - local_flush_icache_range = tx39_flush_icache_range; - - __flush_kernel_vmap_range = tx39_flush_kernel_vmap_range; - - flush_cache_sigtramp = tx39_flush_cache_sigtramp; - local_flush_data_cache_page = local_tx39_flush_data_cache_page; - flush_data_cache_page = tx39_flush_data_cache_page; - - _dma_cache_wback_inv = tx39_dma_cache_wback_inv; - _dma_cache_wback = tx39_dma_cache_wback_inv; - _dma_cache_inv = tx39_dma_cache_inv; - - shm_align_mask = max_t(unsigned long, - (dcache_size / current_cpu_data.dcache.ways) - 1, - PAGE_SIZE - 1); - - break; - } - - current_cpu_data.icache.waysize = icache_size / current_cpu_data.icache.ways; - current_cpu_data.dcache.waysize = dcache_size / current_cpu_data.dcache.ways; - - current_cpu_data.icache.sets = - current_cpu_data.icache.waysize / current_cpu_data.icache.linesz; - current_cpu_data.dcache.sets = - current_cpu_data.dcache.waysize / current_cpu_data.dcache.linesz; - - if (current_cpu_data.dcache.waysize > PAGE_SIZE) - current_cpu_data.dcache.flags |= MIPS_CACHE_ALIASES; - - current_cpu_data.icache.waybit = 0; - current_cpu_data.dcache.waybit = 0; - - printk("Primary instruction cache %ldkB, linesize %d bytes\n", - icache_size >> 10, current_cpu_data.icache.linesz); - printk("Primary data cache %ldkB, linesize %d bytes\n", - dcache_size >> 10, current_cpu_data.dcache.linesz); - - build_clear_page(); - build_copy_page(); - tx39h_flush_icache_all(); -} diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 5aeb3eb0b72f..e3b4224c9a40 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -8,48 +8,71 @@ */ #include <linux/fs.h> #include <linux/fcntl.h> -#include <linux/init.h> #include <linux/kernel.h> #include <linux/linkage.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/syscalls.h> #include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <asm/bcache.h> #include <asm/cacheflush.h> #include <asm/processor.h> #include <asm/cpu.h> #include <asm/cpu-features.h> +#include <asm/setup.h> +#include <asm/pgtable.h> /* Cache operations. */ void (*flush_cache_all)(void); void (*__flush_cache_all)(void); +EXPORT_SYMBOL_GPL(__flush_cache_all); void (*flush_cache_mm)(struct mm_struct *mm); void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start, unsigned long end); void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn); void (*flush_icache_range)(unsigned long start, unsigned long end); +EXPORT_SYMBOL_GPL(flush_icache_range); void (*local_flush_icache_range)(unsigned long start, unsigned long end); +EXPORT_SYMBOL_GPL(local_flush_icache_range); +void (*__flush_icache_user_range)(unsigned long start, unsigned long end); +void (*__local_flush_icache_user_range)(unsigned long start, unsigned long end); +EXPORT_SYMBOL_GPL(__local_flush_icache_user_range); void (*__flush_cache_vmap)(void); void (*__flush_cache_vunmap)(void); void (*__flush_kernel_vmap_range)(unsigned long vaddr, int size); -void (*__invalidate_kernel_vmap_range)(unsigned long vaddr, int size); - EXPORT_SYMBOL_GPL(__flush_kernel_vmap_range); /* MIPS specific cache operations */ -void (*flush_cache_sigtramp)(unsigned long addr); -void (*local_flush_data_cache_page)(void * addr); void (*flush_data_cache_page)(unsigned long addr); void (*flush_icache_all)(void); -EXPORT_SYMBOL_GPL(local_flush_data_cache_page); EXPORT_SYMBOL(flush_data_cache_page); EXPORT_SYMBOL(flush_icache_all); +/* + * Dummy cache handling routine + */ + +void cache_noop(void) {} + +#ifdef CONFIG_BOARD_SCACHE + +static struct bcache_ops no_sc_ops = { + .bc_enable = (void *)cache_noop, + .bc_disable = (void *)cache_noop, + .bc_wback_inv = (void *)cache_noop, + .bc_inv = (void *)cache_noop +}; + +struct bcache_ops *bcops = &no_sc_ops; +#endif + #ifdef CONFIG_DMA_NONCOHERENT /* DMA cache operations. */ @@ -57,8 +80,6 @@ void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size); void (*_dma_cache_wback)(unsigned long start, unsigned long size); void (*_dma_cache_inv)(unsigned long start, unsigned long size); -EXPORT_SYMBOL(_dma_cache_wback_inv); - #endif /* CONFIG_DMA_NONCOHERENT */ /* @@ -70,43 +91,46 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, { if (bytes == 0) return 0; - if (!access_ok(VERIFY_WRITE, (void __user *) addr, bytes)) + if (!access_ok((void __user *) addr, bytes)) return -EFAULT; - flush_icache_range(addr, addr + bytes); + __flush_icache_user_range(addr, addr + bytes); return 0; } -void __flush_dcache_page(struct page *page) +void __flush_dcache_folio_pages(struct folio *folio, struct page *page, + unsigned int nr) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio_flush_mapping(folio); unsigned long addr; + unsigned int i; - if (PageHighMem(page)) - return; if (mapping && !mapping_mapped(mapping)) { - SetPageDcacheDirty(page); + folio_set_dcache_dirty(folio); return; } /* - * We could delay the flush for the !page_mapping case too. But that + * We could delay the flush for the !folio_mapping case too. But that * case is for exec env/arg pages and those are %99 certainly going to * get faulted into the tlb (and thus flushed) anyways. */ - addr = (unsigned long) page_address(page); - flush_data_cache_page(addr); + for (i = 0; i < nr; i++) { + addr = (unsigned long)kmap_local_page(page + i); + flush_data_cache_page(addr); + kunmap_local((void *)addr); + } } - -EXPORT_SYMBOL(__flush_dcache_page); +EXPORT_SYMBOL(__flush_dcache_folio_pages); void __flush_anon_page(struct page *page, unsigned long vmaddr) { unsigned long addr = (unsigned long) page_address(page); + struct folio *folio = page_folio(page); if (pages_do_alias(addr, vmaddr)) { - if (page_mapped(page) && !Page_dcache_dirty(page)) { + if (folio_mapped(folio) && !folio_test_dcache_dirty(folio)) { void *kaddr; kaddr = kmap_coherent(page, vmaddr); @@ -119,110 +143,75 @@ void __flush_anon_page(struct page *page, unsigned long vmaddr) EXPORT_SYMBOL(__flush_anon_page); -void __update_cache(struct vm_area_struct *vma, unsigned long address, - pte_t pte) +void __update_cache(unsigned long address, pte_t pte) { - struct page *page; + struct folio *folio; unsigned long pfn, addr; - int exec = (vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc; + int exec = !pte_no_exec(pte) && !cpu_has_ic_fills_f_dc; + unsigned int i; pfn = pte_pfn(pte); if (unlikely(!pfn_valid(pfn))) return; - page = pfn_to_page(pfn); - if (page_mapping(page) && Page_dcache_dirty(page)) { - addr = (unsigned long) page_address(page); - if (exec || pages_do_alias(addr, address & PAGE_MASK)) - flush_data_cache_page(addr); - ClearPageDcacheDirty(page); + + folio = page_folio(pfn_to_page(pfn)); + address &= PAGE_MASK; + address -= offset_in_folio(folio, pfn << PAGE_SHIFT); + + if (folio_test_dcache_dirty(folio)) { + for (i = 0; i < folio_nr_pages(folio); i++) { + addr = (unsigned long)kmap_local_folio(folio, i); + + if (exec || pages_do_alias(addr, address)) + flush_data_cache_page(addr); + kunmap_local((void *)addr); + address += PAGE_SIZE; + } + folio_clear_dcache_dirty(folio); } } unsigned long _page_cachable_default; EXPORT_SYMBOL(_page_cachable_default); +#define PM(p) __pgprot(_page_cachable_default | (p)) + +static pgprot_t protection_map[16] __ro_after_init; +DECLARE_VM_GET_PAGE_PROT + static inline void setup_protection_map(void) { - if (cpu_has_rixi) { - protection_map[0] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[1] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[2] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[3] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[4] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ); - protection_map[5] = __pgprot(_page_cachable_default | _PAGE_PRESENT); - protection_map[6] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ); - protection_map[7] = __pgprot(_page_cachable_default | _PAGE_PRESENT); - - protection_map[8] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[9] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[10] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ); - protection_map[11] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); - protection_map[12] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ); - protection_map[13] = __pgprot(_page_cachable_default | _PAGE_PRESENT); - protection_map[14] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE | _PAGE_NO_READ); - protection_map[15] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE); - - } else { - protection_map[0] = PAGE_NONE; - protection_map[1] = PAGE_READONLY; - protection_map[2] = PAGE_COPY; - protection_map[3] = PAGE_COPY; - protection_map[4] = PAGE_READONLY; - protection_map[5] = PAGE_READONLY; - protection_map[6] = PAGE_COPY; - protection_map[7] = PAGE_COPY; - protection_map[8] = PAGE_NONE; - protection_map[9] = PAGE_READONLY; - protection_map[10] = PAGE_SHARED; - protection_map[11] = PAGE_SHARED; - protection_map[12] = PAGE_READONLY; - protection_map[13] = PAGE_READONLY; - protection_map[14] = PAGE_SHARED; - protection_map[15] = PAGE_SHARED; - } + protection_map[0] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[1] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[2] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[3] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[4] = PM(_PAGE_PRESENT); + protection_map[5] = PM(_PAGE_PRESENT); + protection_map[6] = PM(_PAGE_PRESENT); + protection_map[7] = PM(_PAGE_PRESENT); + + protection_map[8] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[9] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | + _PAGE_NO_READ); + protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); + protection_map[12] = PM(_PAGE_PRESENT); + protection_map[13] = PM(_PAGE_PRESENT); + protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE); + protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE); } -void __cpuinit cpu_cache_init(void) -{ - if (cpu_has_3k_cache) { - extern void __weak r3k_cache_init(void); +#undef PM +void cpu_cache_init(void) +{ + if (IS_ENABLED(CONFIG_CPU_R3000) && cpu_has_3k_cache) r3k_cache_init(); - } - if (cpu_has_6k_cache) { - extern void __weak r6k_cache_init(void); - - r6k_cache_init(); - } - if (cpu_has_4k_cache) { - extern void __weak r4k_cache_init(void); - + if (IS_ENABLED(CONFIG_CPU_R4K_CACHE_TLB) && cpu_has_4k_cache) r4k_cache_init(); - } - if (cpu_has_8k_cache) { - extern void __weak r8k_cache_init(void); - - r8k_cache_init(); - } - if (cpu_has_tx39_cache) { - extern void __weak tx39_cache_init(void); - - tx39_cache_init(); - } - - if (cpu_has_octeon_cache) { - extern void __weak octeon_cache_init(void); + if (IS_ENABLED(CONFIG_CPU_CAVIUM_OCTEON) && cpu_has_octeon_cache) octeon_cache_init(); - } setup_protection_map(); } - -int __weak __uncached_access(struct file *file, unsigned long addr) -{ - if (file->f_flags & O_DSYNC) - return 1; - - return addr >= __pa(high_memory); -} diff --git a/arch/mips/mm/cerr-sb1.c b/arch/mips/mm/cerr-sb1.c index ee5c1ff861ae..a3c02df19f6f 100644 --- a/arch/mips/mm/cerr-sb1.c +++ b/arch/mips/mm/cerr-sb1.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2001,2002,2003 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include <linux/sched.h> #include <asm/mipsregs.h> diff --git a/arch/mips/mm/cex-gen.S b/arch/mips/mm/cex-gen.S index 45dff5cd4b8e..e528583d1331 100644 --- a/arch/mips/mm/cex-gen.S +++ b/arch/mips/mm/cex-gen.S @@ -25,7 +25,7 @@ * This is a very bad place to be. Our cache error * detection has triggered. If we have write-back data * in the cache, we may not be able to recover. As a - * first-order desperate measure, turn off KSEG0 cacheing. + * first-order desperate measure, turn off KSEG0 caching. */ mfc0 k0,CP0_CONFIG li k1,~CONF_CM_CMASK diff --git a/arch/mips/mm/cex-sb1.S b/arch/mips/mm/cex-sb1.S index fe1d887e8d70..85c6e6a40b5f 100644 --- a/arch/mips/mm/cex-sb1.S +++ b/arch/mips/mm/cex-sb1.S @@ -1,21 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2001,2002,2003 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <linux/init.h> #include <asm/asm.h> #include <asm/regdef.h> @@ -49,8 +35,6 @@ * (0x170-0x17f) are used to preserve k0, k1, and ra. */ - __CPUINIT - LEAF(except_vec2_sb1) /* * If this error is recoverable, we need to exit the handler @@ -142,8 +126,6 @@ unrecoverable: END(except_vec2_sb1) - __FINIT - LEAF(handle_vec2_sb1) mfc0 k0,CP0_CONFIG li k1,~CONF_CM_CMASK diff --git a/arch/mips/mm/context.c b/arch/mips/mm/context.c new file mode 100644 index 000000000000..966f40066f03 --- /dev/null +++ b/arch/mips/mm/context.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/atomic.h> +#include <linux/mmu_context.h> +#include <linux/percpu.h> +#include <linux/spinlock.h> + +static DEFINE_RAW_SPINLOCK(cpu_mmid_lock); + +static atomic64_t mmid_version; +static unsigned int num_mmids; +static unsigned long *mmid_map; + +static DEFINE_PER_CPU(u64, reserved_mmids); +static cpumask_t tlb_flush_pending; + +static bool asid_versions_eq(int cpu, u64 a, u64 b) +{ + return ((a ^ b) & asid_version_mask(cpu)) == 0; +} + +void get_new_mmu_context(struct mm_struct *mm) +{ + unsigned int cpu; + u64 asid; + + /* + * This function is specific to ASIDs, and should not be called when + * MMIDs are in use. + */ + if (WARN_ON(IS_ENABLED(CONFIG_DEBUG_VM) && cpu_has_mmid)) + return; + + cpu = smp_processor_id(); + asid = asid_cache(cpu); + + if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) { + if (cpu_has_vtag_icache) + flush_icache_all(); + local_flush_tlb_all(); /* start new asid cycle */ + } + + set_cpu_context(cpu, mm, asid); + asid_cache(cpu) = asid; +} +EXPORT_SYMBOL_GPL(get_new_mmu_context); + +void check_mmu_context(struct mm_struct *mm) +{ + unsigned int cpu = smp_processor_id(); + + /* + * This function is specific to ASIDs, and should not be called when + * MMIDs are in use. + */ + if (WARN_ON(IS_ENABLED(CONFIG_DEBUG_VM) && cpu_has_mmid)) + return; + + /* Check if our ASID is of an older version and thus invalid */ + if (!asid_versions_eq(cpu, cpu_context(cpu, mm), asid_cache(cpu))) + get_new_mmu_context(mm); +} +EXPORT_SYMBOL_GPL(check_mmu_context); + +static void flush_context(void) +{ + u64 mmid; + int cpu; + + /* Update the list of reserved MMIDs and the MMID bitmap */ + bitmap_zero(mmid_map, num_mmids); + + /* Reserve an MMID for kmap/wired entries */ + __set_bit(MMID_KERNEL_WIRED, mmid_map); + + for_each_possible_cpu(cpu) { + mmid = xchg_relaxed(&cpu_data[cpu].asid_cache, 0); + + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * MMID, as this is the only trace we have of + * the process it is still running. + */ + if (mmid == 0) + mmid = per_cpu(reserved_mmids, cpu); + + __set_bit(mmid & cpu_asid_mask(&cpu_data[cpu]), mmid_map); + per_cpu(reserved_mmids, cpu) = mmid; + } + + /* + * Queue a TLB invalidation for each CPU to perform on next + * context-switch + */ + cpumask_setall(&tlb_flush_pending); +} + +static bool check_update_reserved_mmid(u64 mmid, u64 newmmid) +{ + bool hit; + int cpu; + + /* + * Iterate over the set of reserved MMIDs looking for a match. + * If we find one, then we can update our mm to use newmmid + * (i.e. the same MMID in the current generation) but we can't + * exit the loop early, since we need to ensure that all copies + * of the old MMID are updated to reflect the mm. Failure to do + * so could result in us missing the reserved MMID in a future + * generation. + */ + hit = false; + for_each_possible_cpu(cpu) { + if (per_cpu(reserved_mmids, cpu) == mmid) { + hit = true; + per_cpu(reserved_mmids, cpu) = newmmid; + } + } + + return hit; +} + +static u64 get_new_mmid(struct mm_struct *mm) +{ + static u32 cur_idx = MMID_KERNEL_WIRED + 1; + u64 mmid, version, mmid_mask; + + mmid = cpu_context(0, mm); + version = atomic64_read(&mmid_version); + mmid_mask = cpu_asid_mask(&boot_cpu_data); + + if (!asid_versions_eq(0, mmid, 0)) { + u64 newmmid = version | (mmid & mmid_mask); + + /* + * If our current MMID was active during a rollover, we + * can continue to use it and this was just a false alarm. + */ + if (check_update_reserved_mmid(mmid, newmmid)) { + mmid = newmmid; + goto set_context; + } + + /* + * We had a valid MMID in a previous life, so try to re-use + * it if possible. + */ + if (!__test_and_set_bit(mmid & mmid_mask, mmid_map)) { + mmid = newmmid; + goto set_context; + } + } + + /* Allocate a free MMID */ + mmid = find_next_zero_bit(mmid_map, num_mmids, cur_idx); + if (mmid != num_mmids) + goto reserve_mmid; + + /* We're out of MMIDs, so increment the global version */ + version = atomic64_add_return_relaxed(asid_first_version(0), + &mmid_version); + + /* Note currently active MMIDs & mark TLBs as requiring flushes */ + flush_context(); + + /* We have more MMIDs than CPUs, so this will always succeed */ + mmid = find_first_zero_bit(mmid_map, num_mmids); + +reserve_mmid: + __set_bit(mmid, mmid_map); + cur_idx = mmid; + mmid |= version; +set_context: + set_cpu_context(0, mm, mmid); + return mmid; +} + +void check_switch_mmu_context(struct mm_struct *mm) +{ + unsigned int cpu = smp_processor_id(); + u64 ctx, old_active_mmid; + unsigned long flags; + + if (!cpu_has_mmid) { + check_mmu_context(mm); + write_c0_entryhi(cpu_asid(cpu, mm)); + goto setup_pgd; + } + + /* + * MMID switch fast-path, to avoid acquiring cpu_mmid_lock when it's + * unnecessary. + * + * The memory ordering here is subtle. If our active_mmids is non-zero + * and the MMID matches the current version, then we update the CPU's + * asid_cache with a relaxed cmpxchg. Racing with a concurrent rollover + * means that either: + * + * - We get a zero back from the cmpxchg and end up waiting on + * cpu_mmid_lock in check_mmu_context(). Taking the lock synchronises + * with the rollover and so we are forced to see the updated + * generation. + * + * - We get a valid MMID back from the cmpxchg, which means the + * relaxed xchg in flush_context will treat us as reserved + * because atomic RmWs are totally ordered for a given location. + */ + ctx = cpu_context(cpu, mm); + old_active_mmid = READ_ONCE(cpu_data[cpu].asid_cache); + if (!old_active_mmid || + !asid_versions_eq(cpu, ctx, atomic64_read(&mmid_version)) || + !cmpxchg_relaxed(&cpu_data[cpu].asid_cache, old_active_mmid, ctx)) { + raw_spin_lock_irqsave(&cpu_mmid_lock, flags); + + ctx = cpu_context(cpu, mm); + if (!asid_versions_eq(cpu, ctx, atomic64_read(&mmid_version))) + ctx = get_new_mmid(mm); + + WRITE_ONCE(cpu_data[cpu].asid_cache, ctx); + raw_spin_unlock_irqrestore(&cpu_mmid_lock, flags); + } + + /* + * Invalidate the local TLB if needed. Note that we must only clear our + * bit in tlb_flush_pending after this is complete, so that the + * cpu_has_shared_ftlb_entries case below isn't misled. + */ + if (cpumask_test_cpu(cpu, &tlb_flush_pending)) { + if (cpu_has_vtag_icache) + flush_icache_all(); + local_flush_tlb_all(); + cpumask_clear_cpu(cpu, &tlb_flush_pending); + } + + write_c0_memorymapid(ctx & cpu_asid_mask(&boot_cpu_data)); + + /* + * If this CPU shares FTLB entries with its siblings and one or more of + * those siblings hasn't yet invalidated its TLB following a version + * increase then we need to invalidate any TLB entries for our MMID + * that we might otherwise pick up from a sibling. + * + * We ifdef on CONFIG_SMP because cpu_sibling_map isn't defined in + * CONFIG_SMP=n kernels. + */ +#ifdef CONFIG_SMP + if (cpu_has_shared_ftlb_entries && + cpumask_intersects(&tlb_flush_pending, &cpu_sibling_map[cpu])) { + /* Ensure we operate on the new MMID */ + mtc0_tlbw_hazard(); + + /* + * Invalidate all TLB entries associated with the new + * MMID, and wait for the invalidation to complete. + */ + ginvt_mmid(); + sync_ginv(); + } +#endif + +setup_pgd: + TLBMISS_HANDLER_SETUP_PGD(mm->pgd); +} +EXPORT_SYMBOL_GPL(check_switch_mmu_context); + +static int mmid_init(void) +{ + if (!cpu_has_mmid) + return 0; + + /* + * Expect allocation after rollover to fail if we don't have at least + * one more MMID than CPUs. + */ + num_mmids = asid_first_version(0); + WARN_ON(num_mmids <= num_possible_cpus()); + + atomic64_set(&mmid_version, asid_first_version(0)); + mmid_map = bitmap_zalloc(num_mmids, GFP_KERNEL); + if (!mmid_map) + panic("Failed to allocate bitmap for %u MMIDs\n", num_mmids); + + /* Reserve an MMID for kmap/wired entries */ + __set_bit(MMID_KERNEL_WIRED, mmid_map); + + pr_info("MMID allocator initialised with %u entries\n", num_mmids); + return 0; +} +early_initcall(mmid_init); diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c deleted file mode 100644 index aaccf1c10699..000000000000 --- a/arch/mips/mm/dma-default.c +++ /dev/null @@ -1,375 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000 Ani Joshi <ajoshi@unixbox.com> - * Copyright (C) 2000, 2001, 06 Ralf Baechle <ralf@linux-mips.org> - * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. - */ - -#include <linux/types.h> -#include <linux/dma-mapping.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/scatterlist.h> -#include <linux/string.h> -#include <linux/gfp.h> -#include <linux/highmem.h> - -#include <asm/cache.h> -#include <asm/io.h> - -#include <dma-coherence.h> - -int coherentio = 0; /* User defined DMA coherency from command line. */ -EXPORT_SYMBOL_GPL(coherentio); -int hw_coherentio = 0; /* Actual hardware supported DMA coherency setting. */ - -static int __init setcoherentio(char *str) -{ - coherentio = 1; - pr_info("Hardware DMA cache coherency (command line)\n"); - return 0; -} -early_param("coherentio", setcoherentio); - -static int __init setnocoherentio(char *str) -{ - coherentio = 0; - pr_info("Software DMA cache coherency (command line)\n"); - return 0; -} -early_param("nocoherentio", setnocoherentio); - -static inline struct page *dma_addr_to_page(struct device *dev, - dma_addr_t dma_addr) -{ - return pfn_to_page( - plat_dma_addr_to_phys(dev, dma_addr) >> PAGE_SHIFT); -} - -/* - * Warning on the terminology - Linux calls an uncached area coherent; - * MIPS terminology calls memory areas with hardware maintained coherency - * coherent. - */ - -static inline int cpu_is_noncoherent_r10000(struct device *dev) -{ - return !plat_device_is_coherent(dev) && - (current_cpu_type() == CPU_R10000 || - current_cpu_type() == CPU_R12000); -} - -static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) -{ - gfp_t dma_flag; - - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - -#ifdef CONFIG_ISA - if (dev == NULL) - dma_flag = __GFP_DMA; - else -#endif -#if defined(CONFIG_ZONE_DMA32) && defined(CONFIG_ZONE_DMA) - if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) - dma_flag = __GFP_DMA; - else if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) - dma_flag = __GFP_DMA32; - else -#endif -#if defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_ZONE_DMA) - if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) - dma_flag = __GFP_DMA32; - else -#endif -#if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32) - if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) - dma_flag = __GFP_DMA; - else -#endif - dma_flag = 0; - - /* Don't invoke OOM killer */ - gfp |= __GFP_NORETRY; - - return gfp | dma_flag; -} - -void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t * dma_handle, gfp_t gfp) -{ - void *ret; - - gfp = massage_gfp_flags(dev, gfp); - - ret = (void *) __get_free_pages(gfp, get_order(size)); - - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = plat_map_dma_mem(dev, ret, size); - } - - return ret; -} -EXPORT_SYMBOL(dma_alloc_noncoherent); - -static void *mips_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t * dma_handle, gfp_t gfp, struct dma_attrs *attrs) -{ - void *ret; - - if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) - return ret; - - gfp = massage_gfp_flags(dev, gfp); - - ret = (void *) __get_free_pages(gfp, get_order(size)); - - if (ret) { - memset(ret, 0, size); - *dma_handle = plat_map_dma_mem(dev, ret, size); - - if (!plat_device_is_coherent(dev)) { - dma_cache_wback_inv((unsigned long) ret, size); - if (!hw_coherentio) - ret = UNCAC_ADDR(ret); - } - } - - return ret; -} - - -void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) -{ - plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL); - free_pages((unsigned long) vaddr, get_order(size)); -} -EXPORT_SYMBOL(dma_free_noncoherent); - -static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) -{ - unsigned long addr = (unsigned long) vaddr; - int order = get_order(size); - - if (dma_release_from_coherent(dev, order, vaddr)) - return; - - plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL); - - if (!plat_device_is_coherent(dev) && !hw_coherentio) - addr = CAC_ADDR(addr); - - free_pages(addr, get_order(size)); -} - -static inline void __dma_sync_virtual(void *addr, size_t size, - enum dma_data_direction direction) -{ - switch (direction) { - case DMA_TO_DEVICE: - dma_cache_wback((unsigned long)addr, size); - break; - - case DMA_FROM_DEVICE: - dma_cache_inv((unsigned long)addr, size); - break; - - case DMA_BIDIRECTIONAL: - dma_cache_wback_inv((unsigned long)addr, size); - break; - - default: - BUG(); - } -} - -/* - * A single sg entry may refer to multiple physically contiguous - * pages. But we still need to process highmem pages individually. - * If highmem is not configured then the bulk of this loop gets - * optimized out. - */ -static inline void __dma_sync(struct page *page, - unsigned long offset, size_t size, enum dma_data_direction direction) -{ - size_t left = size; - - do { - size_t len = left; - - if (PageHighMem(page)) { - void *addr; - - if (offset + len > PAGE_SIZE) { - if (offset >= PAGE_SIZE) { - page += offset >> PAGE_SHIFT; - offset &= ~PAGE_MASK; - } - len = PAGE_SIZE - offset; - } - - addr = kmap_atomic(page); - __dma_sync_virtual(addr + offset, len, direction); - kunmap_atomic(addr); - } else - __dma_sync_virtual(page_address(page) + offset, - size, direction); - offset = 0; - page++; - left -= len; - } while (left); -} - -static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) -{ - if (cpu_is_noncoherent_r10000(dev)) - __dma_sync(dma_addr_to_page(dev, dma_addr), - dma_addr & ~PAGE_MASK, size, direction); - - plat_unmap_dma_mem(dev, dma_addr, size, direction); -} - -static int mips_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction, struct dma_attrs *attrs) -{ - int i; - - for (i = 0; i < nents; i++, sg++) { - if (!plat_device_is_coherent(dev)) - __dma_sync(sg_page(sg), sg->offset, sg->length, - direction); -#ifdef CONFIG_NEED_SG_DMA_LENGTH - sg->dma_length = sg->length; -#endif - sg->dma_address = plat_map_dma_mem_page(dev, sg_page(sg)) + - sg->offset; - } - - return nents; -} - -static dma_addr_t mips_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) -{ - if (!plat_device_is_coherent(dev)) - __dma_sync(page, offset, size, direction); - - return plat_map_dma_mem_page(dev, page) + offset; -} - -static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nhwentries, enum dma_data_direction direction, - struct dma_attrs *attrs) -{ - int i; - - for (i = 0; i < nhwentries; i++, sg++) { - if (!plat_device_is_coherent(dev) && - direction != DMA_TO_DEVICE) - __dma_sync(sg_page(sg), sg->offset, sg->length, - direction); - plat_unmap_dma_mem(dev, sg->dma_address, sg->length, direction); - } -} - -static void mips_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) -{ - if (cpu_is_noncoherent_r10000(dev)) - __dma_sync(dma_addr_to_page(dev, dma_handle), - dma_handle & ~PAGE_MASK, size, direction); -} - -static void mips_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) -{ - plat_extra_sync_for_device(dev); - if (!plat_device_is_coherent(dev)) - __dma_sync(dma_addr_to_page(dev, dma_handle), - dma_handle & ~PAGE_MASK, size, direction); -} - -static void mips_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction direction) -{ - int i; - - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) { - if (cpu_is_noncoherent_r10000(dev)) - __dma_sync(sg_page(sg), sg->offset, sg->length, - direction); - } -} - -static void mips_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction direction) -{ - int i; - - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) { - if (!plat_device_is_coherent(dev)) - __dma_sync(sg_page(sg), sg->offset, sg->length, - direction); - } -} - -int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return plat_dma_mapping_error(dev, dma_addr); -} - -int mips_dma_supported(struct device *dev, u64 mask) -{ - return plat_dma_supported(dev, mask); -} - -void dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); - - plat_extra_sync_for_device(dev); - if (!plat_device_is_coherent(dev)) - __dma_sync_virtual(vaddr, size, direction); -} - -EXPORT_SYMBOL(dma_cache_sync); - -static struct dma_map_ops mips_default_dma_map_ops = { - .alloc = mips_dma_alloc_coherent, - .free = mips_dma_free_coherent, - .map_page = mips_dma_map_page, - .unmap_page = mips_dma_unmap_page, - .map_sg = mips_dma_map_sg, - .unmap_sg = mips_dma_unmap_sg, - .sync_single_for_cpu = mips_dma_sync_single_for_cpu, - .sync_single_for_device = mips_dma_sync_single_for_device, - .sync_sg_for_cpu = mips_dma_sync_sg_for_cpu, - .sync_sg_for_device = mips_dma_sync_sg_for_device, - .mapping_error = mips_dma_mapping_error, - .dma_supported = mips_dma_supported -}; - -struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops; -EXPORT_SYMBOL(mips_dma_map_ops); - -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init mips_dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - - return 0; -} -fs_initcall(mips_dma_init); diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c new file mode 100644 index 000000000000..ab4f2a75a7d0 --- /dev/null +++ b/arch/mips/mm/dma-noncoherent.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 Ani Joshi <ajoshi@unixbox.com> + * Copyright (C) 2000, 2001, 06 Ralf Baechle <ralf@linux-mips.org> + * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. + */ +#include <linux/dma-direct.h> +#include <linux/dma-map-ops.h> +#include <linux/highmem.h> + +#include <asm/cache.h> +#include <asm/cpu-type.h> +#include <asm/io.h> + +/* + * The affected CPUs below in 'cpu_needs_post_dma_flush()' can speculatively + * fill random cachelines with stale data at any time, requiring an extra + * flush post-DMA. + * + * Warning on the terminology - Linux calls an uncached area coherent; MIPS + * terminology calls memory areas with hardware maintained coherency coherent. + * + * Note that the R14000 and R16000 should also be checked for in this condition. + * However this function is only called on non-I/O-coherent systems and only the + * R10000 and R12000 are used in such systems, the SGI IP28 Indigo² rsp. + * SGI IP32 aka O2. + */ +static inline bool cpu_needs_post_dma_flush(void) +{ + switch (boot_cpu_type()) { + case CPU_R10000: + case CPU_R12000: + case CPU_BMIPS5000: + case CPU_LOONGSON2EF: + case CPU_XBURST: + return true; + default: + /* + * Presence of MAARs suggests that the CPU supports + * speculatively prefetching data, and therefore requires + * the post-DMA flush/invalidate. + */ + return cpu_has_maar; + } +} + +void arch_dma_prep_coherent(struct page *page, size_t size) +{ + dma_cache_wback_inv((unsigned long)page_address(page), size); +} + +void *arch_dma_set_uncached(void *addr, size_t size) +{ + return (void *)(__pa(addr) + UNCAC_BASE); +} + +static inline void dma_sync_virt_for_device(void *addr, size_t size, + enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + dma_cache_wback((unsigned long)addr, size); + break; + case DMA_FROM_DEVICE: + dma_cache_inv((unsigned long)addr, size); + break; + case DMA_BIDIRECTIONAL: + dma_cache_wback_inv((unsigned long)addr, size); + break; + default: + BUG(); + } +} + +static inline void dma_sync_virt_for_cpu(void *addr, size_t size, + enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + break; + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + dma_cache_inv((unsigned long)addr, size); + break; + default: + BUG(); + } +} + +/* + * A single sg entry may refer to multiple physically contiguous pages. But + * we still need to process highmem pages individually. If highmem is not + * configured then the bulk of this loop gets optimized out. + */ +static inline void dma_sync_phys(phys_addr_t paddr, size_t size, + enum dma_data_direction dir, bool for_device) +{ + struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); + unsigned long offset = paddr & ~PAGE_MASK; + size_t left = size; + + do { + size_t len = left; + void *addr; + + if (PageHighMem(page)) { + if (offset + len > PAGE_SIZE) + len = PAGE_SIZE - offset; + } + + addr = kmap_atomic(page); + if (for_device) + dma_sync_virt_for_device(addr + offset, len, dir); + else + dma_sync_virt_for_cpu(addr + offset, len, dir); + kunmap_atomic(addr); + + offset = 0; + page++; + left -= len; + } while (left); +} + +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + dma_sync_phys(paddr, size, dir, true); +} + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + if (cpu_needs_post_dma_flush()) + dma_sync_phys(paddr, size, dir, false); +} +#endif + +#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS +void arch_setup_dma_ops(struct device *dev, bool coherent) +{ + dev->dma_coherent = coherent; +} +#endif diff --git a/arch/mips/mm/extable.c b/arch/mips/mm/extable.c index 9d25d2ba4b9e..81bc8a34a83f 100644 --- a/arch/mips/mm/extable.c +++ b/arch/mips/mm/extable.c @@ -5,10 +5,10 @@ * * Copyright (C) 1997, 99, 2001 - 2004 Ralf Baechle <ralf@linux-mips.org> */ -#include <linux/module.h> +#include <linux/extable.h> #include <linux/spinlock.h> #include <asm/branch.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int fixup_exception(struct pt_regs *regs) { diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 85df1cd8d446..37fedeaca2e9 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -14,36 +14,40 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/ptrace.h> +#include <linux/ratelimit.h> #include <linux/mman.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/module.h> #include <linux/kprobes.h> #include <linux/perf_event.h> +#include <linux/uaccess.h> #include <asm/branch.h> #include <asm/mmu_context.h> -#include <asm/uaccess.h> #include <asm/ptrace.h> #include <asm/highmem.h> /* For VMALLOC_END */ +#include <asm/traps.h> #include <linux/kdebug.h> +int show_unhandled_signals = 1; + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. */ -static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, +static void __do_page_fault(struct pt_regs *regs, unsigned long write, unsigned long address) { struct vm_area_struct * vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; const int field = sizeof(unsigned long) * 2; - siginfo_t info; - int fault; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + int si_code; + vm_fault_t fault; + unsigned int flags = FAULT_FLAG_DEFAULT; + + static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); #if 0 printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), @@ -53,16 +57,14 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, #ifdef CONFIG_KPROBES /* - * This is to notify the fault handler of the kprobes. The - * exception code is redundant as it is also carried in REGS, - * but we pass it anyhow. + * This is to notify the fault handler of the kprobes. */ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, - (regs->cp0_cause >> 2) & 0x1f, SIGSEGV) == NOTIFY_STOP) + current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) return; #endif - info.si_code = SEGV_MAPERR; + si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The @@ -81,8 +83,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) goto VMALLOC_FAULT_TARGET; -#ifdef MODULE_START - if (unlikely(address >= MODULE_START && address < MODULE_END)) +#ifdef MODULES_VADDR + if (unlikely(address >= MODULES_VADDR && address < MODULES_END)) goto VMALLOC_FAULT_TARGET; #endif @@ -90,30 +92,27 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto bad_area_nosemaphore; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: - down_read(&mm->mmap_sem); - vma = find_vma(mm, address); + vma = lock_mm_and_find_vma(mm, address, regs); if (!vma) - goto bad_area; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (expand_stack(vma, address)) - goto bad_area; + goto bad_area_nosemaphore; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ -good_area: - info.si_code = SEGV_ACCERR; + si_code = SEGV_ACCERR; if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (cpu_has_rixi) { if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) { @@ -126,7 +125,8 @@ good_area: #endif goto bad_area; } - if (!(vma->vm_flags & VM_READ)) { + if (!(vma->vm_flags & VM_READ) && + exception_epc(regs) != address) { #if 0 pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] RI violation\n", raw_smp_processor_id(), @@ -137,7 +137,7 @@ good_area: goto bad_area; } } else { - if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) + if (unlikely(!vma_is_accessible(vma))) goto bad_area; } } @@ -147,44 +147,41 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + goto no_context; + return; + } + + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) return; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } - if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - tsk->maj_flt++; - } else { - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - tsk->min_flt++; - } - if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; - flags |= FAULT_FLAG_TRIED; - /* - * No need to up_read(&mm->mmap_sem) as we would - * have already released it in __lock_page_or_retry - * in mm/filemap.c. - */ + if (fault & VM_FAULT_RETRY) { + flags |= FAULT_FLAG_TRIED; - goto retry; - } + /* + * No need to mmap_read_unlock(mm) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); return; /* @@ -192,27 +189,31 @@ good_area: * Fix it, but check if it's kernel or user first.. */ bad_area: - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { tsk->thread.cp0_badvaddr = address; tsk->thread.error_code = write; -#if 0 - printk("do_page_fault() #2: sending SIGSEGV to %s for " - "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", - tsk->comm, - write ? "write access to" : "read access from", - field, address, - field, (unsigned long) regs->cp0_epc, - field, (unsigned long) regs->regs[31]); -#endif - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void __user *) address; - force_sig_info(SIGSEGV, &info, tsk); + if (show_unhandled_signals && + unhandled_signal(tsk, SIGSEGV) && + __ratelimit(&ratelimit_state)) { + pr_info("do_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx\n", + tsk->comm, + write ? "write access to" : "read access from", + field, address); + pr_info("epc = %0*lx in", field, + (unsigned long) regs->cp0_epc); + print_vma_addr(KERN_CONT " ", regs->cp0_epc); + pr_cont("\n"); + pr_info("ra = %0*lx in", field, + (unsigned long) regs->regs[31]); + print_vma_addr(KERN_CONT " ", regs->regs[31]); + pr_cont("\n"); + } + current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } @@ -240,36 +241,35 @@ out_of_memory: * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); + if (!user_mode(regs)) + goto no_context; pagefault_out_of_memory(); return; do_sigbus: - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; - else + /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ #if 0 - printk("do_page_fault() #3: sending SIGBUS to %s for " - "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", - tsk->comm, - write ? "write access to" : "read access from", - field, address, - field, (unsigned long) regs->cp0_epc, - field, (unsigned long) regs->regs[31]); + printk("do_page_fault() #3: sending SIGBUS to %s for " + "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", + tsk->comm, + write ? "write access to" : "read access from", + field, address, + field, (unsigned long) regs->cp0_epc, + field, (unsigned long) regs->regs[31]); #endif + current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; tsk->thread.cp0_badvaddr = address; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *) address; - force_sig_info(SIGBUS, &info, tsk); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); return; #ifndef CONFIG_64BIT @@ -282,8 +282,9 @@ vmalloc_fault: * Do _not_ use "tsk" here. We might be inside * an interrupt in the middle of a task switch.. */ - int offset = __pgd_offset(address); + int offset = pgd_index(address); pgd_t *pgd, *pgd_k; + p4d_t *p4d, *p4d_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; @@ -295,8 +296,13 @@ vmalloc_fault: goto no_context; set_pgd(pgd, *pgd_k); - pud = pud_offset(pgd, address); - pud_k = pud_offset(pgd_k, address); + p4d = p4d_offset(pgd, address); + p4d_k = p4d_offset(pgd_k, address); + if (!p4d_present(*p4d_k)) + goto no_context; + + pud = pud_offset(p4d, address); + pud_k = pud_offset(p4d_k, address); if (!pud_present(*pud_k)) goto no_context; @@ -313,8 +319,9 @@ vmalloc_fault: } #endif } +NOKPROBE_SYMBOL(__do_page_fault); -asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, +asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, unsigned long address) { enum ctx_state prev_state; @@ -323,3 +330,4 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, __do_page_fault(regs, write, address); exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_page_fault); diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c deleted file mode 100644 index d4ea5c9c4a93..000000000000 --- a/arch/mips/mm/gup.c +++ /dev/null @@ -1,317 +0,0 @@ -/* - * Lockless get_user_pages_fast for MIPS - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - * Copyright (C) 2011 Ralf Baechle - */ -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/vmstat.h> -#include <linux/highmem.h> -#include <linux/swap.h> -#include <linux/hugetlb.h> - -#include <asm/pgtable.h> - -static inline pte_t gup_get_pte(pte_t *ptep) -{ -#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) - pte_t pte; - -retry: - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - if (unlikely(pte.pte_low != ptep->pte_low)) - goto retry; - - return pte; -#else - return ACCESS_ONCE(*ptep); -#endif -} - -static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - pte_t *ptep = pte_offset_map(&pmd, addr); - do { - pte_t pte = gup_get_pte(ptep); - struct page *page; - - if (!pte_present(pte) || - pte_special(pte) || (write && !pte_write(pte))) { - pte_unmap(ptep); - return 0; - } - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - get_page(page); - SetPageReferenced(page); - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - - pte_unmap(ptep - 1); - return 1; -} - -static inline void get_head_page_multiple(struct page *page, int nr) -{ - VM_BUG_ON(page != compound_head(page)); - VM_BUG_ON(page_count(page) == 0); - atomic_add(nr, &page->_count); - SetPageReferenced(page); -} - -static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - pte_t pte = *(pte_t *)&pmd; - struct page *head, *page; - int refs; - - if (write && !pte_write(pte)) - return 0; - /* hugepages are never "special" */ - VM_BUG_ON(pte_special(pte)); - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - if (PageTail(page)) - get_huge_page_tail(page); - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - get_head_page_multiple(head, refs); - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = *pmdp; - - next = pmd_addr_end(addr, end); - /* - * The pmd_trans_splitting() check below explains why - * pmdp_splitting_flush has to flush the tlb, to stop - * this gup-fast code from running while we set the - * splitting bit in the pmd. Returning zero will take - * the slow path that will call wait_split_huge_page() - * if the pmd is still in splitting state. gup-fast - * can't because it has irq disabled and - * wait_split_huge_page() would never return as the - * tlb flush IPI wouldn't run. - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) - return 0; - if (unlikely(pmd_huge(pmd))) { - if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) - return 0; - } else { - if (!gup_pte_range(pmd, addr, next, write, pages,nr)) - return 0; - } - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - pte_t pte = *(pte_t *)&pud; - struct page *head, *page; - int refs; - - if (write && !pte_write(pte)) - return 0; - /* hugepages are never "special" */ - VM_BUG_ON(pte_special(pte)); - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - if (PageTail(page)) - get_huge_page_tail(page); - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - get_head_page_multiple(head, refs); - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = *pudp; - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (unlikely(pud_huge(pud))) { - if (!gup_huge_pud(pud, addr, next, write, pages,nr)) - return 0; - } else { - if (!gup_pmd_range(pud, addr, next, write, pages,nr)) - return 0; - } - } while (pudp++, addr = next, addr != end); - - return 1; -} - -/* - * Like get_user_pages_fast() except its IRQ-safe in that it won't fall - * back to the regular GUP. - */ -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - (void __user *)start, len))) - return 0; - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch - * size will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables and pages from being freed. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the page and take a ref on it. - */ - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); - - return nr; -} - -/** - * get_user_pages_fast() - pin user pages in memory - * @start: starting user address - * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to - * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. - * - * Attempt to pin user pages in memory without taking mm->mmap_sem. - * If not successful, it will fall back to taking the lock and - * calling get_user_pages(). - * - * Returns number of pages pinned. This may be fewer than the number - * requested. If nr_pages is 0 or negative, returns 0. If no pages - * were pinned, returns -errno. - */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - pgd_t *pgdp; - int ret, nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - - end = start + len; - if (end < start) - goto slow_irqon; - - /* XXX: batch / limit 'nr' */ - local_irq_disable(); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - goto slow; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - goto slow; - } while (pgdp++, addr = next, addr != end); - local_irq_enable(); - - VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); - return nr; -slow: - local_irq_enable(); - -slow_irqon: - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - (end - start) >> PAGE_SHIFT, - write, 0, pages, NULL); - up_read(&mm->mmap_sem); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - return ret; -} diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index da815d295239..57e2f08f00d0 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -1,138 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> -#include <linux/module.h> +#include <linux/init.h> +#include <linux/export.h> #include <linux/highmem.h> #include <linux/sched.h> #include <linux/smp.h> #include <asm/fixmap.h> #include <asm/tlbflush.h> -static pte_t *kmap_pte; - unsigned long highstart_pfn, highend_pfn; -void *kmap(struct page *page) -{ - void *addr; - - might_sleep(); - if (!PageHighMem(page)) - return page_address(page); - addr = kmap_high(page); - flush_tlb_one((unsigned long)addr); - - return addr; -} -EXPORT_SYMBOL(kmap); - -void kunmap(struct page *page) -{ - BUG_ON(in_interrupt()); - if (!PageHighMem(page)) - return; - kunmap_high(page); -} -EXPORT_SYMBOL(kunmap); - -/* - * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because - * no global lock is needed and because the kmap code must perform a global TLB - * invalidation when the kmap pool wraps. - * - * However when holding an atomic kmap is is not legal to sleep, so atomic - * kmaps are appropriate for short, tight code paths only. - */ - -void *kmap_atomic(struct page *page) +void kmap_flush_tlb(unsigned long addr) { - unsigned long vaddr; - int idx, type; - - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - pagefault_disable(); - if (!PageHighMem(page)) - return page_address(page); - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte - idx))); -#endif - set_pte(kmap_pte-idx, mk_pte(page, PAGE_KERNEL)); - local_flush_tlb_one((unsigned long)vaddr); - - return (void*) vaddr; -} -EXPORT_SYMBOL(kmap_atomic); - -void __kunmap_atomic(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int type __maybe_unused; - - if (vaddr < FIXADDR_START) { // FIXME - pagefault_enable(); - return; - } - - type = kmap_atomic_idx(); -#ifdef CONFIG_DEBUG_HIGHMEM - { - int idx = type + KM_TYPE_NR * smp_processor_id(); - - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); - - /* - * force other mappings to Oops if they'll try to access - * this pte without first remap it - */ - pte_clear(&init_mm, vaddr, kmap_pte-idx); - local_flush_tlb_one(vaddr); - } -#endif - kmap_atomic_idx_pop(); - pagefault_enable(); -} -EXPORT_SYMBOL(__kunmap_atomic); - -/* - * This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn) -{ - unsigned long vaddr; - int idx, type; - - pagefault_disable(); - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, PAGE_KERNEL)); - flush_tlb_one(vaddr); - - return (void*) vaddr; -} - -struct page *kmap_atomic_to_page(void *ptr) -{ - unsigned long idx, vaddr = (unsigned long)ptr; - pte_t *pte; - - if (vaddr < FIXADDR_START) - return virt_to_page(ptr); - - idx = virt_to_fix(vaddr); - pte = kmap_pte - (idx - FIX_KMAP_BEGIN); - return pte_page(*pte); -} - -void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); + flush_tlb_one(addr); } +EXPORT_SYMBOL(kmap_flush_tlb); diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index a7fee0dfb7a9..0b9e15555b59 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c @@ -11,7 +11,6 @@ * Copyright (C) 2008, 2009 Cavium Networks, Inc. */ -#include <linux/init.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/hugetlb.h> @@ -22,77 +21,39 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, - unsigned long sz) +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pte_t *pte = NULL; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_alloc(mm, pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (pud) pte = (pte_t *)pmd_alloc(mm, pud, addr); return pte; } -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, + unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd = NULL; pgd = pgd_offset(mm, addr); if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) - pmd = pmd_offset(pud, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_present(*p4d)) { + pud = pud_offset(p4d, addr); + if (pud_present(*pud)) + pmd = pmd_offset(pud, addr); + } } return (pte_t *) pmd; } - -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - -/* - * This function checks for proper alignment of input addr and len parameters. - */ -int is_aligned_hugepage_range(unsigned long addr, unsigned long len) -{ - if (len & ~HPAGE_MASK) - return -EINVAL; - if (addr & ~HPAGE_MASK) - return -EINVAL; - return 0; -} - -struct page * -follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - -int pmd_huge(pmd_t pmd) -{ - return (pmd_val(pmd) & _PAGE_HUGE) != 0; -} - -int pud_huge(pud_t pud) -{ - return (pud_val(pud) & _PAGE_HUGE) != 0; -} - -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); - return page; -} diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 4e73f10a7519..a673d3d68254 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -10,7 +10,7 @@ */ #include <linux/bug.h> #include <linux/init.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/smp.h> @@ -22,7 +22,7 @@ #include <linux/ptrace.h> #include <linux/mman.h> #include <linux/mm.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/highmem.h> #include <linux/swap.h> #include <linux/proc_fs.h> @@ -30,41 +30,21 @@ #include <linux/hardirq.h> #include <linux/gfp.h> #include <linux/kcore.h> +#include <linux/initrd.h> +#include <linux/execmem.h> -#include <asm/asm-offsets.h> #include <asm/bootinfo.h> #include <asm/cachectl.h> #include <asm/cpu.h> #include <asm/dma.h> -#include <asm/kmap_types.h> +#include <asm/maar.h> #include <asm/mmu_context.h> +#include <asm/mmzone.h> #include <asm/sections.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/fixmap.h> -/* Atomicity and interruptability */ -#ifdef CONFIG_MIPS_MT_SMTC - -#include <asm/mipsmtregs.h> - -#define ENTER_CRITICAL(flags) \ - { \ - unsigned int mvpflags; \ - local_irq_save(flags);\ - mvpflags = dvpe() -#define EXIT_CRITICAL(flags) \ - evpe(mvpflags); \ - local_irq_restore(flags); \ - } -#else - -#define ENTER_CRITICAL(flags) local_irq_save(flags) -#define EXIT_CRITICAL(flags) local_irq_restore(flags) - -#endif /* CONFIG_MIPS_MT_SMTC */ - /* * We have up to 8 empty zeroed pages so we can map one of the right colour * when needed. This is necessary only on R4000 / R4400 SC and MC versions @@ -74,114 +54,98 @@ */ unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL_GPL(empty_zero_page); +EXPORT_SYMBOL(zero_page_mask); /* * Not static inline because used by IP27 special magic initialization code */ -void setup_zero_pages(void) +static void __init setup_zero_pages(void) { - unsigned int order, i; - struct page *page; + unsigned int order; if (cpu_has_vce) order = 3; else order = 0; - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Oh boy, that early out of memory?"); - - page = virt_to_page((void *)empty_zero_page); - split_page(page, order); - for (i = 0; i < (1 << order); i++, page++) - mark_page_reserved(page); + empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE); zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } -#ifdef CONFIG_MIPS_MT_SMTC -static pte_t *kmap_coherent_pte; -static void __init kmap_coherent_init(void) -{ - unsigned long vaddr; - - /* cache the first coherent kmap pte */ - vaddr = __fix_to_virt(FIX_CMAP_BEGIN); - kmap_coherent_pte = kmap_get_fixmap_pte(vaddr); -} -#else -static inline void kmap_coherent_init(void) {} -#endif - -void *kmap_coherent(struct page *page, unsigned long addr) +static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot) { enum fixed_addresses idx; + unsigned int old_mmid; unsigned long vaddr, flags, entrylo; unsigned long old_ctx; pte_t pte; int tlbidx; - BUG_ON(Page_dcache_dirty(page)); + BUG_ON(folio_test_dcache_dirty(page_folio(page))); - inc_preempt_count(); + preempt_disable(); + pagefault_disable(); idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1); -#ifdef CONFIG_MIPS_MT_SMTC - idx += FIX_N_COLOURS * smp_processor_id() + - (in_interrupt() ? (FIX_N_COLOURS * NR_CPUS) : 0); -#else idx += in_interrupt() ? FIX_N_COLOURS : 0; -#endif vaddr = __fix_to_virt(FIX_CMAP_END - idx); - pte = mk_pte(page, PAGE_KERNEL); -#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) + pte = mk_pte(page, prot); +#if defined(CONFIG_XPA) + entrylo = pte_to_entrylo(pte.pte_high); +#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) entrylo = pte.pte_high; #else entrylo = pte_to_entrylo(pte_val(pte)); #endif - ENTER_CRITICAL(flags); + local_irq_save(flags); old_ctx = read_c0_entryhi(); write_c0_entryhi(vaddr & (PAGE_MASK << 1)); write_c0_entrylo0(entrylo); write_c0_entrylo1(entrylo); -#ifdef CONFIG_MIPS_MT_SMTC - set_pte(kmap_coherent_pte - (FIX_CMAP_END - idx), pte); - /* preload TLB instead of local_flush_tlb_one() */ - mtc0_tlbw_hazard(); - tlb_probe(); - tlb_probe_hazard(); - tlbidx = read_c0_index(); - mtc0_tlbw_hazard(); - if (tlbidx < 0) - tlb_write_random(); - else - tlb_write_indexed(); -#else - tlbidx = read_c0_wired(); + if (cpu_has_mmid) { + old_mmid = read_c0_memorymapid(); + write_c0_memorymapid(MMID_KERNEL_WIRED); + } +#ifdef CONFIG_XPA + if (cpu_has_xpa) { + entrylo = (pte.pte_low & _PFNX_MASK); + writex_c0_entrylo0(entrylo); + writex_c0_entrylo1(entrylo); + } +#endif + tlbidx = num_wired_entries(); write_c0_wired(tlbidx + 1); write_c0_index(tlbidx); mtc0_tlbw_hazard(); tlb_write_indexed(); -#endif tlbw_use_hazard(); write_c0_entryhi(old_ctx); - EXIT_CRITICAL(flags); + if (cpu_has_mmid) + write_c0_memorymapid(old_mmid); + local_irq_restore(flags); return (void*) vaddr; } -#define UNIQUE_ENTRYHI(idx) (CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) +void *kmap_coherent(struct page *page, unsigned long addr) +{ + return __kmap_pgprot(page, addr, PAGE_KERNEL); +} + +void *kmap_noncoherent(struct page *page, unsigned long addr) +{ + return __kmap_pgprot(page, addr, PAGE_KERNEL_NC); +} void kunmap_coherent(void) { -#ifndef CONFIG_MIPS_MT_SMTC unsigned int wired; unsigned long flags, old_ctx; - ENTER_CRITICAL(flags); + local_irq_save(flags); old_ctx = read_c0_entryhi(); - wired = read_c0_wired() - 1; + wired = num_wired_entries() - 1; write_c0_wired(wired); write_c0_index(wired); write_c0_entryhi(UNIQUE_ENTRYHI(wired)); @@ -191,20 +155,20 @@ void kunmap_coherent(void) tlb_write_indexed(); tlbw_use_hazard(); write_c0_entryhi(old_ctx); - EXIT_CRITICAL(flags); -#endif - dec_preempt_count(); - preempt_check_resched(); + local_irq_restore(flags); + pagefault_enable(); + preempt_enable(); } void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { + struct folio *src = page_folio(from); void *vfrom, *vto; vto = kmap_atomic(to); if (cpu_has_dc_aliases && - page_mapped(from) && !Page_dcache_dirty(from)) { + folio_mapped(src) && !folio_test_dcache_dirty(src)) { vfrom = kmap_coherent(from, vaddr); copy_page(vto, vfrom); kunmap_coherent(); @@ -225,17 +189,19 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, unsigned long len) { + struct folio *folio = page_folio(page); + if (cpu_has_dc_aliases && - page_mapped(page) && !Page_dcache_dirty(page)) { + folio_mapped(folio) && !folio_test_dcache_dirty(folio)) { void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK); memcpy(vto, src, len); kunmap_coherent(); } else { memcpy(dst, src, len); if (cpu_has_dc_aliases) - SetPageDcacheDirty(page); + folio_set_dcache_dirty(folio); } - if ((vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc) + if (vma->vm_flags & VM_EXEC) flush_cache_page(vma, vaddr, page_to_pfn(page)); } @@ -243,22 +209,25 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, unsigned long len) { + struct folio *folio = page_folio(page); + if (cpu_has_dc_aliases && - page_mapped(page) && !Page_dcache_dirty(page)) { + folio_mapped(folio) && !folio_test_dcache_dirty(folio)) { void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK); memcpy(dst, vfrom, len); kunmap_coherent(); } else { memcpy(dst, src, len); if (cpu_has_dc_aliases) - SetPageDcacheDirty(page); + folio_set_dcache_dirty(folio); } } +EXPORT_SYMBOL_GPL(copy_from_user_page); void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) { -#if defined(CONFIG_HIGHMEM) || defined(CONFIG_MIPS_MT_SMTC) +#ifdef CONFIG_HIGHMEM pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -267,9 +236,9 @@ void __init fixrange_init(unsigned long start, unsigned long end, unsigned long vaddr; vaddr = start; - i = __pgd_offset(vaddr); - j = __pud_offset(vaddr); - k = __pmd_offset(vaddr); + i = pgd_index(vaddr); + j = pud_index(vaddr); + k = pmd_index(vaddr); pgd = pgd_base + i; for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { @@ -278,7 +247,13 @@ void __init fixrange_init(unsigned long start, unsigned long end, pmd = (pmd_t *)pud; for (; (k < PTRS_PER_PMD) && (vaddr < end); pmd++, k++) { if (pmd_none(*pmd)) { - pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, + PAGE_SIZE); + if (!pte) + panic("%s: Failed to allocate %lu bytes align=%lx\n", + __func__, PAGE_SIZE, + PAGE_SIZE); + set_pmd(pmd, __pmd((unsigned long)pte)); BUG_ON(pte != pte_offset_kernel(pmd, 0)); } @@ -291,46 +266,140 @@ void __init fixrange_init(unsigned long start, unsigned long end, #endif } -#ifndef CONFIG_NEED_MULTIPLE_NODES -int page_is_ram(unsigned long pagenr) +struct maar_walk_info { + struct maar_config cfg[16]; + unsigned int num_cfg; +}; + +static int maar_res_walk(unsigned long start_pfn, unsigned long nr_pages, + void *data) +{ + struct maar_walk_info *wi = data; + struct maar_config *cfg = &wi->cfg[wi->num_cfg]; + unsigned int maar_align; + + /* MAAR registers hold physical addresses right shifted by 4 bits */ + maar_align = BIT(MIPS_MAAR_ADDR_SHIFT + 4); + + /* Fill in the MAAR config entry */ + cfg->lower = ALIGN(PFN_PHYS(start_pfn), maar_align); + cfg->upper = ALIGN_DOWN(PFN_PHYS(start_pfn + nr_pages), maar_align) - 1; + cfg->attrs = MIPS_MAAR_S; + + /* Ensure we don't overflow the cfg array */ + if (!WARN_ON(wi->num_cfg >= ARRAY_SIZE(wi->cfg))) + wi->num_cfg++; + + return 0; +} + + +unsigned __weak platform_maar_init(unsigned num_pairs) { - int i; + unsigned int num_configured; + struct maar_walk_info wi; - for (i = 0; i < boot_mem_map.nr_map; i++) { - unsigned long addr, end; + wi.num_cfg = 0; + walk_system_ram_range(0, max_pfn, &wi, maar_res_walk); - switch (boot_mem_map.map[i].type) { - case BOOT_MEM_RAM: - case BOOT_MEM_INIT_RAM: - break; - default: - /* not usable memory */ + num_configured = maar_config(wi.cfg, wi.num_cfg, num_pairs); + if (num_configured < wi.num_cfg) + pr_warn("Not enough MAAR pairs (%u) for all memory regions (%u)\n", + num_pairs, wi.num_cfg); + + return num_configured; +} + +void maar_init(void) +{ + unsigned num_maars, used, i; + phys_addr_t lower, upper, attr; + static struct { + struct maar_config cfgs[3]; + unsigned used; + } recorded = { { { 0 } }, 0 }; + + if (!cpu_has_maar) + return; + + /* Detect the number of MAARs */ + write_c0_maari(~0); + back_to_back_c0_hazard(); + num_maars = read_c0_maari() + 1; + + /* MAARs should be in pairs */ + WARN_ON(num_maars % 2); + + /* Set MAARs using values we recorded already */ + if (recorded.used) { + used = maar_config(recorded.cfgs, recorded.used, num_maars / 2); + BUG_ON(used != recorded.used); + } else { + /* Configure the required MAARs */ + used = platform_maar_init(num_maars / 2); + } + + /* Disable any further MAARs */ + for (i = (used * 2); i < num_maars; i++) { + write_c0_maari(i); + back_to_back_c0_hazard(); + write_c0_maar(0); + back_to_back_c0_hazard(); + } + + if (recorded.used) + return; + + pr_info("MAAR configuration:\n"); + for (i = 0; i < num_maars; i += 2) { + write_c0_maari(i); + back_to_back_c0_hazard(); + upper = read_c0_maar(); +#ifdef CONFIG_XPA + upper |= (phys_addr_t)readx_c0_maar() << MIPS_MAARX_ADDR_SHIFT; +#endif + + write_c0_maari(i + 1); + back_to_back_c0_hazard(); + lower = read_c0_maar(); +#ifdef CONFIG_XPA + lower |= (phys_addr_t)readx_c0_maar() << MIPS_MAARX_ADDR_SHIFT; +#endif + + attr = lower & upper; + lower = (lower & MIPS_MAAR_ADDR) << 4; + upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff; + + pr_info(" [%d]: ", i / 2); + if ((attr & MIPS_MAAR_V) != MIPS_MAAR_V) { + pr_cont("disabled\n"); continue; } - addr = PFN_UP(boot_mem_map.map[i].addr); - end = PFN_DOWN(boot_mem_map.map[i].addr + - boot_mem_map.map[i].size); + pr_cont("%pa-%pa", &lower, &upper); - if (pagenr >= addr && pagenr < end) - return 1; - } + if (attr & MIPS_MAAR_S) + pr_cont(" speculate"); - return 0; + pr_cont("\n"); + + /* Record the setup for use on secondary CPUs */ + if (used <= ARRAY_SIZE(recorded.cfgs)) { + recorded.cfgs[recorded.used].lower = lower; + recorded.cfgs[recorded.used].upper = upper; + recorded.cfgs[recorded.used].attrs = attr; + recorded.used++; + } + } } +#ifndef CONFIG_NUMA void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; - unsigned long lastpfn __maybe_unused; pagetable_init(); -#ifdef CONFIG_HIGHMEM - kmap_init(); -#endif - kmap_coherent_init(); - #ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; #endif @@ -338,59 +407,34 @@ void __init paging_init(void) max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - lastpfn = max_low_pfn; #ifdef CONFIG_HIGHMEM max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; - lastpfn = highend_pfn; if (cpu_has_dc_aliases && max_low_pfn != highend_pfn) { printk(KERN_WARNING "This processor doesn't support highmem." " %ldk highmem ignored\n", (highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10)); max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn; - lastpfn = max_low_pfn; } #endif - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); } #ifdef CONFIG_64BIT static struct kcore_list kcore_kseg0; #endif -static inline void mem_init_free_highmem(void) +void __init arch_mm_preinit(void) { -#ifdef CONFIG_HIGHMEM - unsigned long tmp; - - for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - - if (!page_is_ram(tmp)) - SetPageReserved(page); - else - free_highmem_page(page); - } -#endif -} - -void __init mem_init(void) -{ -#ifdef CONFIG_HIGHMEM -#ifdef CONFIG_DISCONTIGMEM -#error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet" -#endif - max_mapnr = highend_pfn ? highend_pfn : max_low_pfn; -#else - max_mapnr = max_low_pfn; -#endif - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); + /* + * When PFN_PTE_SHIFT is greater than PAGE_SHIFT we won't have enough PTE + * bits to hold a full 32b physical address on MIPS32 systems. + */ + BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT)); - free_all_bootmem(); + maar_init(); setup_zero_pages(); /* Setup zeroed pages. */ - mem_init_free_highmem(); - mem_init_print_info(NULL); #ifdef CONFIG_64BIT if ((unsigned long) &_text > (unsigned long) CKSEG0) @@ -400,7 +444,12 @@ void __init mem_init(void) 0x80000000 - 4, KCORE_TEXT); #endif } -#endif /* !CONFIG_NEED_MULTIPLE_NODES */ +#else /* CONFIG_NUMA */ +void __init arch_mm_preinit(void) +{ + setup_zero_pages(); /* This comes from node 0 */ +} +#endif /* !CONFIG_NUMA */ void free_init_pages(const char *what, unsigned long begin, unsigned long end) { @@ -416,37 +465,104 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); } -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) +void (*free_init_pages_eva)(void *begin, void *end) = NULL; + +void __weak __init prom_free_prom_memory(void) { - free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, - "initrd"); + /* nothing to do */ } -#endif -void __init_refok free_initmem(void) +void __ref free_initmem(void) { prom_free_prom_memory(); - free_initmem_default(POISON_FREE_INITMEM); + /* + * Let the platform define a specific function to free the + * init section since EVA may have used any possible mapping + * between virtual and physical addresses. + */ + if (free_init_pages_eva) + free_init_pages_eva((void *)&__init_begin, (void *)&__init_end); + else + free_initmem_default(POISON_FREE_INITMEM); +} + +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + return node_distance(cpu_to_node(from), cpu_to_node(to)); +} + +static int __init pcpu_cpu_to_node(int cpu) +{ + return cpu_to_node(cpu); } +void __init setup_per_cpu_areas(void) +{ + unsigned long delta; + unsigned int cpu; + int rc; + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, + pcpu_cpu_distance, + pcpu_cpu_to_node); + if (rc < 0) + panic("Failed to initialize percpu areas."); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +} +#endif + #ifndef CONFIG_MIPS_PGD_C0_CONTEXT unsigned long pgd_current[NR_CPUS]; #endif /* - * gcc 3.3 and older have trouble determining that PTRS_PER_PGD and PGD_ORDER - * are constants. So we use the variants from asm-offset.h until that gcc - * will officially be retired. - * * Align swapper_pg_dir in to 64K, allows its address to be loaded * with a single LUI instruction in the TLB handlers. If we used * __aligned(64K), its size would get rounded up to the alignment * size, and waste space. So we place it in its own section and align * it in the linker script. */ -pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(.bss..swapper_pg_dir); +pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); +#ifndef __PAGETABLE_PUD_FOLDED +pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss; +#endif #ifndef __PAGETABLE_PMD_FOLDED pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss; +EXPORT_SYMBOL_GPL(invalid_pmd_table); #endif pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; +EXPORT_SYMBOL(invalid_pte_table); + +#ifdef CONFIG_EXECMEM +#ifdef MODULES_VADDR +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + }, + }; + + return &execmem_info; +} +#endif +#endif /* CONFIG_EXECMEM */ diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index 7f840bc08abf..c6c4576cd4a8 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -6,119 +6,56 @@ * (C) Copyright 1995 1996 Linus Torvalds * (C) Copyright 2001, 2002 Ralf Baechle */ -#include <linux/module.h> +#include <linux/export.h> #include <asm/addrspace.h> #include <asm/byteorder.h> +#include <linux/ioport.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/mm_types.h> +#include <linux/io.h> #include <asm/cacheflush.h> -#include <asm/io.h> #include <asm/tlbflush.h> +#include <ioremap.h> -static inline void remap_area_pte(pte_t * pte, unsigned long address, - phys_t size, phys_t phys_addr, unsigned long flags) -{ - phys_t end; - unsigned long pfn; - pgprot_t pgprot = __pgprot(_PAGE_GLOBAL | _PAGE_PRESENT | __READABLE - | __WRITEABLE | flags); - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - BUG_ON(address >= end); - pfn = phys_addr >> PAGE_SHIFT; - do { - if (!pte_none(*pte)) { - printk("remap_area_pte: page already exists\n"); - BUG(); - } - set_pte(pte, pfn_pte(pfn, pgprot)); - address += PAGE_SIZE; - pfn++; - pte++; - } while (address && (address < end)); -} +#define IS_LOW512(addr) (!((phys_addr_t)(addr) & (phys_addr_t) ~0x1fffffffULL)) +#define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1) -static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, - phys_t size, phys_t phys_addr, unsigned long flags) +static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, + void *arg) { - phys_t end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - phys_addr -= address; - BUG_ON(address >= end); - do { - pte_t * pte = pte_alloc_kernel(pmd, address); - if (!pte) - return -ENOMEM; - remap_area_pte(pte, address, end - address, address + phys_addr, flags); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} + unsigned long i; -static int remap_area_pages(unsigned long address, phys_t phys_addr, - phys_t size, unsigned long flags) -{ - int error; - pgd_t * dir; - unsigned long end = address + size; - - phys_addr -= address; - dir = pgd_offset(&init_mm, address); - flush_cache_all(); - BUG_ON(address >= end); - do { - pud_t *pud; - pmd_t *pmd; - - error = -ENOMEM; - pud = pud_alloc(&init_mm, dir, address); - if (!pud) - break; - pmd = pmd_alloc(&init_mm, pud, address); - if (!pmd) - break; - if (remap_area_pmd(pmd, address, end - address, - phys_addr + address, flags)) - break; - error = 0; - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); - flush_tlb_all(); - return error; -} + for (i = 0; i < nr_pages; i++) { + if (pfn_valid(start_pfn + i) && + !PageReserved(pfn_to_page(start_pfn + i))) + return 1; + } -/* - * Generic mapping function (not visible outside): - */ + return 0; +} /* - * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. + * ioremap_prot - map bus memory into CPU space + * @phys_addr: bus address of the memory + * @size: size of the resource to map * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. + * ioremap_prot gives the caller control over cache coherency attributes (CCA) */ - -#define IS_LOW512(addr) (!((phys_t)(addr) & (phys_t) ~0x1fffffffULL)) - -void __iomem * __ioremap(phys_t phys_addr, phys_t size, unsigned long flags) +void __iomem *ioremap_prot(phys_addr_t phys_addr, unsigned long size, + pgprot_t prot) { - struct vm_struct * area; - unsigned long offset; - phys_t last_addr; - void * addr; + unsigned long flags = pgprot_val(prot) & _CACHE_MASK; + unsigned long offset, pfn, last_pfn; + struct vm_struct *area; + phys_addr_t last_addr; + unsigned long vaddr; + void __iomem *cpu_addr; + + cpu_addr = plat_ioremap(phys_addr, size, flags); + if (cpu_addr) + return cpu_addr; phys_addr = fixup_bigphys_addr(phys_addr, size); @@ -135,19 +72,21 @@ void __iomem * __ioremap(phys_t phys_addr, phys_t size, unsigned long flags) flags == _CACHE_UNCACHED) return (void __iomem *) CKSEG1ADDR(phys_addr); + /* Early remaps should use the unmapped regions til' VM is available */ + if (WARN_ON_ONCE(!slab_is_available())) + return NULL; + /* - * Don't allow anybody to remap normal RAM that we're using.. + * Don't allow anybody to remap RAM that may be allocated by the page + * allocator, since that could lead to races & data clobbering. */ - if (phys_addr < virt_to_phys(high_memory)) { - char *t_addr, *t_end; - struct page *page; - - t_addr = __va(phys_addr); - t_end = t_addr + (size - 1); - - for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) - if(!PageReserved(page)) - return NULL; + pfn = PFN_DOWN(phys_addr); + last_pfn = PFN_DOWN(last_addr); + if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL, + __ioremap_check_ram) == 1) { + WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n", + &phys_addr, &last_addr); + return NULL; } /* @@ -163,30 +102,22 @@ void __iomem * __ioremap(phys_t phys_addr, phys_t size, unsigned long flags) area = get_vm_area(size, VM_IOREMAP); if (!area) return NULL; - addr = area->addr; - if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { - vunmap(addr); + vaddr = (unsigned long)area->addr; + + flags |= _PAGE_GLOBAL | _PAGE_PRESENT | __READABLE | __WRITEABLE; + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, + __pgprot(flags))) { + free_vm_area(area); return NULL; } - return (void __iomem *) (offset + (char *)addr); + return (void __iomem *)(vaddr + offset); } +EXPORT_SYMBOL(ioremap_prot); -#define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1) - -void __iounmap(const volatile void __iomem *addr) +void iounmap(const volatile void __iomem *addr) { - struct vm_struct *p; - - if (IS_KSEG1(addr)) - return; - - p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); - if (!p) - printk(KERN_ERR "iounmap: bad address %p\n", addr); - - kfree(p); + if (!plat_iounmap(addr) && !IS_KSEG1(addr)) + vunmap((void *)((unsigned long)addr & PAGE_MASK)); } - -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(__iounmap); +EXPORT_SYMBOL(iounmap); diff --git a/arch/mips/mm/ioremap64.c b/arch/mips/mm/ioremap64.c new file mode 100644 index 000000000000..acc03ba20098 --- /dev/null +++ b/arch/mips/mm/ioremap64.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/io.h> +#include <ioremap.h> + +void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, + pgprot_t prot) +{ + unsigned long flags = pgprot_val(prot) & _CACHE_MASK; + u64 base = (flags == _CACHE_UNCACHED ? IO_BASE : UNCAC_BASE); + void __iomem *addr; + + addr = plat_ioremap(offset, size, flags); + if (!addr) + addr = (void __iomem *)(unsigned long)(base + offset); + return addr; +} +EXPORT_SYMBOL(ioremap_prot); + +void iounmap(const volatile void __iomem *addr) +{ + plat_iounmap(addr); +} +EXPORT_SYMBOL(iounmap); diff --git a/arch/mips/mm/maccess.c b/arch/mips/mm/maccess.c new file mode 100644 index 000000000000..58173842c6be --- /dev/null +++ b/arch/mips/mm/maccess.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/uaccess.h> +#include <linux/kernel.h> + +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) +{ + /* highest bit set means kernel space */ + return (unsigned long)unsafe_src >> (BITS_PER_LONG - 1); +} diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index f1baadd56e82..5d2a1225785b 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -7,44 +7,19 @@ * written by Ralf Baechle <ralf@linux-mips.org> */ #include <linux/compiler.h> +#include <linux/elf-randomize.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/mman.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/personality.h> #include <linux/random.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/mm.h> unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); -/* gap between mmap and stack */ -#define MIN_GAP (128*1024*1024UL) -#define MAX_GAP ((TASK_SIZE)/6*5) - -static int mmap_is_legacy(void) -{ - if (current->personality & ADDR_COMPAT_LAYOUT) - return 1; - - if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) - return 1; - - return sysctl_legacy_va_layout; -} - -static unsigned long mmap_base(unsigned long rnd) -{ - unsigned long gap = rlimit(RLIMIT_STACK); - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return PAGE_ALIGN(TASK_SIZE - gap - rnd); -} - #define COLOUR_ALIGN(addr, pgoff) \ ((((addr) + shm_align_mask) & ~shm_align_mask) + \ (((pgoff) << PAGE_SHIFT) & shm_align_mask)) @@ -59,7 +34,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, struct vm_area_struct *vma; unsigned long addr = addr0; int do_color_align; - struct vm_unmapped_area_info info; + struct vm_unmapped_area_info info = {}; if (unlikely(len > TASK_SIZE)) return -ENOMEM; @@ -92,7 +67,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -117,14 +92,14 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, */ } - info.flags = 0; info.low_limit = mm->mmap_base; info.high_limit = TASK_SIZE; return vm_unmapped_area(&info); } unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0, - unsigned long len, unsigned long pgoff, unsigned long flags) + unsigned long len, unsigned long pgoff, unsigned long flags, + vm_flags_t vm_flags) { return arch_get_unmapped_area_common(filp, addr0, len, pgoff, flags, UP); @@ -136,63 +111,19 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0, */ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr0, unsigned long len, unsigned long pgoff, - unsigned long flags) + unsigned long flags, vm_flags_t vm_flags) { return arch_get_unmapped_area_common(filp, addr0, len, pgoff, flags, DOWN); } -void arch_pick_mmap_layout(struct mm_struct *mm) -{ - unsigned long random_factor = 0UL; - - if (current->flags & PF_RANDOMIZE) { - random_factor = get_random_int(); - random_factor = random_factor << PAGE_SHIFT; - if (TASK_IS_32BIT_ADDR) - random_factor &= 0xfffffful; - else - random_factor &= 0xffffffful; - } - - if (mmap_is_legacy()) { - mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; - mm->get_unmapped_area = arch_get_unmapped_area; - } else { - mm->mmap_base = mmap_base(random_factor); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - } -} - -static inline unsigned long brk_rnd(void) -{ - unsigned long rnd = get_random_int(); - - rnd = rnd << PAGE_SHIFT; - /* 8MB for 32bit, 256MB for 64bit */ - if (TASK_IS_32BIT_ADDR) - rnd = rnd & 0x7ffffful; - else - rnd = rnd & 0xffffffful; - - return rnd; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) +bool __virt_addr_valid(const volatile void *kaddr) { - unsigned long base = mm->brk; - unsigned long ret; - - ret = PAGE_ALIGN(base + brk_rnd()); + unsigned long vaddr = (unsigned long)kaddr; - if (ret < mm->brk) - return mm->brk; + if ((vaddr < PAGE_OFFSET) || (vaddr >= MAP_BASE)) + return false; - return ret; -} - -int __virt_addr_valid(const volatile void *kaddr) -{ return pfn_valid(PFN_DOWN(virt_to_phys(kaddr))); } EXPORT_SYMBOL_GPL(__virt_addr_valid); diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S index 48a6b38ff13e..42d0516ca18a 100644 --- a/arch/mips/mm/page-funcs.S +++ b/arch/mips/mm/page-funcs.S @@ -8,6 +8,7 @@ * Copyright (C) 2012 MIPS Technologies, Inc. * Copyright (C) 2012 Ralf Baechle <ralf@linux-mips.org> */ +#include <linux/export.h> #include <asm/asm.h> #include <asm/regdef.h> @@ -29,6 +30,7 @@ */ EXPORT(__clear_page_start) LEAF(cpu_clear_page_function_name) +EXPORT_SYMBOL(cpu_clear_page_function_name) 1: j 1b /* Dummy, will be replaced. */ .space 288 END(cpu_clear_page_function_name) @@ -44,6 +46,7 @@ EXPORT(__clear_page_end) */ EXPORT(__copy_page_start) LEAF(cpu_copy_page_function_name) +EXPORT_SYMBOL(cpu_copy_page_function_name) 1: j 1b /* Dummy, will be replaced. */ .space 1344 END(cpu_copy_page_function_name) diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index 2c0bd580b9da..1df237bd4a72 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -8,26 +8,24 @@ * Copyright (C) 2008 Thiemo Seufer * Copyright (C) 2012 MIPS Technologies, Inc. */ -#include <linux/init.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/mm.h> -#include <linux/module.h> #include <linux/proc_fs.h> #include <asm/bugs.h> #include <asm/cacheops.h> +#include <asm/cpu-type.h> #include <asm/inst.h> #include <asm/io.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/prefetch.h> #include <asm/bootinfo.h> #include <asm/mipsregs.h> #include <asm/mmu_context.h> +#include <asm/regdef.h> #include <asm/cpu.h> -#include <asm/war.h> #ifdef CONFIG_SIBYTE_DMA_PAGEOPS #include <asm/sibyte/sb1250.h> @@ -37,19 +35,6 @@ #include <asm/uasm.h> -/* Registers used in the assembled routines. */ -#define ZERO 0 -#define AT 2 -#define A0 4 -#define A1 5 -#define A2 6 -#define T0 8 -#define T1 9 -#define T2 10 -#define T3 11 -#define T9 25 -#define RA 31 - /* Handle labels (which must be positive integers). */ enum label_id { label_clear_nopref = 1, @@ -66,49 +51,65 @@ UASM_L_LA(_copy_pref_both) UASM_L_LA(_copy_pref_store) /* We need one branch and therefore one relocation per target label. */ -static struct uasm_label __cpuinitdata labels[5]; -static struct uasm_reloc __cpuinitdata relocs[5]; +static struct uasm_label labels[5]; +static struct uasm_reloc relocs[5]; #define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) #define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) -static int pref_bias_clear_store __cpuinitdata; -static int pref_bias_copy_load __cpuinitdata; -static int pref_bias_copy_store __cpuinitdata; - -static u32 pref_src_mode __cpuinitdata; -static u32 pref_dst_mode __cpuinitdata; - -static int clear_word_size __cpuinitdata; -static int copy_word_size __cpuinitdata; - -static int half_clear_loop_size __cpuinitdata; -static int half_copy_loop_size __cpuinitdata; - -static int cache_line_size __cpuinitdata; +/* + * R6 has a limited offset of the pref instruction. + * Skip it if the offset is more than 9 bits. + */ +#define _uasm_i_pref(a, b, c, d) \ +do { \ + if (cpu_has_mips_r6) { \ + if (c <= 0xff && c >= -0x100) \ + uasm_i_pref(a, b, c, d);\ + } else { \ + uasm_i_pref(a, b, c, d); \ + } \ +} while(0) + +static int pref_bias_clear_store; +static int pref_bias_copy_load; +static int pref_bias_copy_store; + +static u32 pref_src_mode; +static u32 pref_dst_mode; + +static int clear_word_size; +static int copy_word_size; + +static int half_clear_loop_size; +static int half_copy_loop_size; + +static int cache_line_size; #define cache_line_mask() (cache_line_size - 1) -static inline void __cpuinit +static inline void pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) { - if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { + if (cpu_has_64bit_gp_regs && + IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS) && + r4k_daddiu_bug()) { if (off > 0x7fff) { - uasm_i_lui(buf, T9, uasm_rel_hi(off)); - uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); + uasm_i_lui(buf, GPR_T9, uasm_rel_hi(off)); + uasm_i_addiu(buf, GPR_T9, GPR_T9, uasm_rel_lo(off)); } else - uasm_i_addiu(buf, T9, ZERO, off); - uasm_i_daddu(buf, reg1, reg2, T9); + uasm_i_addiu(buf, GPR_T9, GPR_ZERO, off); + uasm_i_daddu(buf, reg1, reg2, GPR_T9); } else { if (off > 0x7fff) { - uasm_i_lui(buf, T9, uasm_rel_hi(off)); - uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); - UASM_i_ADDU(buf, reg1, reg2, T9); + uasm_i_lui(buf, GPR_T9, uasm_rel_hi(off)); + uasm_i_addiu(buf, GPR_T9, GPR_T9, uasm_rel_lo(off)); + UASM_i_ADDU(buf, reg1, reg2, GPR_T9); } else UASM_i_ADDIU(buf, reg1, reg2, off); } } -static void __cpuinit set_prefetch_parameters(void) +static void set_prefetch_parameters(void) { if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) clear_word_size = 8; @@ -143,6 +144,7 @@ static void __cpuinit set_prefetch_parameters(void) case CPU_R10000: case CPU_R12000: case CPU_R14000: + case CPU_R16000: /* * Those values have been experimentally tuned for an * Origin 200. @@ -173,12 +175,29 @@ static void __cpuinit set_prefetch_parameters(void) } break; + case CPU_LOONGSON64: + /* Loongson-3 only support the Pref_Load/Pref_Store. */ + pref_bias_clear_store = 128; + pref_bias_copy_load = 128; + pref_bias_copy_store = 128; + pref_src_mode = Pref_Load; + pref_dst_mode = Pref_Store; + break; + default: pref_bias_clear_store = 128; pref_bias_copy_load = 256; pref_bias_copy_store = 128; pref_src_mode = Pref_LoadStreamed; - pref_dst_mode = Pref_PrepareForStore; + if (cpu_has_mips_r6) + /* + * Bit 30 (Pref_PrepareForStore) has been + * removed from MIPS R6. Use bit 5 + * (Pref_StoreStreamed). + */ + pref_dst_mode = Pref_StoreStreamed; + else + pref_dst_mode = Pref_PrepareForStore; break; } } else { @@ -199,38 +218,40 @@ static void __cpuinit set_prefetch_parameters(void) 4 * copy_word_size)); } -static void __cpuinit build_clear_store(u32 **buf, int off) +static void build_clear_store(u32 **buf, int off) { if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) { - uasm_i_sd(buf, ZERO, off, A0); + uasm_i_sd(buf, GPR_ZERO, off, GPR_A0); } else { - uasm_i_sw(buf, ZERO, off, A0); + uasm_i_sw(buf, GPR_ZERO, off, GPR_A0); } } -static inline void __cpuinit build_clear_pref(u32 **buf, int off) +static inline void build_clear_pref(u32 **buf, int off) { if (off & cache_line_mask()) return; if (pref_bias_clear_store) { - uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off, - A0); + _uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off, + GPR_A0); } else if (cache_line_size == (half_clear_loop_size << 1)) { if (cpu_has_cache_cdex_s) { - uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); + uasm_i_cache(buf, Create_Dirty_Excl_SD, off, GPR_A0); } else if (cpu_has_cache_cdex_p) { - if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { + if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) && + cpu_is_r4600_v1_x()) { uasm_i_nop(buf); uasm_i_nop(buf); uasm_i_nop(buf); uasm_i_nop(buf); } - if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) - uasm_i_lw(buf, ZERO, ZERO, AT); + if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && + cpu_is_r4600_v2_x()) + uasm_i_lw(buf, GPR_ZERO, GPR_ZERO, GPR_AT); - uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); + uasm_i_cache(buf, Create_Dirty_Excl_D, off, GPR_A0); } } } @@ -240,7 +261,7 @@ extern u32 __clear_page_end; extern u32 __copy_page_start; extern u32 __copy_page_end; -void __cpuinit build_clear_page(void) +void build_clear_page(void) { int off; u32 *buf = &__clear_page_start; @@ -268,12 +289,12 @@ void __cpuinit build_clear_page(void) off = PAGE_SIZE - pref_bias_clear_store; if (off > 0xffff || !pref_bias_clear_store) - pg_addiu(&buf, A2, A0, off); + pg_addiu(&buf, GPR_A2, GPR_A0, off); else - uasm_i_ori(&buf, A2, A0, off); + uasm_i_ori(&buf, GPR_A2, GPR_A0, off); - if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) - uasm_i_lui(&buf, AT, 0xa000); + if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x()) + uasm_i_lui(&buf, GPR_AT, uasm_rel_hi(0xa0000000)); off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size) * cache_line_size : 0; @@ -287,36 +308,36 @@ void __cpuinit build_clear_page(void) build_clear_store(&buf, off); off += clear_word_size; } while (off < half_clear_loop_size); - pg_addiu(&buf, A0, A0, 2 * off); + pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off); off = -off; do { build_clear_pref(&buf, off); if (off == -clear_word_size) - uasm_il_bne(&buf, &r, A0, A2, label_clear_pref); + uasm_il_bne(&buf, &r, GPR_A0, GPR_A2, label_clear_pref); build_clear_store(&buf, off); off += clear_word_size; } while (off < 0); if (pref_bias_clear_store) { - pg_addiu(&buf, A2, A0, pref_bias_clear_store); + pg_addiu(&buf, GPR_A2, GPR_A0, pref_bias_clear_store); uasm_l_clear_nopref(&l, buf); off = 0; do { build_clear_store(&buf, off); off += clear_word_size; } while (off < half_clear_loop_size); - pg_addiu(&buf, A0, A0, 2 * off); + pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off); off = -off; do { if (off == -clear_word_size) - uasm_il_bne(&buf, &r, A0, A2, + uasm_il_bne(&buf, &r, GPR_A0, GPR_A2, label_clear_nopref); build_clear_store(&buf, off); off += clear_word_size; } while (off < 0); } - uasm_i_jr(&buf, RA); + uasm_i_jr(&buf, GPR_RA); uasm_i_nop(&buf); BUG_ON(buf > &__clear_page_end); @@ -333,21 +354,21 @@ void __cpuinit build_clear_page(void) pr_debug("\t.set pop\n"); } -static void __cpuinit build_copy_load(u32 **buf, int reg, int off) +static void build_copy_load(u32 **buf, int reg, int off) { if (cpu_has_64bit_gp_regs) { - uasm_i_ld(buf, reg, off, A1); + uasm_i_ld(buf, reg, off, GPR_A1); } else { - uasm_i_lw(buf, reg, off, A1); + uasm_i_lw(buf, reg, off, GPR_A1); } } -static void __cpuinit build_copy_store(u32 **buf, int reg, int off) +static void build_copy_store(u32 **buf, int reg, int off) { if (cpu_has_64bit_gp_regs) { - uasm_i_sd(buf, reg, off, A0); + uasm_i_sd(buf, reg, off, GPR_A0); } else { - uasm_i_sw(buf, reg, off, A0); + uasm_i_sw(buf, reg, off, GPR_A0); } } @@ -357,7 +378,7 @@ static inline void build_copy_load_pref(u32 **buf, int off) return; if (pref_bias_copy_load) - uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1); + _uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, GPR_A1); } static inline void build_copy_store_pref(u32 **buf, int off) @@ -366,28 +387,30 @@ static inline void build_copy_store_pref(u32 **buf, int off) return; if (pref_bias_copy_store) { - uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off, - A0); + _uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off, + GPR_A0); } else if (cache_line_size == (half_copy_loop_size << 1)) { if (cpu_has_cache_cdex_s) { - uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); + uasm_i_cache(buf, Create_Dirty_Excl_SD, off, GPR_A0); } else if (cpu_has_cache_cdex_p) { - if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { + if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) && + cpu_is_r4600_v1_x()) { uasm_i_nop(buf); uasm_i_nop(buf); uasm_i_nop(buf); uasm_i_nop(buf); } - if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) - uasm_i_lw(buf, ZERO, ZERO, AT); + if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && + cpu_is_r4600_v2_x()) + uasm_i_lw(buf, GPR_ZERO, GPR_ZERO, GPR_AT); - uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); + uasm_i_cache(buf, Create_Dirty_Excl_D, off, GPR_A0); } } } -void __cpuinit build_copy_page(void) +void build_copy_page(void) { int off; u32 *buf = &__copy_page_start; @@ -419,12 +442,12 @@ void __cpuinit build_copy_page(void) off = PAGE_SIZE - pref_bias_copy_load; if (off > 0xffff || !pref_bias_copy_load) - pg_addiu(&buf, A2, A0, off); + pg_addiu(&buf, GPR_A2, GPR_A0, off); else - uasm_i_ori(&buf, A2, A0, off); + uasm_i_ori(&buf, GPR_A2, GPR_A0, off); - if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) - uasm_i_lui(&buf, AT, 0xa000); + if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x()) + uasm_i_lui(&buf, GPR_AT, uasm_rel_hi(0xa0000000)); off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) * cache_line_size : 0; @@ -441,126 +464,126 @@ void __cpuinit build_copy_page(void) uasm_l_copy_pref_both(&l, buf); do { build_copy_load_pref(&buf, off); - build_copy_load(&buf, T0, off); + build_copy_load(&buf, GPR_T0, off); build_copy_load_pref(&buf, off + copy_word_size); - build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, GPR_T1, off + copy_word_size); build_copy_load_pref(&buf, off + 2 * copy_word_size); - build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size); build_copy_load_pref(&buf, off + 3 * copy_word_size); - build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size); build_copy_store_pref(&buf, off); - build_copy_store(&buf, T0, off); + build_copy_store(&buf, GPR_T0, off); build_copy_store_pref(&buf, off + copy_word_size); - build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store(&buf, GPR_T1, off + copy_word_size); build_copy_store_pref(&buf, off + 2 * copy_word_size); - build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size); build_copy_store_pref(&buf, off + 3 * copy_word_size); - build_copy_store(&buf, T3, off + 3 * copy_word_size); + build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size); off += 4 * copy_word_size; } while (off < half_copy_loop_size); - pg_addiu(&buf, A1, A1, 2 * off); - pg_addiu(&buf, A0, A0, 2 * off); + pg_addiu(&buf, GPR_A1, GPR_A1, 2 * off); + pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off); off = -off; do { build_copy_load_pref(&buf, off); - build_copy_load(&buf, T0, off); + build_copy_load(&buf, GPR_T0, off); build_copy_load_pref(&buf, off + copy_word_size); - build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, GPR_T1, off + copy_word_size); build_copy_load_pref(&buf, off + 2 * copy_word_size); - build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size); build_copy_load_pref(&buf, off + 3 * copy_word_size); - build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size); build_copy_store_pref(&buf, off); - build_copy_store(&buf, T0, off); + build_copy_store(&buf, GPR_T0, off); build_copy_store_pref(&buf, off + copy_word_size); - build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store(&buf, GPR_T1, off + copy_word_size); build_copy_store_pref(&buf, off + 2 * copy_word_size); - build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size); build_copy_store_pref(&buf, off + 3 * copy_word_size); if (off == -(4 * copy_word_size)) - uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both); - build_copy_store(&buf, T3, off + 3 * copy_word_size); + uasm_il_bne(&buf, &r, GPR_A2, GPR_A0, label_copy_pref_both); + build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size); off += 4 * copy_word_size; } while (off < 0); if (pref_bias_copy_load - pref_bias_copy_store) { - pg_addiu(&buf, A2, A0, + pg_addiu(&buf, GPR_A2, GPR_A0, pref_bias_copy_load - pref_bias_copy_store); uasm_l_copy_pref_store(&l, buf); off = 0; do { - build_copy_load(&buf, T0, off); - build_copy_load(&buf, T1, off + copy_word_size); - build_copy_load(&buf, T2, off + 2 * copy_word_size); - build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_load(&buf, GPR_T0, off); + build_copy_load(&buf, GPR_T1, off + copy_word_size); + build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size); + build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size); build_copy_store_pref(&buf, off); - build_copy_store(&buf, T0, off); + build_copy_store(&buf, GPR_T0, off); build_copy_store_pref(&buf, off + copy_word_size); - build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store(&buf, GPR_T1, off + copy_word_size); build_copy_store_pref(&buf, off + 2 * copy_word_size); - build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size); build_copy_store_pref(&buf, off + 3 * copy_word_size); - build_copy_store(&buf, T3, off + 3 * copy_word_size); + build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size); off += 4 * copy_word_size; } while (off < half_copy_loop_size); - pg_addiu(&buf, A1, A1, 2 * off); - pg_addiu(&buf, A0, A0, 2 * off); + pg_addiu(&buf, GPR_A1, GPR_A1, 2 * off); + pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off); off = -off; do { - build_copy_load(&buf, T0, off); - build_copy_load(&buf, T1, off + copy_word_size); - build_copy_load(&buf, T2, off + 2 * copy_word_size); - build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_load(&buf, GPR_T0, off); + build_copy_load(&buf, GPR_T1, off + copy_word_size); + build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size); + build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size); build_copy_store_pref(&buf, off); - build_copy_store(&buf, T0, off); + build_copy_store(&buf, GPR_T0, off); build_copy_store_pref(&buf, off + copy_word_size); - build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store(&buf, GPR_T1, off + copy_word_size); build_copy_store_pref(&buf, off + 2 * copy_word_size); - build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size); build_copy_store_pref(&buf, off + 3 * copy_word_size); if (off == -(4 * copy_word_size)) - uasm_il_bne(&buf, &r, A2, A0, + uasm_il_bne(&buf, &r, GPR_A2, GPR_A0, label_copy_pref_store); - build_copy_store(&buf, T3, off + 3 * copy_word_size); + build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size); off += 4 * copy_word_size; } while (off < 0); } if (pref_bias_copy_store) { - pg_addiu(&buf, A2, A0, pref_bias_copy_store); + pg_addiu(&buf, GPR_A2, GPR_A0, pref_bias_copy_store); uasm_l_copy_nopref(&l, buf); off = 0; do { - build_copy_load(&buf, T0, off); - build_copy_load(&buf, T1, off + copy_word_size); - build_copy_load(&buf, T2, off + 2 * copy_word_size); - build_copy_load(&buf, T3, off + 3 * copy_word_size); - build_copy_store(&buf, T0, off); - build_copy_store(&buf, T1, off + copy_word_size); - build_copy_store(&buf, T2, off + 2 * copy_word_size); - build_copy_store(&buf, T3, off + 3 * copy_word_size); + build_copy_load(&buf, GPR_T0, off); + build_copy_load(&buf, GPR_T1, off + copy_word_size); + build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size); + build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size); + build_copy_store(&buf, GPR_T0, off); + build_copy_store(&buf, GPR_T1, off + copy_word_size); + build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size); + build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size); off += 4 * copy_word_size; } while (off < half_copy_loop_size); - pg_addiu(&buf, A1, A1, 2 * off); - pg_addiu(&buf, A0, A0, 2 * off); + pg_addiu(&buf, GPR_A1, GPR_A1, 2 * off); + pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off); off = -off; do { - build_copy_load(&buf, T0, off); - build_copy_load(&buf, T1, off + copy_word_size); - build_copy_load(&buf, T2, off + 2 * copy_word_size); - build_copy_load(&buf, T3, off + 3 * copy_word_size); - build_copy_store(&buf, T0, off); - build_copy_store(&buf, T1, off + copy_word_size); - build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, GPR_T0, off); + build_copy_load(&buf, GPR_T1, off + copy_word_size); + build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size); + build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size); + build_copy_store(&buf, GPR_T0, off); + build_copy_store(&buf, GPR_T1, off + copy_word_size); + build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size); if (off == -(4 * copy_word_size)) - uasm_il_bne(&buf, &r, A2, A0, + uasm_il_bne(&buf, &r, GPR_A2, GPR_A0, label_copy_nopref); - build_copy_store(&buf, T3, off + 3 * copy_word_size); + build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size); off += 4 * copy_word_size; } while (off < 0); } - uasm_i_jr(&buf, RA); + uasm_i_jr(&buf, GPR_RA); uasm_i_nop(&buf); BUG_ON(buf > &__copy_page_end); @@ -592,21 +615,6 @@ struct dmadscr { u64 pad_b; } ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS]; -void sb1_dma_init(void) -{ - int i; - - for (i = 0; i < DM_NUM_CHANNELS; i++) { - const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) | - V_DM_DSCR_BASE_RINGSZ(1); - void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); - - __raw_writeq(base_val, base_reg); - __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); - __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); - } -} - void clear_page(void *page) { u64 to_phys = CPHYSADDR((unsigned long)page); @@ -630,6 +638,7 @@ void clear_page(void *page) ; __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); } +EXPORT_SYMBOL(clear_page); void copy_page(void *to, void *from) { @@ -656,5 +665,6 @@ void copy_page(void *to, void *from) ; __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); } +EXPORT_SYMBOL(copy_page); #endif /* CONFIG_SIBYTE_DMA_PAGEOPS */ diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c index adc6911ba748..e2cf2166d5cb 100644 --- a/arch/mips/mm/pgtable-32.c +++ b/arch/mips/mm/pgtable-32.c @@ -7,15 +7,15 @@ */ #include <linux/init.h> #include <linux/mm.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/highmem.h> #include <asm/fixmap.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/tlbflush.h> -void pgd_init(unsigned long page) +void pgd_init(void *addr) { - unsigned long *p = (unsigned long *) page; + unsigned long *p = (unsigned long *)addr; int i; for (i = 0; i < USER_PTRS_PER_PGD; i+=8) { @@ -30,39 +30,48 @@ void pgd_init(unsigned long page) } } +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} +#endif /* defined(CONFIG_TRANSPARENT_HUGEPAGE) */ + void __init pagetable_init(void) { unsigned long vaddr; pgd_t *pgd_base; #ifdef CONFIG_HIGHMEM pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; #endif /* Initialize the entire pgd. */ - pgd_init((unsigned long)swapper_pg_dir); - pgd_init((unsigned long)swapper_pg_dir - + sizeof(pgd_t) * USER_PTRS_PER_PGD); + pgd_init(swapper_pg_dir); + pgd_init(&swapper_pg_dir[USER_PTRS_PER_PGD]); pgd_base = swapper_pg_dir; /* * Fixed mappings: */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base); + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1); + fixrange_init(vaddr & PMD_MASK, vaddr + FIXADDR_SIZE, pgd_base); #ifdef CONFIG_HIGHMEM /* * Permanent kmaps: */ vaddr = PKMAP_BASE; - fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); - pgd = swapper_pg_dir + __pgd_offset(vaddr); - pud = pud_offset(pgd, vaddr); + pgd = swapper_pg_dir + pgd_index(vaddr); + p4d = p4d_offset(pgd, vaddr); + pud = pud_offset(p4d, vaddr); pmd = pmd_offset(pud, vaddr); pte = pte_offset_kernel(pmd, vaddr); pkmap_page_table = pte; diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c index e8adc0069d66..b24f865de357 100644 --- a/arch/mips/mm/pgtable-64.c +++ b/arch/mips/mm/pgtable-64.c @@ -6,25 +6,27 @@ * Copyright (C) 1999, 2000 by Silicon Graphics * Copyright (C) 2003 by Ralf Baechle */ +#include <linux/export.h> #include <linux/init.h> #include <linux/mm.h> #include <asm/fixmap.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> -void pgd_init(unsigned long page) +void pgd_init(void *addr) { unsigned long *p, *end; unsigned long entry; -#ifdef __PAGETABLE_PMD_FOLDED - entry = (unsigned long)invalid_pte_table; -#else +#if !defined(__PAGETABLE_PUD_FOLDED) + entry = (unsigned long)invalid_pud_table; +#elif !defined(__PAGETABLE_PMD_FOLDED) entry = (unsigned long)invalid_pmd_table; +#else + entry = (unsigned long)invalid_pte_table; #endif - p = (unsigned long *) page; + p = (unsigned long *) addr; end = p + PTRS_PER_PGD; do { @@ -41,11 +43,12 @@ void pgd_init(unsigned long page) } #ifndef __PAGETABLE_PMD_FOLDED -void pmd_init(unsigned long addr, unsigned long pagetable) +void pmd_init(void *addr) { unsigned long *p, *end; + unsigned long pagetable = (unsigned long)invalid_pte_table; - p = (unsigned long *) addr; + p = (unsigned long *)addr; end = p + PTRS_PER_PMD; do { @@ -60,37 +63,39 @@ void pmd_init(unsigned long addr, unsigned long pagetable) p[-1] = pagetable; } while (p != end); } +EXPORT_SYMBOL_GPL(pmd_init); #endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - -void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, - pmd_t *pmdp) +#ifndef __PAGETABLE_PUD_FOLDED +void pud_init(void *addr) { - if (!pmd_trans_splitting(*pmdp)) { - pmd_t pmd = pmd_mksplitting(*pmdp); - set_pmd_at(vma->vm_mm, address, pmdp, pmd); - } -} - -#endif - -pmd_t mk_pmd(struct page *page, pgprot_t prot) -{ - pmd_t pmd; + unsigned long *p, *end; + unsigned long pagetable = (unsigned long)invalid_pmd_table; - pmd_val(pmd) = (page_to_pfn(page) << _PFN_SHIFT) | pgprot_val(prot); + p = (unsigned long *)addr; + end = p + PTRS_PER_PUD; - return pmd; + do { + p[0] = pagetable; + p[1] = pagetable; + p[2] = pagetable; + p[3] = pagetable; + p[4] = pagetable; + p += 8; + p[-3] = pagetable; + p[-2] = pagetable; + p[-1] = pagetable; + } while (p != end); } +#endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; - flush_tlb_all(); } +#endif void __init pagetable_init(void) { @@ -98,9 +103,12 @@ void __init pagetable_init(void) pgd_t *pgd_base; /* Initialize the entire pgd. */ - pgd_init((unsigned long)swapper_pg_dir); + pgd_init(swapper_pg_dir); +#ifndef __PAGETABLE_PUD_FOLDED + pud_init(invalid_pud_table); +#endif #ifndef __PAGETABLE_PMD_FOLDED - pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table); + pmd_init(invalid_pmd_table); #endif pgd_base = swapper_pg_dir; /* diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c new file mode 100644 index 000000000000..10835414819f --- /dev/null +++ b/arch/mips/mm/pgtable.c @@ -0,0 +1,25 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <asm/pgalloc.h> + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *init, *ret; + + ret = __pgd_alloc(mm, PGD_TABLE_ORDER); + if (ret) { + init = pgd_offset(&init_mm, 0UL); + pgd_init(ret); + memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + } + + return ret; +} +EXPORT_SYMBOL_GPL(pgd_alloc); diff --git a/arch/mips/mm/physaddr.c b/arch/mips/mm/physaddr.c new file mode 100644 index 000000000000..a6b1bf82057a --- /dev/null +++ b/arch/mips/mm/physaddr.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bug.h> +#include <linux/export.h> +#include <linux/types.h> +#include <linux/mmdebug.h> +#include <linux/mm.h> + +#include <asm/addrspace.h> +#include <asm/sections.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/dma.h> + +static inline bool __debug_virt_addr_valid(unsigned long x) +{ + /* + * MAX_DMA_ADDRESS is a virtual address that may not correspond to an + * actual physical address. Enough code relies on + * virt_to_phys(MAX_DMA_ADDRESS) that we just need to work around it + * and always return true. + */ + if (x == MAX_DMA_ADDRESS) + return true; + + return x >= PAGE_OFFSET && (KSEGX(x) < KSEG2 || + IS_ENABLED(CONFIG_EVA) || + !IS_ENABLED(CONFIG_HIGHMEM)); +} + +phys_addr_t __virt_to_phys(volatile const void *x) +{ + WARN(!__debug_virt_addr_valid((unsigned long)x), + "virt_to_phys used for non-linear address: %p (%pS)\n", + x, x); + + return __virt_to_phys_nodebug(x); +} +EXPORT_SYMBOL(__virt_to_phys); + +phys_addr_t __phys_addr_symbol(unsigned long x) +{ + /* This is bounds checking against the kernel image only. + * __pa_symbol should only be used on kernel symbol addresses. + */ + VIRTUAL_BUG_ON(x < (unsigned long)_text || + x > (unsigned long)_end); + + return __pa_symbol_nodebug(x); +} +EXPORT_SYMBOL(__phys_addr_symbol); diff --git a/arch/mips/mm/sc-debugfs.c b/arch/mips/mm/sc-debugfs.c new file mode 100644 index 000000000000..80ff3947157d --- /dev/null +++ b/arch/mips/mm/sc-debugfs.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2015 Imagination Technologies + * Author: Paul Burton <paul.burton@mips.com> + */ + +#include <asm/bcache.h> +#include <asm/debug.h> +#include <linux/uaccess.h> +#include <linux/debugfs.h> +#include <linux/init.h> + +static ssize_t sc_prefetch_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + bool enabled = bc_prefetch_is_enabled(); + char buf[3]; + + buf[0] = enabled ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = 0; + + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t sc_prefetch_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + bool enabled; + int err; + + err = kstrtobool_from_user(user_buf, count, &enabled); + if (err) + return err; + + if (enabled) + bc_prefetch_enable(); + else + bc_prefetch_disable(); + + return count; +} + +static const struct file_operations sc_prefetch_fops = { + .open = simple_open, + .llseek = default_llseek, + .read = sc_prefetch_read, + .write = sc_prefetch_write, +}; + +static int __init sc_debugfs_init(void) +{ + struct dentry *dir; + + dir = debugfs_create_dir("l2cache", mips_debugfs_dir); + debugfs_create_file("prefetch", S_IRUGO | S_IWUSR, dir, NULL, + &sc_prefetch_fops); + return 0; +} +late_initcall(sc_debugfs_init); diff --git a/arch/mips/mm/sc-ip22.c b/arch/mips/mm/sc-ip22.c index c6aaed934d53..d7238687d790 100644 --- a/arch/mips/mm/sc-ip22.c +++ b/arch/mips/mm/sc-ip22.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * sc-ip22.c: Indy cache management functions. * @@ -11,7 +12,6 @@ #include <asm/bcache.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/bootinfo.h> #include <asm/sgi/ip22.h> #include <asm/sgi/mc.h> @@ -31,26 +31,40 @@ static inline void indy_sc_wipe(unsigned long first, unsigned long last) unsigned long tmp; __asm__ __volatile__( - ".set\tpush\t\t\t# indy_sc_wipe\n\t" - ".set\tnoreorder\n\t" - ".set\tmips3\n\t" - ".set\tnoat\n\t" - "mfc0\t%2, $12\n\t" - "li\t$1, 0x80\t\t\t# Go 64 bit\n\t" - "mtc0\t$1, $12\n\t" - - "dli\t$1, 0x9000000080000000\n\t" - "or\t%0, $1\t\t\t# first line to flush\n\t" - "or\t%1, $1\t\t\t# last line to flush\n\t" - ".set\tat\n\t" - - "1:\tsw\t$0, 0(%0)\n\t" - "bne\t%0, %1, 1b\n\t" - " daddu\t%0, 32\n\t" - - "mtc0\t%2, $12\t\t\t# Back to 32 bit\n\t" - "nop; nop; nop; nop;\n\t" - ".set\tpop" + " .set push # indy_sc_wipe \n" + " .set noreorder \n" + " .set mips3 \n" + " .set noat \n" + " mfc0 %2, $12 \n" + " li $1, 0x80 # Go 64 bit \n" + " mtc0 $1, $12 \n" + " \n" + " # \n" + " # Open code a dli $1, 0x9000000080000000 \n" + " # \n" + " # Required because binutils 2.25 will happily accept \n" + " # 64 bit instructions in .set mips3 mode but puke on \n" + " # 64 bit constants when generating 32 bit ELF \n" + " # \n" + " lui $1,0x9000 \n" + " dsll $1,$1,0x10 \n" + " ori $1,$1,0x8000 \n" + " dsll $1,$1,0x10 \n" + " \n" + " or %0, $1 # first line to flush \n" + " or %1, $1 # last line to flush \n" + " .set at \n" + " \n" + "1: sw $0, 0(%0) \n" + " bne %0, %1, 1b \n" + " daddu %0, 32 \n" + " \n" + " mtc0 %2, $12 # Back to 32 bit \n" + " nop # pipeline hazard \n" + " nop \n" + " nop \n" + " nop \n" + " .set pop \n" : "=r" (first), "=r" (last), "=&r" (tmp) : "0" (first), "1" (last)); } @@ -158,7 +172,7 @@ static inline int __init indy_sc_probe(void) return 1; } -/* XXX Check with wje if the Indy caches can differenciate between +/* XXX Check with wje if the Indy caches can differentiate between writeback + invalidate and just invalidate. */ static struct bcache_ops indy_sc_ops = { .bc_enable = indy_sc_enable, @@ -167,7 +181,7 @@ static struct bcache_ops indy_sc_ops = { .bc_inv = indy_sc_wback_invalidate }; -void __cpuinit indy_sc_init(void) +void indy_sc_init(void) { if (indy_sc_probe()) { indy_sc_enable(); diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index df96da7e939b..06ec304ad4d1 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2006 Chris Dearman (chris@mips.com), */ @@ -6,13 +7,15 @@ #include <linux/sched.h> #include <linux/mm.h> +#include <asm/cpu-type.h> #include <asm/mipsregs.h> #include <asm/bcache.h> #include <asm/cacheops.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/mmu_context.h> #include <asm/r4kcache.h> +#include <asm/mips-cps.h> +#include <asm/bootinfo.h> /* * MIPS32/MIPS64 L2 cache handling @@ -49,11 +52,60 @@ static void mips_sc_disable(void) /* L2 cache is permanently enabled */ } +static void mips_sc_prefetch_enable(void) +{ + unsigned long pftctl; + + if (mips_cm_revision() < CM_REV_CM2_5) + return; + + /* + * If there is one or more L2 prefetch unit present then enable + * prefetching for both code & data, for all ports. + */ + pftctl = read_gcr_l2_pft_control(); + if (pftctl & CM_GCR_L2_PFT_CONTROL_NPFT) { + pftctl &= ~CM_GCR_L2_PFT_CONTROL_PAGEMASK; + pftctl |= PAGE_MASK & CM_GCR_L2_PFT_CONTROL_PAGEMASK; + pftctl |= CM_GCR_L2_PFT_CONTROL_PFTEN; + write_gcr_l2_pft_control(pftctl); + + set_gcr_l2_pft_control_b(CM_GCR_L2_PFT_CONTROL_B_PORTID | + CM_GCR_L2_PFT_CONTROL_B_CEN); + } +} + +static void mips_sc_prefetch_disable(void) +{ + if (mips_cm_revision() < CM_REV_CM2_5) + return; + + clear_gcr_l2_pft_control(CM_GCR_L2_PFT_CONTROL_PFTEN); + clear_gcr_l2_pft_control_b(CM_GCR_L2_PFT_CONTROL_B_PORTID | + CM_GCR_L2_PFT_CONTROL_B_CEN); +} + +static bool mips_sc_prefetch_is_enabled(void) +{ + unsigned long pftctl; + + if (mips_cm_revision() < CM_REV_CM2_5) + return false; + + pftctl = read_gcr_l2_pft_control(); + if (!(pftctl & CM_GCR_L2_PFT_CONTROL_NPFT)) + return false; + return !!(pftctl & CM_GCR_L2_PFT_CONTROL_PFTEN); +} + static struct bcache_ops mips_sc_ops = { .bc_enable = mips_sc_enable, .bc_disable = mips_sc_disable, .bc_wback_inv = mips_sc_wback_inv, - .bc_inv = mips_sc_inv + .bc_inv = mips_sc_inv, + .bc_prefetch_enable = mips_sc_prefetch_enable, + .bc_prefetch_disable = mips_sc_prefetch_disable, + .bc_prefetch_is_enabled = mips_sc_prefetch_is_enabled, }; /* @@ -71,11 +123,17 @@ static inline int mips_sc_is_activated(struct cpuinfo_mips *c) unsigned int tmp; /* Check the bypass bit (L2B) */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_34K: case CPU_74K: case CPU_1004K: + case CPU_1074K: + case CPU_INTERAPTIV: + case CPU_PROAPTIV: + case CPU_P5600: case CPU_BMIPS5000: + case CPU_QEMU_GENERIC: + case CPU_P6600: if (config2 & (1 << 12)) return 0; } @@ -88,7 +146,41 @@ static inline int mips_sc_is_activated(struct cpuinfo_mips *c) return 1; } -static inline int __init mips_sc_probe(void) +static int mips_sc_probe_cm3(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned long cfg = read_gcr_l2_config(); + unsigned long sets, line_sz, assoc; + + if (cfg & CM_GCR_L2_CONFIG_BYPASS) + return 0; + + sets = cfg & CM_GCR_L2_CONFIG_SET_SIZE; + sets >>= __ffs(CM_GCR_L2_CONFIG_SET_SIZE); + if (sets) + c->scache.sets = 64 << sets; + + line_sz = cfg & CM_GCR_L2_CONFIG_LINE_SIZE; + line_sz >>= __ffs(CM_GCR_L2_CONFIG_LINE_SIZE); + if (line_sz) + c->scache.linesz = 2 << line_sz; + + assoc = cfg & CM_GCR_L2_CONFIG_ASSOC; + assoc >>= __ffs(CM_GCR_L2_CONFIG_ASSOC); + c->scache.ways = assoc + 1; + c->scache.waysize = c->scache.sets * c->scache.linesz; + c->scache.waybit = __ffs(c->scache.waysize); + + if (c->scache.linesz) { + c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT; + c->options |= MIPS_CPU_INCLUSIVE_CACHES; + return 1; + } + + return 0; +} + +static inline int mips_sc_probe(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config1, config2; @@ -97,9 +189,14 @@ static inline int __init mips_sc_probe(void) /* Mark as not present until probe completed */ c->scache.flags |= MIPS_CACHE_NOT_PRESENT; + if (mips_cm_revision() >= CM_REV_CM3) + return mips_sc_probe_cm3(); + /* Ignore anything but MIPSxx processors */ - if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | - MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2))) + if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | + MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | + MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))) return 0; /* Does this MIPS32/MIPS64 CPU have a config2 register? */ @@ -113,17 +210,40 @@ static inline int __init mips_sc_probe(void) return 0; tmp = (config2 >> 8) & 0x0f; - if (0 <= tmp && tmp <= 7) + if (tmp <= 7) c->scache.sets = 64 << tmp; else return 0; tmp = (config2 >> 0) & 0x0f; - if (0 <= tmp && tmp <= 7) + if (tmp <= 7) c->scache.ways = tmp + 1; else return 0; + if (current_cpu_type() == CPU_XBURST) { + switch (mips_machtype) { + /* + * According to config2 it would be 5-ways, but that is + * contradicted by all documentation. + */ + case MACH_INGENIC_JZ4770: + case MACH_INGENIC_JZ4775: + c->scache.ways = 4; + break; + + /* + * According to config2 it would be 5-ways and 512-sets, + * but that is contradicted by all documentation. + */ + case MACH_INGENIC_X1000: + case MACH_INGENIC_X1000E: + c->scache.sets = 256; + c->scache.ways = 4; + break; + } + } + c->scache.waysize = c->scache.sets * c->scache.linesz; c->scache.waybit = __ffs(c->scache.waysize); @@ -132,11 +252,12 @@ static inline int __init mips_sc_probe(void) return 1; } -int __cpuinit mips_sc_init(void) +int mips_sc_init(void) { int found = mips_sc_probe(); if (found) { mips_sc_enable(); + mips_sc_prefetch_enable(); bcops = &mips_sc_ops; } return found; diff --git a/arch/mips/mm/sc-r5k.c b/arch/mips/mm/sc-r5k.c index 8bc67720e145..736615d68f7a 100644 --- a/arch/mips/mm/sc-r5k.c +++ b/arch/mips/mm/sc-r5k.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org), * derived from r4xx0.c by David S. Miller (davem@davemloft.net). @@ -11,7 +12,6 @@ #include <asm/bcache.h> #include <asm/cacheops.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/mmu_context.h> #include <asm/r4kcache.h> @@ -81,7 +81,7 @@ static inline int __init r5k_sc_probe(void) unsigned long config = read_c0_config(); if (config & CONF_SC) - return(0); + return 0; scache_size = (512 * 1024) << ((config & R5K_CONF_SS) >> 20); @@ -98,7 +98,7 @@ static struct bcache_ops r5k_sc_ops = { .bc_inv = r5k_dma_cache_inv_sc }; -void __cpuinit r5k_sc_init(void) +void r5k_sc_init(void) { if (r5k_sc_probe()) { r5k_sc_enable(); diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c index 274af3be1442..e9e3777a7848 100644 --- a/arch/mips/mm/sc-rm7k.c +++ b/arch/mips/mm/sc-rm7k.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * sc-rm7k.c: RM7000 cache management functions. * @@ -6,7 +7,6 @@ #undef DEBUG -#include <linux/init.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/bitops.h> @@ -104,7 +104,7 @@ static void blast_rm7k_tcache(void) /* * This function is executed in uncached address space. */ -static __cpuinit void __rm7k_tc_enable(void) +static void __rm7k_tc_enable(void) { int i; @@ -117,7 +117,7 @@ static __cpuinit void __rm7k_tc_enable(void) cache_op(Index_Store_Tag_T, CKSEG0ADDR(i)); } -static __cpuinit void rm7k_tc_enable(void) +static void rm7k_tc_enable(void) { if (read_c0_config() & RM7K_CONF_TE) return; @@ -130,7 +130,7 @@ static __cpuinit void rm7k_tc_enable(void) /* * This function is executed in uncached address space. */ -static __cpuinit void __rm7k_sc_enable(void) +static void __rm7k_sc_enable(void) { int i; @@ -143,7 +143,7 @@ static __cpuinit void __rm7k_sc_enable(void) cache_op(Index_Store_Tag_SD, CKSEG0ADDR(i)); } -static __cpuinit void rm7k_sc_enable(void) +static void rm7k_sc_enable(void) { if (read_c0_config() & RM7K_CONF_SE) return; @@ -162,7 +162,7 @@ static void rm7k_tc_disable(void) local_irq_save(flags); blast_rm7k_tcache(); clear_c0_config(RM7K_CONF_TE); - local_irq_save(flags); + local_irq_restore(flags); } static void rm7k_sc_disable(void) @@ -184,7 +184,7 @@ static struct bcache_ops rm7k_sc_ops = { * This is a probing function like the one found in c-r4k.c, we look for the * wrap around point with different addresses. */ -static __cpuinit void __probe_tcache(void) +static void __probe_tcache(void) { unsigned long flags, addr, begin, end, pow2; @@ -226,7 +226,7 @@ static __cpuinit void __probe_tcache(void) local_irq_restore(flags); } -void __cpuinit rm7k_sc_init(void) +void rm7k_sc_init(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config = read_c0_config(); diff --git a/arch/mips/mm/tlb-funcs.S b/arch/mips/mm/tlb-funcs.S index 30a494db99c2..2705d7dcb33e 100644 --- a/arch/mips/mm/tlb-funcs.S +++ b/arch/mips/mm/tlb-funcs.S @@ -11,15 +11,18 @@ * Copyright (C) 2012 MIPS Technologies, Inc. * Copyright (C) 2012 Ralf Baechle <ralf@linux-mips.org> */ +#include <linux/export.h> #include <asm/asm.h> #include <asm/regdef.h> #define FASTPATH_SIZE 128 LEAF(tlbmiss_handler_setup_pgd) - .space 16 * 4 +1: j 1b /* Dummy, will be replaced. */ + .space 64 END(tlbmiss_handler_setup_pgd) EXPORT(tlbmiss_handler_setup_pgd_end) +EXPORT_SYMBOL_GPL(tlbmiss_handler_setup_pgd) LEAF(handle_tlbm) .space FASTPATH_SIZE * 4 diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c index a63d1ed0827f..173f7b36033b 100644 --- a/arch/mips/mm/tlb-r3k.c +++ b/arch/mips/mm/tlb-r3k.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * r2300.c: R2000 and R3000 specific mmu/cache code. * @@ -10,25 +11,23 @@ * Copyright (C) 2002 Ralf Baechle * Copyright (C) 2002 Maciej W. Rozycki */ -#include <linux/init.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/mm.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/mmu_context.h> #include <asm/tlbmisc.h> #include <asm/isadep.h> #include <asm/io.h> #include <asm/bootinfo.h> #include <asm/cpu.h> +#include <asm/setup.h> +#include <asm/tlbex.h> #undef DEBUG_TLB -extern void build_tlb_refill_handler(void); - /* CP0 hazard avoidance. */ #define BARRIER \ __asm__ __volatile__( \ @@ -37,48 +36,38 @@ extern void build_tlb_refill_handler(void); "nop\n\t" \ ".set pop\n\t") -int r3k_have_wired_reg; /* should be in cpu_data? */ - /* TLB operations. */ -void local_flush_tlb_all(void) +static void local_flush_tlb_from(int entry) { - unsigned long flags; unsigned long old_ctx; - int entry; -#ifdef DEBUG_TLB - printk("[tlball]"); -#endif - - local_irq_save(flags); - old_ctx = read_c0_entryhi() & ASID_MASK; + old_ctx = read_c0_entryhi() & cpu_asid_mask(¤t_cpu_data); write_c0_entrylo0(0); - entry = r3k_have_wired_reg ? read_c0_wired() : 8; - for (; entry < current_cpu_data.tlbsize; entry++) { + while (entry < current_cpu_data.tlbsize) { write_c0_index(entry << 8); write_c0_entryhi((entry | 0x80000) << 12); - BARRIER; + entry++; /* BARRIER */ tlb_write_indexed(); } write_c0_entryhi(old_ctx); - local_irq_restore(flags); } -void local_flush_tlb_mm(struct mm_struct *mm) +void local_flush_tlb_all(void) { - int cpu = smp_processor_id(); + unsigned long flags; - if (cpu_context(cpu, mm) != 0) { #ifdef DEBUG_TLB - printk("[tlbmm<%lu>]", (unsigned long)cpu_context(cpu, mm)); + printk("[tlball]"); #endif - drop_mmu_context(mm, cpu); - } + local_irq_save(flags); + local_flush_tlb_from(8); + local_irq_restore(flags); } void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); struct mm_struct *mm = vma->vm_mm; int cpu = smp_processor_id(); @@ -87,13 +76,13 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, #ifdef DEBUG_TLB printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", - cpu_context(cpu, mm) & ASID_MASK, start, end); + cpu_context(cpu, mm) & asid_mask, start, end); #endif local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (size <= current_cpu_data.tlbsize) { - int oldpid = read_c0_entryhi() & ASID_MASK; - int newpid = cpu_context(cpu, mm) & ASID_MASK; + int oldpid = read_c0_entryhi() & asid_mask; + int newpid = cpu_context(cpu, mm) & asid_mask; start &= PAGE_MASK; end += PAGE_SIZE - 1; @@ -113,7 +102,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } write_c0_entryhi(oldpid); } else { - drop_mmu_context(mm, cpu); + drop_mmu_context(mm); } local_irq_restore(flags); } @@ -157,19 +146,20 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { + unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); int cpu = smp_processor_id(); - if (!vma || cpu_context(cpu, vma->vm_mm) != 0) { + if (cpu_context(cpu, vma->vm_mm) != 0) { unsigned long flags; int oldpid, newpid, idx; #ifdef DEBUG_TLB printk("[tlbpage<%lu,0x%08lx>]", cpu_context(cpu, vma->vm_mm), page); #endif - newpid = cpu_context(cpu, vma->vm_mm) & ASID_MASK; + newpid = cpu_context(cpu, vma->vm_mm) & asid_mask; page &= PAGE_MASK; local_irq_save(flags); - oldpid = read_c0_entryhi() & ASID_MASK; + oldpid = read_c0_entryhi() & asid_mask; write_c0_entryhi(page | newpid); BARRIER; tlb_probe(); @@ -188,19 +178,20 @@ finish: void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) { + unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); unsigned long flags; int idx, pid; /* - * Handle debugger faulting in for debugee. + * Handle debugger faulting in for debuggee. */ if (current->active_mm != vma->vm_mm) return; - pid = read_c0_entryhi() & ASID_MASK; + pid = read_c0_entryhi() & asid_mask; #ifdef DEBUG_TLB - if ((pid != (cpu_context(cpu, vma->vm_mm) & ASID_MASK)) || (cpu_context(cpu, vma->vm_mm) == 0)) { + if ((pid != (cpu_context(cpu, vma->vm_mm) & asid_mask)) || (cpu_context(cpu, vma->vm_mm) == 0)) { printk("update_mmu_cache: Wheee, bogus tlbpid mmpid=%lu tlbpid=%d\n", (cpu_context(cpu, vma->vm_mm)), pid); } @@ -226,45 +217,19 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, unsigned long entryhi, unsigned long pagemask) { + unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); unsigned long flags; unsigned long old_ctx; static unsigned long wired = 0; - if (r3k_have_wired_reg) { /* TX39XX */ - unsigned long old_pagemask; - unsigned long w; - -#ifdef DEBUG_TLB - printk("[tlbwired<entry lo0 %8x, hi %8x\n, pagemask %8x>]\n", - entrylo0, entryhi, pagemask); -#endif - - local_irq_save(flags); - /* Save old context and create impossible VPN2 value */ - old_ctx = read_c0_entryhi() & ASID_MASK; - old_pagemask = read_c0_pagemask(); - w = read_c0_wired(); - write_c0_wired(w + 1); - write_c0_index(w << 8); - write_c0_pagemask(pagemask); - write_c0_entryhi(entryhi); - write_c0_entrylo0(entrylo0); - BARRIER; - tlb_write_indexed(); - - write_c0_entryhi(old_ctx); - write_c0_pagemask(old_pagemask); - local_flush_tlb_all(); - local_irq_restore(flags); - - } else if (wired < 8) { + if (wired < 8) { #ifdef DEBUG_TLB printk("[tlbwired<entry lo0 %8x, hi %8x\n>]\n", entrylo0, entryhi); #endif local_irq_save(flags); - old_ctx = read_c0_entryhi() & ASID_MASK; + old_ctx = read_c0_entryhi() & asid_mask; write_c0_entrylo0(entrylo0); write_c0_entryhi(entryhi); write_c0_index(wired); @@ -276,9 +241,8 @@ void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, } } -void __cpuinit tlb_init(void) +void tlb_init(void) { - local_flush_tlb_all(); - + local_flush_tlb_from(0); build_tlb_refill_handler(); } diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index c643de4c473a..44a662536148 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -8,112 +8,103 @@ * Carsten Langgaard, carstenl@mips.com * Copyright (C) 2002 MIPS Technologies, Inc. All rights reserved. */ +#include <linux/cpu_pm.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/smp.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/hugetlb.h> -#include <linux/module.h> +#include <linux/export.h> +#include <linux/sort.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/bootinfo.h> +#include <asm/hazards.h> #include <asm/mmu_context.h> -#include <asm/pgtable.h> +#include <asm/tlb.h> +#include <asm/tlbex.h> #include <asm/tlbmisc.h> - -extern void build_tlb_refill_handler(void); +#include <asm/setup.h> /* - * Make sure all entries differ. If they're not different - * MIPS32 will take revenge ... + * LOONGSON-2 has a 4 entry itlb which is a subset of jtlb, LOONGSON-3 has + * a 4 entry itlb and a 4 entry dtlb which are subsets of jtlb. Unfortunately, + * itlb/dtlb are not totally transparent to software. */ -#define UNIQUE_ENTRYHI(idx) (CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) - -/* Atomicity and interruptability */ -#ifdef CONFIG_MIPS_MT_SMTC - -#include <asm/smtc.h> -#include <asm/mipsmtregs.h> - -#define ENTER_CRITICAL(flags) \ - { \ - unsigned int mvpflags; \ - local_irq_save(flags);\ - mvpflags = dvpe() -#define EXIT_CRITICAL(flags) \ - evpe(mvpflags); \ - local_irq_restore(flags); \ +static inline void flush_micro_tlb(void) +{ + switch (current_cpu_type()) { + case CPU_LOONGSON2EF: + write_c0_diag(LOONGSON_DIAG_ITLB); + break; + case CPU_LOONGSON64: + write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB); + break; + default: + break; } -#else - -#define ENTER_CRITICAL(flags) local_irq_save(flags) -#define EXIT_CRITICAL(flags) local_irq_restore(flags) - -#endif /* CONFIG_MIPS_MT_SMTC */ - -#if defined(CONFIG_CPU_LOONGSON2) -/* - * LOONGSON2 has a 4 entry itlb which is a subset of dtlb, - * unfortrunately, itlb is not totally transparent to software. - */ -#define FLUSH_ITLB write_c0_diag(4); - -#define FLUSH_ITLB_VM(vma) { if ((vma)->vm_flags & VM_EXEC) write_c0_diag(4); } - -#else - -#define FLUSH_ITLB -#define FLUSH_ITLB_VM(vma) +} -#endif +static inline void flush_micro_tlb_vm(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_EXEC) + flush_micro_tlb(); +} void local_flush_tlb_all(void) { unsigned long flags; unsigned long old_ctx; - int entry; + int entry, ftlbhighset; - ENTER_CRITICAL(flags); + local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); + htw_stop(); write_c0_entrylo0(0); write_c0_entrylo1(0); - entry = read_c0_wired(); + entry = num_wired_entries(); - /* Blast 'em all away. */ - while (entry < current_cpu_data.tlbsize) { - /* Make sure all entries differ. */ - write_c0_entryhi(UNIQUE_ENTRYHI(entry)); - write_c0_index(entry); - mtc0_tlbw_hazard(); - tlb_write_indexed(); - entry++; + /* + * Blast 'em all away. + * If there are any wired entries, fall back to iterating + */ + if (cpu_has_tlbinv && !entry) { + if (current_cpu_data.tlbsizevtlb) { + write_c0_index(0); + mtc0_tlbw_hazard(); + tlbinvf(); /* invalidate VTLB */ + } + ftlbhighset = current_cpu_data.tlbsizevtlb + + current_cpu_data.tlbsizeftlbsets; + for (entry = current_cpu_data.tlbsizevtlb; + entry < ftlbhighset; + entry++) { + write_c0_index(entry); + mtc0_tlbw_hazard(); + tlbinvf(); /* invalidate one FTLB set */ + } + } else { + while (entry < current_cpu_data.tlbsize) { + /* Make sure all entries differ. */ + write_c0_entryhi(UNIQUE_ENTRYHI(entry)); + write_c0_index(entry); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + entry++; + } } tlbw_use_hazard(); write_c0_entryhi(old_ctx); - FLUSH_ITLB; - EXIT_CRITICAL(flags); + htw_start(); + flush_micro_tlb(); + local_irq_restore(flags); } EXPORT_SYMBOL(local_flush_tlb_all); -/* All entries common to a mm share an asid. To effectively flush - these entries, we just bump the asid. */ -void local_flush_tlb_mm(struct mm_struct *mm) -{ - int cpu; - - preempt_disable(); - - cpu = smp_processor_id(); - - if (cpu_context(cpu, mm) != 0) { - drop_mmu_context(mm, cpu); - } - - preempt_enable(); -} - void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { @@ -123,18 +114,30 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, if (cpu_context(cpu, mm) != 0) { unsigned long size, flags; - ENTER_CRITICAL(flags); + local_irq_save(flags); start = round_down(start, PAGE_SIZE << 1); end = round_up(end, PAGE_SIZE << 1); size = (end - start) >> (PAGE_SHIFT + 1); - if (size <= current_cpu_data.tlbsize/2) { - int oldpid = read_c0_entryhi(); + if (size <= (current_cpu_data.tlbsizeftlbsets ? + current_cpu_data.tlbsize / 8 : + current_cpu_data.tlbsize / 2)) { + unsigned long old_entryhi, old_mmid; int newpid = cpu_asid(cpu, mm); + old_entryhi = read_c0_entryhi(); + if (cpu_has_mmid) { + old_mmid = read_c0_memorymapid(); + write_c0_memorymapid(newpid); + } + + htw_stop(); while (start < end) { int idx; - write_c0_entryhi(start | newpid); + if (cpu_has_mmid) + write_c0_entryhi(start); + else + write_c0_entryhi(start | newpid); start += (PAGE_SIZE << 1); mtc0_tlbw_hazard(); tlb_probe(); @@ -150,12 +153,15 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, tlb_write_indexed(); } tlbw_use_hazard(); - write_c0_entryhi(oldpid); + write_c0_entryhi(old_entryhi); + if (cpu_has_mmid) + write_c0_memorymapid(old_mmid); + htw_start(); } else { - drop_mmu_context(mm, cpu); + drop_mmu_context(mm); } - FLUSH_ITLB; - EXIT_CRITICAL(flags); + flush_micro_tlb(); + local_irq_restore(flags); } } @@ -163,15 +169,18 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long size, flags; - ENTER_CRITICAL(flags); + local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - if (size <= current_cpu_data.tlbsize / 2) { + if (size <= (current_cpu_data.tlbsizeftlbsets ? + current_cpu_data.tlbsize / 8 : + current_cpu_data.tlbsize / 2)) { int pid = read_c0_entryhi(); start &= (PAGE_MASK << 1); end += ((PAGE_SIZE << 1) - 1); end &= (PAGE_MASK << 1); + htw_stop(); while (start < end) { int idx; @@ -193,11 +202,12 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) } tlbw_use_hazard(); write_c0_entryhi(pid); + htw_start(); } else { local_flush_tlb_all(); } - FLUSH_ITLB; - EXIT_CRITICAL(flags); + flush_micro_tlb(); + local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -205,14 +215,21 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) int cpu = smp_processor_id(); if (cpu_context(cpu, vma->vm_mm) != 0) { - unsigned long flags; - int oldpid, newpid, idx; + unsigned long old_mmid; + unsigned long flags, old_entryhi; + int idx; - newpid = cpu_asid(cpu, vma->vm_mm); page &= (PAGE_MASK << 1); - ENTER_CRITICAL(flags); - oldpid = read_c0_entryhi(); - write_c0_entryhi(page | newpid); + local_irq_save(flags); + old_entryhi = read_c0_entryhi(); + htw_stop(); + if (cpu_has_mmid) { + old_mmid = read_c0_memorymapid(); + write_c0_entryhi(page); + write_c0_memorymapid(cpu_asid(cpu, vma->vm_mm)); + } else { + write_c0_entryhi(page | cpu_asid(cpu, vma->vm_mm)); + } mtc0_tlbw_hazard(); tlb_probe(); tlb_probe_hazard(); @@ -228,9 +245,12 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) tlbw_use_hazard(); finish: - write_c0_entryhi(oldpid); - FLUSH_ITLB_VM(vma); - EXIT_CRITICAL(flags); + write_c0_entryhi(old_entryhi); + if (cpu_has_mmid) + write_c0_memorymapid(old_mmid); + htw_start(); + flush_micro_tlb_vm(vma); + local_irq_restore(flags); } } @@ -243,8 +263,9 @@ void local_flush_tlb_one(unsigned long page) unsigned long flags; int oldpid, idx; - ENTER_CRITICAL(flags); + local_irq_save(flags); oldpid = read_c0_entryhi(); + htw_stop(); page &= (PAGE_MASK << 1); write_c0_entryhi(page); mtc0_tlbw_hazard(); @@ -261,8 +282,9 @@ void local_flush_tlb_one(unsigned long page) tlbw_use_hazard(); } write_c0_entryhi(oldpid); - FLUSH_ITLB; - EXIT_CRITICAL(flags); + htw_start(); + flush_micro_tlb(); + local_irq_restore(flags); } /* @@ -274,32 +296,39 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) { unsigned long flags; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; - pte_t *ptep; + pte_t *ptep, *ptemap = NULL; int idx, pid; /* - * Handle debugger faulting in for debugee. + * Handle debugger faulting in for debuggee. */ if (current->active_mm != vma->vm_mm) return; - ENTER_CRITICAL(flags); + local_irq_save(flags); - pid = read_c0_entryhi() & ASID_MASK; + htw_stop(); address &= (PAGE_MASK << 1); - write_c0_entryhi(address | pid); + if (cpu_has_mmid) { + write_c0_entryhi(address); + } else { + pid = read_c0_entryhi() & cpu_asid_mask(¤t_cpu_data); + write_c0_entryhi(address | pid); + } pgdp = pgd_offset(vma->vm_mm, address); mtc0_tlbw_hazard(); tlb_probe(); tlb_probe_hazard(); - pudp = pud_offset(pgdp, address); + p4dp = p4d_offset(pgdp, address); + pudp = pud_offset(p4dp, address); pmdp = pmd_offset(pudp, address); idx = read_c0_index(); #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT /* this could be a huge page */ - if (pmd_huge(*pmdp)) { + if (pmd_leaf(*pmdp)) { unsigned long lo; write_c0_pagemask(PM_HUGE_MASK); ptep = (pte_t *)pmdp; @@ -317,12 +346,27 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) } else #endif { - ptep = pte_offset_map(pmdp, address); + ptemap = ptep = pte_offset_map(pmdp, address); + /* + * update_mmu_cache() is called between pte_offset_map_lock() + * and pte_unmap_unlock(), so we can assume that ptep is not + * NULL here: and what should be done below if it were NULL? + */ -#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) +#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) +#ifdef CONFIG_XPA + write_c0_entrylo0(pte_to_entrylo(ptep->pte_high)); + if (cpu_has_xpa) + writex_c0_entrylo0(ptep->pte_low & _PFNX_MASK); + ptep++; + write_c0_entrylo1(pte_to_entrylo(ptep->pte_high)); + if (cpu_has_xpa) + writex_c0_entrylo1(ptep->pte_low & _PFNX_MASK); +#else write_c0_entrylo0(ptep->pte_high); ptep++; write_c0_entrylo1(ptep->pte_high); +#endif #else write_c0_entrylo0(pte_to_entrylo(pte_val(*ptep++))); write_c0_entrylo1(pte_to_entrylo(pte_val(*ptep))); @@ -334,23 +378,36 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) tlb_write_indexed(); } tlbw_use_hazard(); - FLUSH_ITLB_VM(vma); - EXIT_CRITICAL(flags); + htw_start(); + flush_micro_tlb_vm(vma); + + if (ptemap) + pte_unmap(ptemap); + local_irq_restore(flags); } void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, unsigned long entryhi, unsigned long pagemask) { +#ifdef CONFIG_XPA + panic("Broken for XPA kernels"); +#else + unsigned int old_mmid; unsigned long flags; unsigned long wired; unsigned long old_pagemask; unsigned long old_ctx; - ENTER_CRITICAL(flags); + local_irq_save(flags); + if (cpu_has_mmid) { + old_mmid = read_c0_memorymapid(); + write_c0_memorymapid(MMID_KERNEL_WIRED); + } /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); + htw_stop(); old_pagemask = read_c0_pagemask(); - wired = read_c0_wired(); + wired = num_wired_entries(); write_c0_wired(wired + 1); write_c0_index(wired); tlbw_use_hazard(); /* What is the hazard here? */ @@ -363,33 +420,88 @@ void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, tlbw_use_hazard(); write_c0_entryhi(old_ctx); + if (cpu_has_mmid) + write_c0_memorymapid(old_mmid); tlbw_use_hazard(); /* What is the hazard here? */ + htw_start(); write_c0_pagemask(old_pagemask); local_flush_tlb_all(); - EXIT_CRITICAL(flags); + local_irq_restore(flags); +#endif } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -int __init has_transparent_hugepage(void) +int has_transparent_hugepage(void) { - unsigned int mask; - unsigned long flags; + static unsigned int mask = -1; - ENTER_CRITICAL(flags); - write_c0_pagemask(PM_HUGE_MASK); - back_to_back_c0_hazard(); - mask = read_c0_pagemask(); - write_c0_pagemask(PM_DEFAULT_MASK); - - EXIT_CRITICAL(flags); + if (mask == -1) { /* first call comes during __init */ + unsigned long flags; + local_irq_save(flags); + write_c0_pagemask(PM_HUGE_MASK); + back_to_back_c0_hazard(); + mask = read_c0_pagemask(); + write_c0_pagemask(PM_DEFAULT_MASK); + local_irq_restore(flags); + } return mask == PM_HUGE_MASK; } +EXPORT_SYMBOL(has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static int __cpuinitdata ntlb; +/* + * Used for loading TLB entries before trap_init() has started, when we + * don't actually want to add a wired entry which remains throughout the + * lifetime of the system + */ + +int temp_tlb_entry; + +#ifndef CONFIG_64BIT +__init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, + unsigned long entryhi, unsigned long pagemask) +{ + int ret = 0; + unsigned long flags; + unsigned long wired; + unsigned long old_pagemask; + unsigned long old_ctx; + + local_irq_save(flags); + /* Save old context and create impossible VPN2 value */ + htw_stop(); + old_ctx = read_c0_entryhi(); + old_pagemask = read_c0_pagemask(); + wired = num_wired_entries(); + if (--temp_tlb_entry < wired) { + printk(KERN_WARNING + "No TLB space left for add_temporary_entry\n"); + ret = -ENOSPC; + goto out; + } + + write_c0_index(temp_tlb_entry); + write_c0_pagemask(pagemask); + write_c0_entryhi(entryhi); + write_c0_entrylo0(entrylo0); + write_c0_entrylo1(entrylo1); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + + write_c0_entryhi(old_ctx); + write_c0_pagemask(old_pagemask); + htw_start(); +out: + local_irq_restore(flags); + return ret; +} +#endif + +static int ntlb; static int __init set_ntlb(char *str) { get_option(&str, &ntlb); @@ -398,7 +510,101 @@ static int __init set_ntlb(char *str) __setup("ntlb=", set_ntlb); -void __cpuinit tlb_init(void) + +/* Comparison function for EntryHi VPN fields. */ +static int r4k_vpn_cmp(const void *a, const void *b) +{ + long v = *(unsigned long *)a - *(unsigned long *)b; + int s = sizeof(long) > sizeof(int) ? sizeof(long) * 8 - 1: 0; + return s ? (v != 0) | v >> s : v; +} + +/* + * Initialise all TLB entries with unique values that do not clash with + * what we have been handed over and what we'll be using ourselves. + */ +static void __ref r4k_tlb_uniquify(void) +{ + int tlbsize = current_cpu_data.tlbsize; + bool use_slab = slab_is_available(); + int start = num_wired_entries(); + phys_addr_t tlb_vpn_size; + unsigned long *tlb_vpns; + unsigned long vpn_mask; + int cnt, ent, idx, i; + + vpn_mask = GENMASK(cpu_vmbits - 1, 13); + vpn_mask |= IS_ENABLED(CONFIG_64BIT) ? 3ULL << 62 : 1 << 31; + + tlb_vpn_size = tlbsize * sizeof(*tlb_vpns); + tlb_vpns = (use_slab ? + kmalloc(tlb_vpn_size, GFP_KERNEL) : + memblock_alloc_raw(tlb_vpn_size, sizeof(*tlb_vpns))); + if (WARN_ON(!tlb_vpns)) + return; /* Pray local_flush_tlb_all() is good enough. */ + + htw_stop(); + + for (i = start, cnt = 0; i < tlbsize; i++, cnt++) { + unsigned long vpn; + + write_c0_index(i); + mtc0_tlbr_hazard(); + tlb_read(); + tlb_read_hazard(); + vpn = read_c0_entryhi(); + vpn &= vpn_mask & PAGE_MASK; + tlb_vpns[cnt] = vpn; + + /* Prevent any large pages from overlapping regular ones. */ + write_c0_pagemask(read_c0_pagemask() & PM_DEFAULT_MASK); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + } + + sort(tlb_vpns, cnt, sizeof(tlb_vpns[0]), r4k_vpn_cmp, NULL); + + write_c0_pagemask(PM_DEFAULT_MASK); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + + idx = 0; + ent = tlbsize; + for (i = start; i < tlbsize; i++) + while (1) { + unsigned long entryhi, vpn; + + entryhi = UNIQUE_ENTRYHI(ent); + vpn = entryhi & vpn_mask & PAGE_MASK; + + if (idx >= cnt || vpn < tlb_vpns[idx]) { + write_c0_entryhi(entryhi); + write_c0_index(i); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + ent++; + break; + } else if (vpn == tlb_vpns[idx]) { + ent++; + } else { + idx++; + } + } + + tlbw_use_hazard(); + htw_start(); + flush_micro_tlb(); + if (use_slab) + kfree(tlb_vpns); + else + memblock_free(tlb_vpns, tlb_vpn_size); +} + +/* + * Configure TLB (for init or after a CPU has been powered off). + */ +static void r4k_tlb_configure(void) { /* * You should never change this register: @@ -408,28 +614,41 @@ void __cpuinit tlb_init(void) * be set to fixed-size pages. */ write_c0_pagemask(PM_DEFAULT_MASK); + back_to_back_c0_hazard(); + if (read_c0_pagemask() != PM_DEFAULT_MASK) + panic("MMU doesn't support PAGE_SIZE=0x%lx", PAGE_SIZE); + write_c0_wired(0); if (current_cpu_type() == CPU_R10000 || current_cpu_type() == CPU_R12000 || - current_cpu_type() == CPU_R14000) + current_cpu_type() == CPU_R14000 || + current_cpu_type() == CPU_R16000) write_c0_framemask(0); if (cpu_has_rixi) { /* - * Enable the no read, no exec bits, and enable large virtual + * Enable the no read, no exec bits, and enable large physical * address. */ - u32 pg = PG_RIE | PG_XIE; #ifdef CONFIG_64BIT - pg |= PG_ELPA; + set_c0_pagegrain(PG_RIE | PG_XIE | PG_ELPA); +#else + set_c0_pagegrain(PG_RIE | PG_XIE); #endif - write_c0_pagegrain(pg); } + temp_tlb_entry = current_cpu_data.tlbsize - 1; + /* From this point on the ARC firmware is dead. */ + r4k_tlb_uniquify(); local_flush_tlb_all(); /* Did I tell you that ARC SUCKS? */ +} + +void tlb_init(void) +{ + r4k_tlb_configure(); if (ntlb) { if (ntlb > 1 && ntlb <= current_cpu_data.tlbsize) { @@ -443,3 +662,26 @@ void __cpuinit tlb_init(void) build_tlb_refill_handler(); } + +static int r4k_tlb_pm_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + switch (cmd) { + case CPU_PM_ENTER_FAILED: + case CPU_PM_EXIT: + r4k_tlb_configure(); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block r4k_tlb_pm_notifier_block = { + .notifier_call = r4k_tlb_pm_notifier, +}; + +static int __init r4k_tlb_init_pm(void) +{ + return cpu_pm_register_notifier(&r4k_tlb_pm_notifier_block); +} +arch_initcall(r4k_tlb_init_pm); diff --git a/arch/mips/mm/tlb-r8k.c b/arch/mips/mm/tlb-r8k.c deleted file mode 100644 index 91c2499f806a..000000000000 --- a/arch/mips/mm/tlb-r8k.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 2002 MIPS Technologies, Inc. All rights reserved. - */ -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/smp.h> -#include <linux/mm.h> - -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/mmu_context.h> -#include <asm/pgtable.h> - -extern void build_tlb_refill_handler(void); - -#define TFP_TLB_SIZE 384 -#define TFP_TLB_SET_SHIFT 7 - -/* CP0 hazard avoidance. */ -#define BARRIER __asm__ __volatile__(".set noreorder\n\t" \ - "nop; nop; nop; nop; nop; nop;\n\t" \ - ".set reorder\n\t") - -void local_flush_tlb_all(void) -{ - unsigned long flags; - unsigned long old_ctx; - int entry; - - local_irq_save(flags); - /* Save old context and create impossible VPN2 value */ - old_ctx = read_c0_entryhi(); - write_c0_entrylo(0); - - for (entry = 0; entry < TFP_TLB_SIZE; entry++) { - write_c0_tlbset(entry >> TFP_TLB_SET_SHIFT); - write_c0_vaddr(entry << PAGE_SHIFT); - write_c0_entryhi(CKSEG0 + (entry << (PAGE_SHIFT + 1))); - mtc0_tlbw_hazard(); - tlb_write(); - } - tlbw_use_hazard(); - write_c0_entryhi(old_ctx); - local_irq_restore(flags); -} - -void local_flush_tlb_mm(struct mm_struct *mm) -{ - int cpu = smp_processor_id(); - - if (cpu_context(cpu, mm) != 0) - drop_mmu_context(mm, cpu); -} - -void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - int cpu = smp_processor_id(); - unsigned long flags; - int oldpid, newpid, size; - - if (!cpu_context(cpu, mm)) - return; - - size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - size = (size + 1) >> 1; - - local_irq_save(flags); - - if (size > TFP_TLB_SIZE / 2) { - drop_mmu_context(mm, cpu); - goto out_restore; - } - - oldpid = read_c0_entryhi(); - newpid = cpu_asid(cpu, mm); - - write_c0_entrylo(0); - - start &= PAGE_MASK; - end += (PAGE_SIZE - 1); - end &= PAGE_MASK; - while (start < end) { - signed long idx; - - write_c0_vaddr(start); - write_c0_entryhi(start); - start += PAGE_SIZE; - tlb_probe(); - idx = read_c0_tlbset(); - if (idx < 0) - continue; - - write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1))); - tlb_write(); - } - write_c0_entryhi(oldpid); - -out_restore: - local_irq_restore(flags); -} - -/* Usable for KV1 addresses only! */ -void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - unsigned long size, flags; - - size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - size = (size + 1) >> 1; - - if (size > TFP_TLB_SIZE / 2) { - local_flush_tlb_all(); - return; - } - - local_irq_save(flags); - - write_c0_entrylo(0); - - start &= PAGE_MASK; - end += (PAGE_SIZE - 1); - end &= PAGE_MASK; - while (start < end) { - signed long idx; - - write_c0_vaddr(start); - write_c0_entryhi(start); - start += PAGE_SIZE; - tlb_probe(); - idx = read_c0_tlbset(); - if (idx < 0) - continue; - - write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1))); - tlb_write(); - } - - local_irq_restore(flags); -} - -void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) -{ - int cpu = smp_processor_id(); - unsigned long flags; - int oldpid, newpid; - signed long idx; - - if (!cpu_context(cpu, vma->vm_mm)) - return; - - newpid = cpu_asid(cpu, vma->vm_mm); - page &= PAGE_MASK; - local_irq_save(flags); - oldpid = read_c0_entryhi(); - write_c0_vaddr(page); - write_c0_entryhi(newpid); - tlb_probe(); - idx = read_c0_tlbset(); - if (idx < 0) - goto finish; - - write_c0_entrylo(0); - write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1))); - tlb_write(); - -finish: - write_c0_entryhi(oldpid); - local_irq_restore(flags); -} - -/* - * We will need multiple versions of update_mmu_cache(), one that just - * updates the TLB with the new pte(s), and another which also checks - * for the R4k "end of page" hardware bug and does the needy. - */ -void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) -{ - unsigned long flags; - pgd_t *pgdp; - pmd_t *pmdp; - pte_t *ptep; - int pid; - - /* - * Handle debugger faulting in for debugee. - */ - if (current->active_mm != vma->vm_mm) - return; - - pid = read_c0_entryhi() & ASID_MASK; - - local_irq_save(flags); - address &= PAGE_MASK; - write_c0_vaddr(address); - write_c0_entryhi(pid); - pgdp = pgd_offset(vma->vm_mm, address); - pmdp = pmd_offset(pgdp, address); - ptep = pte_offset_map(pmdp, address); - tlb_probe(); - - write_c0_entrylo(pte_val(*ptep++) >> 6); - tlb_write(); - - write_c0_entryhi(pid); - local_irq_restore(flags); -} - -static void __cpuinit probe_tlb(unsigned long config) -{ - struct cpuinfo_mips *c = ¤t_cpu_data; - - c->tlbsize = 3 * 128; /* 3 sets each 128 entries */ -} - -void __cpuinit tlb_init(void) -{ - unsigned int config = read_c0_config(); - unsigned long status; - - probe_tlb(config); - - status = read_c0_status(); - status &= ~(ST0_UPS | ST0_KPS); -#ifdef CONFIG_PAGE_SIZE_4KB - status |= (TFP_PAGESIZE_4K << 32) | (TFP_PAGESIZE_4K << 36); -#elif defined(CONFIG_PAGE_SIZE_8KB) - status |= (TFP_PAGESIZE_8K << 32) | (TFP_PAGESIZE_8K << 36); -#elif defined(CONFIG_PAGE_SIZE_16KB) - status |= (TFP_PAGESIZE_16K << 32) | (TFP_PAGESIZE_16K << 36); -#elif defined(CONFIG_PAGE_SIZE_64KB) - status |= (TFP_PAGESIZE_64K << 32) | (TFP_PAGESIZE_64K << 36); -#endif - write_c0_status(status); - - write_c0_wired(0); - - local_flush_tlb_all(); - - build_tlb_refill_handler(); -} diff --git a/arch/mips/mm/tlbex-fault.S b/arch/mips/mm/tlbex-fault.S index 318855eb5f80..77db401fc620 100644 --- a/arch/mips/mm/tlbex-fault.S +++ b/arch/mips/mm/tlbex-fault.S @@ -12,14 +12,15 @@ .macro tlb_do_page_fault, write NESTED(tlb_do_page_fault_\write, PT_SIZE, sp) - SAVE_ALL + .cfi_signal_frame + SAVE_ALL docfi=1 MFC0 a2, CP0_BADVADDR KMODE move a0, sp REG_S a2, PT_BVADDR(sp) li a1, \write - PTR_LA ra, ret_from_exception - j do_page_fault + jal do_page_fault + j ret_from_exception END(tlb_do_page_fault_\write) .endm diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 9ab0f907a52c..69ea54bdc0c3 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -12,7 +12,7 @@ * Copyright (C) 2011 MIPS Technologies, Inc. * * ... and the days got worse and worse and now you see - * I've gone completly out of my mind. + * I've gone completely out of my mind. * * They're coming to take me a away haha * they're coming to take me a away hoho hihi haha @@ -22,18 +22,33 @@ */ #include <linux/bug.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/smp.h> #include <linux/string.h> -#include <linux/init.h> #include <linux/cache.h> +#include <linux/pgtable.h> #include <asm/cacheflush.h> -#include <asm/pgtable.h> -#include <asm/war.h> +#include <asm/cpu-type.h> +#include <asm/mipsregs.h> +#include <asm/mmu_context.h> +#include <asm/regdef.h> #include <asm/uasm.h> #include <asm/setup.h> +#include <asm/tlbex.h> + +static int mips_xpa_disabled; + +static int __init xpa_disable(char *s) +{ + mips_xpa_disabled = 1; + + return 1; +} + +__setup("noxpa", xpa_disable); /* * TLB load/store/modify handlers. @@ -69,14 +84,18 @@ static inline int r4k_250MHZhwbug(void) return 0; } +extern int sb1250_m3_workaround_needed(void); + static inline int __maybe_unused bcm1250_m3_war(void) { - return BCM1250_M3_WAR; + if (IS_ENABLED(CONFIG_SB1_PASS_2_WORKAROUNDS)) + return sb1250_m3_workaround_needed(); + return 0; } static inline int __maybe_unused r10000_llsc_war(void) { - return R10000_LLSC_WAR; + return IS_ENABLED(CONFIG_WAR_R10000_LLSC); } static int use_bbit_insns(void) @@ -85,6 +104,7 @@ static int use_bbit_insns(void) case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: return 1; default: return 0; @@ -95,6 +115,7 @@ static int use_lwx_insns(void) { switch (current_cpu_type()) { case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: return 1; default: return 0; @@ -136,10 +157,9 @@ static int scratchpad_offset(int i) * why; it's not an issue caused by the core RTL. * */ -static int __cpuinit m4kc_tlbp_war(void) +static int m4kc_tlbp_war(void) { - return (current_cpu_data.processor_id & 0xffff00) == - (PRID_COMP_MIPS | PRID_IMP_4KC); + return current_cpu_type() == CPU_4KC; } /* Handle labels (which must be positive integers). */ @@ -181,11 +201,9 @@ UASM_L_LA(_large_segbits_fault) UASM_L_LA(_tlb_huge_update) #endif -static int __cpuinitdata hazard_instance; +static int hazard_instance; -static void __cpuinit uasm_bgezl_hazard(u32 **p, - struct uasm_reloc **r, - int instance) +static void uasm_bgezl_hazard(u32 **p, struct uasm_reloc **r, int instance) { switch (instance) { case 0 ... 7: @@ -196,9 +214,7 @@ static void __cpuinit uasm_bgezl_hazard(u32 **p, } } -static void __cpuinit uasm_bgezl_label(struct uasm_label **l, - u32 **p, - int instance) +static void uasm_bgezl_label(struct uasm_label **l, u32 **p, int instance) { switch (instance) { case 0 ... 7: @@ -225,31 +241,28 @@ static void output_pgtable_bits_defines(void) pr_debug("\n"); pr_define("_PAGE_PRESENT_SHIFT %d\n", _PAGE_PRESENT_SHIFT); - pr_define("_PAGE_READ_SHIFT %d\n", _PAGE_READ_SHIFT); + pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT); pr_define("_PAGE_WRITE_SHIFT %d\n", _PAGE_WRITE_SHIFT); pr_define("_PAGE_ACCESSED_SHIFT %d\n", _PAGE_ACCESSED_SHIFT); pr_define("_PAGE_MODIFIED_SHIFT %d\n", _PAGE_MODIFIED_SHIFT); #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT); - pr_define("_PAGE_SPLITTING_SHIFT %d\n", _PAGE_SPLITTING_SHIFT); #endif - if (cpu_has_rixi) { #ifdef _PAGE_NO_EXEC_SHIFT + if (cpu_has_rixi) pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT); #endif -#ifdef _PAGE_NO_READ_SHIFT - pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT); -#endif - } pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT); pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT); pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT); - pr_define("_PFN_SHIFT %d\n", _PFN_SHIFT); + pr_define("PFN_PTE_SHIFT %d\n", PFN_PTE_SHIFT); pr_debug("\n"); } -static inline void dump_handler(const char *symbol, const u32 *handler, int count) +static inline void dump_handler(const char *symbol, const void *start, const void *end) { + unsigned int count = (end - start) / sizeof(u32); + const u32 *handler = start; int i; pr_debug("LEAF(%s)\n", symbol); @@ -265,22 +278,6 @@ static inline void dump_handler(const char *symbol, const u32 *handler, int coun pr_debug("\tEND(%s)\n", symbol); } -/* The only general purpose registers allowed in TLB handlers. */ -#define K0 26 -#define K1 27 - -/* Some CP0 registers */ -#define C0_INDEX 0, 0 -#define C0_ENTRYLO0 2, 0 -#define C0_TCBIND 2, 2 -#define C0_ENTRYLO1 3, 0 -#define C0_CONTEXT 4, 0 -#define C0_PAGEMASK 5, 0 -#define C0_BADVADDR 8, 0 -#define C0_ENTRYHI 10, 0 -#define C0_EPC 14, 0 -#define C0_XCONTEXT 20, 0 - #ifdef CONFIG_64BIT # define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_XCONTEXT) #else @@ -295,28 +292,23 @@ static inline void dump_handler(const char *symbol, const u32 *handler, int coun * We deliberately chose a buffer size of 128, so we won't scribble * over anything important on overflow before we panic. */ -static u32 tlb_handler[128] __cpuinitdata; +static u32 tlb_handler[128]; /* simply assume worst case size for labels and relocs */ -static struct uasm_label labels[128] __cpuinitdata; -static struct uasm_reloc relocs[128] __cpuinitdata; +static struct uasm_label labels[128]; +static struct uasm_reloc relocs[128]; -static int check_for_high_segbits __cpuinitdata; +static int check_for_high_segbits; +static bool fill_includes_sw_bits; -static unsigned int kscratch_used_mask __cpuinitdata; +static unsigned int kscratch_used_mask; static inline int __maybe_unused c0_kscratch(void) { - switch (current_cpu_type()) { - case CPU_XLP: - case CPU_XLR: - return 22; - default: - return 31; - } + return 31; } -static int __cpuinit allocate_kscratch(void) +static int allocate_kscratch(void) { int r; unsigned int a = cpu_data[0].kscratch_mask & ~kscratch_used_mask; @@ -333,75 +325,57 @@ static int __cpuinit allocate_kscratch(void) return r; } -static int scratch_reg __cpuinitdata; -static int pgd_reg __cpuinitdata; +static int scratch_reg; +int pgd_reg; +EXPORT_SYMBOL_GPL(pgd_reg); enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch}; -static struct work_registers __cpuinit build_get_work_registers(u32 **p) +static struct work_registers build_get_work_registers(u32 **p) { struct work_registers r; - int smp_processor_id_reg; - int smp_processor_id_sel; - int smp_processor_id_shift; - if (scratch_reg >= 0) { /* Save in CPU local C0_KScratch? */ UASM_i_MTC0(p, 1, c0_kscratch(), scratch_reg); - r.r1 = K0; - r.r2 = K1; - r.r3 = 1; + r.r1 = GPR_K0; + r.r2 = GPR_K1; + r.r3 = GPR_AT; return r; } if (num_possible_cpus() > 1) { -#ifdef CONFIG_MIPS_PGD_C0_CONTEXT - smp_processor_id_shift = 51; - smp_processor_id_reg = 20; /* XContext */ - smp_processor_id_sel = 0; -#else -# ifdef CONFIG_32BIT - smp_processor_id_shift = 25; - smp_processor_id_reg = 4; /* Context */ - smp_processor_id_sel = 0; -# endif -# ifdef CONFIG_64BIT - smp_processor_id_shift = 26; - smp_processor_id_reg = 4; /* Context */ - smp_processor_id_sel = 0; -# endif -#endif /* Get smp_processor_id */ - UASM_i_MFC0(p, K0, smp_processor_id_reg, smp_processor_id_sel); - UASM_i_SRL_SAFE(p, K0, K0, smp_processor_id_shift); + UASM_i_CPUID_MFC0(p, GPR_K0, SMP_CPUID_REG); + UASM_i_SRL_SAFE(p, GPR_K0, GPR_K0, SMP_CPUID_REGSHIFT); - /* handler_reg_save index in K0 */ - UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save))); + /* handler_reg_save index in GPR_K0 */ + UASM_i_SLL(p, GPR_K0, GPR_K0, ilog2(sizeof(struct tlb_reg_save))); - UASM_i_LA(p, K1, (long)&handler_reg_save); - UASM_i_ADDU(p, K0, K0, K1); + UASM_i_LA(p, GPR_K1, (long)&handler_reg_save); + UASM_i_ADDU(p, GPR_K0, GPR_K0, GPR_K1); } else { - UASM_i_LA(p, K0, (long)&handler_reg_save); + UASM_i_LA(p, GPR_K0, (long)&handler_reg_save); } - /* K0 now points to save area, save $1 and $2 */ - UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0); - UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0); + /* GPR_K0 now points to save area, save $1 and $2 */ + UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), GPR_K0); + UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), GPR_K0); - r.r1 = K1; + r.r1 = GPR_K1; r.r2 = 1; r.r3 = 2; return r; } -static void __cpuinit build_restore_work_registers(u32 **p) +static void build_restore_work_registers(u32 **p) { if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); return; } - /* K0 already points to save area, restore $1 and $2 */ - UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0); - UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0); + /* GPR_K0 already points to save area, restore $1 and $2 */ + UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), GPR_K0); + UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), GPR_K0); } #ifndef CONFIG_MIPS_PGD_C0_CONTEXT @@ -410,15 +384,9 @@ static void __cpuinit build_restore_work_registers(u32 **p) * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current, * we cannot do r3000 under these circumstances. * - * Declare pgd_current here instead of including mmu_context.h to avoid type - * conflicts for tlbmiss_handler_setup_pgd - */ -extern unsigned long pgd_current[]; - -/* * The R3000 TLB handler is simple. */ -static void __cpuinit build_r3000_tlb_refill_handler(void) +static void build_r3000_tlb_refill_handler(void) { long pgdc = (long)pgd_current; u32 *p; @@ -426,22 +394,22 @@ static void __cpuinit build_r3000_tlb_refill_handler(void) memset(tlb_handler, 0, sizeof(tlb_handler)); p = tlb_handler; - uasm_i_mfc0(&p, K0, C0_BADVADDR); - uasm_i_lui(&p, K1, uasm_rel_hi(pgdc)); /* cp0 delay */ - uasm_i_lw(&p, K1, uasm_rel_lo(pgdc), K1); - uasm_i_srl(&p, K0, K0, 22); /* load delay */ - uasm_i_sll(&p, K0, K0, 2); - uasm_i_addu(&p, K1, K1, K0); - uasm_i_mfc0(&p, K0, C0_CONTEXT); - uasm_i_lw(&p, K1, 0, K1); /* cp0 delay */ - uasm_i_andi(&p, K0, K0, 0xffc); /* load delay */ - uasm_i_addu(&p, K1, K1, K0); - uasm_i_lw(&p, K0, 0, K1); + uasm_i_mfc0(&p, GPR_K0, C0_BADVADDR); + uasm_i_lui(&p, GPR_K1, uasm_rel_hi(pgdc)); /* cp0 delay */ + uasm_i_lw(&p, GPR_K1, uasm_rel_lo(pgdc), GPR_K1); + uasm_i_srl(&p, GPR_K0, GPR_K0, 22); /* load delay */ + uasm_i_sll(&p, GPR_K0, GPR_K0, 2); + uasm_i_addu(&p, GPR_K1, GPR_K1, GPR_K0); + uasm_i_mfc0(&p, GPR_K0, C0_CONTEXT); + uasm_i_lw(&p, GPR_K1, 0, GPR_K1); /* cp0 delay */ + uasm_i_andi(&p, GPR_K0, GPR_K0, 0xffc); /* load delay */ + uasm_i_addu(&p, GPR_K1, GPR_K1, GPR_K0); + uasm_i_lw(&p, GPR_K0, 0, GPR_K1); uasm_i_nop(&p); /* load delay */ - uasm_i_mtc0(&p, K0, C0_ENTRYLO0); - uasm_i_mfc0(&p, K1, C0_EPC); /* cp0 delay */ + uasm_i_mtc0(&p, GPR_K0, C0_ENTRYLO0); + uasm_i_mfc0(&p, GPR_K1, C0_EPC); /* cp0 delay */ uasm_i_tlbwr(&p); /* cp0 delay */ - uasm_i_jr(&p, K1); + uasm_i_jr(&p, GPR_K1); uasm_i_rfe(&p); /* branch delay */ if (p > tlb_handler + 32) @@ -451,8 +419,8 @@ static void __cpuinit build_r3000_tlb_refill_handler(void) (unsigned int)(p - tlb_handler)); memcpy((void *)ebase, tlb_handler, 0x80); - - dump_handler("r3000_tlb_refill", (u32 *)ebase, 32); + local_flush_icache_range(ebase, ebase + 0x80); + dump_handler("r3000_tlb_refill", (u32 *)ebase, (u32 *)(ebase + 0x80)); } #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ @@ -463,7 +431,7 @@ static void __cpuinit build_r3000_tlb_refill_handler(void) * other one.To keep things simple, we first assume linear space, * then we relocate it to the final handler layout as needed. */ -static u32 final_handler[64] __cpuinitdata; +static u32 final_handler[64]; /* * Hazards @@ -487,7 +455,7 @@ static u32 final_handler[64] __cpuinitdata; * * As if we MIPS hackers wouldn't know how to nop pipelines happy ... */ -static void __cpuinit __maybe_unused build_tlb_probe_entry(u32 **p) +static void __maybe_unused build_tlb_probe_entry(u32 **p) { switch (current_cpu_type()) { /* Found by experiment: R4600 v2.0/R4700 needs this, too. */ @@ -505,15 +473,9 @@ static void __cpuinit __maybe_unused build_tlb_probe_entry(u32 **p) } } -/* - * Write random or indexed TLB entry, and care about the hazards from - * the preceding mtc0 and for the following eret. - */ -enum tlb_write_entry { tlb_random, tlb_indexed }; - -static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, - struct uasm_reloc **r, - enum tlb_write_entry wmode) +void build_tlb_write_entry(u32 **p, struct uasm_label **l, + struct uasm_reloc **r, + enum tlb_write_entry wmode) { void(*tlbw)(u32 **) = NULL; @@ -522,21 +484,9 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, case tlb_indexed: tlbw = uasm_i_tlbwi; break; } - if (cpu_has_mips_r2) { - /* - * The architecture spec says an ehb is required here, - * but a number of cores do not have the hazard and - * using an ehb causes an expensive pipeline stall. - */ - switch (current_cpu_type()) { - case CPU_M14KC: - case CPU_74K: - break; - - default: + if (cpu_has_mips_r2_r6) { + if (cpu_has_mips_r2_exec_hazard) uasm_i_ehb(p); - break; - } tlbw(p); return; } @@ -577,7 +527,6 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, case CPU_5KC: case CPU_TX49XX: case CPU_PR4450: - case CPU_XLR: uasm_i_nop(p); tlbw(p); break; @@ -585,6 +534,7 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, case CPU_R10000: case CPU_R12000: case CPU_R14000: + case CPU_R16000: case CPU_4KC: case CPU_4KEC: case CPU_M14KC: @@ -599,10 +549,12 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, case CPU_BMIPS4350: case CPU_BMIPS4380: case CPU_BMIPS5000: - case CPU_LOONGSON2: + case CPU_LOONGSON2EF: + case CPU_LOONGSON64: case CPU_R5500: if (m4kc_tlbp_war()) uasm_i_nop(p); + fallthrough; case CPU_ALCHEMY: tlbw(p); break; @@ -615,45 +567,37 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, tlbw(p); break; - case CPU_VR4111: - case CPU_VR4121: - case CPU_VR4122: - case CPU_VR4181: - case CPU_VR4181A: - uasm_i_nop(p); - uasm_i_nop(p); - tlbw(p); - uasm_i_nop(p); - uasm_i_nop(p); - break; - - case CPU_VR4131: - case CPU_VR4133: - case CPU_R5432: - uasm_i_nop(p); - uasm_i_nop(p); - tlbw(p); - break; - - case CPU_JZRISC: + case CPU_XBURST: tlbw(p); uasm_i_nop(p); break; default: panic("No TLB refill handler yet (CPU type: %d)", - current_cpu_data.cputype); + current_cpu_type()); break; } } +EXPORT_SYMBOL_GPL(build_tlb_write_entry); -static __cpuinit __maybe_unused void build_convert_pte_to_entrylo(u32 **p, - unsigned int reg) +static __maybe_unused void build_convert_pte_to_entrylo(u32 **p, + unsigned int reg) { - if (cpu_has_rixi) { - UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL)); + if (_PAGE_GLOBAL_SHIFT == 0) { + /* pte_t is already in EntryLo format */ + return; + } + + if (cpu_has_rixi && _PAGE_NO_EXEC != 0) { + if (fill_includes_sw_bits) { + UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL)); + } else { + UASM_i_SRL(p, reg, reg, ilog2(_PAGE_NO_EXEC)); + UASM_i_ROTR(p, reg, reg, + ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); + } } else { -#ifdef CONFIG_64BIT_PHYS_ADDR +#ifdef CONFIG_PHYS_ADDR_T_64BIT uasm_i_dsrl_safe(p, reg, reg, ilog2(_PAGE_GLOBAL)); #else UASM_i_SRL(p, reg, reg, ilog2(_PAGE_GLOBAL)); @@ -663,13 +607,18 @@ static __cpuinit __maybe_unused void build_convert_pte_to_entrylo(u32 **p, #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT -static __cpuinit void build_restore_pagemask(u32 **p, - struct uasm_reloc **r, - unsigned int tmp, - enum label_id lid, - int restore_scratch) +static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, + unsigned int tmp, enum label_id lid, + int restore_scratch) { if (restore_scratch) { + /* + * Ensure the MFC0 below observes the value written to the + * KScratch register by the prior MTC0. + */ + if (scratch_reg >= 0) + uasm_i_ehb(p); + /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); @@ -706,12 +655,11 @@ static __cpuinit void build_restore_pagemask(u32 **p, } } -static __cpuinit void build_huge_tlb_write_entry(u32 **p, - struct uasm_label **l, - struct uasm_reloc **r, - unsigned int tmp, - enum tlb_write_entry wmode, - int restore_scratch) +static void build_huge_tlb_write_entry(u32 **p, struct uasm_label **l, + struct uasm_reloc **r, + unsigned int tmp, + enum tlb_write_entry wmode, + int restore_scratch) { /* Set huge page tlb entry size */ uasm_i_lui(p, tmp, PM_HUGE_MASK >> 16); @@ -726,9 +674,9 @@ static __cpuinit void build_huge_tlb_write_entry(u32 **p, /* * Check if Huge PTE is present, if so then jump to LABEL. */ -static void __cpuinit +static void build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp, - unsigned int pmd, int lid) + unsigned int pmd, int lid) { UASM_i_LW(p, tmp, 0, pmd); if (use_bbit_insns()) { @@ -739,9 +687,8 @@ build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp, } } -static __cpuinit void build_huge_update_entries(u32 **p, - unsigned int pte, - unsigned int tmp) +static void build_huge_update_entries(u32 **p, unsigned int pte, + unsigned int tmp) { int small_sequence; @@ -771,11 +718,11 @@ static __cpuinit void build_huge_update_entries(u32 **p, UASM_i_MTC0(p, pte, C0_ENTRYLO1); /* load it */ } -static __cpuinit void build_huge_handler_tail(u32 **p, - struct uasm_reloc **r, - struct uasm_label **l, - unsigned int pte, - unsigned int ptr) +static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r, + struct uasm_label **l, + unsigned int pte, + unsigned int ptr, + unsigned int flush) { #ifdef CONFIG_SMP UASM_i_SC(p, pte, 0, ptr); @@ -784,6 +731,22 @@ static __cpuinit void build_huge_handler_tail(u32 **p, #else UASM_i_SW(p, pte, 0, ptr); #endif + if (cpu_has_ftlb && flush) { + BUG_ON(!cpu_has_tlbinv); + + UASM_i_MFC0(p, ptr, C0_ENTRYHI); + uasm_i_ori(p, ptr, ptr, MIPS_ENTRYHI_EHINV); + UASM_i_MTC0(p, ptr, C0_ENTRYHI); + build_tlb_write_entry(p, l, r, tlb_indexed); + + uasm_i_xori(p, ptr, ptr, MIPS_ENTRYHI_EHINV); + UASM_i_MTC0(p, ptr, C0_ENTRYHI); + build_huge_update_entries(p, pte, ptr); + build_huge_tlb_write_entry(p, l, r, pte, tlb_random, 0); + + return; + } + build_huge_update_entries(p, pte, ptr); build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0); } @@ -794,9 +757,8 @@ static __cpuinit void build_huge_handler_tail(u32 **p, * TMP and PTR are scratch. * TMP will be clobbered, PTR will hold the pmd entry. */ -static void __cpuinit -build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, - unsigned int tmp, unsigned int ptr) +void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, + unsigned int tmp, unsigned int ptr) { #ifndef CONFIG_MIPS_PGD_C0_CONTEXT long pgdc = (long)pgd_current; @@ -808,7 +770,7 @@ build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, if (check_for_high_segbits) { /* - * The kernel currently implicitely assumes that the + * The kernel currently implicitly assumes that the * MIPS SEGBITS parameter for the processor is * (PGDIR_SHIFT+PGDIR_BITS) or less, and will never * allocate virtual addresses outside the maximum @@ -818,18 +780,21 @@ build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, * everything but the lower xuseg addresses goes down * the module_alloc/vmalloc path. */ - uasm_i_dsrl_safe(p, ptr, tmp, PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); + uasm_i_dsrl_safe(p, ptr, tmp, PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3); uasm_il_bnez(p, r, ptr, label_vmalloc); } else { uasm_il_bltz(p, r, tmp, label_vmalloc); } /* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */ -#ifdef CONFIG_MIPS_PGD_C0_CONTEXT if (pgd_reg != -1) { /* pgd is in pgd_reg */ - UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); + if (cpu_has_ldpte) + UASM_i_MFC0(p, ptr, C0_PWBASE); + else + UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); } else { +#if defined(CONFIG_MIPS_PGD_C0_CONTEXT) /* * &pgd << 11 stored in CONTEXT [23..63]. */ @@ -838,33 +803,21 @@ build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, /* Clear lower 23 bits of context. */ uasm_i_dins(p, ptr, 0, 0, 23); - /* 1 0 1 0 1 << 6 xkphys cached */ - uasm_i_ori(p, ptr, ptr, 0x540); + /* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */ + uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53)); uasm_i_drotr(p, ptr, ptr, 11); - } #elif defined(CONFIG_SMP) -# ifdef CONFIG_MIPS_MT_SMTC - /* - * SMTC uses TCBind value as "CPU" index - */ - uasm_i_mfc0(p, ptr, C0_TCBIND); - uasm_i_dsrl_safe(p, ptr, ptr, 19); -# else - /* - * 64 bit SMP running in XKPHYS has smp_processor_id() << 3 - * stored in CONTEXT. - */ - uasm_i_dmfc0(p, ptr, C0_CONTEXT); - uasm_i_dsrl_safe(p, ptr, ptr, 23); -# endif - UASM_i_LA_mostly(p, tmp, pgdc); - uasm_i_daddu(p, ptr, ptr, tmp); - uasm_i_dmfc0(p, tmp, C0_BADVADDR); - uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); + UASM_i_CPUID_MFC0(p, ptr, SMP_CPUID_REG); + uasm_i_dsrl_safe(p, ptr, ptr, SMP_CPUID_PTRSHIFT); + UASM_i_LA_mostly(p, tmp, pgdc); + uasm_i_daddu(p, ptr, ptr, tmp); + uasm_i_dmfc0(p, tmp, C0_BADVADDR); + uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); #else - UASM_i_LA_mostly(p, ptr, pgdc); - uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); + UASM_i_LA_mostly(p, ptr, pgdc); + uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); #endif + } uasm_l_vmalloc_done(l, *p); @@ -873,6 +826,13 @@ build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, uasm_i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3); uasm_i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */ +#ifndef __PAGETABLE_PUD_FOLDED + uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + uasm_i_ld(p, ptr, 0, ptr); /* get pud pointer */ + uasm_i_dsrl_safe(p, tmp, tmp, PUD_SHIFT - 3); /* get pud offset in bytes */ + uasm_i_andi(p, tmp, tmp, (PTRS_PER_PUD - 1) << 3); + uasm_i_daddu(p, ptr, ptr, tmp); /* add in pud offset */ +#endif #ifndef __PAGETABLE_PMD_FOLDED uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */ uasm_i_ld(p, ptr, 0, ptr); /* get pmd pointer */ @@ -881,12 +841,13 @@ build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, uasm_i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */ #endif } +EXPORT_SYMBOL_GPL(build_get_pmde64); /* * BVADDR is the faulting address, PTR is scratch. * PTR will hold the pgd for vmalloc. */ -static void __cpuinit +static void build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, unsigned int bvaddr, unsigned int ptr, enum vmalloc64_mode mode) @@ -910,7 +871,7 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, } } if (!did_vmalloc_branch) { - if (uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd)) { + if (single_insn_swpd) { uasm_il_b(p, r, label_vmalloc_done); uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); } else { @@ -924,6 +885,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, } if (mode != not_refill && check_for_high_segbits) { uasm_l_large_segbits_fault(l, *p); + + if (mode == refill_scratch && scratch_reg >= 0) + uasm_i_ehb(p); + /* * We get here if we are an xsseg address, or if we are * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary. @@ -936,6 +901,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, * to mimic that here by taking a load/istream page * fault. */ + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(p, 0); UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(p, ptr); @@ -956,68 +923,46 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, * TMP and PTR are scratch. * TMP will be clobbered, PTR will hold the pgd entry. */ -static void __cpuinit __maybe_unused -build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr) +void build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr) { - long pgdc = (long)pgd_current; + if (pgd_reg != -1) { + /* pgd is in pgd_reg */ + uasm_i_mfc0(p, ptr, c0_kscratch(), pgd_reg); + uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + } else { + long pgdc = (long)pgd_current; - /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ + /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ #ifdef CONFIG_SMP -#ifdef CONFIG_MIPS_MT_SMTC - /* - * SMTC uses TCBind value as "CPU" index - */ - uasm_i_mfc0(p, ptr, C0_TCBIND); - UASM_i_LA_mostly(p, tmp, pgdc); - uasm_i_srl(p, ptr, ptr, 19); + uasm_i_mfc0(p, ptr, SMP_CPUID_REG); + UASM_i_LA_mostly(p, tmp, pgdc); + uasm_i_srl(p, ptr, ptr, SMP_CPUID_PTRSHIFT); + uasm_i_addu(p, ptr, tmp, ptr); #else - /* - * smp_processor_id() << 2 is stored in CONTEXT. - */ - uasm_i_mfc0(p, ptr, C0_CONTEXT); - UASM_i_LA_mostly(p, tmp, pgdc); - uasm_i_srl(p, ptr, ptr, 23); -#endif - uasm_i_addu(p, ptr, tmp, ptr); -#else - UASM_i_LA_mostly(p, ptr, pgdc); + UASM_i_LA_mostly(p, ptr, pgdc); #endif - uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ - uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); + uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); + } uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */ uasm_i_sll(p, tmp, tmp, PGD_T_LOG2); uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */ } +EXPORT_SYMBOL_GPL(build_get_pgde32); #endif /* !CONFIG_64BIT */ -static void __cpuinit build_adjust_context(u32 **p, unsigned int ctx) +static void build_adjust_context(u32 **p, unsigned int ctx) { unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12; unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1); - switch (current_cpu_type()) { - case CPU_VR41XX: - case CPU_VR4111: - case CPU_VR4121: - case CPU_VR4122: - case CPU_VR4131: - case CPU_VR4181: - case CPU_VR4181A: - case CPU_VR4133: - shift += 2; - break; - - default: - break; - } - if (shift) UASM_i_SRL(p, ctx, ctx, shift); uasm_i_andi(p, ctx, ctx, mask); } -static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) +void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) { /* * Bug workaround for the Nevada. It seems as if under certain @@ -1041,70 +986,66 @@ static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr build_adjust_context(p, tmp); UASM_i_ADDU(p, ptr, ptr, tmp); /* add in offset */ } +EXPORT_SYMBOL_GPL(build_get_ptep); -static void __cpuinit build_update_entries(u32 **p, unsigned int tmp, - unsigned int ptep) +void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep) { - /* - * 64bit address support (36bit on a 32bit CPU) in a 32bit - * Kernel is a special case. Only a few CPUs use it. - */ -#ifdef CONFIG_64BIT_PHYS_ADDR - if (cpu_has_64bits) { - uasm_i_ld(p, tmp, 0, ptep); /* get even pte */ - uasm_i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ - if (cpu_has_rixi) { - UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); - UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ - UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); - } else { - uasm_i_dsrl_safe(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */ - UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ - uasm_i_dsrl_safe(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */ + int pte_off_even = 0; + int pte_off_odd = sizeof(pte_t); + +#if defined(CONFIG_CPU_MIPS32) && defined(CONFIG_PHYS_ADDR_T_64BIT) + /* The low 32 bits of EntryLo is stored in pte_high */ + pte_off_even += offsetof(pte_t, pte_high); + pte_off_odd += offsetof(pte_t, pte_high); +#endif + + if (IS_ENABLED(CONFIG_XPA)) { + uasm_i_lw(p, tmp, pte_off_even, ptep); /* even pte */ + UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); + UASM_i_MTC0(p, tmp, C0_ENTRYLO0); + + if (cpu_has_xpa && !mips_xpa_disabled) { + uasm_i_lw(p, tmp, 0, ptep); + uasm_i_ext(p, tmp, tmp, 0, 24); + uasm_i_mthc0(p, tmp, C0_ENTRYLO0); } - UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ - } else { - int pte_off_even = sizeof(pte_t) / 2; - int pte_off_odd = pte_off_even + sizeof(pte_t); - - /* The pte entries are pre-shifted */ - uasm_i_lw(p, tmp, pte_off_even, ptep); /* get even pte */ - UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ - uasm_i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */ - UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ + + uasm_i_lw(p, tmp, pte_off_odd, ptep); /* odd pte */ + UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); + UASM_i_MTC0(p, tmp, C0_ENTRYLO1); + + if (cpu_has_xpa && !mips_xpa_disabled) { + uasm_i_lw(p, tmp, sizeof(pte_t), ptep); + uasm_i_ext(p, tmp, tmp, 0, 24); + uasm_i_mthc0(p, tmp, C0_ENTRYLO1); + } + return; } -#else - UASM_i_LW(p, tmp, 0, ptep); /* get even pte */ - UASM_i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ + + UASM_i_LW(p, tmp, pte_off_even, ptep); /* get even pte */ + UASM_i_LW(p, ptep, pte_off_odd, ptep); /* get odd pte */ if (r45k_bvahwbug()) build_tlb_probe_entry(p); - if (cpu_has_rixi) { - UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); - if (r4k_250MHZhwbug()) - UASM_i_MTC0(p, 0, C0_ENTRYLO0); - UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ - UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); - } else { - UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */ - if (r4k_250MHZhwbug()) - UASM_i_MTC0(p, 0, C0_ENTRYLO0); - UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ - UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */ - if (r45k_bvahwbug()) - uasm_i_mfc0(p, tmp, C0_INDEX); - } + build_convert_pte_to_entrylo(p, tmp); + if (r4k_250MHZhwbug()) + UASM_i_MTC0(p, 0, C0_ENTRYLO0); + UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ + build_convert_pte_to_entrylo(p, ptep); + if (r45k_bvahwbug()) + uasm_i_mfc0(p, tmp, C0_INDEX); if (r4k_250MHZhwbug()) UASM_i_MTC0(p, 0, C0_ENTRYLO1); UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ -#endif } +EXPORT_SYMBOL_GPL(build_update_entries); struct mips_huge_tlb_info { int huge_pte; int restore_scratch; + bool need_reload_pte; }; -static struct mips_huge_tlb_info __cpuinit +static struct mips_huge_tlb_info build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, struct uasm_reloc **r, unsigned int tmp, unsigned int ptr, int c0_scratch_reg) @@ -1116,6 +1057,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, rv.huge_pte = scratch; rv.restore_scratch = 0; + rv.need_reload_pte = false; if (check_for_high_segbits) { UASM_i_MFC0(p, tmp, C0_BADVADDR); @@ -1131,7 +1073,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, UASM_i_SW(p, scratch, scratchpad_offset(0), 0); uasm_i_dsrl_safe(p, scratch, tmp, - PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); + PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3); uasm_il_bnez(p, r, scratch, label_vmalloc); if (pgd_reg == -1) { @@ -1161,8 +1103,9 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, if (pgd_reg == -1) { vmalloc_branch_delay_filled = 1; - /* 1 0 1 0 1 << 6 xkphys cached */ - uasm_i_ori(p, ptr, ptr, 0x540); + /* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */ + uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53)); + uasm_i_drotr(p, ptr, ptr, 11); } @@ -1200,6 +1143,21 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */ } +#ifndef __PAGETABLE_PUD_FOLDED + /* get pud offset in bytes */ + uasm_i_dsrl_safe(p, scratch, tmp, PUD_SHIFT - 3); + uasm_i_andi(p, scratch, scratch, (PTRS_PER_PUD - 1) << 3); + + if (use_lwx_insns()) { + UASM_i_LWX(p, ptr, scratch, ptr); + } else { + uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */ + UASM_i_LW(p, ptr, 0, ptr); + } + /* ptr contains a pointer to PMD entry */ + /* tmp contains the address */ +#endif + #ifndef __PAGETABLE_PMD_FOLDED /* get pmd offset in bytes */ uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3); @@ -1254,6 +1212,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ if (c0_scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg); build_tlb_write_entry(p, l, r, tlb_random); uasm_l_leave(l, *p); @@ -1282,7 +1241,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, */ #define MIPS64_REFILL_INSNS 32 -static void __cpuinit build_r4000_tlb_refill_handler(void) +static void build_r4000_tlb_refill_handler(void) { u32 *p = tlb_handler; struct uasm_label *l = labels; @@ -1297,13 +1256,14 @@ static void __cpuinit build_r4000_tlb_refill_handler(void) memset(relocs, 0, sizeof(relocs)); memset(final_handler, 0, sizeof(final_handler)); - if ((scratch_reg >= 0 || scratchpad_available()) && use_bbit_insns()) { - htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1, + if (IS_ENABLED(CONFIG_64BIT) && (scratch_reg >= 0 || scratchpad_available()) && use_bbit_insns()) { + htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, GPR_K0, GPR_K1, scratch_reg); vmalloc_mode = refill_scratch; } else { - htlb_info.huge_pte = K0; + htlb_info.huge_pte = GPR_K0; htlb_info.restore_scratch = 0; + htlb_info.need_reload_pte = true; vmalloc_mode = refill_noscratch; /* * create the plain linear handler @@ -1311,42 +1271,44 @@ static void __cpuinit build_r4000_tlb_refill_handler(void) if (bcm1250_m3_war()) { unsigned int segbits = 44; - uasm_i_dmfc0(&p, K0, C0_BADVADDR); - uasm_i_dmfc0(&p, K1, C0_ENTRYHI); - uasm_i_xor(&p, K0, K0, K1); - uasm_i_dsrl_safe(&p, K1, K0, 62); - uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); - uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); - uasm_i_or(&p, K0, K0, K1); - uasm_il_bnez(&p, &r, K0, label_leave); + uasm_i_dmfc0(&p, GPR_K0, C0_BADVADDR); + uasm_i_dmfc0(&p, GPR_K1, C0_ENTRYHI); + uasm_i_xor(&p, GPR_K0, GPR_K0, GPR_K1); + uasm_i_dsrl_safe(&p, GPR_K1, GPR_K0, 62); + uasm_i_dsrl_safe(&p, GPR_K0, GPR_K0, 12 + 1); + uasm_i_dsll_safe(&p, GPR_K0, GPR_K0, 64 + 12 + 1 - segbits); + uasm_i_or(&p, GPR_K0, GPR_K0, GPR_K1); + uasm_il_bnez(&p, &r, GPR_K0, label_leave); /* No need for uasm_i_nop */ } #ifdef CONFIG_64BIT - build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ + build_get_pmde64(&p, &l, &r, GPR_K0, GPR_K1); /* get pmd in GPR_K1 */ #else - build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ + build_get_pgde32(&p, GPR_K0, GPR_K1); /* get pgd in GPR_K1 */ #endif #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT - build_is_huge_pte(&p, &r, K0, K1, label_tlb_huge_update); + build_is_huge_pte(&p, &r, GPR_K0, GPR_K1, label_tlb_huge_update); #endif - build_get_ptep(&p, K0, K1); - build_update_entries(&p, K0, K1); + build_get_ptep(&p, GPR_K0, GPR_K1); + build_update_entries(&p, GPR_K0, GPR_K1); build_tlb_write_entry(&p, &l, &r, tlb_random); uasm_l_leave(&l, p); uasm_i_eret(&p); /* return from trap */ } #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT uasm_l_tlb_huge_update(&l, p); - build_huge_update_entries(&p, htlb_info.huge_pte, K1); - build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random, + if (htlb_info.need_reload_pte) + UASM_i_LW(&p, htlb_info.huge_pte, 0, GPR_K1); + build_huge_update_entries(&p, htlb_info.huge_pte, GPR_K1); + build_huge_tlb_write_entry(&p, &l, &r, GPR_K0, tlb_random, htlb_info.restore_scratch); #endif #ifdef CONFIG_64BIT - build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, vmalloc_mode); + build_get_pgd_vmalloc64(&p, &l, &r, GPR_K0, GPR_K1, vmalloc_mode); #endif /* @@ -1356,130 +1318,237 @@ static void __cpuinit build_r4000_tlb_refill_handler(void) * need three, with the second nop'ed and the third being * unused. */ - /* Loongson2 ebase is different than r4k, we have more space */ -#if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2) - if ((p - tlb_handler) > 64) - panic("TLB refill handler space exceeded"); -#else - if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1) - || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3) - && uasm_insn_has_bdelay(relocs, - tlb_handler + MIPS64_REFILL_INSNS - 3))) - panic("TLB refill handler space exceeded"); -#endif - - /* - * Now fold the handler in the TLB refill handler space. - */ -#if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2) - f = final_handler; - /* Simplest case, just copy the handler. */ - uasm_copy_handler(relocs, labels, tlb_handler, p, f); - final_len = p - tlb_handler; -#else /* CONFIG_64BIT */ - f = final_handler + MIPS64_REFILL_INSNS; - if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) { - /* Just copy the handler. */ - uasm_copy_handler(relocs, labels, tlb_handler, p, f); - final_len = p - tlb_handler; - } else { -#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT - const enum label_id ls = label_tlb_huge_update; -#else - const enum label_id ls = label_vmalloc; -#endif - u32 *split; - int ov = 0; - int i; - - for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++) - ; - BUG_ON(i == ARRAY_SIZE(labels)); - split = labels[i].addr; - - /* - * See if we have overflown one way or the other. - */ - if (split > tlb_handler + MIPS64_REFILL_INSNS || - split < p - MIPS64_REFILL_INSNS) - ov = 1; - - if (ov) { + switch (boot_cpu_type()) { + default: + if (sizeof(long) == 4) { + fallthrough; + case CPU_LOONGSON2EF: + /* Loongson2 ebase is different than r4k, we have more space */ + if ((p - tlb_handler) > 64) + panic("TLB refill handler space exceeded"); /* - * Split two instructions before the end. One - * for the branch and one for the instruction - * in the delay slot. + * Now fold the handler in the TLB refill handler space. */ - split = tlb_handler + MIPS64_REFILL_INSNS - 2; - + f = final_handler; + /* Simplest case, just copy the handler. */ + uasm_copy_handler(relocs, labels, tlb_handler, p, f); + final_len = p - tlb_handler; + break; + } else { + if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1) + || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3) + && uasm_insn_has_bdelay(relocs, + tlb_handler + MIPS64_REFILL_INSNS - 3))) + panic("TLB refill handler space exceeded"); /* - * If the branch would fall in a delay slot, - * we must back up an additional instruction - * so that it is no longer in a delay slot. + * Now fold the handler in the TLB refill handler space. */ - if (uasm_insn_has_bdelay(relocs, split - 1)) - split--; - } - /* Copy first part of the handler. */ - uasm_copy_handler(relocs, labels, tlb_handler, split, f); - f += split - tlb_handler; - - if (ov) { - /* Insert branch. */ - uasm_l_split(&l, final_handler); - uasm_il_b(&f, &r, label_split); - if (uasm_insn_has_bdelay(relocs, split)) - uasm_i_nop(&f); - else { - uasm_copy_handler(relocs, labels, - split, split + 1, f); - uasm_move_labels(labels, f, f + 1, -1); - f++; - split++; + f = final_handler + MIPS64_REFILL_INSNS; + if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) { + /* Just copy the handler. */ + uasm_copy_handler(relocs, labels, tlb_handler, p, f); + final_len = p - tlb_handler; + } else { +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + const enum label_id ls = label_tlb_huge_update; +#else + const enum label_id ls = label_vmalloc; +#endif + u32 *split; + int ov = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++) + ; + BUG_ON(i == ARRAY_SIZE(labels)); + split = labels[i].addr; + + /* + * See if we have overflown one way or the other. + */ + if (split > tlb_handler + MIPS64_REFILL_INSNS || + split < p - MIPS64_REFILL_INSNS) + ov = 1; + + if (ov) { + /* + * Split two instructions before the end. One + * for the branch and one for the instruction + * in the delay slot. + */ + split = tlb_handler + MIPS64_REFILL_INSNS - 2; + + /* + * If the branch would fall in a delay slot, + * we must back up an additional instruction + * so that it is no longer in a delay slot. + */ + if (uasm_insn_has_bdelay(relocs, split - 1)) + split--; + } + /* Copy first part of the handler. */ + uasm_copy_handler(relocs, labels, tlb_handler, split, f); + f += split - tlb_handler; + + if (ov) { + /* Insert branch. */ + uasm_l_split(&l, final_handler); + uasm_il_b(&f, &r, label_split); + if (uasm_insn_has_bdelay(relocs, split)) + uasm_i_nop(&f); + else { + uasm_copy_handler(relocs, labels, + split, split + 1, f); + uasm_move_labels(labels, f, f + 1, -1); + f++; + split++; + } + } + + /* Copy the rest of the handler. */ + uasm_copy_handler(relocs, labels, split, p, final_handler); + final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) + + (p - split); } } - - /* Copy the rest of the handler. */ - uasm_copy_handler(relocs, labels, split, p, final_handler); - final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) + - (p - split); + break; } -#endif /* CONFIG_64BIT */ uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB refill handler (%u instructions).\n", final_len); memcpy((void *)ebase, final_handler, 0x100); - - dump_handler("r4000_tlb_refill", (u32 *)ebase, 64); + local_flush_icache_range(ebase, ebase + 0x100); + dump_handler("r4000_tlb_refill", (u32 *)ebase, (u32 *)(ebase + 0x100)); } -extern u32 handle_tlbl[], handle_tlbl_end[]; -extern u32 handle_tlbs[], handle_tlbs_end[]; -extern u32 handle_tlbm[], handle_tlbm_end[]; +static void setup_pw(void) +{ + unsigned int pwctl; + unsigned long pgd_i, pgd_w; +#ifndef __PAGETABLE_PMD_FOLDED + unsigned long pmd_i, pmd_w; +#endif + unsigned long pt_i, pt_w; + unsigned long pte_i, pte_w; +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + unsigned long psn; -#ifdef CONFIG_MIPS_PGD_C0_CONTEXT -extern u32 tlbmiss_handler_setup_pgd[], tlbmiss_handler_setup_pgd_end[]; + psn = ilog2(_PAGE_HUGE); /* bit used to indicate huge page */ +#endif + pgd_i = PGDIR_SHIFT; /* 1st level PGD */ +#ifndef __PAGETABLE_PMD_FOLDED + pgd_w = PGDIR_SHIFT - PMD_SHIFT + PGD_TABLE_ORDER; + + pmd_i = PMD_SHIFT; /* 2nd level PMD */ + pmd_w = PMD_SHIFT - PAGE_SHIFT; +#else + pgd_w = PGDIR_SHIFT - PAGE_SHIFT + PGD_TABLE_ORDER; +#endif + + pt_i = PAGE_SHIFT; /* 3rd level PTE */ + pt_w = PAGE_SHIFT - 3; + + pte_i = ilog2(_PAGE_GLOBAL); + pte_w = 0; + pwctl = 1 << 30; /* Set PWDirExt */ + +#ifndef __PAGETABLE_PMD_FOLDED + write_c0_pwfield(pgd_i << 24 | pmd_i << 12 | pt_i << 6 | pte_i); + write_c0_pwsize(1 << 30 | pgd_w << 24 | pmd_w << 12 | pt_w << 6 | pte_w); +#else + write_c0_pwfield(pgd_i << 24 | pt_i << 6 | pte_i); + write_c0_pwsize(1 << 30 | pgd_w << 24 | pt_w << 6 | pte_w); +#endif -static void __cpuinit build_r4000_setup_pgd(void) +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + pwctl |= (1 << 6 | psn); +#endif + write_c0_pwctl(pwctl); + write_c0_kpgd((long)swapper_pg_dir); + kscratch_used_mask |= (1 << 7); /* KScratch6 is used for KPGD */ +} + +static void build_loongson3_tlb_refill_handler(void) { - const int a0 = 4; - const int a1 = 5; - u32 *p = tlbmiss_handler_setup_pgd_array; - const int tlbmiss_handler_setup_pgd_size = - tlbmiss_handler_setup_pgd_end - tlbmiss_handler_setup_pgd; + u32 *p = tlb_handler; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; - memset(tlbmiss_handler_setup_pgd, 0, tlbmiss_handler_setup_pgd_size * - sizeof(tlbmiss_handler_setup_pgd[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); + memset(tlb_handler, 0, sizeof(tlb_handler)); - pgd_reg = allocate_kscratch(); + if (check_for_high_segbits) { + uasm_i_dmfc0(&p, GPR_K0, C0_BADVADDR); + uasm_i_dsrl_safe(&p, GPR_K1, GPR_K0, + PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3); + uasm_il_beqz(&p, &r, GPR_K1, label_vmalloc); + uasm_i_nop(&p); + uasm_il_bgez(&p, &r, GPR_K0, label_large_segbits_fault); + uasm_i_nop(&p); + uasm_l_vmalloc(&l, p); + } + + uasm_i_dmfc0(&p, GPR_K1, C0_PGD); + + uasm_i_lddir(&p, GPR_K0, GPR_K1, 3); /* global page dir */ +#ifndef __PAGETABLE_PMD_FOLDED + uasm_i_lddir(&p, GPR_K1, GPR_K0, 1); /* middle page dir */ +#endif + uasm_i_ldpte(&p, GPR_K1, 0); /* even */ + uasm_i_ldpte(&p, GPR_K1, 1); /* odd */ + uasm_i_tlbwr(&p); + + /* restore page mask */ + if (PM_DEFAULT_MASK >> 16) { + uasm_i_lui(&p, GPR_K0, PM_DEFAULT_MASK >> 16); + uasm_i_ori(&p, GPR_K0, GPR_K0, PM_DEFAULT_MASK & 0xffff); + uasm_i_mtc0(&p, GPR_K0, C0_PAGEMASK); + } else if (PM_DEFAULT_MASK) { + uasm_i_ori(&p, GPR_K0, 0, PM_DEFAULT_MASK); + uasm_i_mtc0(&p, GPR_K0, C0_PAGEMASK); + } else { + uasm_i_mtc0(&p, 0, C0_PAGEMASK); + } + + uasm_i_eret(&p); + + if (check_for_high_segbits) { + uasm_l_large_segbits_fault(&l, p); + UASM_i_LA(&p, GPR_K1, (unsigned long)tlb_do_page_fault_0); + uasm_i_jr(&p, GPR_K1); + uasm_i_nop(&p); + } + + uasm_resolve_relocs(relocs, labels); + memcpy((void *)(ebase + 0x80), tlb_handler, 0x80); + local_flush_icache_range(ebase + 0x80, ebase + 0x100); + dump_handler("loongson3_tlb_refill", + (u32 *)(ebase + 0x80), (u32 *)(ebase + 0x100)); +} + +static void build_setup_pgd(void) +{ + const int a0 = 4; + const int __maybe_unused a1 = 5; + const int __maybe_unused a2 = 6; + u32 *p = (u32 *)msk_isa16_mode((ulong)tlbmiss_handler_setup_pgd); +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT + long pgdc = (long)pgd_current; +#endif + + memset(p, 0, tlbmiss_handler_setup_pgd_end - (char *)p); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + pgd_reg = allocate_kscratch(); +#ifdef CONFIG_MIPS_PGD_C0_CONTEXT if (pgd_reg == -1) { + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + /* PGD << 11 in c0_Context */ /* * If it is a ckseg0 address, convert to a physical @@ -1494,37 +1563,66 @@ static void __cpuinit build_r4000_setup_pgd(void) uasm_i_dinsm(&p, a0, 0, 29, 64 - 29); uasm_l_tlbl_goaround1(&l, p); UASM_i_SLL(&p, a0, a0, 11); - uasm_i_jr(&p, 31); UASM_i_MTC0(&p, a0, C0_CONTEXT); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); } else { /* PGD in c0_KScratch */ + if (cpu_has_ldpte) + UASM_i_MTC0(&p, a0, C0_PWBASE); + else + UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); uasm_i_jr(&p, 31); + uasm_i_ehb(&p); + } +#else +#ifdef CONFIG_SMP + /* Save PGD to pgd_current[smp_processor_id()] */ + UASM_i_CPUID_MFC0(&p, a1, SMP_CPUID_REG); + UASM_i_SRL_SAFE(&p, a1, a1, SMP_CPUID_PTRSHIFT); + UASM_i_LA_mostly(&p, a2, pgdc); + UASM_i_ADDU(&p, a2, a2, a1); + UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); +#else + UASM_i_LA_mostly(&p, a2, pgdc); + UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); +#endif /* SMP */ + + /* if pgd_reg is allocated, save PGD also to scratch register */ + if (pgd_reg != -1) { UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); + } else { + uasm_i_jr(&p, 31); + uasm_i_nop(&p); } - if (p >= tlbmiss_handler_setup_pgd_end) +#endif + if (p >= (u32 *)tlbmiss_handler_setup_pgd_end) panic("tlbmiss_handler_setup_pgd space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote tlbmiss_handler_setup_pgd (%u instructions).\n", - (unsigned int)(p - tlbmiss_handler_setup_pgd)); + (unsigned int)(p - (u32 *)tlbmiss_handler_setup_pgd)); dump_handler("tlbmiss_handler", tlbmiss_handler_setup_pgd, - tlbmiss_handler_setup_pgd_size); + tlbmiss_handler_setup_pgd_end); } -#endif -static void __cpuinit +static void iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) { #ifdef CONFIG_SMP -# ifdef CONFIG_64BIT_PHYS_ADDR + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(p, 0); +# ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_lld(p, pte, 0, ptr); else # endif UASM_i_LL(p, pte, 0, ptr); #else -# ifdef CONFIG_64BIT_PHYS_ADDR +# ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_ld(p, pte, 0, ptr); else @@ -1533,17 +1631,23 @@ iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) #endif } -static void __cpuinit +static void iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, - unsigned int mode) + unsigned int mode, unsigned int scratch) { -#ifdef CONFIG_64BIT_PHYS_ADDR unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY); -#endif + unsigned int swmode = mode & ~hwmode; + + if (IS_ENABLED(CONFIG_XPA) && !cpu_has_64bits) { + uasm_i_lui(p, scratch, swmode >> 16); + uasm_i_or(p, pte, pte, scratch); + BUG_ON(swmode & 0xffff); + } else { + uasm_i_ori(p, pte, pte, mode); + } - uasm_i_ori(p, pte, pte, mode); #ifdef CONFIG_SMP -# ifdef CONFIG_64BIT_PHYS_ADDR +# ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_scd(p, pte, 0, ptr); else @@ -1555,11 +1659,12 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, else uasm_il_beqz(p, r, pte, label_smp_pgtable_change); -# ifdef CONFIG_64BIT_PHYS_ADDR +# ifdef CONFIG_PHYS_ADDR_T_64BIT if (!cpu_has_64bits) { /* no uasm_i_nop needed */ uasm_i_ll(p, pte, sizeof(pte_t) / 2, ptr); uasm_i_ori(p, pte, pte, hwmode); + BUG_ON(hwmode & ~0xffff); uasm_i_sc(p, pte, sizeof(pte_t) / 2, ptr); uasm_il_beqz(p, r, pte, label_smp_pgtable_change); /* no uasm_i_nop needed */ @@ -1570,17 +1675,18 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, uasm_i_nop(p); # endif #else -# ifdef CONFIG_64BIT_PHYS_ADDR +# ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_sd(p, pte, 0, ptr); else # endif UASM_i_SW(p, pte, 0, ptr); -# ifdef CONFIG_64BIT_PHYS_ADDR +# ifdef CONFIG_PHYS_ADDR_T_64BIT if (!cpu_has_64bits) { uasm_i_lw(p, pte, sizeof(pte_t) / 2, ptr); uasm_i_ori(p, pte, pte, hwmode); + BUG_ON(hwmode & ~0xffff); uasm_i_sw(p, pte, sizeof(pte_t) / 2, ptr); uasm_i_lw(p, pte, 0, ptr); } @@ -1591,28 +1697,38 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, /* * Check if PTE is present, if not then jump to LABEL. PTR points to * the page table where this PTE is located, PTE will be re-loaded - * with it's original value. + * with its original value. */ -static void __cpuinit +static void build_pte_present(u32 **p, struct uasm_reloc **r, int pte, int ptr, int scratch, enum label_id lid) { int t = scratch >= 0 ? scratch : pte; + int cur = pte; if (cpu_has_rixi) { if (use_bbit_insns()) { uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid); uasm_i_nop(p); } else { - uasm_i_andi(p, t, pte, _PAGE_PRESENT); + if (_PAGE_PRESENT_SHIFT) { + uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT); + cur = t; + } + uasm_i_andi(p, t, cur, 1); uasm_il_beqz(p, r, t, lid); if (pte == t) /* You lose the SMP race :-(*/ iPTE_LW(p, pte, ptr); } } else { - uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_READ); - uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_READ); + if (_PAGE_PRESENT_SHIFT) { + uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT); + cur = t; + } + uasm_i_andi(p, t, cur, + (_PAGE_PRESENT | _PAGE_NO_READ) >> _PAGE_PRESENT_SHIFT); + uasm_i_xori(p, t, t, _PAGE_PRESENT >> _PAGE_PRESENT_SHIFT); uasm_il_bnez(p, r, t, lid); if (pte == t) /* You lose the SMP race :-(*/ @@ -1621,28 +1737,35 @@ build_pte_present(u32 **p, struct uasm_reloc **r, } /* Make PTE valid, store result in PTR. */ -static void __cpuinit +static void build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte, - unsigned int ptr) + unsigned int ptr, unsigned int scratch) { unsigned int mode = _PAGE_VALID | _PAGE_ACCESSED; - iPTE_SW(p, r, pte, ptr, mode); + iPTE_SW(p, r, pte, ptr, mode, scratch); } /* * Check if PTE can be written to, if not branch to LABEL. Regardless * restore PTE with value from PTR when done. */ -static void __cpuinit +static void build_pte_writable(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, int scratch, enum label_id lid) { int t = scratch >= 0 ? scratch : pte; + int cur = pte; - uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_WRITE); - uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_WRITE); + if (_PAGE_PRESENT_SHIFT) { + uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT); + cur = t; + } + uasm_i_andi(p, t, cur, + (_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT); + uasm_i_xori(p, t, t, + (_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT); uasm_il_bnez(p, r, t, lid); if (pte == t) /* You lose the SMP race :-(*/ @@ -1654,21 +1777,21 @@ build_pte_writable(u32 **p, struct uasm_reloc **r, /* Make PTE writable, update software status bits as well, then store * at PTR. */ -static void __cpuinit +static void build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte, - unsigned int ptr) + unsigned int ptr, unsigned int scratch) { unsigned int mode = (_PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); - iPTE_SW(p, r, pte, ptr, mode); + iPTE_SW(p, r, pte, ptr, mode, scratch); } /* * Check if PTE can be modified, if not branch to LABEL. Regardless * restore PTE with value from PTR when done. */ -static void __cpuinit +static void build_pte_modifiable(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, int scratch, enum label_id lid) @@ -1678,7 +1801,8 @@ build_pte_modifiable(u32 **p, struct uasm_reloc **r, uasm_i_nop(p); } else { int t = scratch >= 0 ? scratch : pte; - uasm_i_andi(p, t, pte, _PAGE_WRITE); + uasm_i_srl(p, t, pte, _PAGE_WRITE_SHIFT); + uasm_i_andi(p, t, t, 1); uasm_il_beqz(p, r, t, lid); if (pte == t) /* You lose the SMP race :-(*/ @@ -1697,7 +1821,7 @@ build_pte_modifiable(u32 **p, struct uasm_reloc **r, * This places the pte into ENTRYLO0 and writes it with tlbwi. * Then it returns. */ -static void __cpuinit +static void build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp) { uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ @@ -1713,7 +1837,7 @@ build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp) * may have the probe fail bit set as a result of a trap on a * kseg2 access, i.e. without refill. Then it returns. */ -static void __cpuinit +static void build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l, struct uasm_reloc **r, unsigned int pte, unsigned int tmp) @@ -1731,7 +1855,7 @@ build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l, uasm_i_rfe(p); /* branch delay */ } -static void __cpuinit +static void build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte, unsigned int ptr) { @@ -1751,104 +1875,121 @@ build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte, uasm_i_tlbp(p); /* load delay */ } -static void __cpuinit build_r3000_tlb_load_handler(void) +static void build_r3000_tlb_load_handler(void) { - u32 *p = handle_tlbl; - const int handle_tlbl_size = handle_tlbl_end - handle_tlbl; + u32 *p = (u32 *)handle_tlbl; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; - memset(handle_tlbl, 0, handle_tlbl_size * sizeof(handle_tlbl[0])); + memset(p, 0, handle_tlbl_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); - build_r3000_tlbchange_handler_head(&p, K0, K1); - build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl); + build_r3000_tlbchange_handler_head(&p, GPR_K0, GPR_K1); + build_pte_present(&p, &r, GPR_K0, GPR_K1, -1, label_nopage_tlbl); uasm_i_nop(&p); /* load delay */ - build_make_valid(&p, &r, K0, K1); - build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); + build_make_valid(&p, &r, GPR_K0, GPR_K1, -1); + build_r3000_tlb_reload_write(&p, &l, &r, GPR_K0, GPR_K1); uasm_l_nopage_tlbl(&l, p); uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); uasm_i_nop(&p); - if (p >= handle_tlbl_end) + if (p >= (u32 *)handle_tlbl_end) panic("TLB load handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbl)); + (unsigned int)(p - (u32 *)handle_tlbl)); - dump_handler("r3000_tlb_load", handle_tlbl, handle_tlbl_size); + dump_handler("r3000_tlb_load", handle_tlbl, handle_tlbl_end); } -static void __cpuinit build_r3000_tlb_store_handler(void) +static void build_r3000_tlb_store_handler(void) { - u32 *p = handle_tlbs; - const int handle_tlbs_size = handle_tlbs_end - handle_tlbs; + u32 *p = (u32 *)handle_tlbs; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; - memset(handle_tlbs, 0, handle_tlbs_size * sizeof(handle_tlbs[0])); + memset(p, 0, handle_tlbs_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); - build_r3000_tlbchange_handler_head(&p, K0, K1); - build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs); + build_r3000_tlbchange_handler_head(&p, GPR_K0, GPR_K1); + build_pte_writable(&p, &r, GPR_K0, GPR_K1, -1, label_nopage_tlbs); uasm_i_nop(&p); /* load delay */ - build_make_write(&p, &r, K0, K1); - build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); + build_make_write(&p, &r, GPR_K0, GPR_K1, -1); + build_r3000_tlb_reload_write(&p, &l, &r, GPR_K0, GPR_K1); uasm_l_nopage_tlbs(&l, p); uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); uasm_i_nop(&p); - if (p >= handle_tlbs) + if (p >= (u32 *)handle_tlbs_end) panic("TLB store handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbs)); + (unsigned int)(p - (u32 *)handle_tlbs)); - dump_handler("r3000_tlb_store", handle_tlbs, handle_tlbs_size); + dump_handler("r3000_tlb_store", handle_tlbs, handle_tlbs_end); } -static void __cpuinit build_r3000_tlb_modify_handler(void) +static void build_r3000_tlb_modify_handler(void) { - u32 *p = handle_tlbm; - const int handle_tlbm_size = handle_tlbm_end - handle_tlbm; + u32 *p = (u32 *)handle_tlbm; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; - memset(handle_tlbm, 0, handle_tlbm_size * sizeof(handle_tlbm[0])); + memset(p, 0, handle_tlbm_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); - build_r3000_tlbchange_handler_head(&p, K0, K1); - build_pte_modifiable(&p, &r, K0, K1, -1, label_nopage_tlbm); + build_r3000_tlbchange_handler_head(&p, GPR_K0, GPR_K1); + build_pte_modifiable(&p, &r, GPR_K0, GPR_K1, -1, label_nopage_tlbm); uasm_i_nop(&p); /* load delay */ - build_make_write(&p, &r, K0, K1); - build_r3000_pte_reload_tlbwi(&p, K0, K1); + build_make_write(&p, &r, GPR_K0, GPR_K1, -1); + build_r3000_pte_reload_tlbwi(&p, GPR_K0, GPR_K1); uasm_l_nopage_tlbm(&l, p); uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); uasm_i_nop(&p); - if (p >= handle_tlbm_end) + if (p >= (u32 *)handle_tlbm_end) panic("TLB modify handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbm)); + (unsigned int)(p - (u32 *)handle_tlbm)); - dump_handler("r3000_tlb_modify", handle_tlbm, handle_tlbm_size); + dump_handler("r3000_tlb_modify", handle_tlbm, handle_tlbm_end); } #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ +static bool cpu_has_tlbex_tlbp_race(void) +{ + /* + * When a Hardware Table Walker is running it can replace TLB entries + * at any time, leading to a race between it & the CPU. + */ + if (cpu_has_htw) + return true; + + /* + * If the CPU shares FTLB RAM with its siblings then our entry may be + * replaced at any time by a sibling performing a write to the FTLB. + */ + if (cpu_has_shared_ftlb_ram) + return true; + + /* In all other cases there ought to be no race condition to handle */ + return false; +} + /* * R4000 style TLB load/store/modify handlers. */ -static struct work_registers __cpuinit +static struct work_registers build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l, struct uasm_reloc **r) { @@ -1871,7 +2012,7 @@ build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l, UASM_i_MFC0(p, wr.r1, C0_BADVADDR); UASM_i_LW(p, wr.r2, 0, wr.r2); - UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2); + UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT - PTE_T_LOG2); uasm_i_andi(p, wr.r1, wr.r1, (PTRS_PER_PTE - 1) << PTE_T_LOG2); UASM_i_ADDU(p, wr.r2, wr.r2, wr.r1); @@ -1879,12 +2020,20 @@ build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l, uasm_l_smp_pgtable_change(l, *p); #endif iPTE_LW(p, wr.r1, wr.r2); /* get even pte */ - if (!m4kc_tlbp_war()) + if (!m4kc_tlbp_war()) { build_tlb_probe_entry(p); + if (cpu_has_tlbex_tlbp_race()) { + /* race condition happens, leaving */ + uasm_i_ehb(p); + uasm_i_mfc0(p, wr.r3, C0_INDEX); + uasm_il_bltz(p, r, wr.r3, label_leave); + uasm_i_nop(p); + } + } return wr; } -static void __cpuinit +static void build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l, struct uasm_reloc **r, unsigned int tmp, unsigned int ptr) @@ -1902,29 +2051,28 @@ build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l, #endif } -static void __cpuinit build_r4000_tlb_load_handler(void) +static void build_r4000_tlb_load_handler(void) { - u32 *p = handle_tlbl; - const int handle_tlbl_size = handle_tlbl_end - handle_tlbl; + u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbl); struct uasm_label *l = labels; struct uasm_reloc *r = relocs; struct work_registers wr; - memset(handle_tlbl, 0, handle_tlbl_size * sizeof(handle_tlbl[0])); + memset(p, 0, handle_tlbl_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); if (bcm1250_m3_war()) { unsigned int segbits = 44; - uasm_i_dmfc0(&p, K0, C0_BADVADDR); - uasm_i_dmfc0(&p, K1, C0_ENTRYHI); - uasm_i_xor(&p, K0, K0, K1); - uasm_i_dsrl_safe(&p, K1, K0, 62); - uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); - uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); - uasm_i_or(&p, K0, K0, K1); - uasm_il_bnez(&p, &r, K0, label_leave); + uasm_i_dmfc0(&p, GPR_K0, C0_BADVADDR); + uasm_i_dmfc0(&p, GPR_K1, C0_ENTRYHI); + uasm_i_xor(&p, GPR_K0, GPR_K0, GPR_K1); + uasm_i_dsrl_safe(&p, GPR_K1, GPR_K0, 62); + uasm_i_dsrl_safe(&p, GPR_K0, GPR_K0, 12 + 1); + uasm_i_dsll_safe(&p, GPR_K0, GPR_K0, 64 + 12 + 1 - segbits); + uasm_i_or(&p, GPR_K0, GPR_K0, GPR_K1); + uasm_il_bnez(&p, &r, GPR_K0, label_leave); /* No need for uasm_i_nop */ } @@ -1933,7 +2081,7 @@ static void __cpuinit build_r4000_tlb_load_handler(void) if (m4kc_tlbp_war()) build_tlb_probe_entry(&p); - if (cpu_has_rixi) { + if (cpu_has_rixi && !cpu_has_rixiex) { /* * If the page is not _PAGE_VALID, RI or XI could not * have triggered it. Skip the expensive test.. @@ -1947,19 +2095,18 @@ static void __cpuinit build_r4000_tlb_load_handler(void) } uasm_i_nop(&p); - uasm_i_tlbr(&p); + /* + * Warn if something may race with us & replace the TLB entry + * before we read it here. Everything with such races should + * also have dedicated RiXi exception handlers, so this + * shouldn't be hit. + */ + WARN(cpu_has_tlbex_tlbp_race(), "Unhandled race in RiXi path"); - switch (current_cpu_type()) { - default: - if (cpu_has_mips_r2) { - uasm_i_ehb(&p); + uasm_i_tlbr(&p); - case CPU_CAVIUM_OCTEON: - case CPU_CAVIUM_OCTEON_PLUS: - case CPU_CAVIUM_OCTEON2: - break; - } - } + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(&p); /* Examine entrylo 0 or 1 based on ptr. */ if (use_bbit_insns()) { @@ -1987,7 +2134,7 @@ static void __cpuinit build_r4000_tlb_load_handler(void) } uasm_l_tlbl_goaround1(&l, p); } - build_make_valid(&p, &r, wr.r1, wr.r2); + build_make_valid(&p, &r, wr.r1, wr.r2, wr.r3); build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT @@ -2000,7 +2147,7 @@ static void __cpuinit build_r4000_tlb_load_handler(void) build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); build_tlb_probe_entry(&p); - if (cpu_has_rixi) { + if (cpu_has_rixi && !cpu_has_rixiex) { /* * If the page is not _PAGE_VALID, RI or XI could not * have triggered it. Skip the expensive test.. @@ -2014,19 +2161,18 @@ static void __cpuinit build_r4000_tlb_load_handler(void) } uasm_i_nop(&p); - uasm_i_tlbr(&p); + /* + * Warn if something may race with us & replace the TLB entry + * before we read it here. Everything with such races should + * also have dedicated RiXi exception handlers, so this + * shouldn't be hit. + */ + WARN(cpu_has_tlbex_tlbp_race(), "Unhandled race in RiXi path"); - switch (current_cpu_type()) { - default: - if (cpu_has_mips_r2) { - uasm_i_ehb(&p); + uasm_i_tlbr(&p); - case CPU_CAVIUM_OCTEON: - case CPU_CAVIUM_OCTEON_PLUS: - case CPU_CAVIUM_OCTEON2: - break; - } - } + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(&p); /* Examine entrylo 0 or 1 based on ptr. */ if (use_bbit_insns()) { @@ -2060,40 +2206,41 @@ static void __cpuinit build_r4000_tlb_load_handler(void) uasm_l_tlbl_goaround2(&l, p); } uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID)); - build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1); #endif uasm_l_nopage_tlbl(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_0 & 1) { - uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_0)); - uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_0)); - uasm_i_jr(&p, K0); + uasm_i_lui(&p, GPR_K0, uasm_rel_hi((long)tlb_do_page_fault_0)); + uasm_i_addiu(&p, GPR_K0, GPR_K0, uasm_rel_lo((long)tlb_do_page_fault_0)); + uasm_i_jr(&p, GPR_K0); } else #endif uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); uasm_i_nop(&p); - if (p >= handle_tlbl_end) + if (p >= (u32 *)handle_tlbl_end) panic("TLB load handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbl)); + (unsigned int)(p - (u32 *)handle_tlbl)); - dump_handler("r4000_tlb_load", handle_tlbl, handle_tlbl_size); + dump_handler("r4000_tlb_load", handle_tlbl, handle_tlbl_end); } -static void __cpuinit build_r4000_tlb_store_handler(void) +static void build_r4000_tlb_store_handler(void) { - u32 *p = handle_tlbs; - const int handle_tlbs_size = handle_tlbs_end - handle_tlbs; + u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbs); struct uasm_label *l = labels; struct uasm_reloc *r = relocs; struct work_registers wr; - memset(handle_tlbs, 0, handle_tlbs_size * sizeof(handle_tlbs[0])); + memset(p, 0, handle_tlbs_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -2101,7 +2248,7 @@ static void __cpuinit build_r4000_tlb_store_handler(void) build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs); if (m4kc_tlbp_war()) build_tlb_probe_entry(&p); - build_make_write(&p, &r, wr.r1, wr.r2); + build_make_write(&p, &r, wr.r1, wr.r2, wr.r3); build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT @@ -2115,40 +2262,41 @@ static void __cpuinit build_r4000_tlb_store_handler(void) build_tlb_probe_entry(&p); uasm_i_ori(&p, wr.r1, wr.r1, _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); - build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1); #endif uasm_l_nopage_tlbs(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { - uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1)); - uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1)); - uasm_i_jr(&p, K0); + uasm_i_lui(&p, GPR_K0, uasm_rel_hi((long)tlb_do_page_fault_1)); + uasm_i_addiu(&p, GPR_K0, GPR_K0, uasm_rel_lo((long)tlb_do_page_fault_1)); + uasm_i_jr(&p, GPR_K0); } else #endif uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); uasm_i_nop(&p); - if (p >= handle_tlbs_end) + if (p >= (u32 *)handle_tlbs_end) panic("TLB store handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbs)); + (unsigned int)(p - (u32 *)handle_tlbs)); - dump_handler("r4000_tlb_store", handle_tlbs, handle_tlbs_size); + dump_handler("r4000_tlb_store", handle_tlbs, handle_tlbs_end); } -static void __cpuinit build_r4000_tlb_modify_handler(void) +static void build_r4000_tlb_modify_handler(void) { - u32 *p = handle_tlbm; - const int handle_tlbm_size = handle_tlbm_end - handle_tlbm; + u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbm); struct uasm_label *l = labels; struct uasm_reloc *r = relocs; struct work_registers wr; - memset(handle_tlbm, 0, handle_tlbm_size * sizeof(handle_tlbm[0])); + memset(p, 0, handle_tlbm_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -2157,7 +2305,7 @@ static void __cpuinit build_r4000_tlb_modify_handler(void) if (m4kc_tlbp_war()) build_tlb_probe_entry(&p); /* Present and writable bits set, set accessed and dirty bits. */ - build_make_write(&p, &r, wr.r1, wr.r2); + build_make_write(&p, &r, wr.r1, wr.r2, wr.r3); build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT @@ -2171,32 +2319,34 @@ static void __cpuinit build_r4000_tlb_modify_handler(void) build_tlb_probe_entry(&p); uasm_i_ori(&p, wr.r1, wr.r1, _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); - build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 0); #endif uasm_l_nopage_tlbm(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { - uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1)); - uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1)); - uasm_i_jr(&p, K0); + uasm_i_lui(&p, GPR_K0, uasm_rel_hi((long)tlb_do_page_fault_1)); + uasm_i_addiu(&p, GPR_K0, GPR_K0, uasm_rel_lo((long)tlb_do_page_fault_1)); + uasm_i_jr(&p, GPR_K0); } else #endif uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); uasm_i_nop(&p); - if (p >= handle_tlbm_end) + if (p >= (u32 *)handle_tlbm_end) panic("TLB modify handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbm)); + (unsigned int)(p - (u32 *)handle_tlbm)); - dump_handler("r4000_tlb_modify", handle_tlbm, handle_tlbm_size); + dump_handler("r4000_tlb_modify", handle_tlbm, handle_tlbm_end); } -static void __cpuinit flush_tlb_handlers(void) +static void flush_tlb_handlers(void) { local_flush_icache_range((unsigned long)handle_tlbl, (unsigned long)handle_tlbl_end); @@ -2204,13 +2354,179 @@ static void __cpuinit flush_tlb_handlers(void) (unsigned long)handle_tlbs_end); local_flush_icache_range((unsigned long)handle_tlbm, (unsigned long)handle_tlbm_end); -#ifdef CONFIG_MIPS_PGD_C0_CONTEXT local_flush_icache_range((unsigned long)tlbmiss_handler_setup_pgd, (unsigned long)tlbmiss_handler_setup_pgd_end); +} + +static void print_htw_config(void) +{ + unsigned long config; + unsigned int pwctl; + const int field = 2 * sizeof(unsigned long); + + config = read_c0_pwfield(); + pr_debug("PWField (0x%0*lx): GDI: 0x%02lx UDI: 0x%02lx MDI: 0x%02lx PTI: 0x%02lx PTEI: 0x%02lx\n", + field, config, + (config & MIPS_PWFIELD_GDI_MASK) >> MIPS_PWFIELD_GDI_SHIFT, + (config & MIPS_PWFIELD_UDI_MASK) >> MIPS_PWFIELD_UDI_SHIFT, + (config & MIPS_PWFIELD_MDI_MASK) >> MIPS_PWFIELD_MDI_SHIFT, + (config & MIPS_PWFIELD_PTI_MASK) >> MIPS_PWFIELD_PTI_SHIFT, + (config & MIPS_PWFIELD_PTEI_MASK) >> MIPS_PWFIELD_PTEI_SHIFT); + + config = read_c0_pwsize(); + pr_debug("PWSize (0x%0*lx): PS: 0x%lx GDW: 0x%02lx UDW: 0x%02lx MDW: 0x%02lx PTW: 0x%02lx PTEW: 0x%02lx\n", + field, config, + (config & MIPS_PWSIZE_PS_MASK) >> MIPS_PWSIZE_PS_SHIFT, + (config & MIPS_PWSIZE_GDW_MASK) >> MIPS_PWSIZE_GDW_SHIFT, + (config & MIPS_PWSIZE_UDW_MASK) >> MIPS_PWSIZE_UDW_SHIFT, + (config & MIPS_PWSIZE_MDW_MASK) >> MIPS_PWSIZE_MDW_SHIFT, + (config & MIPS_PWSIZE_PTW_MASK) >> MIPS_PWSIZE_PTW_SHIFT, + (config & MIPS_PWSIZE_PTEW_MASK) >> MIPS_PWSIZE_PTEW_SHIFT); + + pwctl = read_c0_pwctl(); + pr_debug("PWCtl (0x%x): PWEn: 0x%x XK: 0x%x XS: 0x%x XU: 0x%x DPH: 0x%x HugePg: 0x%x Psn: 0x%x\n", + pwctl, + (pwctl & MIPS_PWCTL_PWEN_MASK) >> MIPS_PWCTL_PWEN_SHIFT, + (pwctl & MIPS_PWCTL_XK_MASK) >> MIPS_PWCTL_XK_SHIFT, + (pwctl & MIPS_PWCTL_XS_MASK) >> MIPS_PWCTL_XS_SHIFT, + (pwctl & MIPS_PWCTL_XU_MASK) >> MIPS_PWCTL_XU_SHIFT, + (pwctl & MIPS_PWCTL_DPH_MASK) >> MIPS_PWCTL_DPH_SHIFT, + (pwctl & MIPS_PWCTL_HUGEPG_MASK) >> MIPS_PWCTL_HUGEPG_SHIFT, + (pwctl & MIPS_PWCTL_PSN_MASK) >> MIPS_PWCTL_PSN_SHIFT); +} + +static void config_htw_params(void) +{ + unsigned long pwfield, pwsize, ptei; + unsigned int config; + + /* + * We are using 2-level page tables, so we only need to + * setup GDW and PTW appropriately. UDW and MDW will remain 0. + * The default value of GDI/UDI/MDI/PTI is 0xc. It is illegal to + * write values less than 0xc in these fields because the entire + * write will be dropped. As a result of which, we must preserve + * the original reset values and overwrite only what we really want. + */ + + pwfield = read_c0_pwfield(); + /* re-initialize the GDI field */ + pwfield &= ~MIPS_PWFIELD_GDI_MASK; + pwfield |= PGDIR_SHIFT << MIPS_PWFIELD_GDI_SHIFT; + /* re-initialize the PTI field including the even/odd bit */ + pwfield &= ~MIPS_PWFIELD_PTI_MASK; + pwfield |= PAGE_SHIFT << MIPS_PWFIELD_PTI_SHIFT; + if (CONFIG_PGTABLE_LEVELS >= 3) { + pwfield &= ~MIPS_PWFIELD_MDI_MASK; + pwfield |= PMD_SHIFT << MIPS_PWFIELD_MDI_SHIFT; + } + /* Set the PTEI right shift */ + ptei = _PAGE_GLOBAL_SHIFT << MIPS_PWFIELD_PTEI_SHIFT; + pwfield |= ptei; + write_c0_pwfield(pwfield); + /* Check whether the PTEI value is supported */ + back_to_back_c0_hazard(); + pwfield = read_c0_pwfield(); + if (((pwfield & MIPS_PWFIELD_PTEI_MASK) << MIPS_PWFIELD_PTEI_SHIFT) + != ptei) { + pr_warn("Unsupported PTEI field value: 0x%lx. HTW will not be enabled", + ptei); + /* + * Drop option to avoid HTW being enabled via another path + * (eg htw_reset()) + */ + current_cpu_data.options &= ~MIPS_CPU_HTW; + return; + } + + pwsize = ilog2(PTRS_PER_PGD) << MIPS_PWSIZE_GDW_SHIFT; + pwsize |= ilog2(PTRS_PER_PTE) << MIPS_PWSIZE_PTW_SHIFT; + if (CONFIG_PGTABLE_LEVELS >= 3) + pwsize |= ilog2(PTRS_PER_PMD) << MIPS_PWSIZE_MDW_SHIFT; + + /* Set pointer size to size of directory pointers */ + if (IS_ENABLED(CONFIG_64BIT)) + pwsize |= MIPS_PWSIZE_PS_MASK; + /* PTEs may be multiple pointers long (e.g. with XPA) */ + pwsize |= ((PTE_T_LOG2 - PGD_T_LOG2) << MIPS_PWSIZE_PTEW_SHIFT) + & MIPS_PWSIZE_PTEW_MASK; + + write_c0_pwsize(pwsize); + + /* Make sure everything is set before we enable the HTW */ + back_to_back_c0_hazard(); + + /* + * Enable HTW (and only for XUSeg on 64-bit), and disable the rest of + * the pwctl fields. + */ + config = 1 << MIPS_PWCTL_PWEN_SHIFT; + if (IS_ENABLED(CONFIG_64BIT)) + config |= MIPS_PWCTL_XU_MASK; + write_c0_pwctl(config); + pr_info("Hardware Page Table Walker enabled\n"); + + print_htw_config(); +} + +static void config_xpa_params(void) +{ +#ifdef CONFIG_XPA + unsigned int pagegrain; + + if (mips_xpa_disabled) { + pr_info("Extended Physical Addressing (XPA) disabled\n"); + return; + } + + pagegrain = read_c0_pagegrain(); + write_c0_pagegrain(pagegrain | PG_ELPA); + back_to_back_c0_hazard(); + pagegrain = read_c0_pagegrain(); + + if (pagegrain & PG_ELPA) + pr_info("Extended Physical Addressing (XPA) enabled\n"); + else + panic("Extended Physical Addressing (XPA) disabled"); #endif } -void __cpuinit build_tlb_refill_handler(void) +static void check_pabits(void) +{ + unsigned long entry; + unsigned pabits, fillbits; + + if (!cpu_has_rixi || _PAGE_NO_EXEC == 0) { + /* + * We'll only be making use of the fact that we can rotate bits + * into the fill if the CPU supports RIXI, so don't bother + * probing this for CPUs which don't. + */ + return; + } + + write_c0_entrylo0(~0ul); + back_to_back_c0_hazard(); + entry = read_c0_entrylo0(); + + /* clear all non-PFN bits */ + entry &= ~((1 << MIPS_ENTRYLO_PFN_SHIFT) - 1); + entry &= ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI); + + /* find a lower bound on PABITS, and upper bound on fill bits */ + pabits = fls_long(entry) + 6; + fillbits = max_t(int, (int)BITS_PER_LONG - pabits, 0); + + /* minus the RI & XI bits */ + fillbits -= min_t(unsigned, fillbits, 2); + + if (fillbits >= ilog2(_PAGE_NO_EXEC)) + fill_includes_sw_bits = true; + + pr_debug("Entry* registers contain %u fill bits\n", fillbits); +} + +void build_tlb_refill_handler(void) { /* * The refill handler is generated per-CPU, multi-node systems @@ -2219,26 +2535,21 @@ void __cpuinit build_tlb_refill_handler(void) */ static int run_once = 0; + if (IS_ENABLED(CONFIG_XPA) && !cpu_has_rixi) + panic("Kernels supporting XPA currently require CPUs with RIXI"); + output_pgtable_bits_defines(); + check_pabits(); #ifdef CONFIG_64BIT - check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); + check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3); #endif - switch (current_cpu_type()) { - case CPU_R2000: - case CPU_R3000: - case CPU_R3000A: - case CPU_R3081E: - case CPU_TX3912: - case CPU_TX3922: - case CPU_TX3927: + if (cpu_has_3kex) { #ifndef CONFIG_MIPS_PGD_C0_CONTEXT - if (cpu_has_local_ebase) - build_r3000_tlb_refill_handler(); if (!run_once) { - if (!cpu_has_local_ebase) - build_r3000_tlb_refill_handler(); + build_setup_pgd(); + build_r3000_tlb_refill_handler(); build_r3000_tlb_load_handler(); build_r3000_tlb_store_handler(); build_r3000_tlb_modify_handler(); @@ -2248,32 +2559,27 @@ void __cpuinit build_tlb_refill_handler(void) #else panic("No R3000 TLB refill handler"); #endif - break; - - case CPU_R6000: - case CPU_R6000A: - panic("No R6000 TLB refill handler yet"); - break; - - case CPU_R8000: - panic("No R8000 TLB refill handler yet"); - break; + return; + } - default: - if (!run_once) { - scratch_reg = allocate_kscratch(); -#ifdef CONFIG_MIPS_PGD_C0_CONTEXT - build_r4000_setup_pgd(); -#endif - build_r4000_tlb_load_handler(); - build_r4000_tlb_store_handler(); - build_r4000_tlb_modify_handler(); - if (!cpu_has_local_ebase) - build_r4000_tlb_refill_handler(); - flush_tlb_handlers(); - run_once++; - } - if (cpu_has_local_ebase) + if (cpu_has_ldpte) + setup_pw(); + + if (!run_once) { + scratch_reg = allocate_kscratch(); + build_setup_pgd(); + build_r4000_tlb_load_handler(); + build_r4000_tlb_store_handler(); + build_r4000_tlb_modify_handler(); + if (cpu_has_ldpte) + build_loongson3_tlb_refill_handler(); + else build_r4000_tlb_refill_handler(); + flush_tlb_handlers(); + run_once++; } + if (cpu_has_xpa) + config_xpa_params(); + if (cpu_has_htw) + config_htw_params(); } diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c index 162ee6d62788..75ef90486fe6 100644 --- a/arch/mips/mm/uasm-micromips.c +++ b/arch/mips/mm/uasm-micromips.c @@ -15,12 +15,10 @@ #include <linux/kernel.h> #include <linux/types.h> -#include <linux/init.h> #include <asm/inst.h> #include <asm/elf.h> #include <asm/bugs.h> -#define UASM_ISA _UASM_ISA_MICROMIPS #include <asm/uasm.h> #define RS_MASK 0x1f @@ -39,86 +37,100 @@ | (e) << RE_SH \ | (f) << FUNC_SH) -/* Define these when we are not the ISA the kernel is being compiled with. */ -#ifndef CONFIG_CPU_MICROMIPS -#define MM_uasm_i_b(buf, off) ISAOPC(_beq)(buf, 0, 0, off) -#define MM_uasm_i_beqz(buf, rs, off) ISAOPC(_beq)(buf, rs, 0, off) -#define MM_uasm_i_beqzl(buf, rs, off) ISAOPC(_beql)(buf, rs, 0, off) -#define MM_uasm_i_bnez(buf, rs, off) ISAOPC(_bne)(buf, rs, 0, off) -#endif - #include "uasm.c" -static struct insn insn_table_MM[] __uasminitdata = { - { insn_addu, M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD }, - { insn_addiu, M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, - { insn_and, M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD }, - { insn_andi, M(mm_andi32_op, 0, 0, 0, 0, 0), RT | RS | UIMM }, - { insn_beq, M(mm_beq32_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, - { insn_beql, 0, 0 }, - { insn_bgez, M(mm_pool32i_op, mm_bgez_op, 0, 0, 0, 0), RS | BIMM }, - { insn_bgezl, 0, 0 }, - { insn_bltz, M(mm_pool32i_op, mm_bltz_op, 0, 0, 0, 0), RS | BIMM }, - { insn_bltzl, 0, 0 }, - { insn_bne, M(mm_bne32_op, 0, 0, 0, 0, 0), RT | RS | BIMM }, - { insn_cache, M(mm_pool32b_op, 0, 0, mm_cache_func, 0, 0), RT | RS | SIMM }, - { insn_daddu, 0, 0 }, - { insn_daddiu, 0, 0 }, - { insn_dmfc0, 0, 0 }, - { insn_dmtc0, 0, 0 }, - { insn_dsll, 0, 0 }, - { insn_dsll32, 0, 0 }, - { insn_dsra, 0, 0 }, - { insn_dsrl, 0, 0 }, - { insn_dsrl32, 0, 0 }, - { insn_drotr, 0, 0 }, - { insn_drotr32, 0, 0 }, - { insn_dsubu, 0, 0 }, - { insn_eret, M(mm_pool32a_op, 0, 0, 0, mm_eret_op, mm_pool32axf_op), 0 }, - { insn_ins, M(mm_pool32a_op, 0, 0, 0, 0, mm_ins_op), RT | RS | RD | RE }, - { insn_ext, M(mm_pool32a_op, 0, 0, 0, 0, mm_ext_op), RT | RS | RD | RE }, - { insn_j, M(mm_j32_op, 0, 0, 0, 0, 0), JIMM }, - { insn_jal, M(mm_jal32_op, 0, 0, 0, 0, 0), JIMM }, - { insn_jr, M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RS }, - { insn_ld, 0, 0 }, - { insn_ll, M(mm_pool32c_op, 0, 0, (mm_ll_func << 1), 0, 0), RS | RT | SIMM }, - { insn_lld, 0, 0 }, - { insn_lui, M(mm_pool32i_op, mm_lui_op, 0, 0, 0, 0), RS | SIMM }, - { insn_lw, M(mm_lw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, - { insn_mfc0, M(mm_pool32a_op, 0, 0, 0, mm_mfc0_op, mm_pool32axf_op), RT | RS | RD }, - { insn_mtc0, M(mm_pool32a_op, 0, 0, 0, mm_mtc0_op, mm_pool32axf_op), RT | RS | RD }, - { insn_or, M(mm_pool32a_op, 0, 0, 0, 0, mm_or32_op), RT | RS | RD }, - { insn_ori, M(mm_ori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM }, - { insn_pref, M(mm_pool32c_op, 0, 0, (mm_pref_func << 1), 0, 0), RT | RS | SIMM }, - { insn_rfe, 0, 0 }, - { insn_sc, M(mm_pool32c_op, 0, 0, (mm_sc_func << 1), 0, 0), RT | RS | SIMM }, - { insn_scd, 0, 0 }, - { insn_sd, 0, 0 }, - { insn_sll, M(mm_pool32a_op, 0, 0, 0, 0, mm_sll32_op), RT | RS | RD }, - { insn_sra, M(mm_pool32a_op, 0, 0, 0, 0, mm_sra_op), RT | RS | RD }, - { insn_srl, M(mm_pool32a_op, 0, 0, 0, 0, mm_srl32_op), RT | RS | RD }, - { insn_rotr, M(mm_pool32a_op, 0, 0, 0, 0, mm_rotr_op), RT | RS | RD }, - { insn_subu, M(mm_pool32a_op, 0, 0, 0, 0, mm_subu32_op), RT | RS | RD }, - { insn_sw, M(mm_sw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, - { insn_tlbp, M(mm_pool32a_op, 0, 0, 0, mm_tlbp_op, mm_pool32axf_op), 0 }, - { insn_tlbr, M(mm_pool32a_op, 0, 0, 0, mm_tlbr_op, mm_pool32axf_op), 0 }, - { insn_tlbwi, M(mm_pool32a_op, 0, 0, 0, mm_tlbwi_op, mm_pool32axf_op), 0 }, - { insn_tlbwr, M(mm_pool32a_op, 0, 0, 0, mm_tlbwr_op, mm_pool32axf_op), 0 }, - { insn_xor, M(mm_pool32a_op, 0, 0, 0, 0, mm_xor32_op), RT | RS | RD }, - { insn_xori, M(mm_xori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM }, - { insn_dins, 0, 0 }, - { insn_dinsm, 0, 0 }, - { insn_syscall, M(mm_pool32a_op, 0, 0, 0, mm_syscall_op, mm_pool32axf_op), SCIMM}, - { insn_bbit0, 0, 0 }, - { insn_bbit1, 0, 0 }, - { insn_lwx, 0, 0 }, - { insn_ldx, 0, 0 }, - { insn_invalid, 0, 0 } +static const struct insn insn_table_MM[insn_invalid] = { + [insn_addu] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD}, + [insn_addiu] = {M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, + [insn_and] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD}, + [insn_andi] = {M(mm_andi32_op, 0, 0, 0, 0, 0), RT | RS | UIMM}, + [insn_beq] = {M(mm_beq32_op, 0, 0, 0, 0, 0), RS | RT | BIMM}, + [insn_beql] = {0, 0}, + [insn_bgez] = {M(mm_pool32i_op, mm_bgez_op, 0, 0, 0, 0), RS | BIMM}, + [insn_bgezl] = {0, 0}, + [insn_bltz] = {M(mm_pool32i_op, mm_bltz_op, 0, 0, 0, 0), RS | BIMM}, + [insn_bltzl] = {0, 0}, + [insn_bne] = {M(mm_bne32_op, 0, 0, 0, 0, 0), RT | RS | BIMM}, + [insn_cache] = {M(mm_pool32b_op, 0, 0, mm_cache_func, 0, 0), RT | RS | SIMM}, + [insn_cfc1] = {M(mm_pool32f_op, 0, 0, 0, mm_cfc1_op, mm_32f_73_op), RT | RS}, + [insn_cfcmsa] = {M(mm_pool32s_op, 0, msa_cfc_op, 0, 0, mm_32s_elm_op), RD | RE}, + [insn_ctc1] = {M(mm_pool32f_op, 0, 0, 0, mm_ctc1_op, mm_32f_73_op), RT | RS}, + [insn_ctcmsa] = {M(mm_pool32s_op, 0, msa_ctc_op, 0, 0, mm_32s_elm_op), RD | RE}, + [insn_daddu] = {0, 0}, + [insn_daddiu] = {0, 0}, + [insn_di] = {M(mm_pool32a_op, 0, 0, 0, mm_di_op, mm_pool32axf_op), RS}, + [insn_divu] = {M(mm_pool32a_op, 0, 0, 0, mm_divu_op, mm_pool32axf_op), RT | RS}, + [insn_dmfc0] = {0, 0}, + [insn_dmtc0] = {0, 0}, + [insn_dsll] = {0, 0}, + [insn_dsll32] = {0, 0}, + [insn_dsra] = {0, 0}, + [insn_dsrl] = {0, 0}, + [insn_dsrl32] = {0, 0}, + [insn_drotr] = {0, 0}, + [insn_drotr32] = {0, 0}, + [insn_dsubu] = {0, 0}, + [insn_eret] = {M(mm_pool32a_op, 0, 0, 0, mm_eret_op, mm_pool32axf_op), 0}, + [insn_ins] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_ins_op), RT | RS | RD | RE}, + [insn_ext] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_ext_op), RT | RS | RD | RE}, + [insn_j] = {M(mm_j32_op, 0, 0, 0, 0, 0), JIMM}, + [insn_jal] = {M(mm_jal32_op, 0, 0, 0, 0, 0), JIMM}, + [insn_jalr] = {M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RT | RS}, + [insn_jr] = {M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RS}, + [insn_lb] = {M(mm_lb32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, + [insn_ld] = {0, 0}, + [insn_lh] = {M(mm_lh32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, + [insn_ll] = {M(mm_pool32c_op, 0, 0, (mm_ll_func << 1), 0, 0), RS | RT | SIMM}, + [insn_lld] = {0, 0}, + [insn_lui] = {M(mm_pool32i_op, mm_lui_op, 0, 0, 0, 0), RS | SIMM}, + [insn_lw] = {M(mm_lw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, + [insn_mfc0] = {M(mm_pool32a_op, 0, 0, 0, mm_mfc0_op, mm_pool32axf_op), RT | RS | RD}, + [insn_mfhi] = {M(mm_pool32a_op, 0, 0, 0, mm_mfhi32_op, mm_pool32axf_op), RS}, + [insn_mflo] = {M(mm_pool32a_op, 0, 0, 0, mm_mflo32_op, mm_pool32axf_op), RS}, + [insn_mtc0] = {M(mm_pool32a_op, 0, 0, 0, mm_mtc0_op, mm_pool32axf_op), RT | RS | RD}, + [insn_mthi] = {M(mm_pool32a_op, 0, 0, 0, mm_mthi32_op, mm_pool32axf_op), RS}, + [insn_mtlo] = {M(mm_pool32a_op, 0, 0, 0, mm_mtlo32_op, mm_pool32axf_op), RS}, + [insn_mul] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_mul_op), RT | RS | RD}, + [insn_or] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_or32_op), RT | RS | RD}, + [insn_ori] = {M(mm_ori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM}, + [insn_pref] = {M(mm_pool32c_op, 0, 0, (mm_pref_func << 1), 0, 0), RT | RS | SIMM}, + [insn_rfe] = {0, 0}, + [insn_sc] = {M(mm_pool32c_op, 0, 0, (mm_sc_func << 1), 0, 0), RT | RS | SIMM}, + [insn_scd] = {0, 0}, + [insn_sd] = {0, 0}, + [insn_sll] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_sll32_op), RT | RS | RD}, + [insn_sllv] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_sllv32_op), RT | RS | RD}, + [insn_slt] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_slt_op), RT | RS | RD}, + [insn_sltiu] = {M(mm_sltiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, + [insn_sltu] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_sltu_op), RT | RS | RD}, + [insn_sra] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_sra_op), RT | RS | RD}, + [insn_srav] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_srav_op), RT | RS | RD}, + [insn_srl] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_srl32_op), RT | RS | RD}, + [insn_srlv] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_srlv32_op), RT | RS | RD}, + [insn_rotr] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_rotr_op), RT | RS | RD}, + [insn_subu] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_subu32_op), RT | RS | RD}, + [insn_sw] = {M(mm_sw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, + [insn_sync] = {M(mm_pool32a_op, 0, 0, 0, mm_sync_op, mm_pool32axf_op), RS}, + [insn_tlbp] = {M(mm_pool32a_op, 0, 0, 0, mm_tlbp_op, mm_pool32axf_op), 0}, + [insn_tlbr] = {M(mm_pool32a_op, 0, 0, 0, mm_tlbr_op, mm_pool32axf_op), 0}, + [insn_tlbwi] = {M(mm_pool32a_op, 0, 0, 0, mm_tlbwi_op, mm_pool32axf_op), 0}, + [insn_tlbwr] = {M(mm_pool32a_op, 0, 0, 0, mm_tlbwr_op, mm_pool32axf_op), 0}, + [insn_wait] = {M(mm_pool32a_op, 0, 0, 0, mm_wait_op, mm_pool32axf_op), SCIMM}, + [insn_wsbh] = {M(mm_pool32a_op, 0, 0, 0, mm_wsbh_op, mm_pool32axf_op), RT | RS}, + [insn_xor] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_xor32_op), RT | RS | RD}, + [insn_xori] = {M(mm_xori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM}, + [insn_dins] = {0, 0}, + [insn_dinsm] = {0, 0}, + [insn_syscall] = {M(mm_pool32a_op, 0, 0, 0, mm_syscall_op, mm_pool32axf_op), SCIMM}, + [insn_bbit0] = {0, 0}, + [insn_bbit1] = {0, 0}, + [insn_lwx] = {0, 0}, + [insn_ldx] = {0, 0}, }; #undef M -static inline __uasminit u32 build_bimm(s32 arg) +static inline u32 build_bimm(s32 arg) { WARN(arg > 0xffff || arg < -0x10000, KERN_WARNING "Micro-assembler field overflow\n"); @@ -128,7 +140,7 @@ static inline __uasminit u32 build_bimm(s32 arg) return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 1) & 0x7fff); } -static inline __uasminit u32 build_jimm(u32 arg) +static inline u32 build_jimm(u32 arg) { WARN(arg & ~((JIMM_MASK << 2) | 1), @@ -141,32 +153,31 @@ static inline __uasminit u32 build_jimm(u32 arg) * The order of opcode arguments is implicitly left to right, * starting with RS and ending with FUNC or IMM. */ -static void __uasminit build_insn(u32 **buf, enum opcode opc, ...) +static void build_insn(u32 **buf, enum opcode opc, ...) { - struct insn *ip = NULL; - unsigned int i; + const struct insn *ip; va_list ap; u32 op; - for (i = 0; insn_table_MM[i].opcode != insn_invalid; i++) - if (insn_table_MM[i].opcode == opc) { - ip = &insn_table_MM[i]; - break; - } - - if (!ip || (opc == insn_daddiu && r4k_daddiu_bug())) + if (opc < 0 || opc >= insn_invalid || + (opc == insn_daddiu && r4k_daddiu_bug()) || + (insn_table_MM[opc].match == 0 && insn_table_MM[opc].fields == 0)) panic("Unsupported Micro-assembler instruction %d", opc); + ip = &insn_table_MM[opc]; + op = ip->match; va_start(ap, opc); if (ip->fields & RS) { - if (opc == insn_mfc0 || opc == insn_mtc0) + if (opc == insn_mfc0 || opc == insn_mtc0 || + opc == insn_cfc1 || opc == insn_ctc1) op |= build_rt(va_arg(ap, u32)); else op |= build_rs(va_arg(ap, u32)); } if (ip->fields & RT) { - if (opc == insn_mfc0 || opc == insn_mtc0) + if (opc == insn_mfc0 || opc == insn_mtc0 || + opc == insn_cfc1 || opc == insn_ctc1) op |= build_rs(va_arg(ap, u32)); else op |= build_rt(va_arg(ap, u32)); @@ -199,7 +210,7 @@ static void __uasminit build_insn(u32 **buf, enum opcode opc, ...) (*buf)++; } -static inline void __uasminit +static inline void __resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab) { long laddr = (long)lab->addr; diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c index 5fcdd8fe3e83..e15c6700cd08 100644 --- a/arch/mips/mm/uasm-mips.c +++ b/arch/mips/mm/uasm-mips.c @@ -15,12 +15,10 @@ #include <linux/kernel.h> #include <linux/types.h> -#include <linux/init.h> #include <asm/inst.h> #include <asm/elf.h> #include <asm/bugs.h> -#define UASM_ISA _UASM_ISA_CLASSIC #include <asm/uasm.h> #define RS_MASK 0x1f @@ -39,87 +37,177 @@ | (e) << RE_SH \ | (f) << FUNC_SH) -/* Define these when we are not the ISA the kernel is being compiled with. */ -#ifdef CONFIG_CPU_MICROMIPS -#define CL_uasm_i_b(buf, off) ISAOPC(_beq)(buf, 0, 0, off) -#define CL_uasm_i_beqz(buf, rs, off) ISAOPC(_beq)(buf, rs, 0, off) -#define CL_uasm_i_beqzl(buf, rs, off) ISAOPC(_beql)(buf, rs, 0, off) -#define CL_uasm_i_bnez(buf, rs, off) ISAOPC(_bne)(buf, rs, 0, off) -#endif +/* This macro sets the non-variable bits of an R6 instruction. */ +#define M6(a, b, c, d, e) \ + ((a) << OP_SH \ + | (b) << RS_SH \ + | (c) << RT_SH \ + | (d) << SIMM9_SH \ + | (e) << FUNC_SH) #include "uasm.c" -static struct insn insn_table[] __uasminitdata = { - { insn_addiu, M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, - { insn_addu, M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD }, - { insn_andi, M(andi_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, - { insn_and, M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD }, - { insn_bbit0, M(lwc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, - { insn_bbit1, M(swc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, - { insn_beql, M(beql_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, - { insn_beq, M(beq_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, - { insn_bgezl, M(bcond_op, 0, bgezl_op, 0, 0, 0), RS | BIMM }, - { insn_bgez, M(bcond_op, 0, bgez_op, 0, 0, 0), RS | BIMM }, - { insn_bltzl, M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM }, - { insn_bltz, M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM }, - { insn_bne, M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, - { insn_cache, M(cache_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, - { insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, - { insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD }, - { insn_dinsm, M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE }, - { insn_dins, M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE }, - { insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET}, - { insn_dmtc0, M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET}, - { insn_drotr32, M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE }, - { insn_drotr, M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE }, - { insn_dsll32, M(spec_op, 0, 0, 0, 0, dsll32_op), RT | RD | RE }, - { insn_dsll, M(spec_op, 0, 0, 0, 0, dsll_op), RT | RD | RE }, - { insn_dsra, M(spec_op, 0, 0, 0, 0, dsra_op), RT | RD | RE }, - { insn_dsrl32, M(spec_op, 0, 0, 0, 0, dsrl32_op), RT | RD | RE }, - { insn_dsrl, M(spec_op, 0, 0, 0, 0, dsrl_op), RT | RD | RE }, - { insn_dsubu, M(spec_op, 0, 0, 0, 0, dsubu_op), RS | RT | RD }, - { insn_eret, M(cop0_op, cop_op, 0, 0, 0, eret_op), 0 }, - { insn_ext, M(spec3_op, 0, 0, 0, 0, ext_op), RS | RT | RD | RE }, - { insn_ins, M(spec3_op, 0, 0, 0, 0, ins_op), RS | RT | RD | RE }, - { insn_j, M(j_op, 0, 0, 0, 0, 0), JIMM }, - { insn_jal, M(jal_op, 0, 0, 0, 0, 0), JIMM }, - { insn_j, M(j_op, 0, 0, 0, 0, 0), JIMM }, - { insn_jr, M(spec_op, 0, 0, 0, 0, jr_op), RS }, - { insn_ld, M(ld_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, - { insn_ldx, M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD }, - { insn_lld, M(lld_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, - { insn_ll, M(ll_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, - { insn_lui, M(lui_op, 0, 0, 0, 0, 0), RT | SIMM }, - { insn_lw, M(lw_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, - { insn_lwx, M(spec3_op, 0, 0, 0, lwx_op, lx_op), RS | RT | RD }, - { insn_mfc0, M(cop0_op, mfc_op, 0, 0, 0, 0), RT | RD | SET}, - { insn_mtc0, M(cop0_op, mtc_op, 0, 0, 0, 0), RT | RD | SET}, - { insn_ori, M(ori_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, - { insn_or, M(spec_op, 0, 0, 0, 0, or_op), RS | RT | RD }, - { insn_pref, M(pref_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, - { insn_rfe, M(cop0_op, cop_op, 0, 0, 0, rfe_op), 0 }, - { insn_rotr, M(spec_op, 1, 0, 0, 0, srl_op), RT | RD | RE }, - { insn_scd, M(scd_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, - { insn_sc, M(sc_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, - { insn_sd, M(sd_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, - { insn_sll, M(spec_op, 0, 0, 0, 0, sll_op), RT | RD | RE }, - { insn_sra, M(spec_op, 0, 0, 0, 0, sra_op), RT | RD | RE }, - { insn_srl, M(spec_op, 0, 0, 0, 0, srl_op), RT | RD | RE }, - { insn_subu, M(spec_op, 0, 0, 0, 0, subu_op), RS | RT | RD }, - { insn_sw, M(sw_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, - { insn_syscall, M(spec_op, 0, 0, 0, 0, syscall_op), SCIMM}, - { insn_tlbp, M(cop0_op, cop_op, 0, 0, 0, tlbp_op), 0 }, - { insn_tlbr, M(cop0_op, cop_op, 0, 0, 0, tlbr_op), 0 }, - { insn_tlbwi, M(cop0_op, cop_op, 0, 0, 0, tlbwi_op), 0 }, - { insn_tlbwr, M(cop0_op, cop_op, 0, 0, 0, tlbwr_op), 0 }, - { insn_xori, M(xori_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, - { insn_xor, M(spec_op, 0, 0, 0, 0, xor_op), RS | RT | RD }, - { insn_invalid, 0, 0 } +static const struct insn insn_table[insn_invalid] = { + [insn_addiu] = {M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_addu] = {M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD}, + [insn_and] = {M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD}, + [insn_andi] = {M(andi_op, 0, 0, 0, 0, 0), RS | RT | UIMM}, + [insn_bbit0] = {M(lwc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM}, + [insn_bbit1] = {M(swc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM}, + [insn_beq] = {M(beq_op, 0, 0, 0, 0, 0), RS | RT | BIMM}, + [insn_beql] = {M(beql_op, 0, 0, 0, 0, 0), RS | RT | BIMM}, + [insn_bgez] = {M(bcond_op, 0, bgez_op, 0, 0, 0), RS | BIMM}, + [insn_bgezl] = {M(bcond_op, 0, bgezl_op, 0, 0, 0), RS | BIMM}, + [insn_bgtz] = {M(bgtz_op, 0, 0, 0, 0, 0), RS | BIMM}, + [insn_blez] = {M(blez_op, 0, 0, 0, 0, 0), RS | BIMM}, + [insn_bltz] = {M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM}, + [insn_bltzl] = {M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM}, + [insn_bne] = {M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM}, + [insn_break] = {M(spec_op, 0, 0, 0, 0, break_op), SCIMM}, +#ifndef CONFIG_CPU_MIPSR6 + [insn_cache] = {M(cache_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, +#else + [insn_cache] = {M6(spec3_op, 0, 0, 0, cache6_op), RS | RT | SIMM9}, +#endif + [insn_cfc1] = {M(cop1_op, cfc_op, 0, 0, 0, 0), RT | RD}, + [insn_cfcmsa] = {M(msa_op, 0, msa_cfc_op, 0, 0, msa_elm_op), RD | RE}, + [insn_ctc1] = {M(cop1_op, ctc_op, 0, 0, 0, 0), RT | RD}, + [insn_ctcmsa] = {M(msa_op, 0, msa_ctc_op, 0, 0, msa_elm_op), RD | RE}, + [insn_daddiu] = {M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_daddu] = {M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD}, + [insn_ddivu] = {M(spec_op, 0, 0, 0, 0, ddivu_op), RS | RT}, + [insn_ddivu_r6] = {M(spec_op, 0, 0, 0, ddivu_ddivu6_op, ddivu_op), + RS | RT | RD}, + [insn_di] = {M(cop0_op, mfmc0_op, 0, 12, 0, 0), RT}, + [insn_dins] = {M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE}, + [insn_dinsm] = {M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE}, + [insn_dinsu] = {M(spec3_op, 0, 0, 0, 0, dinsu_op), RS | RT | RD | RE}, + [insn_divu] = {M(spec_op, 0, 0, 0, 0, divu_op), RS | RT}, + [insn_divu_r6] = {M(spec_op, 0, 0, 0, divu_divu6_op, divu_op), + RS | RT | RD}, + [insn_dmfc0] = {M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_dmodu] = {M(spec_op, 0, 0, 0, ddivu_dmodu_op, ddivu_op), + RS | RT | RD}, + [insn_dmtc0] = {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_dmultu] = {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT}, + [insn_dmulu] = {M(spec_op, 0, 0, 0, dmultu_dmulu_op, dmultu_op), + RS | RT | RD}, + [insn_drotr] = {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE}, + [insn_drotr32] = {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE}, + [insn_dsbh] = {M(spec3_op, 0, 0, 0, dsbh_op, dbshfl_op), RT | RD}, + [insn_dshd] = {M(spec3_op, 0, 0, 0, dshd_op, dbshfl_op), RT | RD}, + [insn_dsll] = {M(spec_op, 0, 0, 0, 0, dsll_op), RT | RD | RE}, + [insn_dsll32] = {M(spec_op, 0, 0, 0, 0, dsll32_op), RT | RD | RE}, + [insn_dsllv] = {M(spec_op, 0, 0, 0, 0, dsllv_op), RS | RT | RD}, + [insn_dsra] = {M(spec_op, 0, 0, 0, 0, dsra_op), RT | RD | RE}, + [insn_dsra32] = {M(spec_op, 0, 0, 0, 0, dsra32_op), RT | RD | RE}, + [insn_dsrav] = {M(spec_op, 0, 0, 0, 0, dsrav_op), RS | RT | RD}, + [insn_dsrl] = {M(spec_op, 0, 0, 0, 0, dsrl_op), RT | RD | RE}, + [insn_dsrl32] = {M(spec_op, 0, 0, 0, 0, dsrl32_op), RT | RD | RE}, + [insn_dsrlv] = {M(spec_op, 0, 0, 0, 0, dsrlv_op), RS | RT | RD}, + [insn_dsubu] = {M(spec_op, 0, 0, 0, 0, dsubu_op), RS | RT | RD}, + [insn_eret] = {M(cop0_op, cop_op, 0, 0, 0, eret_op), 0}, + [insn_ext] = {M(spec3_op, 0, 0, 0, 0, ext_op), RS | RT | RD | RE}, + [insn_ins] = {M(spec3_op, 0, 0, 0, 0, ins_op), RS | RT | RD | RE}, + [insn_j] = {M(j_op, 0, 0, 0, 0, 0), JIMM}, + [insn_jal] = {M(jal_op, 0, 0, 0, 0, 0), JIMM}, + [insn_jalr] = {M(spec_op, 0, 0, 0, 0, jalr_op), RS | RD}, +#ifndef CONFIG_CPU_MIPSR6 + [insn_jr] = {M(spec_op, 0, 0, 0, 0, jr_op), RS}, +#else + [insn_jr] = {M(spec_op, 0, 0, 0, 0, jalr_op), RS}, +#endif + [insn_lb] = {M(lb_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_lbu] = {M(lbu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_ld] = {M(ld_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_lddir] = {M(lwc2_op, 0, 0, 0, lddir_op, mult_op), RS | RT | RD}, + [insn_ldpte] = {M(lwc2_op, 0, 0, 0, ldpte_op, mult_op), RS | RD}, + [insn_ldx] = {M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD}, + [insn_lh] = {M(lh_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_lhu] = {M(lhu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, +#ifndef CONFIG_CPU_MIPSR6 + [insn_ll] = {M(ll_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_lld] = {M(lld_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, +#else + [insn_ll] = {M6(spec3_op, 0, 0, 0, ll6_op), RS | RT | SIMM9}, + [insn_lld] = {M6(spec3_op, 0, 0, 0, lld6_op), RS | RT | SIMM9}, +#endif + [insn_lui] = {M(lui_op, 0, 0, 0, 0, 0), RT | SIMM}, + [insn_lw] = {M(lw_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_lwu] = {M(lwu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_lwx] = {M(spec3_op, 0, 0, 0, lwx_op, lx_op), RS | RT | RD}, + [insn_mfc0] = {M(cop0_op, mfc_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_mfhc0] = {M(cop0_op, mfhc0_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_mfhi] = {M(spec_op, 0, 0, 0, 0, mfhi_op), RD}, + [insn_mflo] = {M(spec_op, 0, 0, 0, 0, mflo_op), RD}, + [insn_modu] = {M(spec_op, 0, 0, 0, divu_modu_op, divu_op), + RS | RT | RD}, + [insn_movn] = {M(spec_op, 0, 0, 0, 0, movn_op), RS | RT | RD}, + [insn_movz] = {M(spec_op, 0, 0, 0, 0, movz_op), RS | RT | RD}, + [insn_mtc0] = {M(cop0_op, mtc_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_mthc0] = {M(cop0_op, mthc0_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_mthi] = {M(spec_op, 0, 0, 0, 0, mthi_op), RS}, + [insn_mtlo] = {M(spec_op, 0, 0, 0, 0, mtlo_op), RS}, + [insn_mulu] = {M(spec_op, 0, 0, 0, multu_mulu_op, multu_op), + RS | RT | RD}, + [insn_muhu] = {M(spec_op, 0, 0, 0, multu_muhu_op, multu_op), + RS | RT | RD}, +#ifndef CONFIG_CPU_MIPSR6 + [insn_mul] = {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD}, +#else + [insn_mul] = {M(spec_op, 0, 0, 0, mult_mul_op, mult_op), RS | RT | RD}, +#endif + [insn_multu] = {M(spec_op, 0, 0, 0, 0, multu_op), RS | RT}, + [insn_nor] = {M(spec_op, 0, 0, 0, 0, nor_op), RS | RT | RD}, + [insn_or] = {M(spec_op, 0, 0, 0, 0, or_op), RS | RT | RD}, + [insn_ori] = {M(ori_op, 0, 0, 0, 0, 0), RS | RT | UIMM}, +#ifndef CONFIG_CPU_MIPSR6 + [insn_pref] = {M(pref_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, +#else + [insn_pref] = {M6(spec3_op, 0, 0, 0, pref6_op), RS | RT | SIMM9}, +#endif + [insn_rfe] = {M(cop0_op, cop_op, 0, 0, 0, rfe_op), 0}, + [insn_rotr] = {M(spec_op, 1, 0, 0, 0, srl_op), RT | RD | RE}, + [insn_sb] = {M(sb_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, +#ifndef CONFIG_CPU_MIPSR6 + [insn_sc] = {M(sc_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_scd] = {M(scd_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, +#else + [insn_sc] = {M6(spec3_op, 0, 0, 0, sc6_op), RS | RT | SIMM9}, + [insn_scd] = {M6(spec3_op, 0, 0, 0, scd6_op), RS | RT | SIMM9}, +#endif + [insn_sd] = {M(sd_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_seleqz] = {M(spec_op, 0, 0, 0, 0, seleqz_op), RS | RT | RD}, + [insn_selnez] = {M(spec_op, 0, 0, 0, 0, selnez_op), RS | RT | RD}, + [insn_sh] = {M(sh_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_sll] = {M(spec_op, 0, 0, 0, 0, sll_op), RT | RD | RE}, + [insn_sllv] = {M(spec_op, 0, 0, 0, 0, sllv_op), RS | RT | RD}, + [insn_slt] = {M(spec_op, 0, 0, 0, 0, slt_op), RS | RT | RD}, + [insn_slti] = {M(slti_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_sltiu] = {M(sltiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_sltu] = {M(spec_op, 0, 0, 0, 0, sltu_op), RS | RT | RD}, + [insn_sra] = {M(spec_op, 0, 0, 0, 0, sra_op), RT | RD | RE}, + [insn_srav] = {M(spec_op, 0, 0, 0, 0, srav_op), RS | RT | RD}, + [insn_srl] = {M(spec_op, 0, 0, 0, 0, srl_op), RT | RD | RE}, + [insn_srlv] = {M(spec_op, 0, 0, 0, 0, srlv_op), RS | RT | RD}, + [insn_subu] = {M(spec_op, 0, 0, 0, 0, subu_op), RS | RT | RD}, + [insn_sw] = {M(sw_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_sync] = {M(spec_op, 0, 0, 0, 0, sync_op), RE}, + [insn_syscall] = {M(spec_op, 0, 0, 0, 0, syscall_op), SCIMM}, + [insn_tlbp] = {M(cop0_op, cop_op, 0, 0, 0, tlbp_op), 0}, + [insn_tlbr] = {M(cop0_op, cop_op, 0, 0, 0, tlbr_op), 0}, + [insn_tlbwi] = {M(cop0_op, cop_op, 0, 0, 0, tlbwi_op), 0}, + [insn_tlbwr] = {M(cop0_op, cop_op, 0, 0, 0, tlbwr_op), 0}, + [insn_wait] = {M(cop0_op, cop_op, 0, 0, 0, wait_op), SCIMM}, + [insn_wsbh] = {M(spec3_op, 0, 0, 0, wsbh_op, bshfl_op), RT | RD}, + [insn_xor] = {M(spec_op, 0, 0, 0, 0, xor_op), RS | RT | RD}, + [insn_xori] = {M(xori_op, 0, 0, 0, 0, 0), RS | RT | UIMM}, + [insn_yield] = {M(spec3_op, 0, 0, 0, 0, yield_op), RS | RD}, }; #undef M -static inline __uasminit u32 build_bimm(s32 arg) +static inline u32 build_bimm(s32 arg) { WARN(arg > 0x1ffff || arg < -0x20000, KERN_WARNING "Micro-assembler field overflow\n"); @@ -129,7 +217,7 @@ static inline __uasminit u32 build_bimm(s32 arg) return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 2) & 0x7fff); } -static inline __uasminit u32 build_jimm(u32 arg) +static inline u32 build_jimm(u32 arg) { WARN(arg & ~(JIMM_MASK << 2), KERN_WARNING "Micro-assembler field overflow\n"); @@ -141,22 +229,19 @@ static inline __uasminit u32 build_jimm(u32 arg) * The order of opcode arguments is implicitly left to right, * starting with RS and ending with FUNC or IMM. */ -static void __uasminit build_insn(u32 **buf, enum opcode opc, ...) +static void build_insn(u32 **buf, enum opcode opc, ...) { - struct insn *ip = NULL; - unsigned int i; + const struct insn *ip; va_list ap; u32 op; - for (i = 0; insn_table[i].opcode != insn_invalid; i++) - if (insn_table[i].opcode == opc) { - ip = &insn_table[i]; - break; - } - - if (!ip || (opc == insn_daddiu && r4k_daddiu_bug())) + if (opc < 0 || opc >= insn_invalid || + (opc == insn_daddiu && r4k_daddiu_bug()) || + (insn_table[opc].match == 0 && insn_table[opc].fields == 0)) panic("Unsupported Micro-assembler instruction %d", opc); + ip = &insn_table[opc]; + op = ip->match; va_start(ap, opc); if (ip->fields & RS) @@ -181,13 +266,15 @@ static void __uasminit build_insn(u32 **buf, enum opcode opc, ...) op |= build_set(va_arg(ap, u32)); if (ip->fields & SCIMM) op |= build_scimm(va_arg(ap, u32)); + if (ip->fields & SIMM9) + op |= build_scimm9(va_arg(ap, u32)); va_end(ap); **buf = op; (*buf)++; } -static inline void __uasminit +static inline void __resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab) { long laddr = (long)lab->addr; diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c index 7eb5e4355d25..125140979d62 100644 --- a/arch/mips/mm/uasm.c +++ b/arch/mips/mm/uasm.c @@ -24,7 +24,8 @@ enum fields { JIMM = 0x080, FUNC = 0x100, SET = 0x200, - SCIMM = 0x400 + SCIMM = 0x400, + SIMM9 = 0x800, }; #define OP_MASK 0x3f @@ -41,57 +42,67 @@ enum fields { #define FUNC_SH 0 #define SET_MASK 0x7 #define SET_SH 0 +#define SIMM9_SH 7 +#define SIMM9_MASK 0x1ff enum opcode { - insn_invalid, insn_addiu, insn_addu, insn_and, insn_andi, insn_bbit0, insn_bbit1, - insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl, - insn_bne, insn_cache, insn_daddiu, insn_daddu, insn_dins, insn_dinsm, - insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll, - insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, insn_dsubu, insn_eret, - insn_ext, insn_ins, insn_j, insn_jal, insn_jr, insn_ld, insn_ldx, - insn_ll, insn_lld, insn_lui, insn_lw, insn_lwx, insn_mfc0, insn_mtc0, - insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sc, insn_scd, - insn_sd, insn_sll, insn_sra, insn_srl, insn_subu, insn_sw, - insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_xor, - insn_xori, + insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bgtz, insn_blez, + insn_bltz, insn_bltzl, insn_bne, insn_break, insn_cache, insn_cfc1, + insn_cfcmsa, insn_ctc1, insn_ctcmsa, insn_daddiu, insn_daddu, insn_ddivu, + insn_ddivu_r6, insn_di, insn_dins, insn_dinsm, insn_dinsu, insn_divu, + insn_divu_r6, insn_dmfc0, insn_dmodu, insn_dmtc0, insn_dmultu, + insn_dmulu, insn_drotr, insn_drotr32, insn_dsbh, insn_dshd, insn_dsll, + insn_dsll32, insn_dsllv, insn_dsra, insn_dsra32, insn_dsrav, insn_dsrl, + insn_dsrl32, insn_dsrlv, insn_dsubu, insn_eret, insn_ext, insn_ins, + insn_j, insn_jal, insn_jalr, insn_jr, insn_lb, insn_lbu, insn_ld, + insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, insn_ll, insn_lld, + insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi, + insn_mflo, insn_modu, insn_movn, insn_movz, insn_mtc0, insn_mthc0, + insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_muhu, insn_nor, + insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, insn_sc, + insn_scd, insn_seleqz, insn_selnez, insn_sd, insn_sh, insn_sll, + insn_sllv, insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra, + insn_srav, insn_srl, insn_srlv, insn_subu, insn_sw, insn_sync, + insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, + insn_wsbh, insn_xor, insn_xori, insn_yield, + insn_invalid /* insn_invalid must be last */ }; struct insn { - enum opcode opcode; u32 match; enum fields fields; }; -static inline __uasminit u32 build_rs(u32 arg) +static inline u32 build_rs(u32 arg) { WARN(arg & ~RS_MASK, KERN_WARNING "Micro-assembler field overflow\n"); return (arg & RS_MASK) << RS_SH; } -static inline __uasminit u32 build_rt(u32 arg) +static inline u32 build_rt(u32 arg) { WARN(arg & ~RT_MASK, KERN_WARNING "Micro-assembler field overflow\n"); return (arg & RT_MASK) << RT_SH; } -static inline __uasminit u32 build_rd(u32 arg) +static inline u32 build_rd(u32 arg) { WARN(arg & ~RD_MASK, KERN_WARNING "Micro-assembler field overflow\n"); return (arg & RD_MASK) << RD_SH; } -static inline __uasminit u32 build_re(u32 arg) +static inline u32 build_re(u32 arg) { WARN(arg & ~RE_MASK, KERN_WARNING "Micro-assembler field overflow\n"); return (arg & RE_MASK) << RE_SH; } -static inline __uasminit u32 build_simm(s32 arg) +static inline u32 build_simm(s32 arg) { WARN(arg > 0x7fff || arg < -0x8000, KERN_WARNING "Micro-assembler field overflow\n"); @@ -99,14 +110,14 @@ static inline __uasminit u32 build_simm(s32 arg) return arg & 0xffff; } -static inline __uasminit u32 build_uimm(u32 arg) +static inline u32 build_uimm(u32 arg) { WARN(arg & ~IMM_MASK, KERN_WARNING "Micro-assembler field overflow\n"); return arg & IMM_MASK; } -static inline __uasminit u32 build_scimm(u32 arg) +static inline u32 build_scimm(u32 arg) { WARN(arg & ~SCIMM_MASK, KERN_WARNING "Micro-assembler field overflow\n"); @@ -114,21 +125,29 @@ static inline __uasminit u32 build_scimm(u32 arg) return (arg & SCIMM_MASK) << SCIMM_SH; } -static inline __uasminit u32 build_func(u32 arg) +static inline u32 build_scimm9(s32 arg) +{ + WARN((arg > 0xff || arg < -0x100), + KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & SIMM9_MASK) << SIMM9_SH; +} + +static inline u32 build_func(u32 arg) { WARN(arg & ~FUNC_MASK, KERN_WARNING "Micro-assembler field overflow\n"); return arg & FUNC_MASK; } -static inline __uasminit u32 build_set(u32 arg) +static inline u32 build_set(u32 arg) { WARN(arg & ~SET_MASK, KERN_WARNING "Micro-assembler field overflow\n"); return arg & SET_MASK; } -static void __uasminit build_insn(u32 **buf, enum opcode opc, ...); +static void build_insn(u32 **buf, enum opcode opc, ...); #define I_u1u2u3(op) \ Ip_u1u2u3(op) \ @@ -137,6 +156,13 @@ Ip_u1u2u3(op) \ } \ UASM_EXPORT_SYMBOL(uasm_i##op); +#define I_s3s1s2(op) \ +Ip_s3s1s2(op) \ +{ \ + build_insn(buf, insn##op, b, c, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + #define I_u2u1u3(op) \ Ip_u2u1u3(op) \ { \ @@ -144,6 +170,13 @@ Ip_u2u1u3(op) \ } \ UASM_EXPORT_SYMBOL(uasm_i##op); +#define I_u3u2u1(op) \ +Ip_u3u2u1(op) \ +{ \ + build_insn(buf, insn##op, c, b, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + #define I_u3u1u2(op) \ Ip_u3u1u2(op) \ { \ @@ -186,6 +219,13 @@ Ip_u2u1msbu3(op) \ } \ UASM_EXPORT_SYMBOL(uasm_i##op); +#define I_u2u1msb32msb3(op) \ +Ip_u2u1msbu3(op) \ +{ \ + build_insn(buf, insn##op, b, a, c+d-33, c-32); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + #define I_u2u1msbdu3(op) \ Ip_u2u1msbu3(op) \ { \ @@ -200,6 +240,13 @@ Ip_u1u2(op) \ } \ UASM_EXPORT_SYMBOL(uasm_i##op); +#define I_u2u1(op) \ +Ip_u1u2(op) \ +{ \ + build_insn(buf, insn##op, b, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + #define I_u1s2(op) \ Ip_u1s2(op) \ { \ @@ -229,19 +276,40 @@ I_u1u2s3(_beq) I_u1u2s3(_beql) I_u1s2(_bgez) I_u1s2(_bgezl) +I_u1s2(_bgtz) +I_u1s2(_blez) I_u1s2(_bltz) I_u1s2(_bltzl) I_u1u2s3(_bne) +I_u1(_break) I_u2s3u1(_cache) +I_u1u2(_cfc1) +I_u2u1(_cfcmsa) +I_u1u2(_ctc1) +I_u2u1(_ctcmsa) +I_u1u2(_ddivu) +I_u3u1u2(_ddivu_r6) I_u1u2u3(_dmfc0) +I_u3u1u2(_dmodu) I_u1u2u3(_dmtc0) +I_u1u2(_dmultu) +I_u3u1u2(_dmulu) I_u2u1s3(_daddiu) I_u3u1u2(_daddu) +I_u1(_di); +I_u1u2(_divu) +I_u3u1u2(_divu_r6) +I_u2u1(_dsbh); +I_u2u1(_dshd); I_u2u1u3(_dsll) I_u2u1u3(_dsll32) +I_u3u2u1(_dsllv) I_u2u1u3(_dsra) +I_u2u1u3(_dsra32) +I_u3u2u1(_dsrav) I_u2u1u3(_dsrl) I_u2u1u3(_dsrl32) +I_u3u2u1(_dsrlv) I_u2u1u3(_drotr) I_u2u1u3(_drotr32) I_u3u1u2(_dsubu) @@ -250,46 +318,84 @@ I_u2u1msbdu3(_ext) I_u2u1msbu3(_ins) I_u1(_j) I_u1(_jal) +I_u2u1(_jalr) I_u1(_jr) +I_u2s3u1(_lb) +I_u2s3u1(_lbu) I_u2s3u1(_ld) +I_u2s3u1(_lh) +I_u2s3u1(_lhu) I_u2s3u1(_ll) I_u2s3u1(_lld) I_u1s2(_lui) I_u2s3u1(_lw) +I_u2s3u1(_lwu) I_u1u2u3(_mfc0) +I_u1u2u3(_mfhc0) +I_u3u1u2(_modu) +I_u3u1u2(_movn) +I_u3u1u2(_movz) +I_u1(_mfhi) +I_u1(_mflo) I_u1u2u3(_mtc0) -I_u2u1u3(_ori) +I_u1u2u3(_mthc0) +I_u1(_mthi) +I_u1(_mtlo) +I_u3u1u2(_mul) +I_u1u2(_multu) +I_u3u1u2(_mulu) +I_u3u1u2(_muhu) +I_u3u1u2(_nor) I_u3u1u2(_or) +I_u2u1u3(_ori) I_0(_rfe) +I_u2s3u1(_sb) I_u2s3u1(_sc) I_u2s3u1(_scd) I_u2s3u1(_sd) +I_u3u1u2(_seleqz) +I_u3u1u2(_selnez) +I_u2s3u1(_sh) I_u2u1u3(_sll) +I_u3u2u1(_sllv) +I_s3s1s2(_slt) +I_u2u1s3(_slti) +I_u2u1s3(_sltiu) +I_u3u1u2(_sltu) I_u2u1u3(_sra) +I_u3u2u1(_srav) I_u2u1u3(_srl) +I_u3u2u1(_srlv) I_u2u1u3(_rotr) I_u3u1u2(_subu) I_u2s3u1(_sw) +I_u1(_sync) I_0(_tlbp) I_0(_tlbr) I_0(_tlbwi) I_0(_tlbwr) +I_u1(_wait); +I_u2u1(_wsbh) I_u3u1u2(_xor) I_u2u1u3(_xori) +I_u2u1(_yield) I_u2u1msbu3(_dins); I_u2u1msb32u3(_dinsm); +I_u2u1msb32msb3(_dinsu); I_u1(_syscall); I_u1u2s3(_bbit0); I_u1u2s3(_bbit1); I_u3u1u2(_lwx) I_u3u1u2(_ldx) +I_u1u2(_ldpte) +I_u2u1u3(_lddir) #ifdef CONFIG_CPU_CAVIUM_OCTEON #include <asm/octeon/octeon.h> -void __uasminit ISAFUNC(uasm_i_pref)(u32 **buf, unsigned int a, signed int b, +void uasm_i_pref(u32 **buf, unsigned int a, signed int b, unsigned int c) { - if (OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X) && a <= 24 && a != 5) + if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && a <= 24 && a != 5) /* * As per erratum Core-14449, replace prefetches 0-4, * 6-24 with 'pref 28'. @@ -298,32 +404,28 @@ void __uasminit ISAFUNC(uasm_i_pref)(u32 **buf, unsigned int a, signed int b, else build_insn(buf, insn_pref, c, a, b); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_i_pref)); +UASM_EXPORT_SYMBOL(uasm_i_pref); #else I_u2s3u1(_pref) #endif /* Handle labels. */ -void __uasminit ISAFUNC(uasm_build_label)(struct uasm_label **lab, u32 *addr, int lid) +void uasm_build_label(struct uasm_label **lab, u32 *addr, int lid) { (*lab)->addr = addr; (*lab)->lab = lid; (*lab)++; } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_build_label)); +UASM_EXPORT_SYMBOL(uasm_build_label); -int __uasminit ISAFUNC(uasm_in_compat_space_p)(long addr) +int uasm_in_compat_space_p(long addr) { /* Is this address in 32bit compat space? */ -#ifdef CONFIG_64BIT - return (((addr) & 0xffffffff00000000L) == 0xffffffff00000000L); -#else - return 1; -#endif + return addr == (int)addr; } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_in_compat_space_p)); +UASM_EXPORT_SYMBOL(uasm_in_compat_space_p); -static int __uasminit uasm_rel_highest(long val) +static int uasm_rel_highest(long val) { #ifdef CONFIG_64BIT return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000; @@ -332,7 +434,7 @@ static int __uasminit uasm_rel_highest(long val) #endif } -static int __uasminit uasm_rel_higher(long val) +static int uasm_rel_higher(long val) { #ifdef CONFIG_64BIT return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000; @@ -341,66 +443,65 @@ static int __uasminit uasm_rel_higher(long val) #endif } -int __uasminit ISAFUNC(uasm_rel_hi)(long val) +int uasm_rel_hi(long val) { return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000; } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_rel_hi)); +UASM_EXPORT_SYMBOL(uasm_rel_hi); -int __uasminit ISAFUNC(uasm_rel_lo)(long val) +int uasm_rel_lo(long val) { return ((val & 0xffff) ^ 0x8000) - 0x8000; } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_rel_lo)); +UASM_EXPORT_SYMBOL(uasm_rel_lo); -void __uasminit ISAFUNC(UASM_i_LA_mostly)(u32 **buf, unsigned int rs, long addr) +void UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr) { - if (!ISAFUNC(uasm_in_compat_space_p)(addr)) { - ISAFUNC(uasm_i_lui)(buf, rs, uasm_rel_highest(addr)); + if (!uasm_in_compat_space_p(addr)) { + uasm_i_lui(buf, rs, uasm_rel_highest(addr)); if (uasm_rel_higher(addr)) - ISAFUNC(uasm_i_daddiu)(buf, rs, rs, uasm_rel_higher(addr)); - if (ISAFUNC(uasm_rel_hi(addr))) { - ISAFUNC(uasm_i_dsll)(buf, rs, rs, 16); - ISAFUNC(uasm_i_daddiu)(buf, rs, rs, - ISAFUNC(uasm_rel_hi)(addr)); - ISAFUNC(uasm_i_dsll)(buf, rs, rs, 16); + uasm_i_daddiu(buf, rs, rs, uasm_rel_higher(addr)); + if (uasm_rel_hi(addr)) { + uasm_i_dsll(buf, rs, rs, 16); + uasm_i_daddiu(buf, rs, rs, + uasm_rel_hi(addr)); + uasm_i_dsll(buf, rs, rs, 16); } else - ISAFUNC(uasm_i_dsll32)(buf, rs, rs, 0); + uasm_i_dsll32(buf, rs, rs, 0); } else - ISAFUNC(uasm_i_lui)(buf, rs, ISAFUNC(uasm_rel_hi(addr))); + uasm_i_lui(buf, rs, uasm_rel_hi(addr)); } -UASM_EXPORT_SYMBOL(ISAFUNC(UASM_i_LA_mostly)); +UASM_EXPORT_SYMBOL(UASM_i_LA_mostly); -void __uasminit ISAFUNC(UASM_i_LA)(u32 **buf, unsigned int rs, long addr) +void UASM_i_LA(u32 **buf, unsigned int rs, long addr) { - ISAFUNC(UASM_i_LA_mostly)(buf, rs, addr); - if (ISAFUNC(uasm_rel_lo(addr))) { - if (!ISAFUNC(uasm_in_compat_space_p)(addr)) - ISAFUNC(uasm_i_daddiu)(buf, rs, rs, - ISAFUNC(uasm_rel_lo(addr))); + UASM_i_LA_mostly(buf, rs, addr); + if (uasm_rel_lo(addr)) { + if (!uasm_in_compat_space_p(addr)) + uasm_i_daddiu(buf, rs, rs, + uasm_rel_lo(addr)); else - ISAFUNC(uasm_i_addiu)(buf, rs, rs, - ISAFUNC(uasm_rel_lo(addr))); + uasm_i_addiu(buf, rs, rs, + uasm_rel_lo(addr)); } } -UASM_EXPORT_SYMBOL(ISAFUNC(UASM_i_LA)); +UASM_EXPORT_SYMBOL(UASM_i_LA); /* Handle relocations. */ -void __uasminit -ISAFUNC(uasm_r_mips_pc16)(struct uasm_reloc **rel, u32 *addr, int lid) +void uasm_r_mips_pc16(struct uasm_reloc **rel, u32 *addr, int lid) { (*rel)->addr = addr; (*rel)->type = R_MIPS_PC16; (*rel)->lab = lid; (*rel)++; } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_r_mips_pc16)); +UASM_EXPORT_SYMBOL(uasm_r_mips_pc16); -static inline void __uasminit -__resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab); +static inline void __resolve_relocs(struct uasm_reloc *rel, + struct uasm_label *lab); -void __uasminit -ISAFUNC(uasm_resolve_relocs)(struct uasm_reloc *rel, struct uasm_label *lab) +void uasm_resolve_relocs(struct uasm_reloc *rel, + struct uasm_label *lab) { struct uasm_label *l; @@ -409,40 +510,39 @@ ISAFUNC(uasm_resolve_relocs)(struct uasm_reloc *rel, struct uasm_label *lab) if (rel->lab == l->lab) __resolve_relocs(rel, l); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_resolve_relocs)); +UASM_EXPORT_SYMBOL(uasm_resolve_relocs); -void __uasminit -ISAFUNC(uasm_move_relocs)(struct uasm_reloc *rel, u32 *first, u32 *end, long off) +void uasm_move_relocs(struct uasm_reloc *rel, u32 *first, u32 *end, + long off) { for (; rel->lab != UASM_LABEL_INVALID; rel++) if (rel->addr >= first && rel->addr < end) rel->addr += off; } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_move_relocs)); +UASM_EXPORT_SYMBOL(uasm_move_relocs); -void __uasminit -ISAFUNC(uasm_move_labels)(struct uasm_label *lab, u32 *first, u32 *end, long off) +void uasm_move_labels(struct uasm_label *lab, u32 *first, u32 *end, + long off) { for (; lab->lab != UASM_LABEL_INVALID; lab++) if (lab->addr >= first && lab->addr < end) lab->addr += off; } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_move_labels)); +UASM_EXPORT_SYMBOL(uasm_move_labels); -void __uasminit -ISAFUNC(uasm_copy_handler)(struct uasm_reloc *rel, struct uasm_label *lab, u32 *first, - u32 *end, u32 *target) +void uasm_copy_handler(struct uasm_reloc *rel, struct uasm_label *lab, + u32 *first, u32 *end, u32 *target) { long off = (long)(target - first); memcpy(target, first, (end - first) * sizeof(u32)); - ISAFUNC(uasm_move_relocs(rel, first, end, off)); - ISAFUNC(uasm_move_labels(lab, first, end, off)); + uasm_move_relocs(rel, first, end, off); + uasm_move_labels(lab, first, end, off); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_copy_handler)); +UASM_EXPORT_SYMBOL(uasm_copy_handler); -int __uasminit ISAFUNC(uasm_insn_has_bdelay)(struct uasm_reloc *rel, u32 *addr) +int uasm_insn_has_bdelay(struct uasm_reloc *rel, u32 *addr) { for (; rel->lab != UASM_LABEL_INVALID; rel++) { if (rel->addr == addr @@ -453,88 +553,92 @@ int __uasminit ISAFUNC(uasm_insn_has_bdelay)(struct uasm_reloc *rel, u32 *addr) return 0; } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_insn_has_bdelay)); +UASM_EXPORT_SYMBOL(uasm_insn_has_bdelay); /* Convenience functions for labeled branches. */ -void __uasminit -ISAFUNC(uasm_il_bltz)(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) +void uasm_il_bltz(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_bltz(p, reg, 0); +} +UASM_EXPORT_SYMBOL(uasm_il_bltz); + +void uasm_il_b(u32 **p, struct uasm_reloc **r, int lid) { uasm_r_mips_pc16(r, *p, lid); - ISAFUNC(uasm_i_bltz)(p, reg, 0); + uasm_i_b(p, 0); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bltz)); +UASM_EXPORT_SYMBOL(uasm_il_b); -void __uasminit -ISAFUNC(uasm_il_b)(u32 **p, struct uasm_reloc **r, int lid) +void uasm_il_beq(u32 **p, struct uasm_reloc **r, unsigned int r1, + unsigned int r2, int lid) { uasm_r_mips_pc16(r, *p, lid); - ISAFUNC(uasm_i_b)(p, 0); + uasm_i_beq(p, r1, r2, 0); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_b)); +UASM_EXPORT_SYMBOL(uasm_il_beq); -void __uasminit -ISAFUNC(uasm_il_beqz)(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) +void uasm_il_beqz(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) { uasm_r_mips_pc16(r, *p, lid); - ISAFUNC(uasm_i_beqz)(p, reg, 0); + uasm_i_beqz(p, reg, 0); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beqz)); +UASM_EXPORT_SYMBOL(uasm_il_beqz); -void __uasminit -ISAFUNC(uasm_il_beqzl)(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) +void uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) { uasm_r_mips_pc16(r, *p, lid); - ISAFUNC(uasm_i_beqzl)(p, reg, 0); + uasm_i_beqzl(p, reg, 0); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beqzl)); +UASM_EXPORT_SYMBOL(uasm_il_beqzl); -void __uasminit -ISAFUNC(uasm_il_bne)(u32 **p, struct uasm_reloc **r, unsigned int reg1, - unsigned int reg2, int lid) +void uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1, + unsigned int reg2, int lid) { uasm_r_mips_pc16(r, *p, lid); - ISAFUNC(uasm_i_bne)(p, reg1, reg2, 0); + uasm_i_bne(p, reg1, reg2, 0); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bne)); +UASM_EXPORT_SYMBOL(uasm_il_bne); -void __uasminit -ISAFUNC(uasm_il_bnez)(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) +void uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) { uasm_r_mips_pc16(r, *p, lid); - ISAFUNC(uasm_i_bnez)(p, reg, 0); + uasm_i_bnez(p, reg, 0); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bnez)); +UASM_EXPORT_SYMBOL(uasm_il_bnez); -void __uasminit -ISAFUNC(uasm_il_bgezl)(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) +void uasm_il_bgezl(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) { uasm_r_mips_pc16(r, *p, lid); - ISAFUNC(uasm_i_bgezl)(p, reg, 0); + uasm_i_bgezl(p, reg, 0); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bgezl)); +UASM_EXPORT_SYMBOL(uasm_il_bgezl); -void __uasminit -ISAFUNC(uasm_il_bgez)(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) +void uasm_il_bgez(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) { uasm_r_mips_pc16(r, *p, lid); - ISAFUNC(uasm_i_bgez)(p, reg, 0); + uasm_i_bgez(p, reg, 0); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bgez)); +UASM_EXPORT_SYMBOL(uasm_il_bgez); -void __uasminit -ISAFUNC(uasm_il_bbit0)(u32 **p, struct uasm_reloc **r, unsigned int reg, - unsigned int bit, int lid) +void uasm_il_bbit0(u32 **p, struct uasm_reloc **r, unsigned int reg, + unsigned int bit, int lid) { uasm_r_mips_pc16(r, *p, lid); - ISAFUNC(uasm_i_bbit0)(p, reg, bit, 0); + uasm_i_bbit0(p, reg, bit, 0); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bbit0)); +UASM_EXPORT_SYMBOL(uasm_il_bbit0); -void __uasminit -ISAFUNC(uasm_il_bbit1)(u32 **p, struct uasm_reloc **r, unsigned int reg, - unsigned int bit, int lid) +void uasm_il_bbit1(u32 **p, struct uasm_reloc **r, unsigned int reg, + unsigned int bit, int lid) { uasm_r_mips_pc16(r, *p, lid); - ISAFUNC(uasm_i_bbit1)(p, reg, bit, 0); + uasm_i_bbit1(p, reg, bit, 0); } -UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bbit1)); +UASM_EXPORT_SYMBOL(uasm_il_bbit1); |
