From 28853ac8fe5221de74a14f1182d7b2b383dfd85c Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Wed, 25 Mar 2009 13:10:01 +0200 Subject: ARM: Add support for FA526 v2 Adds support for Faraday FA526 core. This core is used at least by: Cortina Systems Gemini and Centroid family Cavium Networks ECONA family Grain Media GM8120 Pixelplus ImageARM Prolific PL-1029 Faraday IP evaluation boards v2: - move TLB_BTB to separate patch - update copyrights Signed-off-by: Paulius Zaleckas --- arch/arm/Makefile | 1 + arch/arm/boot/compressed/head.S | 26 ++++ arch/arm/include/asm/cacheflush.h | 8 ++ arch/arm/include/asm/page.h | 8 ++ arch/arm/include/asm/proc-fns.h | 8 ++ arch/arm/include/asm/system.h | 6 + arch/arm/include/asm/tlbflush.h | 19 +++ arch/arm/mm/Kconfig | 35 +++++- arch/arm/mm/Makefile | 4 + arch/arm/mm/cache-fa.S | 220 +++++++++++++++++++++++++++++++++ arch/arm/mm/copypage-fa.c | 86 +++++++++++++ arch/arm/mm/proc-fa526.S | 248 ++++++++++++++++++++++++++++++++++++++ arch/arm/mm/tlb-fa.S | 75 ++++++++++++ 13 files changed, 742 insertions(+), 2 deletions(-) create mode 100644 arch/arm/mm/cache-fa.S create mode 100644 arch/arm/mm/copypage-fa.c create mode 100644 arch/arm/mm/proc-fa526.S create mode 100644 arch/arm/mm/tlb-fa.S diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 24e0f0187697..d29f9260fb1c 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -72,6 +72,7 @@ tune-$(CONFIG_CPU_ARM920T) :=-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM922T) :=-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM925T) :=-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_FA526) :=-mtune=arm9tdmi tune-$(CONFIG_CPU_SA110) :=-mtune=strongarm110 tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100 tune-$(CONFIG_CPU_XSCALE) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 77d614232d81..def02483286a 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -459,6 +459,20 @@ __armv7_mmu_cache_on: mcr p15, 0, r0, c7, c5, 4 @ ISB mov pc, r12 +__fa526_cache_on: + mov r12, lr + bl __setup_mmu + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ Invalidate whole cache + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c8, c7, 0 @ flush UTLB + mrc p15, 0, r0, c1, c0, 0 @ read control reg + orr r0, r0, #0x1000 @ I-cache enable + bl __common_mmu_cache_on + mov r0, #0 + mcr p15, 0, r0, c8, c7, 0 @ flush UTLB + mov pc, r12 + __arm6_mmu_cache_on: mov r12, lr bl __setup_mmu @@ -636,6 +650,12 @@ proc_types: b __armv4_mmu_cache_off b __armv5tej_mmu_cache_flush + .word 0x66015261 @ FA526 + .word 0xff01fff1 + b __fa526_cache_on + b __armv4_mmu_cache_off + b __fa526_cache_flush + @ These match on the architecture ID .word 0x00020000 @ ARMv4T @@ -775,6 +795,12 @@ __armv4_mpu_cache_flush: mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr +__fa526_cache_flush: + mov r1, #0 + mcr p15, 0, r1, c7, c14, 0 @ clean and invalidate D cache + mcr p15, 0, r1, c7, c5, 0 @ flush I cache + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mov pc, lr __armv6_mmu_cache_flush: mov r1, #0 diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 6cbd8fdc9f1f..a6b8b90ed57f 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -46,6 +46,14 @@ # define MULTI_CACHE 1 #endif +#if defined(CONFIG_CPU_FA526) +# ifdef _CACHE +# define MULTI_CACHE 1 +# else +# define _CACHE fa +# endif +#endif + #if defined(CONFIG_CPU_ARM926T) # ifdef _CACHE # define MULTI_CACHE 1 diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index f341c9dbd662..e6eb8a67b807 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -76,6 +76,14 @@ # endif #endif +#ifdef CONFIG_CPU_COPY_FA +# ifdef _USER +# define MULTI_USER 1 +# else +# define _USER fa +# endif +#endif + #ifdef CONFIG_CPU_SA1100 # ifdef _USER # define MULTI_USER 1 diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index db80203b68e0..00949281d3ee 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -89,6 +89,14 @@ # define CPU_NAME cpu_arm922 # endif # endif +# ifdef CONFIG_CPU_FA526 +# ifdef CPU_NAME +# undef MULTI_CPU +# define MULTI_CPU +# else +# define CPU_NAME cpu_fa526 +# endif +# endif # ifdef CONFIG_CPU_ARM925T # ifdef CPU_NAME # undef MULTI_CPU diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 811be55f338e..d6a4dad99c9b 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -125,6 +125,12 @@ extern unsigned int user_debug; : : "r" (0) : "memory") #define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ : : "r" (0) : "memory") +#elif defined(CONFIG_CPU_FA526) +#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ + : : "r" (0) : "memory") +#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ + : : "r" (0) : "memory") +#define dmb() __asm__ __volatile__ ("" : : : "memory") #else #define isb() __asm__ __volatile__ ("" : : : "memory") #define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index ffedd2494eab..a62218013c78 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -54,6 +54,7 @@ * v4wb - ARMv4 with write buffer without I TLB flush entry instruction * v4wbi - ARMv4 with write buffer with I TLB flush entry instruction * fr - Feroceon (v4wbi with non-outer-cacheable page table walks) + * fa - Faraday (v4 with write buffer with UTLB and branch target buffer (BTB)) * v6wbi - ARMv6 with write buffer with I TLB flush entry instruction * v7wbi - identical to v6wbi */ @@ -90,6 +91,22 @@ # define v4_always_flags (-1UL) #endif +#define fa_tlb_flags (TLB_WB | TLB_BTB | TLB_DCLEAN | \ + TLB_V4_U_FULL | TLB_V4_U_PAGE) + +#ifdef CONFIG_CPU_TLB_FA +# define fa_possible_flags fa_tlb_flags +# define fa_always_flags fa_tlb_flags +# ifdef _TLB +# define MULTI_TLB 1 +# else +# define _TLB fa +# endif +#else +# define fa_possible_flags 0 +# define fa_always_flags (-1UL) +#endif + #define v4wbi_tlb_flags (TLB_WB | TLB_DCLEAN | \ TLB_V4_I_FULL | TLB_V4_D_FULL | \ TLB_V4_I_PAGE | TLB_V4_D_PAGE) @@ -268,6 +285,7 @@ extern struct cpu_tlb_fns cpu_tlb; v4wbi_possible_flags | \ fr_possible_flags | \ v4wb_possible_flags | \ + fa_possible_flags | \ v6wbi_possible_flags | \ v7wbi_possible_flags) @@ -276,6 +294,7 @@ extern struct cpu_tlb_fns cpu_tlb; v4wbi_always_flags & \ fr_always_flags & \ v4wb_always_flags & \ + fa_always_flags & \ v6wbi_always_flags & \ v7wbi_always_flags) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index d490f3773c01..bc3331863d9d 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -186,6 +186,24 @@ config CPU_ARM926T Say Y if you want support for the ARM926T processor. Otherwise, say N. +# FA526 +config CPU_FA526 + bool + select CPU_32v4 + select CPU_ABRT_EV4 + select CPU_PABRT_NOIFAR + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_CACHE_FA + select CPU_COPY_FA if MMU + select CPU_TLB_FA if MMU + help + The FA526 is a version of the ARMv4 compatible processor with + Branch Target Buffer, Unified TLB and cache line size 16. + + Say Y if you want support for the FA526 processor. + Otherwise, say N. + # ARM940T config CPU_ARM940T bool "Support ARM940T processor" if ARCH_INTEGRATOR @@ -484,6 +502,9 @@ config CPU_CACHE_VIVT config CPU_CACHE_VIPT bool +config CPU_CACHE_FA + bool + if MMU # The copy-page model config CPU_COPY_V3 @@ -498,6 +519,9 @@ config CPU_COPY_V4WB config CPU_COPY_FEROCEON bool +config CPU_COPY_FA + bool + config CPU_COPY_V6 bool @@ -528,6 +552,13 @@ config CPU_TLB_FEROCEON help Feroceon TLB (v4wbi with non-outer-cachable page table walks). +config CPU_TLB_FA + bool + help + Faraday ARM FA526 architecture, unified TLB with writeback cache + and invalidate instruction cache entry. Branch target buffer is + also supported. + config CPU_TLB_V6 bool @@ -638,7 +669,7 @@ config CPU_DCACHE_SIZE config CPU_DCACHE_WRITETHROUGH bool "Force write through D-cache" - depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020) && !CPU_DCACHE_DISABLE + depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE default y if CPU_ARM925T help Say Y here to use the data cache in writethrough mode. Unless you @@ -653,7 +684,7 @@ config CPU_CACHE_ROUND_ROBIN config CPU_BPREDICT_DISABLE bool "Disable branch prediction" - depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7 + depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7 || CPU_FA526 help Say Y here to disable branch prediction. If unsure, say N. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 480f78a3611a..40f941c2245c 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o +obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o @@ -41,6 +42,7 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o context.o obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o +obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o @@ -49,6 +51,7 @@ obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o +obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o obj-$(CONFIG_CPU_ARM610) += proc-arm6_7.o obj-$(CONFIG_CPU_ARM710) += proc-arm6_7.o @@ -62,6 +65,7 @@ obj-$(CONFIG_CPU_ARM925T) += proc-arm925.o obj-$(CONFIG_CPU_ARM926T) += proc-arm926.o obj-$(CONFIG_CPU_ARM940T) += proc-arm940.o obj-$(CONFIG_CPU_ARM946E) += proc-arm946.o +obj-$(CONFIG_CPU_FA526) += proc-fa526.o obj-$(CONFIG_CPU_ARM1020) += proc-arm1020.o obj-$(CONFIG_CPU_ARM1020E) += proc-arm1020e.o obj-$(CONFIG_CPU_ARM1022) += proc-arm1022.o diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S new file mode 100644 index 000000000000..b63a8f7b95cf --- /dev/null +++ b/arch/arm/mm/cache-fa.S @@ -0,0 +1,220 @@ +/* + * linux/arch/arm/mm/cache-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + * + * Based on cache-v4wb.S: + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Processors: FA520 FA526 FA626 + */ +#include +#include +#include +#include + +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 16 + +/* + * The total size of the data cache. + */ +#ifdef CONFIG_ARCH_GEMINI +#define CACHE_DSIZE 8192 +#else +#define CACHE_DSIZE 16384 +#endif + +/* FIXME: put optimal value here. Current one is just estimation */ +#define CACHE_DLIMIT (CACHE_DSIZE * 2) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular address + * space. + */ +ENTRY(fa_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(fa_flush_kern_cache_all) + mov ip, #0 + mov r2, #VM_EXEC +__flush_whole_cache: + mcr p15, 0, ip, c7, c14, 0 @ clean/invalidate D cache + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(fa_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT @ total size >= limit? + bhs __flush_whole_cache @ flush whole D cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I line + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_kern_range) + /* fall through */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_kern_dcache_page(kaddr) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - kaddr - kernel address (guaranteed to be page aligned) + */ +ENTRY(fa_flush_kern_dcache_page) + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_dma_inv_range) + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + tst r1, #CACHE_DLINESIZE - 1 + bic r1, r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean (write back) the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_dma_clean_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(fa_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + + __INITDATA + + .type fa_cache_fns, #object +ENTRY(fa_cache_fns) + .long fa_flush_kern_cache_all + .long fa_flush_user_cache_all + .long fa_flush_user_cache_range + .long fa_coherent_kern_range + .long fa_coherent_user_range + .long fa_flush_kern_dcache_page + .long fa_dma_inv_range + .long fa_dma_clean_range + .long fa_dma_flush_range + .size fa_cache_fns, . - fa_cache_fns diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c new file mode 100644 index 000000000000..b2a6008b0111 --- /dev/null +++ b/arch/arm/mm/copypage-fa.c @@ -0,0 +1,86 @@ +/* + * linux/arch/arm/lib/copypage-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + * + * Based on copypage-v4wb.S: + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +/* + * Faraday optimised copy_user_page + */ +static void __naked +fa_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4, lr} @ 2\n\ + mov r2, %0 @ 1\n\ +1: ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add r0, r0, #16 @ 1\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add r0, r0, #16 @ 1\n\ + subs r2, r2, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r2, c7, c10, 4 @ 1 drain WB\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "I" (PAGE_SIZE / 32)); +} + +void fa_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to, KM_USER0); + kfrom = kmap_atomic(from, KM_USER1); + fa_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom, KM_USER1); + kunmap_atomic(kto, KM_USER0); +} + +/* + * Faraday optimised clear_user_page + * + * Same story as above. + */ +void fa_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page, KM_USER0); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr, KM_USER0); +} + +struct cpu_user_fns fa_user_fns __initdata = { + .cpu_clear_user_highpage = fa_clear_user_highpage, + .cpu_copy_user_highpage = fa_copy_user_highpage, +}; diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S new file mode 100644 index 000000000000..08b8a955d5d7 --- /dev/null +++ b/arch/arm/mm/proc-fa526.S @@ -0,0 +1,248 @@ +/* + * linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526 + * + * Written by : Luke Lee + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * + * These are the low level assembler for performing cache and TLB + * functions on the fa526. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "proc-macros.S" + +#define CACHE_DLINESIZE 16 + + .text +/* + * cpu_fa526_proc_init() + */ +ENTRY(cpu_fa526_proc_init) + mov pc, lr + +/* + * cpu_fa526_proc_fin() + */ +ENTRY(cpu_fa526_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + bl fa_flush_kern_cache_all + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + nop + nop + ldmfd sp!, {pc} + +/* + * cpu_fa526_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 4 +ENTRY(cpu_fa526_reset) +/* TODO: Use CP8 if possible... */ + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + bic ip, ip, #0x0800 @ BTB off + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + nop + nop + mov pc, r0 + +/* + * cpu_fa526_do_idle() + */ + .align 4 +ENTRY(cpu_fa526_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + + +ENTRY(cpu_fa526_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_fa526_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 4 +ENTRY(cpu_fa526_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else + mcr p15, 0, ip, c7, c14, 0 @ clean and invalidate whole D cache +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c5, 6 @ invalidate BTB since mm changed + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB +#endif + mov pc, lr + +/* + * cpu_fa526_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 4 +ENTRY(cpu_fa526_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mov pc, lr + + __INIT + + .type __fa526_setup, #function +__fa526_setup: + /* On return of this routine, r0 must carry correct flags for CFG register */ + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + mcr p15, 0, r0, c7, c5, 5 @ invalidate IScratchpad RAM + + mov r0, #1 + mcr p15, 0, r0, c1, c1, 0 @ turn-on ECR + + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB All + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + + mov r0, #0x1f @ Domains 0, 1 = manager, 2 = client + mcr p15, 0, r0, c3, c0 @ load domain access register + + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, fa526_cr1_clear + bic r0, r0, r5 + ldr r5, fa526_cr1_set + orr r0, r0, r5 + mov pc, lr + .size __fa526_setup, . - __fa526_setup + + /* + * .RVI ZFRS BLDP WCAM + * ..11 1001 .111 1101 + * + */ + .type fa526_cr1_clear, #object + .type fa526_cr1_set, #object +fa526_cr1_clear: + .word 0x3f3f +fa526_cr1_set: + .word 0x397D + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type fa526_processor_functions, #object +fa526_processor_functions: + .word v4_early_abort + .word pabort_noifar + .word cpu_fa526_proc_init + .word cpu_fa526_proc_fin + .word cpu_fa526_reset + .word cpu_fa526_do_idle + .word cpu_fa526_dcache_clean_area + .word cpu_fa526_switch_mm + .word cpu_fa526_set_pte_ext + .size fa526_processor_functions, . - fa526_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv4" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v4" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_fa526_name, #object +cpu_fa526_name: + .asciz "FA526" + .size cpu_fa526_name, . - cpu_fa526_name + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __fa526_proc_info,#object +__fa526_proc_info: + .long 0x66015261 + .long 0xff01fff1 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __fa526_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF + .long cpu_fa526_name + .long fa526_processor_functions + .long fa_tlb_fns + .long fa_user_fns + .long fa_cache_fns + .size __fa526_proc_info, . - __fa526_proc_info diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S new file mode 100644 index 000000000000..9694f1f6f485 --- /dev/null +++ b/arch/arm/mm/tlb-fa.S @@ -0,0 +1,75 @@ +/* + * linux/arch/arm/mm/tlb-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + * + * Based on tlb-v4wbi.S: + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 4, Faraday variation. + * This assume an unified TLBs, with a write buffer, and branch target buffer (BTB) + * + * Processors: FA520 FA526 FA626 + */ +#include +#include +#include +#include +#include "proc-macros.S" + + +/* + * flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 4 +ENTRY(fa_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + movne pc, lr @ no, we dont do anything + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r3, c7, c10, 4 @ data write barrier + mov pc, lr + + +ENTRY(fa_flush_kern_tlb_range) + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r3, c7, c10, 4 @ data write barrier + mcr p15, 0, r3, c7, c5, 4 @ prefetch flush + mov pc, lr + + __INITDATA + + .type fa_tlb_fns, #object +ENTRY(fa_tlb_fns) + .long fa_flush_user_tlb_range + .long fa_flush_kern_tlb_range + .long fa_tlb_flags + .size fa_tlb_fns, . - fa_tlb_fns -- cgit