diff options
Diffstat (limited to 'arch')
36 files changed, 1507 insertions, 358 deletions
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 8d22015fde3e..1e95920b0737 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -12,6 +12,7 @@ config OPENRISC select HAVE_MEMBLOCK select GPIOLIB select HAVE_ARCH_TRACEHOOK + select SPARSE_IRQ select GENERIC_IRQ_CHIP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/openrisc/TODO.openrisc b/arch/openrisc/TODO.openrisc index 0eb04c8240f9..c43d4e1d14eb 100644 --- a/arch/openrisc/TODO.openrisc +++ b/arch/openrisc/TODO.openrisc @@ -10,4 +10,3 @@ that are due for investigation shortly, i.e. our TODO list: or1k and this change is slowly trickling through the stack. For the time being, or32 is equivalent to or1k. --- Implement optimized version of memcpy and memset diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index ef8d1ccc3e45..fb241757f7f0 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -1,7 +1,6 @@ header-y += ucontext.h -generic-y += atomic.h generic-y += auxvec.h generic-y += barrier.h generic-y += bitsperlong.h @@ -10,8 +9,6 @@ generic-y += bugs.h generic-y += cacheflush.h generic-y += checksum.h generic-y += clkdev.h -generic-y += cmpxchg-local.h -generic-y += cmpxchg.h generic-y += current.h generic-y += device.h generic-y += div64.h @@ -22,12 +19,12 @@ generic-y += exec.h generic-y += fb.h generic-y += fcntl.h generic-y += ftrace.h -generic-y += futex.h generic-y += hardirq.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h +generic-y += irq.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h new file mode 100644 index 000000000000..146e1660f00e --- /dev/null +++ b/arch/openrisc/include/asm/atomic.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ASM_OPENRISC_ATOMIC_H +#define __ASM_OPENRISC_ATOMIC_H + +#include <linux/types.h> + +/* Atomically perform op with v->counter and i */ +#define ATOMIC_OP(op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + int tmp; \ + \ + __asm__ __volatile__( \ + "1: l.lwa %0,0(%1) \n" \ + " l." #op " %0,%0,%2 \n" \ + " l.swa 0(%1),%0 \n" \ + " l.bnf 1b \n" \ + " l.nop \n" \ + : "=&r"(tmp) \ + : "r"(&v->counter), "r"(i) \ + : "cc", "memory"); \ +} + +/* Atomically perform op with v->counter and i, return the result */ +#define ATOMIC_OP_RETURN(op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + int tmp; \ + \ + __asm__ __volatile__( \ + "1: l.lwa %0,0(%1) \n" \ + " l." #op " %0,%0,%2 \n" \ + " l.swa 0(%1),%0 \n" \ + " l.bnf 1b \n" \ + " l.nop \n" \ + : "=&r"(tmp) \ + : "r"(&v->counter), "r"(i) \ + : "cc", "memory"); \ + \ + return tmp; \ +} + +/* Atomically perform op with v->counter and i, return orig v->counter */ +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int tmp, old; \ + \ + __asm__ __volatile__( \ + "1: l.lwa %0,0(%2) \n" \ + " l." #op " %1,%0,%3 \n" \ + " l.swa 0(%2),%1 \n" \ + " l.bnf 1b \n" \ + " l.nop \n" \ + : "=&r"(old), "=&r"(tmp) \ + : "r"(&v->counter), "r"(i) \ + : "cc", "memory"); \ + \ + return old; \ +} + +ATOMIC_OP_RETURN(add) +ATOMIC_OP_RETURN(sub) + +ATOMIC_FETCH_OP(add) +ATOMIC_FETCH_OP(sub) +ATOMIC_FETCH_OP(and) +ATOMIC_FETCH_OP(or) +ATOMIC_FETCH_OP(xor) + +ATOMIC_OP(and) +ATOMIC_OP(or) +ATOMIC_OP(xor) + +#undef ATOMIC_FETCH_OP +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +#define atomic_add_return atomic_add_return +#define atomic_sub_return atomic_sub_return +#define atomic_fetch_add atomic_fetch_add +#define atomic_fetch_sub atomic_fetch_sub +#define atomic_fetch_and atomic_fetch_and +#define atomic_fetch_or atomic_fetch_or +#define atomic_fetch_xor atomic_fetch_xor +#define atomic_and atomic_and +#define atomic_or atomic_or +#define atomic_xor atomic_xor + +/* + * Atomically add a to v->counter as long as v is not already u. + * Returns the original value at v->counter. + * + * This is often used through atomic_inc_not_zero() + */ +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int old, tmp; + + __asm__ __volatile__( + "1: l.lwa %0, 0(%2) \n" + " l.sfeq %0, %4 \n" + " l.bf 2f \n" + " l.add %1, %0, %3 \n" + " l.swa 0(%2), %1 \n" + " l.bnf 1b \n" + " l.nop \n" + "2: \n" + : "=&r"(old), "=&r" (tmp) + : "r"(&v->counter), "r"(a), "r"(u) + : "cc", "memory"); + + return old; +} +#define __atomic_add_unless __atomic_add_unless + +#include <asm-generic/atomic.h> + +#endif /* __ASM_OPENRISC_ATOMIC_H */ diff --git a/arch/openrisc/include/asm/bitops.h b/arch/openrisc/include/asm/bitops.h index 3003cdad561b..689f56819d53 100644 --- a/arch/openrisc/include/asm/bitops.h +++ b/arch/openrisc/include/asm/bitops.h @@ -45,7 +45,7 @@ #include <asm-generic/bitops/hweight.h> #include <asm-generic/bitops/lock.h> -#include <asm-generic/bitops/atomic.h> +#include <asm/bitops/atomic.h> #include <asm-generic/bitops/non-atomic.h> #include <asm-generic/bitops/le.h> #include <asm-generic/bitops/ext2-atomic.h> diff --git a/arch/openrisc/include/asm/bitops/atomic.h b/arch/openrisc/include/asm/bitops/atomic.h new file mode 100644 index 000000000000..35fb85f61b4a --- /dev/null +++ b/arch/openrisc/include/asm/bitops/atomic.h @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ASM_OPENRISC_BITOPS_ATOMIC_H +#define __ASM_OPENRISC_BITOPS_ATOMIC_H + +static inline void set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%1) \n" + " l.or %0,%0,%2 \n" + " l.swa 0(%1),%0 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(tmp) + : "r"(p), "r"(mask) + : "cc", "memory"); +} + +static inline void clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%1) \n" + " l.and %0,%0,%2 \n" + " l.swa 0(%1),%0 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(tmp) + : "r"(p), "r"(~mask) + : "cc", "memory"); +} + +static inline void change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%1) \n" + " l.xor %0,%0,%2 \n" + " l.swa 0(%1),%0 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(tmp) + : "r"(p), "r"(mask) + : "cc", "memory"); +} + +static inline int test_and_set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long old; + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%2) \n" + " l.or %1,%0,%3 \n" + " l.swa 0(%2),%1 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(old), "=&r"(tmp) + : "r"(p), "r"(mask) + : "cc", "memory"); + + return (old & mask) != 0; +} + +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long old; + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%2) \n" + " l.and %1,%0,%3 \n" + " l.swa 0(%2),%1 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(old), "=&r"(tmp) + : "r"(p), "r"(~mask) + : "cc", "memory"); + + return (old & mask) != 0; +} + +static inline int test_and_change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long old; + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%2) \n" + " l.xor %1,%0,%3 \n" + " l.swa 0(%2),%1 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(old), "=&r"(tmp) + : "r"(p), "r"(mask) + : "cc", "memory"); + + return (old & mask) != 0; +} + +#endif /* __ASM_OPENRISC_BITOPS_ATOMIC_H */ diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h new file mode 100644 index 000000000000..5fcb9ac72693 --- /dev/null +++ b/arch/openrisc/include/asm/cmpxchg.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ASM_OPENRISC_CMPXCHG_H +#define __ASM_OPENRISC_CMPXCHG_H + +#include <linux/types.h> + +/* + * This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid cmpxchg(). + */ +extern void __cmpxchg_called_with_bad_pointer(void); + +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long +__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) +{ + if (size != 4) { + __cmpxchg_called_with_bad_pointer(); + return old; + } + + __asm__ __volatile__( + "1: l.lwa %0, 0(%1) \n" + " l.sfeq %0, %2 \n" + " l.bnf 2f \n" + " l.nop \n" + " l.swa 0(%1), %3 \n" + " l.bnf 1b \n" + " l.nop \n" + "2: \n" + : "=&r"(old) + : "r"(ptr), "r"(old), "r"(new) + : "cc", "memory"); + + return old; +} + +#define cmpxchg(ptr, o, n) \ + ({ \ + (__typeof__(*(ptr))) __cmpxchg((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr))); \ + }) + +/* + * This function doesn't exist, so you'll get a linker error if + * something tries to do an invalidly-sized xchg(). + */ +extern void __xchg_called_with_bad_pointer(void); + +static inline unsigned long __xchg(unsigned long val, volatile void *ptr, + int size) +{ + if (size != 4) { + __xchg_called_with_bad_pointer(); + return val; + } + + __asm__ __volatile__( + "1: l.lwa %0, 0(%1) \n" + " l.swa 0(%1), %2 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(val) + : "r"(ptr), "r"(val) + : "cc", "memory"); + + return val; +} + +#define xchg(ptr, with) \ + ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr)))) + +#endif /* __ASM_OPENRISC_CMPXCHG_H */ diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h index 917318b6a970..ec10679d6429 100644 --- a/arch/openrisc/include/asm/cpuinfo.h +++ b/arch/openrisc/include/asm/cpuinfo.h @@ -24,9 +24,11 @@ struct cpuinfo { u32 icache_size; u32 icache_block_size; + u32 icache_ways; u32 dcache_size; u32 dcache_block_size; + u32 dcache_ways; }; extern struct cpuinfo cpuinfo; diff --git a/arch/openrisc/include/asm/futex.h b/arch/openrisc/include/asm/futex.h new file mode 100644 index 000000000000..778087341977 --- /dev/null +++ b/arch/openrisc/include/asm/futex.h @@ -0,0 +1,135 @@ +#ifndef __ASM_OPENRISC_FUTEX_H +#define __ASM_OPENRISC_FUTEX_H + +#ifdef __KERNEL__ + +#include <linux/futex.h> +#include <linux/uaccess.h> +#include <asm/errno.h> + +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +({ \ + __asm__ __volatile__ ( \ + "1: l.lwa %0, %2 \n" \ + insn "\n" \ + "2: l.swa %2, %1 \n" \ + " l.bnf 1b \n" \ + " l.ori %1, r0, 0 \n" \ + "3: \n" \ + ".section .fixup,\"ax\" \n" \ + "4: l.j 3b \n" \ + " l.addi %1, r0, %3 \n" \ + ".previous \n" \ + ".section __ex_table,\"a\" \n" \ + ".word 1b,4b,2b,4b \n" \ + ".previous \n" \ + : "=&r" (oldval), "=&r" (ret), "+m" (*uaddr) \ + : "i" (-EFAULT), "r" (oparg) \ + : "cc", "memory" \ + ); \ +}) + +static inline int +futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("l.or %1,%4,%4", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("l.add %1,%0,%4", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("l.or %1,%0,%4", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("l.and %1,%0,%4", ret, oldval, uaddr, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("l.xor %1,%0,%4", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + pagefault_enable(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: + ret = (oldval == cmparg); + break; + case FUTEX_OP_CMP_NE: + ret = (oldval != cmparg); + break; + case FUTEX_OP_CMP_LT: + ret = (oldval < cmparg); + break; + case FUTEX_OP_CMP_GE: + ret = (oldval >= cmparg); + break; + case FUTEX_OP_CMP_LE: + ret = (oldval <= cmparg); + break; + case FUTEX_OP_CMP_GT: + ret = (oldval > cmparg); + break; + default: + ret = -ENOSYS; + } + } + return ret; +} + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret = 0; + u32 prev; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + __asm__ __volatile__ ( \ + "1: l.lwa %1, %2 \n" \ + " l.sfeq %1, %3 \n" \ + " l.bnf 3f \n" \ + " l.nop \n" \ + "2: l.swa %2, %4 \n" \ + " l.bnf 1b \n" \ + " l.nop \n" \ + "3: \n" \ + ".section .fixup,\"ax\" \n" \ + "4: l.j 3b \n" \ + " l.addi %0, r0, %5 \n" \ + ".previous \n" \ + ".section __ex_table,\"a\" \n" \ + ".word 1b,4b,2b,4b \n" \ + ".previous \n" \ + : "+r" (ret), "=&r" (prev), "+m" (*uaddr) \ + : "r" (oldval), "r" (newval), "i" (-EFAULT) \ + : "cc", "memory" \ + ); + + *uval = prev; + return ret; +} + +#endif /* __KERNEL__ */ + +#endif /* __ASM_OPENRISC_FUTEX_H */ diff --git a/arch/openrisc/include/asm/spr_defs.h b/arch/openrisc/include/asm/spr_defs.h index 5dbc668865c4..367dac70326a 100644 --- a/arch/openrisc/include/asm/spr_defs.h +++ b/arch/openrisc/include/asm/spr_defs.h @@ -152,8 +152,8 @@ #define SPR_UPR_MP 0x00000020 /* MAC present */ #define SPR_UPR_DUP 0x00000040 /* Debug unit present */ #define SPR_UPR_PCUP 0x00000080 /* Performance counters unit present */ -#define SPR_UPR_PMP 0x00000100 /* Power management present */ -#define SPR_UPR_PICP 0x00000200 /* PIC present */ +#define SPR_UPR_PICP 0x00000100 /* PIC present */ +#define SPR_UPR_PMP 0x00000200 /* Power management present */ #define SPR_UPR_TTP 0x00000400 /* Tick timer present */ #define SPR_UPR_RES 0x00fe0000 /* Reserved */ #define SPR_UPR_CUP 0xff000000 /* Context units present */ diff --git a/arch/openrisc/include/asm/string.h b/arch/openrisc/include/asm/string.h new file mode 100644 index 000000000000..64939ccd7531 --- /dev/null +++ b/arch/openrisc/include/asm/string.h @@ -0,0 +1,10 @@ +#ifndef __ASM_OPENRISC_STRING_H +#define __ASM_OPENRISC_STRING_H + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *s, int c, __kernel_size_t n); + +#define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *dest, __const void *src, __kernel_size_t n); + +#endif /* __ASM_OPENRISC_STRING_H */ diff --git a/arch/openrisc/kernel/.gitignore b/arch/openrisc/kernel/.gitignore new file mode 100644 index 000000000000..c5f676c3c224 --- /dev/null +++ b/arch/openrisc/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index aac0bde3330c..bc6500860f4d 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -173,6 +173,11 @@ handler: ;\ l.j _ret_from_exception ;\ l.nop +/* clobbers 'reg' */ +#define CLEAR_LWA_FLAG(reg) \ + l.movhi reg,hi(lwa_flag) ;\ + l.ori reg,reg,lo(lwa_flag) ;\ + l.sw 0(reg),r0 /* * NOTE: one should never assume that SPR_EPC, SPR_ESR, SPR_EEAR * contain the same values as when exception we're handling @@ -193,6 +198,7 @@ EXCEPTION_ENTRY(_tng_kernel_start) /* ---[ 0x200: BUS exception ]------------------------------------------- */ EXCEPTION_ENTRY(_bus_fault_handler) + CLEAR_LWA_FLAG(r3) /* r4: EA of fault (set by EXCEPTION_HANDLE) */ l.jal do_bus_fault l.addi r3,r1,0 /* pt_regs */ @@ -202,11 +208,13 @@ EXCEPTION_ENTRY(_bus_fault_handler) /* ---[ 0x300: Data Page Fault exception ]------------------------------- */ EXCEPTION_ENTRY(_dtlb_miss_page_fault_handler) + CLEAR_LWA_FLAG(r3) l.and r5,r5,r0 l.j 1f l.nop EXCEPTION_ENTRY(_data_page_fault_handler) + CLEAR_LWA_FLAG(r3) /* set up parameters for do_page_fault */ l.ori r5,r0,0x300 // exception vector 1: @@ -220,7 +228,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler) * DTLB miss handler in the CONFIG_GUARD_PROTECTED_CORE part */ #ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX - l.lwz r6,PT_PC(r3) // address of an offending insn + l.lwz r6,PT_PC(r3) // address of an offending insn l.lwz r6,0(r6) // instruction that caused pf l.srli r6,r6,26 // check opcode for jump insn @@ -236,57 +244,57 @@ EXCEPTION_ENTRY(_data_page_fault_handler) l.bf 8f l.sfeqi r6,0x12 // l.jalr l.bf 8f - - l.nop + l.nop l.j 9f - l.nop -8: + l.nop - l.lwz r6,PT_PC(r3) // address of an offending insn +8: // offending insn is in delay slot + l.lwz r6,PT_PC(r3) // address of an offending insn l.addi r6,r6,4 l.lwz r6,0(r6) // instruction that caused pf l.srli r6,r6,26 // get opcode -9: +9: // offending instruction opcode loaded in r6 #else - l.mfspr r6,r0,SPR_SR // SR -// l.lwz r6,PT_SR(r3) // ESR - l.andi r6,r6,SPR_SR_DSX // check for delay slot exception - l.sfeqi r6,0x1 // exception happened in delay slot - l.bnf 7f - l.lwz r6,PT_PC(r3) // address of an offending insn + l.lwz r6,PT_SR(r3) // SR + l.andi r6,r6,SPR_SR_DSX // check for delay slot exception + l.sfne r6,r0 // exception happened in delay slot + l.bnf 7f + l.lwz r6,PT_PC(r3) // address of an offending insn - l.addi r6,r6,4 // offending insn is in delay slot + l.addi r6,r6,4 // offending insn is in delay slot 7: l.lwz r6,0(r6) // instruction that caused pf l.srli r6,r6,26 // check opcode for write access #endif - l.sfgeui r6,0x33 // check opcode for write access + l.sfgeui r6,0x33 // check opcode for write access l.bnf 1f l.sfleui r6,0x37 l.bnf 1f l.ori r6,r0,0x1 // write access l.j 2f - l.nop + l.nop 1: l.ori r6,r0,0x0 // !write access 2: /* call fault.c handler in or32/mm/fault.c */ l.jal do_page_fault - l.nop + l.nop l.j _ret_from_exception - l.nop + l.nop /* ---[ 0x400: Insn Page Fault exception ]------------------------------- */ EXCEPTION_ENTRY(_itlb_miss_page_fault_handler) + CLEAR_LWA_FLAG(r3) l.and r5,r5,r0 l.j 1f l.nop EXCEPTION_ENTRY(_insn_page_fault_handler) + CLEAR_LWA_FLAG(r3) /* set up parameters for do_page_fault */ l.ori r5,r0,0x400 // exception vector 1: @@ -296,14 +304,15 @@ EXCEPTION_ENTRY(_insn_page_fault_handler) /* call fault.c handler in or32/mm/fault.c */ l.jal do_page_fault - l.nop + l.nop l.j _ret_from_exception - l.nop + l.nop /* ---[ 0x500: Timer exception ]----------------------------------------- */ EXCEPTION_ENTRY(_timer_handler) + CLEAR_LWA_FLAG(r3) l.jal timer_interrupt l.addi r3,r1,0 /* pt_regs */ @@ -313,6 +322,7 @@ EXCEPTION_ENTRY(_timer_handler) /* ---[ 0x600: Aligment exception ]-------------------------------------- */ EXCEPTION_ENTRY(_alignment_handler) + CLEAR_LWA_FLAG(r3) /* r4: EA of fault (set by EXCEPTION_HANDLE) */ l.jal do_unaligned_access l.addi r3,r1,0 /* pt_regs */ @@ -509,6 +519,7 @@ EXCEPTION_ENTRY(_external_irq_handler) // l.sw PT_SR(r1),r4 1: #endif + CLEAR_LWA_FLAG(r3) l.addi r3,r1,0 l.movhi r8,hi(do_IRQ) l.ori r8,r8,lo(do_IRQ) @@ -556,8 +567,12 @@ ENTRY(_sys_call_handler) * they should be clobbered, otherwise */ l.sw PT_GPR3(r1),r3 - /* r4 already saved */ - /* r4 holds the EEAR address of the fault, load the original r4 */ + /* + * r4 already saved + * r4 holds the EEAR address of the fault, use it as screatch reg and + * then load the original r4 + */ + CLEAR_LWA_FLAG(r4) l.lwz r4,PT_GPR4(r1) l.sw PT_GPR5(r1),r5 l.sw PT_GPR6(r1),r6 @@ -776,6 +791,7 @@ UNHANDLED_EXCEPTION(_vector_0xd00,0xd00) /* ---[ 0xe00: Trap exception ]------------------------------------------ */ EXCEPTION_ENTRY(_trap_handler) + CLEAR_LWA_FLAG(r3) /* r4: EA of fault (set by EXCEPTION_HANDLE) */ l.jal do_trap l.addi r3,r1,0 /* pt_regs */ diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index f14793306b03..d01b82eace3e 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -24,6 +24,7 @@ #include <asm/page.h> #include <asm/mmu.h> #include <asm/pgtable.h> +#include <asm/thread_info.h> #include <asm/cache.h> #include <asm/spr_defs.h> #include <asm/asm-offsets.h> @@ -34,7 +35,7 @@ l.add rd,rd,rs #define CLEAR_GPR(gpr) \ - l.or gpr,r0,r0 + l.movhi gpr,0x0 #define LOAD_SYMBOL_2_GPR(gpr,symbol) \ l.movhi gpr,hi(symbol) ;\ @@ -442,6 +443,9 @@ _dispatch_do_ipage_fault: __HEAD .global _start _start: + /* Init r0 to zero as per spec */ + CLEAR_GPR(r0) + /* save kernel parameters */ l.or r25,r0,r3 /* pointer to fdt */ @@ -486,7 +490,8 @@ _start: /* * set up initial ksp and current */ - LOAD_SYMBOL_2_GPR(r1,init_thread_union+0x2000) // setup kernel stack + /* setup kernel stack */ + LOAD_SYMBOL_2_GPR(r1,init_thread_union + THREAD_SIZE) LOAD_SYMBOL_2_GPR(r10,init_thread_union) // setup current tophys (r31,r10) l.sw TI_KSP(r31), r1 @@ -520,22 +525,8 @@ enable_dc: l.nop flush_tlb: - /* - * I N V A L I D A T E T L B e n t r i e s - */ - LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0)) - LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0)) - l.addi r7,r0,128 /* Maximum number of sets */ -1: - l.mtspr r5,r0,0x0 - l.mtspr r6,r0,0x0 - - l.addi r5,r5,1 - l.addi r6,r6,1 - l.sfeq r7,r0 - l.bnf 1b - l.addi r7,r7,-1 - + l.jal _flush_tlb + l.nop /* The MMU needs to be enabled before or32_early_setup is called */ @@ -627,6 +618,26 @@ jump_start_kernel: l.jr r30 l.nop +_flush_tlb: + /* + * I N V A L I D A T E T L B e n t r i e s + */ + LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0)) + LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0)) + l.addi r7,r0,128 /* Maximum number of sets */ +1: + l.mtspr r5,r0,0x0 + l.mtspr r6,r0,0x0 + + l.addi r5,r5,1 + l.addi r6,r6,1 + l.sfeq r7,r0 + l.bnf 1b + l.addi r7,r7,-1 + + l.jr r9 + l.nop + /* ========================================[ cache ]=== */ /* aligment here so we don't change memory offsets with @@ -971,8 +982,6 @@ ENTRY(dtlb_miss_handler) EXCEPTION_STORE_GPR2 EXCEPTION_STORE_GPR3 EXCEPTION_STORE_GPR4 - EXCEPTION_STORE_GPR5 - EXCEPTION_STORE_GPR6 /* * get EA of the miss */ @@ -980,91 +989,70 @@ ENTRY(dtlb_miss_handler) /* * pmd = (pmd_t *)(current_pgd + pgd_index(daddr)); */ - GET_CURRENT_PGD(r3,r5) // r3 is current_pgd, r5 is temp + GET_CURRENT_PGD(r3,r4) // r3 is current_pgd, r4 is temp l.srli r4,r2,0x18 // >> PAGE_SHIFT + (PAGE_SHIFT - 2) l.slli r4,r4,0x2 // to get address << 2 - l.add r5,r4,r3 // r4 is pgd_index(daddr) + l.add r3,r4,r3 // r4 is pgd_index(daddr) /* * if (pmd_none(*pmd)) * goto pmd_none: */ - tophys (r4,r5) + tophys (r4,r3) l.lwz r3,0x0(r4) // get *pmd value l.sfne r3,r0 l.bnf d_pmd_none - l.andi r3,r3,~PAGE_MASK //0x1fff // ~PAGE_MASK - /* - * if (pmd_bad(*pmd)) - * pmd_clear(pmd) - * goto pmd_bad: - */ -// l.sfeq r3,r0 // check *pmd value -// l.bf d_pmd_good - l.addi r3,r0,0xffffe000 // PAGE_MASK -// l.j d_pmd_bad -// l.sw 0x0(r4),r0 // clear pmd + l.addi r3,r0,0xffffe000 // PAGE_MASK + d_pmd_good: /* * pte = *pte_offset(pmd, daddr); */ l.lwz r4,0x0(r4) // get **pmd value l.and r4,r4,r3 // & PAGE_MASK - l.srli r5,r2,0xd // >> PAGE_SHIFT, r2 == EEAR - l.andi r3,r5,0x7ff // (1UL << PAGE_SHIFT - 2) - 1 + l.srli r2,r2,0xd // >> PAGE_SHIFT, r2 == EEAR + l.andi r3,r2,0x7ff // (1UL << PAGE_SHIFT - 2) - 1 l.slli r3,r3,0x2 // to get address << 2 l.add r3,r3,r4 - l.lwz r2,0x0(r3) // this is pte at last + l.lwz r3,0x0(r3) // this is pte at last /* * if (!pte_present(pte)) */ - l.andi r4,r2,0x1 + l.andi r4,r3,0x1 l.sfne r4,r0 // is pte present l.bnf d_pte_not_present - l.addi r3,r0,0xffffe3fa // PAGE_MASK | DTLB_UP_CONVERT_MASK + l.addi r4,r0,0xffffe3fa // PAGE_MASK | DTLB_UP_CONVERT_MASK /* * fill DTLB TR register */ - l.and r4,r2,r3 // apply the mask + l.and r4,r3,r4 // apply the mask // Determine number of DMMU sets - l.mfspr r6, r0, SPR_DMMUCFGR - l.andi r6, r6, SPR_DMMUCFGR_NTS - l.srli r6, r6, SPR_DMMUCFGR_NTS_OFF + l.mfspr r2, r0, SPR_DMMUCFGR + l.andi r2, r2, SPR_DMMUCFGR_NTS + l.srli r2, r2, SPR_DMMUCFGR_NTS_OFF l.ori r3, r0, 0x1 - l.sll r3, r3, r6 // r3 = number DMMU sets DMMUCFGR - l.addi r6, r3, -1 // r6 = nsets mask - l.and r5, r5, r6 // calc offset: & (NUM_TLB_ENTRIES-1) + l.sll r3, r3, r2 // r3 = number DMMU sets DMMUCFGR + l.addi r2, r3, -1 // r2 = nsets mask + l.mfspr r3, r0, SPR_EEAR_BASE + l.srli r3, r3, 0xd // >> PAGE_SHIFT + l.and r2, r3, r2 // calc offset: & (NUM_TLB_ENTRIES-1) //NUM_TLB_ENTRIES - l.mtspr r5,r4,SPR_DTLBTR_BASE(0) + l.mtspr r2,r4,SPR_DTLBTR_BASE(0) /* * fill DTLB MR register */ - l.mfspr r2,r0,SPR_EEAR_BASE - l.addi r3,r0,0xffffe000 // PAGE_MASK - l.and r4,r2,r3 // apply PAGE_MASK to EA (__PHX__ do we really need this?) - l.ori r4,r4,0x1 // set hardware valid bit: DTBL_MR entry - l.mtspr r5,r4,SPR_DTLBMR_BASE(0) + l.slli r3, r3, 0xd /* << PAGE_SHIFT => EA & PAGE_MASK */ + l.ori r4,r3,0x1 // set hardware valid bit: DTBL_MR entry + l.mtspr r2,r4,SPR_DTLBMR_BASE(0) EXCEPTION_LOAD_GPR2 EXCEPTION_LOAD_GPR3 EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 - l.rfe -d_pmd_bad: - l.nop 1 - EXCEPTION_LOAD_GPR2 - EXCEPTION_LOAD_GPR3 - EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 l.rfe d_pmd_none: d_pte_not_present: EXCEPTION_LOAD_GPR2 EXCEPTION_LOAD_GPR3 EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 EXCEPTION_HANDLE(_dtlb_miss_page_fault_handler) /* ==============================================[ ITLB miss handler ]=== */ @@ -1072,8 +1060,6 @@ ENTRY(itlb_miss_handler) EXCEPTION_STORE_GPR2 EXCEPTION_STORE_GPR3 EXCEPTION_STORE_GPR4 - EXCEPTION_STORE_GPR5 - EXCEPTION_STORE_GPR6 /* * get EA of the miss */ @@ -1083,30 +1069,19 @@ ENTRY(itlb_miss_handler) * pmd = (pmd_t *)(current_pgd + pgd_index(daddr)); * */ - GET_CURRENT_PGD(r3,r5) // r3 is current_pgd, r5 is temp + GET_CURRENT_PGD(r3,r4) // r3 is current_pgd, r5 is temp l.srli r4,r2,0x18 // >> PAGE_SHIFT + (PAGE_SHIFT - 2) l.slli r4,r4,0x2 // to get address << 2 - l.add r5,r4,r3 // r4 is pgd_index(daddr) + l.add r3,r4,r3 // r4 is pgd_index(daddr) /* * if (pmd_none(*pmd)) * goto pmd_none: */ - tophys (r4,r5) + tophys (r4,r3) l.lwz r3,0x0(r4) // get *pmd value l.sfne r3,r0 l.bnf i_pmd_none - l.andi r3,r3,0x1fff // ~PAGE_MASK - /* - * if (pmd_bad(*pmd)) - * pmd_clear(pmd) - * goto pmd_bad: - */ - -// l.sfeq r3,r0 // check *pmd value -// l.bf i_pmd_good - l.addi r3,r0,0xffffe000 // PAGE_MASK -// l.j i_pmd_bad -// l.sw 0x0(r4),r0 // clear pmd + l.addi r3,r0,0xffffe000 // PAGE_MASK i_pmd_good: /* @@ -1115,35 +1090,36 @@ i_pmd_good: */ l.lwz r4,0x0(r4) // get **pmd value l.and r4,r4,r3 // & PAGE_MASK - l.srli r5,r2,0xd // >> PAGE_SHIFT, r2 == EEAR - l.andi r3,r5,0x7ff // (1UL << PAGE_SHIFT - 2) - 1 + l.srli r2,r2,0xd // >> PAGE_SHIFT, r2 == EEAR + l.andi r3,r2,0x7ff // (1UL << PAGE_SHIFT - 2) - 1 l.slli r3,r3,0x2 // to get address << 2 l.add r3,r3,r4 - l.lwz r2,0x0(r3) // this is pte at last + l.lwz r3,0x0(r3) // this is pte at last /* * if (!pte_present(pte)) * */ - l.andi r4,r2,0x1 + l.andi r4,r3,0x1 l.sfne r4,r0 // is pte present l.bnf i_pte_not_present - l.addi r3,r0,0xffffe03a // PAGE_MASK | ITLB_UP_CONVERT_MASK + l.addi r4,r0,0xffffe03a // PAGE_MASK | ITLB_UP_CONVERT_MASK /* * fill ITLB TR register */ - l.and r4,r2,r3 // apply the mask - l.andi r3,r2,0x7c0 // _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE | _PAGE_URE | _PAGE_UWE -// l.andi r3,r2,0x400 // _PAGE_EXEC + l.and r4,r3,r4 // apply the mask + l.andi r3,r3,0x7c0 // _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE | _PAGE_URE | _PAGE_UWE l.sfeq r3,r0 l.bf itlb_tr_fill //_workaround // Determine number of IMMU sets - l.mfspr r6, r0, SPR_IMMUCFGR - l.andi r6, r6, SPR_IMMUCFGR_NTS - l.srli r6, r6, SPR_IMMUCFGR_NTS_OFF + l.mfspr r2, r0, SPR_IMMUCFGR + l.andi r2, r2, SPR_IMMUCFGR_NTS + l.srli r2, r2, SPR_IMMUCFGR_NTS_OFF l.ori r3, r0, 0x1 - l.sll r3, r3, r6 // r3 = number IMMU sets IMMUCFGR - l.addi r6, r3, -1 // r6 = nsets mask - l.and r5, r5, r6 // calc offset: & (NUM_TLB_ENTRIES-1) + l.sll r3, r3, r2 // r3 = number IMMU sets IMMUCFGR + l.addi r2, r3, -1 // r2 = nsets mask + l.mfspr r3, r0, SPR_EEAR_BASE + l.srli r3, r3, 0xd // >> PAGE_SHIFT + l.and r2, r3, r2 // calc offset: & (NUM_TLB_ENTRIES-1) /* * __PHX__ :: fixme @@ -1155,38 +1131,24 @@ i_pmd_good: itlb_tr_fill_workaround: l.ori r4,r4,0xc0 // | (SPR_ITLBTR_UXE | ITLBTR_SXE) itlb_tr_fill: - l.mtspr r5,r4,SPR_ITLBTR_BASE(0) + l.mtspr r2,r4,SPR_ITLBTR_BASE(0) /* * fill DTLB MR register */ - l.mfspr r2,r0,SPR_EEAR_BASE - l.addi r3,r0,0xffffe000 // PAGE_MASK - l.and r4,r2,r3 // apply PAGE_MASK to EA (__PHX__ do we really need this?) - l.ori r4,r4,0x1 // set hardware valid bit: DTBL_MR entry - l.mtspr r5,r4,SPR_ITLBMR_BASE(0) + l.slli r3, r3, 0xd /* << PAGE_SHIFT => EA & PAGE_MASK */ + l.ori r4,r3,0x1 // set hardware valid bit: ITBL_MR entry + l.mtspr r2,r4,SPR_ITLBMR_BASE(0) EXCEPTION_LOAD_GPR2 EXCEPTION_LOAD_GPR3 EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 l.rfe -i_pmd_bad: - l.nop 1 - EXCEPTION_LOAD_GPR2 - EXCEPTION_LOAD_GPR3 - EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 - l.rfe i_pmd_none: i_pte_not_present: EXCEPTION_LOAD_GPR2 EXCEPTION_LOAD_GPR3 EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 EXCEPTION_HANDLE(_itlb_miss_page_fault_handler) /* ==============================================[ boot tlb handlers ]=== */ @@ -1571,12 +1533,7 @@ ENTRY(_early_uart_init) l.jr r9 l.nop -_string_copying_linux: - .string "\n\n\n\n\n\rCopying Linux... \0" - -_string_ok_booting: - .string "Ok, booting the kernel.\n\r\0" - + .section .rodata _string_unhandled_exception: .string "\n\rRunarunaround: Unhandled exception 0x\0" @@ -1586,11 +1543,6 @@ _string_epc_prefix: _string_nl: .string "\n\r\0" - .global _string_esr_irq_bug -_string_esr_irq_bug: - .string "\n\rESR external interrupt bug, for details look into entry.S\n\r\0" - - /* ========================================[ page aligned structures ]=== */ diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c index 86e31cf1de1d..5c4695d13542 100644 --- a/arch/openrisc/kernel/or32_ksyms.c +++ b/arch/openrisc/kernel/or32_ksyms.c @@ -44,3 +44,4 @@ DECLARE_EXPORT(__ashldi3); DECLARE_EXPORT(__lshrdi3); EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(memset); diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index d7990df9025a..6e9d1cb519f2 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -75,6 +75,17 @@ void machine_power_off(void) __asm__("l.nop 1"); } +/* + * Send the doze signal to the cpu if available. + * Make sure, that all interrupts are enabled + */ +void arch_cpu_idle(void) +{ + local_irq_enable(); + if (mfspr(SPR_UPR) & SPR_UPR_PMP) + mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME); +} + void (*pm_power_off) (void) = machine_power_off; /* @@ -226,6 +237,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu) extern struct thread_info *_switch(struct thread_info *old_ti, struct thread_info *new_ti); +extern int lwa_flag; struct task_struct *__switch_to(struct task_struct *old, struct task_struct *new) @@ -243,6 +255,8 @@ struct task_struct *__switch_to(struct task_struct *old, new_ti = new->stack; old_ti = old->stack; + lwa_flag = 0; + current_thread_info_set[smp_processor_id()] = new_ti; last = (_switch(old_ti, new_ti))->task; diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c index 4f59fa4e34e5..228288887d74 100644 --- a/arch/openrisc/kernel/ptrace.c +++ b/arch/openrisc/kernel/ptrace.c @@ -16,7 +16,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <stddef.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index cb797a3beb47..dbf5ee95a0d5 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -117,13 +117,15 @@ static void print_cpuinfo(void) if (upr & SPR_UPR_DCP) printk(KERN_INFO "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo.dcache_size, cpuinfo.dcache_block_size, 1); + cpuinfo.dcache_size, cpuinfo.dcache_block_size, + cpuinfo.dcache_ways); else printk(KERN_INFO "-- dcache disabled\n"); if (upr & SPR_UPR_ICP) printk(KERN_INFO "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo.icache_size, cpuinfo.icache_block_size, 1); + cpuinfo.icache_size, cpuinfo.icache_block_size, + cpuinfo.icache_ways); else printk(KERN_INFO "-- icache disabled\n"); @@ -155,25 +157,25 @@ void __init setup_cpuinfo(void) { struct device_node *cpu; unsigned long iccfgr, dccfgr; - unsigned long cache_set_size, cache_ways; + unsigned long cache_set_size; cpu = of_find_compatible_node(NULL, NULL, "opencores,or1200-rtlsvn481"); if (!cpu) panic("No compatible CPU found in device tree...\n"); iccfgr = mfspr(SPR_ICCFGR); - cache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW); + cpuinfo.icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW); cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); cpuinfo.icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); cpuinfo.icache_size = - cache_set_size * cache_ways * cpuinfo.icache_block_size; + cache_set_size * cpuinfo.icache_ways * cpuinfo.icache_block_size; dccfgr = mfspr(SPR_DCCFGR); - cache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW); + cpuinfo.dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW); cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); cpuinfo.dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); cpuinfo.dcache_size = - cache_set_size * cache_ways * cpuinfo.dcache_block_size; + cache_set_size * cpuinfo.dcache_ways * cpuinfo.dcache_block_size; if (of_property_read_u32(cpu, "clock-frequency", &cpuinfo.clock_frequency)) { @@ -308,30 +310,33 @@ static int show_cpuinfo(struct seq_file *m, void *v) revision = vr & SPR_VR_REV; seq_printf(m, - "cpu\t\t: OpenRISC-%x\n" - "revision\t: %d\n" - "frequency\t: %ld\n" - "dcache size\t: %d bytes\n" - "dcache block size\t: %d bytes\n" - "icache size\t: %d bytes\n" - "icache block size\t: %d bytes\n" - "immu\t\t: %d entries, %lu ways\n" - "dmmu\t\t: %d entries, %lu ways\n" - "bogomips\t: %lu.%02lu\n", - version, - revision, - loops_per_jiffy * HZ, - cpuinfo.dcache_size, - cpuinfo.dcache_block_size, - cpuinfo.icache_size, - cpuinfo.icache_block_size, - 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), - 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW), - 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2), - 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW), - (loops_per_jiffy * HZ) / 500000, - ((loops_per_jiffy * HZ) / 5000) % 100); - + "cpu\t\t: OpenRISC-%x\n" + "revision\t: %d\n" + "frequency\t: %ld\n" + "dcache size\t: %d bytes\n" + "dcache block size\t: %d bytes\n" + "dcache ways\t: %d\n" + "icache size\t: %d bytes\n" + "icache block size\t: %d bytes\n" + "icache ways\t: %d\n" + "immu\t\t: %d entries, %lu ways\n" + "dmmu\t\t: %d entries, %lu ways\n" + "bogomips\t: %lu.%02lu\n", + version, + revision, + loops_per_jiffy * HZ, + cpuinfo.dcache_size, + cpuinfo.dcache_block_size, + cpuinfo.dcache_ways, + cpuinfo.icache_size, + cpuinfo.icache_block_size, + cpuinfo.icache_ways, + 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), + 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW), + 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2), + 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW), + (loops_per_jiffy * HZ) / 500000, + ((loops_per_jiffy * HZ) / 5000) % 100); return 0; } diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index d29c41bfbffa..7e81ad258bca 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -40,6 +40,8 @@ extern char _etext, _stext; int kstack_depth_to_print = 0x180; +int lwa_flag; +unsigned long __user *lwa_addr; static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { @@ -334,10 +336,191 @@ asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address) } } +static inline int in_delay_slot(struct pt_regs *regs) +{ +#ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX + /* No delay slot flag, do the old way */ + unsigned int op, insn; + + insn = *((unsigned int *)regs->pc); + op = insn >> 26; + switch (op) { + case 0x00: /* l.j */ + case 0x01: /* l.jal */ + case 0x03: /* l.bnf */ + case 0x04: /* l.bf */ + case 0x11: /* l.jr */ + case 0x12: /* l.jalr */ + return 1; + default: + return 0; + } +#else + return regs->sr & SPR_SR_DSX; +#endif +} + +static inline void adjust_pc(struct pt_regs *regs, unsigned long address) +{ + int displacement; + unsigned int rb, op, jmp; + + if (unlikely(in_delay_slot(regs))) { + /* In delay slot, instruction at pc is a branch, simulate it */ + jmp = *((unsigned int *)regs->pc); + + displacement = sign_extend32(((jmp) & 0x3ffffff) << 2, 27); + rb = (jmp & 0x0000ffff) >> 11; + op = jmp >> 26; + + switch (op) { + case 0x00: /* l.j */ + regs->pc += displacement; + return; + case 0x01: /* l.jal */ + regs->pc += displacement; + regs->gpr[9] = regs->pc + 8; + return; + case 0x03: /* l.bnf */ + if (regs->sr & SPR_SR_F) + regs->pc += 8; + else + regs->pc += displacement; + return; + case 0x04: /* l.bf */ + if (regs->sr & SPR_SR_F) + regs->pc += displacement; + else + regs->pc += 8; + return; + case 0x11: /* l.jr */ + regs->pc = regs->gpr[rb]; + return; + case 0x12: /* l.jalr */ + regs->pc = regs->gpr[rb]; + regs->gpr[9] = regs->pc + 8; + return; + default: + break; + } + } else { + regs->pc += 4; + } +} + +static inline void simulate_lwa(struct pt_regs *regs, unsigned long address, + unsigned int insn) +{ + unsigned int ra, rd; + unsigned long value; + unsigned long orig_pc; + long imm; + + const struct exception_table_entry *entry; + + orig_pc = regs->pc; + adjust_pc(regs, address); + + ra = (insn >> 16) & 0x1f; + rd = (insn >> 21) & 0x1f; + imm = (short)insn; + lwa_addr = (unsigned long __user *)(regs->gpr[ra] + imm); + + if ((unsigned long)lwa_addr & 0x3) { + do_unaligned_access(regs, address); + return; + } + + if (get_user(value, lwa_addr)) { + if (user_mode(regs)) { + force_sig(SIGSEGV, current); + return; + } + + if ((entry = search_exception_tables(orig_pc))) { + regs->pc = entry->fixup; + return; + } + + /* kernel access in kernel space, load it directly */ + value = *((unsigned long *)lwa_addr); + } + + lwa_flag = 1; + regs->gpr[rd] = value; +} + +static inline void simulate_swa(struct pt_regs *regs, unsigned long address, + unsigned int insn) +{ + unsigned long __user *vaddr; + unsigned long orig_pc; + unsigned int ra, rb; + long imm; + + const struct exception_table_entry *entry; + + orig_pc = regs->pc; + adjust_pc(regs, address); + + ra = (insn >> 16) & 0x1f; + rb = (insn >> 11) & 0x1f; + imm = (short)(((insn & 0x2200000) >> 10) | (insn & 0x7ff)); + vaddr = (unsigned long __user *)(regs->gpr[ra] + imm); + + if (!lwa_flag || vaddr != lwa_addr) { + regs->sr &= ~SPR_SR_F; + return; + } + + if ((unsigned long)vaddr & 0x3) { + do_unaligned_access(regs, address); + return; + } + + if (put_user(regs->gpr[rb], vaddr)) { + if (user_mode(regs)) { + force_sig(SIGSEGV, current); + return; + } + + if ((entry = search_exception_tables(orig_pc))) { + regs->pc = entry->fixup; + return; + } + + /* kernel access in kernel space, store it directly */ + *((unsigned long *)vaddr) = regs->gpr[rb]; + } + + lwa_flag = 0; + regs->sr |= SPR_SR_F; +} + +#define INSN_LWA 0x1b +#define INSN_SWA 0x33 + asmlinkage void do_illegal_instruction(struct pt_regs *regs, unsigned long address) { siginfo_t info; + unsigned int op; + unsigned int insn = *((unsigned int *)address); + + op = insn >> 26; + + switch (op) { + case INSN_LWA: + simulate_lwa(regs, address, insn); + return; + + case INSN_SWA: + simulate_swa(regs, address, insn); + return; + + default: + break; + } if (user_mode(regs)) { /* Send a SIGILL */ diff --git a/arch/openrisc/lib/Makefile b/arch/openrisc/lib/Makefile index 966f65dbc6f0..17d9d37f32d2 100644 --- a/arch/openrisc/lib/Makefile +++ b/arch/openrisc/lib/Makefile @@ -2,4 +2,4 @@ # Makefile for or32 specific library files.. # -obj-y = string.o delay.o +obj-y := delay.o string.o memset.o memcpy.o diff --git a/arch/openrisc/lib/memcpy.c b/arch/openrisc/lib/memcpy.c new file mode 100644 index 000000000000..669887a60e27 --- /dev/null +++ b/arch/openrisc/lib/memcpy.c @@ -0,0 +1,124 @@ +/* + * arch/openrisc/lib/memcpy.c + * + * Optimized memory copy routines for openrisc. These are mostly copied + * from ohter sources but slightly entended based on ideas discuassed in + * #openrisc. + * + * The word unroll implementation is an extension to the arm byte + * unrolled implementation, but using word copies (if things are + * properly aligned) + * + * The great arm loop unroll algorithm can be found at: + * arch/arm/boot/compressed/string.c + */ + +#include <linux/export.h> + +#include <linux/string.h> + +#ifdef CONFIG_OR1K_1200 +/* + * Do memcpy with word copies and loop unrolling. This gives the + * best performance on the OR1200 and MOR1KX archirectures + */ +void *memcpy(void *dest, __const void *src, __kernel_size_t n) +{ + int i = 0; + unsigned char *d, *s; + uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; + + /* If both source and dest are word aligned copy words */ + if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { + /* Copy 32 bytes per loop */ + for (i = n >> 5; i > 0; i--) { + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + } + + if (n & 1 << 4) { + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + } + + if (n & 1 << 3) { + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + } + + if (n & 1 << 2) + *dest_w++ = *src_w++; + + d = (unsigned char *)dest_w; + s = (unsigned char *)src_w; + + } else { + d = (unsigned char *)dest_w; + s = (unsigned char *)src_w; + + for (i = n >> 3; i > 0; i--) { + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + } + + if (n & 1 << 2) { + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + } + } + + if (n & 1 << 1) { + *d++ = *s++; + *d++ = *s++; + } + + if (n & 1) + *d++ = *s++; + + return dest; +} +#else +/* + * Use word copies but no loop unrolling as we cannot assume there + * will be benefits on the archirecture + */ +void *memcpy(void *dest, __const void *src, __kernel_size_t n) +{ + unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src; + uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; + + /* If both source and dest are word aligned copy words */ + if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { + for (; n >= 4; n -= 4) + *dest_w++ = *src_w++; + } + + d = (unsigned char *)dest_w; + s = (unsigned char *)src_w; + + /* For remaining or if not aligned, copy bytes */ + for (; n >= 1; n -= 1) + *d++ = *s++; + + return dest; + +} +#endif + +EXPORT_SYMBOL(memcpy); diff --git a/arch/openrisc/lib/memset.S b/arch/openrisc/lib/memset.S new file mode 100644 index 000000000000..92cc2eac26fa --- /dev/null +++ b/arch/openrisc/lib/memset.S @@ -0,0 +1,98 @@ +/* + * OpenRISC memset.S + * + * Hand-optimized assembler version of memset for OpenRISC. + * Algorithm inspired by several other arch-specific memset routines + * in the kernel tree + * + * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + .global memset + .type memset, @function +memset: + /* arguments: + * r3 = *s + * r4 = c + * r5 = n + * r13, r15, r17, r19 used as temp regs + */ + + /* Exit if n == 0 */ + l.sfeqi r5, 0 + l.bf 4f + + /* Truncate c to char */ + l.andi r13, r4, 0xff + + /* Skip word extension if c is 0 */ + l.sfeqi r13, 0 + l.bf 1f + /* Check for at least two whole words (8 bytes) */ + l.sfleui r5, 7 + + /* Extend char c to 32-bit word cccc in r13 */ + l.slli r15, r13, 16 // r13 = 000c, r15 = 0c00 + l.or r13, r13, r15 // r13 = 0c0c, r15 = 0c00 + l.slli r15, r13, 8 // r13 = 0c0c, r15 = c0c0 + l.or r13, r13, r15 // r13 = cccc, r15 = c0c0 + +1: l.addi r19, r3, 0 // Set r19 = src + /* Jump to byte copy loop if less than two words */ + l.bf 3f + l.or r17, r5, r0 // Set r17 = n + + /* Mask out two LSBs to check alignment */ + l.andi r15, r3, 0x3 + + /* lsb == 00, jump to word copy loop */ + l.sfeqi r15, 0 + l.bf 2f + l.addi r19, r3, 0 // Set r19 = src + + /* lsb == 01,10 or 11 */ + l.sb 0(r3), r13 // *src = c + l.addi r17, r17, -1 // Decrease n + + l.sfeqi r15, 3 + l.bf 2f + l.addi r19, r3, 1 // src += 1 + + /* lsb == 01 or 10 */ + l.sb 1(r3), r13 // *(src+1) = c + l.addi r17, r17, -1 // Decrease n + + l.sfeqi r15, 2 + l.bf 2f + l.addi r19, r3, 2 // src += 2 + + /* lsb == 01 */ + l.sb 2(r3), r13 // *(src+2) = c + l.addi r17, r17, -1 // Decrease n + l.addi r19, r3, 3 // src += 3 + + /* Word copy loop */ +2: l.sw 0(r19), r13 // *src = cccc + l.addi r17, r17, -4 // Decrease n + l.sfgeui r17, 4 + l.bf 2b + l.addi r19, r19, 4 // Increase src + + /* When n > 0, copy the remaining bytes, otherwise jump to exit */ + l.sfeqi r17, 0 + l.bf 4f + + /* Byte copy loop */ +3: l.addi r17, r17, -1 // Decrease n + l.sb 0(r19), r13 // *src = cccc + l.sfnei r17, 0 + l.bf 3b + l.addi r19, r19, 1 // Increase src + +4: l.jr r9 + l.ori r11, r3, 0 diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index 8705a46218f9..2175e4bfd9fc 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -80,6 +80,7 @@ __ioremap(phys_addr_t addr, unsigned long size, pgprot_t prot) return (void __iomem *)(offset + (char *)v); } +EXPORT_SYMBOL(__ioremap); void iounmap(void *addr) { @@ -106,6 +107,7 @@ void iounmap(void *addr) return vfree((void *)(PAGE_MASK & (unsigned long)addr)); } +EXPORT_SYMBOL(iounmap); /** * OK, this one's a bit tricky... ioremap can get called before memory is diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index c1263fc390db..f294dd42fc7d 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -17,7 +17,8 @@ #define HPAGE_SHIFT 23 #define REAL_HPAGE_SHIFT 22 - +#define HPAGE_256MB_SHIFT 28 +#define HPAGE_64K_SHIFT 16 #define REAL_HPAGE_SIZE (_AC(1,UL) << REAL_HPAGE_SHIFT) #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) @@ -26,6 +27,7 @@ #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT)) +#define HUGE_MAX_HSTATE 3 #endif #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 314b66851348..7932a4a37817 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -375,7 +375,10 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot) #define pgprot_noncached pgprot_noncached #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) -static inline unsigned long __pte_huge_mask(void) +extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable); +#define arch_make_huge_pte arch_make_huge_pte +static inline unsigned long __pte_default_huge_mask(void) { unsigned long mask; @@ -395,12 +398,14 @@ static inline unsigned long __pte_huge_mask(void) static inline pte_t pte_mkhuge(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_PMD_HUGE | __pte_huge_mask()); + return __pte(pte_val(pte) | __pte_default_huge_mask()); } -static inline bool is_hugetlb_pte(pte_t pte) +static inline bool is_default_hugetlb_pte(pte_t pte) { - return !!(pte_val(pte) & __pte_huge_mask()); + unsigned long mask = __pte_default_huge_mask(); + + return (pte_val(pte) & mask) == mask; } static inline bool is_hugetlb_pmd(pmd_t pmd) @@ -875,10 +880,12 @@ static inline unsigned long pud_pfn(pud_t pud) /* Actual page table PTE updates. */ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, - pte_t *ptep, pte_t orig, int fullmm); + pte_t *ptep, pte_t orig, int fullmm, + unsigned int hugepage_shift); static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, - pte_t *ptep, pte_t orig, int fullmm) + pte_t *ptep, pte_t orig, int fullmm, + unsigned int hugepage_shift) { /* It is more efficient to let flush_tlb_kernel_range() * handle init_mm tlb flushes. @@ -887,7 +894,7 @@ static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, * and SUN4V pte layout, so this inline test is fine. */ if (likely(mm != &init_mm) && pte_accessible(mm, orig)) - tlb_batch_add(mm, vaddr, ptep, orig, fullmm); + tlb_batch_add(mm, vaddr, ptep, orig, fullmm, hugepage_shift); } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR @@ -906,7 +913,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t orig = *ptep; *ptep = pte; - maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm); + maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm, PAGE_SHIFT); } #define set_pte_at(mm,addr,ptep,pte) \ diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 29d64b1758ed..478bf6bb4598 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -59,8 +59,11 @@ extern atomic_t dcpage_flushes; extern atomic_t dcpage_flushes_xcall; extern int sysctl_tsb_ratio; -#endif +#ifdef CONFIG_SERIAL_SUNHV +void sunhv_migrate_hvcons_irq(int cpu); +#endif +#endif void sun_do_break(void); extern int stop_a_enabled; extern int scons_pwroff; diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h index a8e192e90700..54be88a6774c 100644 --- a/arch/sparc/include/asm/tlbflush_64.h +++ b/arch/sparc/include/asm/tlbflush_64.h @@ -8,7 +8,7 @@ #define TLB_BATCH_NR 192 struct tlb_batch { - bool huge; + unsigned int hugepage_shift; struct mm_struct *mm; unsigned long tlb_nr; unsigned long active; @@ -17,7 +17,8 @@ struct tlb_batch { void flush_tsb_kernel_range(unsigned long start, unsigned long end); void flush_tsb_user(struct tlb_batch *tb); -void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge); +void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, + unsigned int hugepage_shift); /* TLB flush operations. */ diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 225543000122..ad5293f89680 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -4,7 +4,6 @@ #ifdef CONFIG_NUMA #include <asm/mmzone.h> -#include <asm/cpudata.h> static inline int cpu_to_node(int cpu) { @@ -42,6 +41,9 @@ int __node_distance(int, int); #endif /* !(CONFIG_NUMA) */ #ifdef CONFIG_SMP + +#include <asm/cpudata.h> + #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) #define topology_core_id(cpu) (cpu_data(cpu).core_id) #define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu]) diff --git a/arch/sparc/include/asm/uprobes.h b/arch/sparc/include/asm/uprobes.h index f87aae5a908e..36196c17aff8 100644 --- a/arch/sparc/include/asm/uprobes.h +++ b/arch/sparc/include/asm/uprobes.h @@ -42,8 +42,8 @@ struct arch_uprobe { }; struct arch_uprobe_task { - u32 saved_tpc; - u32 saved_tnpc; + u64 saved_tpc; + u64 saved_tnpc; }; struct task_struct; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 0ce347f8e4cc..90a02cb64e20 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1443,6 +1443,7 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) static void stop_this_cpu(void *dummy) { + set_cpu_online(smp_processor_id(), false); prom_stopself(); } @@ -1451,9 +1452,15 @@ void smp_send_stop(void) int cpu; if (tlb_type == hypervisor) { + int this_cpu = smp_processor_id(); +#ifdef CONFIG_SERIAL_SUNHV + sunhv_migrate_hvcons_irq(this_cpu); +#endif for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) + if (cpu == this_cpu) continue; + + set_cpu_online(cpu, false); #ifdef CONFIG_SUN_LDOMS if (ldom_domaining_enabled) { unsigned long hv_err; diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index d568c8207af7..10689cfd0ad4 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -117,26 +117,11 @@ tsb_miss_page_table_walk_sun4v_fastpath: /* Valid PTE is now in %g5. */ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) -661: sethi %uhi(_PAGE_SZALL_4U), %g7 + sethi %uhi(_PAGE_PMD_HUGE), %g7 sllx %g7, 32, %g7 - .section .sun4v_2insn_patch, "ax" - .word 661b - mov _PAGE_SZALL_4V, %g7 - nop - .previous - - and %g5, %g7, %g2 - -661: sethi %uhi(_PAGE_SZHUGE_4U), %g7 - sllx %g7, 32, %g7 - .section .sun4v_2insn_patch, "ax" - .word 661b - mov _PAGE_SZHUGE_4V, %g7 - nop - .previous - cmp %g2, %g7 - bne,pt %xcc, 60f + andcc %g5, %g7, %g0 + be,pt %xcc, 60f nop /* It is a huge page, use huge page TSB entry address we diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 988acc8b1b80..e98a3f2e8f0f 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -28,6 +28,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, unsigned long pgoff, unsigned long flags) { + struct hstate *h = hstate_file(filp); unsigned long task_size = TASK_SIZE; struct vm_unmapped_area_info info; @@ -38,7 +39,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, info.length = len; info.low_limit = TASK_UNMAPPED_BASE; info.high_limit = min(task_size, VA_EXCLUDE_START); - info.align_mask = PAGE_MASK & ~HPAGE_MASK; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; addr = vm_unmapped_area(&info); @@ -58,6 +59,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long pgoff, const unsigned long flags) { + struct hstate *h = hstate_file(filp); struct mm_struct *mm = current->mm; unsigned long addr = addr0; struct vm_unmapped_area_info info; @@ -69,7 +71,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, info.length = len; info.low_limit = PAGE_SIZE; info.high_limit = mm->mmap_base; - info.align_mask = PAGE_MASK & ~HPAGE_MASK; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; addr = vm_unmapped_area(&info); @@ -94,6 +96,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { + struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long task_size = TASK_SIZE; @@ -101,7 +104,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (test_thread_flag(TIF_32BIT)) task_size = STACK_TOP32; - if (len & ~HPAGE_MASK) + if (len & ~huge_page_mask(h)) return -EINVAL; if (len > task_size) return -ENOMEM; @@ -113,7 +116,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } if (addr) { - addr = ALIGN(addr, HPAGE_SIZE); + addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (task_size - len >= addr && (!vma || addr + len <= vma->vm_start)) @@ -127,17 +130,141 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, pgoff, flags); } +static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift) +{ + return entry; +} + +static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift) +{ + unsigned long hugepage_size = _PAGE_SZ4MB_4V; + + pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V; + + switch (shift) { + case HPAGE_256MB_SHIFT: + hugepage_size = _PAGE_SZ256MB_4V; + pte_val(entry) |= _PAGE_PMD_HUGE; + break; + case HPAGE_SHIFT: + pte_val(entry) |= _PAGE_PMD_HUGE; + break; + case HPAGE_64K_SHIFT: + hugepage_size = _PAGE_SZ64K_4V; + break; + default: + WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift); + } + + pte_val(entry) = pte_val(entry) | hugepage_size; + return entry; +} + +static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift) +{ + if (tlb_type == hypervisor) + return sun4v_hugepage_shift_to_tte(entry, shift); + else + return sun4u_hugepage_shift_to_tte(entry, shift); +} + +pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writeable) +{ + unsigned int shift = huge_page_shift(hstate_vma(vma)); + + return hugepage_shift_to_tte(entry, shift); +} + +static unsigned int sun4v_huge_tte_to_shift(pte_t entry) +{ + unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V; + unsigned int shift; + + switch (tte_szbits) { + case _PAGE_SZ256MB_4V: + shift = HPAGE_256MB_SHIFT; + break; + case _PAGE_SZ4MB_4V: + shift = REAL_HPAGE_SHIFT; + break; + case _PAGE_SZ64K_4V: + shift = HPAGE_64K_SHIFT; + break; + default: + shift = PAGE_SHIFT; + break; + } + return shift; +} + +static unsigned int sun4u_huge_tte_to_shift(pte_t entry) +{ + unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U; + unsigned int shift; + + switch (tte_szbits) { + case _PAGE_SZ256MB_4U: + shift = HPAGE_256MB_SHIFT; + break; + case _PAGE_SZ4MB_4U: + shift = REAL_HPAGE_SHIFT; + break; + case _PAGE_SZ64K_4U: + shift = HPAGE_64K_SHIFT; + break; + default: + shift = PAGE_SHIFT; + break; + } + return shift; +} + +static unsigned int huge_tte_to_shift(pte_t entry) +{ + unsigned long shift; + + if (tlb_type == hypervisor) + shift = sun4v_huge_tte_to_shift(entry); + else + shift = sun4u_huge_tte_to_shift(entry); + + if (shift == PAGE_SHIFT) + WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n", + pte_val(entry)); + + return shift; +} + +static unsigned long huge_tte_to_size(pte_t pte) +{ + unsigned long size = 1UL << huge_tte_to_shift(pte); + + if (size == REAL_HPAGE_SIZE) + size = HPAGE_SIZE; + return size; +} + pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; pud_t *pud; + pmd_t *pmd; pte_t *pte = NULL; pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); - if (pud) - pte = (pte_t *)pmd_alloc(mm, pud, addr); + if (pud) { + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return NULL; + + if (sz == PMD_SHIFT) + pte = (pte_t *)pmd; + else + pte = pte_alloc_map(mm, pmd, addr); + } return pte; } @@ -146,49 +273,83 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pud_t *pud; + pmd_t *pmd; pte_t *pte = NULL; pgd = pgd_offset(mm, addr); if (!pgd_none(*pgd)) { pud = pud_offset(pgd, addr); - if (!pud_none(*pud)) - pte = (pte_t *)pmd_offset(pud, addr); + if (!pud_none(*pud)) { + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) { + if (is_hugetlb_pmd(*pmd)) + pte = (pte_t *)pmd; + else + pte = pte_offset_map(pmd, addr); + } + } } + return pte; } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { + unsigned int i, nptes, orig_shift, shift; + unsigned long size; pte_t orig; + size = huge_tte_to_size(entry); + shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT; + nptes = size >> shift; + if (!pte_present(*ptep) && pte_present(entry)) - mm->context.hugetlb_pte_count++; + mm->context.hugetlb_pte_count += nptes; - addr &= HPAGE_MASK; + addr &= ~(size - 1); orig = *ptep; - *ptep = entry; + orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig); + + for (i = 0; i < nptes; i++) + ptep[i] = __pte(pte_val(entry) + (i << shift)); - /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ - maybe_tlb_batch_add(mm, addr, ptep, orig, 0); - maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0); + maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift); + /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ + if (size == HPAGE_SIZE) + maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0, + orig_shift); } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { + unsigned int i, nptes, hugepage_shift; + unsigned long size; pte_t entry; entry = *ptep; + size = huge_tte_to_size(entry); + if (size >= HPAGE_SIZE) + nptes = size >> PMD_SHIFT; + else + nptes = size >> PAGE_SHIFT; + + hugepage_shift = pte_none(entry) ? PAGE_SHIFT : + huge_tte_to_shift(entry); + if (pte_present(entry)) - mm->context.hugetlb_pte_count--; + mm->context.hugetlb_pte_count -= nptes; - addr &= HPAGE_MASK; - *ptep = __pte(0UL); + addr &= ~(size - 1); + for (i = 0; i < nptes; i++) + ptep[i] = __pte(0UL); - /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ - maybe_tlb_batch_add(mm, addr, ptep, entry, 0); - maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0); + maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift); + /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ + if (size == HPAGE_SIZE) + maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0, + hugepage_shift); return entry; } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 5d2f91511c60..ccd455328989 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -324,6 +324,50 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde tsb_insert(tsb, tag, tte); } +#ifdef CONFIG_HUGETLB_PAGE +static int __init setup_hugepagesz(char *string) +{ + unsigned long long hugepage_size; + unsigned int hugepage_shift; + unsigned short hv_pgsz_idx; + unsigned int hv_pgsz_mask; + int rc = 0; + + hugepage_size = memparse(string, &string); + hugepage_shift = ilog2(hugepage_size); + + switch (hugepage_shift) { + case HPAGE_256MB_SHIFT: + hv_pgsz_mask = HV_PGSZ_MASK_256MB; + hv_pgsz_idx = HV_PGSZ_IDX_256MB; + break; + case HPAGE_SHIFT: + hv_pgsz_mask = HV_PGSZ_MASK_4MB; + hv_pgsz_idx = HV_PGSZ_IDX_4MB; + break; + case HPAGE_64K_SHIFT: + hv_pgsz_mask = HV_PGSZ_MASK_64K; + hv_pgsz_idx = HV_PGSZ_IDX_64K; + break; + default: + hv_pgsz_mask = 0; + } + + if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) { + pr_warn("hugepagesz=%llu not supported by MMU.\n", + hugepage_size); + goto out; + } + + hugetlb_add_hstate(hugepage_shift - PAGE_SHIFT); + rc = 1; + +out: + return rc; +} +__setup("hugepagesz=", setup_hugepagesz); +#endif /* CONFIG_HUGETLB_PAGE */ + void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { struct mm_struct *mm; @@ -347,7 +391,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) && - is_hugetlb_pte(pte)) { + is_hugetlb_pmd(__pmd(pte_val(pte)))) { /* We are fabricating 8MB pages using 4MB real hw pages. */ pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT)); __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, @@ -785,13 +829,23 @@ static void __init find_ramdisk(unsigned long phys_base) struct node_mem_mask { unsigned long mask; - unsigned long val; + unsigned long match; }; static struct node_mem_mask node_masks[MAX_NUMNODES]; static int num_node_masks; #ifdef CONFIG_NEED_MULTIPLE_NODES +struct mdesc_mlgroup { + u64 node; + u64 latency; + u64 match; + u64 mask; +}; + +static struct mdesc_mlgroup *mlgroups; +static int num_mlgroups; + int numa_cpu_lookup_table[NR_CPUS]; cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; @@ -802,78 +856,129 @@ struct mdesc_mblock { }; static struct mdesc_mblock *mblocks; static int num_mblocks; -static int find_numa_node_for_addr(unsigned long pa, - struct node_mem_mask *pnode_mask); -static unsigned long __init ra_to_pa(unsigned long addr) +static struct mdesc_mblock * __init addr_to_mblock(unsigned long addr) { + struct mdesc_mblock *m = NULL; int i; for (i = 0; i < num_mblocks; i++) { - struct mdesc_mblock *m = &mblocks[i]; + m = &mblocks[i]; if (addr >= m->base && addr < (m->base + m->size)) { - addr += m->offset; break; } } - return addr; + + return m; } -static int __init find_node(unsigned long addr) +static u64 __init memblock_nid_range_sun4u(u64 start, u64 end, int *nid) { - static bool search_mdesc = true; - static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL }; - static int last_index; - int i; + int prev_nid, new_nid; - addr = ra_to_pa(addr); - for (i = 0; i < num_node_masks; i++) { - struct node_mem_mask *p = &node_masks[i]; + prev_nid = -1; + for ( ; start < end; start += PAGE_SIZE) { + for (new_nid = 0; new_nid < num_node_masks; new_nid++) { + struct node_mem_mask *p = &node_masks[new_nid]; - if ((addr & p->mask) == p->val) - return i; - } - /* The following condition has been observed on LDOM guests because - * node_masks only contains the best latency mask and value. - * LDOM guest's mdesc can contain a single latency group to - * cover multiple address range. Print warning message only if the - * address cannot be found in node_masks nor mdesc. - */ - if ((search_mdesc) && - ((addr & last_mem_mask.mask) != last_mem_mask.val)) { - /* find the available node in the mdesc */ - last_index = find_numa_node_for_addr(addr, &last_mem_mask); - numadbg("find_node: latency group for address 0x%lx is %d\n", - addr, last_index); - if ((last_index < 0) || (last_index >= num_node_masks)) { - /* WARN_ONCE() and use default group 0 */ - WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0."); - search_mdesc = false; - last_index = 0; + if ((start & p->mask) == p->match) { + if (prev_nid == -1) + prev_nid = new_nid; + break; + } } + + if (new_nid == num_node_masks) { + prev_nid = 0; + WARN_ONCE(1, "addr[%Lx] doesn't match a NUMA node rule. Some memory will be owned by node 0.", + start); + break; + } + + if (prev_nid != new_nid) + break; } + *nid = prev_nid; - return last_index; + return start > end ? end : start; } static u64 __init memblock_nid_range(u64 start, u64 end, int *nid) { - *nid = find_node(start); - start += PAGE_SIZE; - while (start < end) { - int n = find_node(start); + u64 ret_end, pa_start, m_mask, m_match, m_end; + struct mdesc_mblock *mblock; + int _nid, i; + + if (tlb_type != hypervisor) + return memblock_nid_range_sun4u(start, end, nid); + + mblock = addr_to_mblock(start); + if (!mblock) { + WARN_ONCE(1, "memblock_nid_range: Can't find mblock addr[%Lx]", + start); + + _nid = 0; + ret_end = end; + goto done; + } + + pa_start = start + mblock->offset; + m_match = 0; + m_mask = 0; + + for (_nid = 0; _nid < num_node_masks; _nid++) { + struct node_mem_mask *const m = &node_masks[_nid]; - if (n != *nid) + if ((pa_start & m->mask) == m->match) { + m_match = m->match; + m_mask = m->mask; break; - start += PAGE_SIZE; + } } - if (start > end) - start = end; + if (num_node_masks == _nid) { + /* We could not find NUMA group, so default to 0, but lets + * search for latency group, so we could calculate the correct + * end address that we return + */ + _nid = 0; + + for (i = 0; i < num_mlgroups; i++) { + struct mdesc_mlgroup *const m = &mlgroups[i]; + + if ((pa_start & m->mask) == m->match) { + m_match = m->match; + m_mask = m->mask; + break; + } + } + + if (i == num_mlgroups) { + WARN_ONCE(1, "memblock_nid_range: Can't find latency group addr[%Lx]", + start); + + ret_end = end; + goto done; + } + } - return start; + /* + * Each latency group has match and mask, and each memory block has an + * offset. An address belongs to a latency group if its address matches + * the following formula: ((addr + offset) & mask) == match + * It is, however, slow to check every single page if it matches a + * particular latency group. As optimization we calculate end value by + * using bit arithmetics. + */ + m_end = m_match + (1ul << __ffs(m_mask)) - mblock->offset; + m_end += pa_start & ~((1ul << fls64(m_mask)) - 1); + ret_end = m_end > end ? end : m_end; + +done: + *nid = _nid; + return ret_end; } #endif @@ -914,7 +1019,8 @@ static void init_node_masks_nonnuma(void) numadbg("Initializing tables for non-numa.\n"); - node_masks[0].mask = node_masks[0].val = 0; + node_masks[0].mask = 0; + node_masks[0].match = 0; num_node_masks = 1; #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -932,15 +1038,6 @@ EXPORT_SYMBOL(numa_cpu_lookup_table); EXPORT_SYMBOL(numa_cpumask_lookup_table); EXPORT_SYMBOL(node_data); -struct mdesc_mlgroup { - u64 node; - u64 latency; - u64 match; - u64 mask; -}; -static struct mdesc_mlgroup *mlgroups; -static int num_mlgroups; - static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio, u32 cfg_handle) { @@ -1029,6 +1126,10 @@ int of_node_to_nid(struct device_node *dp) static void __init add_node_ranges(void) { struct memblock_region *reg; + unsigned long prev_max; + +memblock_resized: + prev_max = memblock.memory.max; for_each_memblock(memory, reg) { unsigned long size = reg->size; @@ -1048,6 +1149,8 @@ static void __init add_node_ranges(void) memblock_set_node(start, this_end - start, &memblock.memory, nid); + if (memblock.memory.max != prev_max) + goto memblock_resized; start = this_end; } } @@ -1182,41 +1285,6 @@ int __node_distance(int from, int to) return numa_latency[from][to]; } -static int find_numa_node_for_addr(unsigned long pa, - struct node_mem_mask *pnode_mask) -{ - struct mdesc_handle *md = mdesc_grab(); - u64 node, arc; - int i = 0; - - node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); - if (node == MDESC_NODE_NULL) - goto out; - - mdesc_for_each_node_by_name(md, node, "group") { - mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) { - u64 target = mdesc_arc_target(md, arc); - struct mdesc_mlgroup *m = find_mlgroup(target); - - if (!m) - continue; - if ((pa & m->mask) == m->match) { - if (pnode_mask) { - pnode_mask->mask = m->mask; - pnode_mask->val = m->match; - } - mdesc_release(md); - return i; - } - } - i++; - } - -out: - mdesc_release(md); - return -1; -} - static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) { int i; @@ -1224,7 +1292,7 @@ static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) for (i = 0; i < MAX_NUMNODES; i++) { struct node_mem_mask *n = &node_masks[i]; - if ((grp->mask == n->mask) && (grp->match == n->val)) + if ((grp->mask == n->mask) && (grp->match == n->match)) break; } return i; @@ -1279,10 +1347,10 @@ static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp, n = &node_masks[num_node_masks++]; n->mask = candidate->mask; - n->val = candidate->match; + n->match = candidate->match; - numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%llx])\n", - index, n->mask, n->val, candidate->latency); + numadbg("NUMA NODE[%d]: mask[%lx] match[%lx] (latency[%llx])\n", + index, n->mask, n->match, candidate->latency); return 0; } @@ -1379,7 +1447,7 @@ static int __init numa_parse_jbus(void) numa_cpu_lookup_table[cpu] = index; cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu)); node_masks[index].mask = ~((1UL << 36UL) - 1UL); - node_masks[index].val = cpu << 36UL; + node_masks[index].match = cpu << 36UL; index++; } diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index c7f2a5295b3a..def82f6d626f 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -1444,7 +1444,7 @@ static void poke_viking(void) srmmu_set_mmureg(mreg); } -static struct sparc32_cachetlb_ops viking_ops = { +static struct sparc32_cachetlb_ops viking_ops __ro_after_init = { .cache_all = viking_flush_cache_all, .cache_mm = viking_flush_cache_mm, .cache_page = viking_flush_cache_page, @@ -1475,7 +1475,7 @@ static struct sparc32_cachetlb_ops viking_ops = { * flushes going at once will require SMP locking anyways so there's * no real value in trying any harder than this. */ -static struct sparc32_cachetlb_ops viking_sun4d_smp_ops = { +static struct sparc32_cachetlb_ops viking_sun4d_smp_ops __ro_after_init = { .cache_all = viking_flush_cache_all, .cache_mm = viking_flush_cache_mm, .cache_page = viking_flush_cache_page, @@ -1759,7 +1759,7 @@ static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) local_ops->sig_insns(mm, insn_addr); } -static struct sparc32_cachetlb_ops smp_cachetlb_ops = { +static struct sparc32_cachetlb_ops smp_cachetlb_ops __ro_after_init = { .cache_all = smp_flush_cache_all, .cache_mm = smp_flush_cache_mm, .cache_page = smp_flush_cache_page, diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index c56a195c9071..afda3bbf7854 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -67,7 +67,7 @@ void arch_leave_lazy_mmu_mode(void) } static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, - bool exec, bool huge) + bool exec, unsigned int hugepage_shift) { struct tlb_batch *tb = &get_cpu_var(tlb_batch); unsigned long nr; @@ -84,19 +84,19 @@ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, } if (!tb->active) { - flush_tsb_user_page(mm, vaddr, huge); + flush_tsb_user_page(mm, vaddr, hugepage_shift); global_flush_tlb_page(mm, vaddr); goto out; } if (nr == 0) { tb->mm = mm; - tb->huge = huge; + tb->hugepage_shift = hugepage_shift; } - if (tb->huge != huge) { + if (tb->hugepage_shift != hugepage_shift) { flush_tlb_pending(); - tb->huge = huge; + tb->hugepage_shift = hugepage_shift; nr = 0; } @@ -110,10 +110,9 @@ out: } void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, - pte_t *ptep, pte_t orig, int fullmm) + pte_t *ptep, pte_t orig, int fullmm, + unsigned int hugepage_shift) { - bool huge = is_hugetlb_pte(orig); - if (tlb_type != hypervisor && pte_dirty(orig)) { unsigned long paddr, pfn = pte_pfn(orig); @@ -139,7 +138,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, no_cache_flush: if (!fullmm) - tlb_batch_add_one(mm, vaddr, pte_exec(orig), huge); + tlb_batch_add_one(mm, vaddr, pte_exec(orig), hugepage_shift); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index e20fbbafb0b0..23479c3d39f0 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -86,6 +86,33 @@ static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries); } +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +static void __flush_huge_tsb_one_entry(unsigned long tsb, unsigned long v, + unsigned long hash_shift, + unsigned long nentries, + unsigned int hugepage_shift) +{ + unsigned int hpage_entries; + unsigned int i; + + hpage_entries = 1 << (hugepage_shift - hash_shift); + for (i = 0; i < hpage_entries; i++) + __flush_tsb_one_entry(tsb, v + (i << hash_shift), hash_shift, + nentries); +} + +static void __flush_huge_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, + unsigned long tsb, unsigned long nentries, + unsigned int hugepage_shift) +{ + unsigned long i; + + for (i = 0; i < tb->tlb_nr; i++) + __flush_huge_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, + nentries, hugepage_shift); +} +#endif + void flush_tsb_user(struct tlb_batch *tb) { struct mm_struct *mm = tb->mm; @@ -93,45 +120,61 @@ void flush_tsb_user(struct tlb_batch *tb) spin_lock_irqsave(&mm->context.lock, flags); - if (!tb->huge) { + if (tb->hugepage_shift < HPAGE_SHIFT) { base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(base); - __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); + if (tb->hugepage_shift == PAGE_SHIFT) + __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); +#if defined(CONFIG_HUGETLB_PAGE) + else + __flush_huge_tsb_one(tb, PAGE_SHIFT, base, nentries, + tb->hugepage_shift); +#endif } #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (tb->huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) { + else if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(base); - __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries); + __flush_huge_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries, + tb->hugepage_shift); } #endif spin_unlock_irqrestore(&mm->context.lock, flags); } -void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge) +void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, + unsigned int hugepage_shift) { unsigned long nentries, base, flags; spin_lock_irqsave(&mm->context.lock, flags); - if (!huge) { + if (hugepage_shift < HPAGE_SHIFT) { base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(base); - __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); + if (hugepage_shift == PAGE_SHIFT) + __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, + nentries); +#if defined(CONFIG_HUGETLB_PAGE) + else + __flush_huge_tsb_one_entry(base, vaddr, PAGE_SHIFT, + nentries, hugepage_shift); +#endif } #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) { + else if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(base); - __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries); + __flush_huge_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, + nentries, hugepage_shift); } #endif spin_unlock_irqrestore(&mm->context.lock, flags); |