summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/openrisc/Kconfig1
-rw-r--r--arch/openrisc/TODO.openrisc1
-rw-r--r--arch/openrisc/include/asm/Kbuild5
-rw-r--r--arch/openrisc/include/asm/atomic.h126
-rw-r--r--arch/openrisc/include/asm/bitops.h2
-rw-r--r--arch/openrisc/include/asm/bitops/atomic.h123
-rw-r--r--arch/openrisc/include/asm/cmpxchg.h83
-rw-r--r--arch/openrisc/include/asm/cpuinfo.h2
-rw-r--r--arch/openrisc/include/asm/futex.h135
-rw-r--r--arch/openrisc/include/asm/spr_defs.h4
-rw-r--r--arch/openrisc/include/asm/string.h10
-rw-r--r--arch/openrisc/kernel/.gitignore1
-rw-r--r--arch/openrisc/kernel/entry.S60
-rw-r--r--arch/openrisc/kernel/head.S200
-rw-r--r--arch/openrisc/kernel/or32_ksyms.c1
-rw-r--r--arch/openrisc/kernel/process.c14
-rw-r--r--arch/openrisc/kernel/ptrace.c1
-rw-r--r--arch/openrisc/kernel/setup.c67
-rw-r--r--arch/openrisc/kernel/traps.c183
-rw-r--r--arch/openrisc/lib/Makefile2
-rw-r--r--arch/openrisc/lib/memcpy.c124
-rw-r--r--arch/openrisc/lib/memset.S98
-rw-r--r--arch/openrisc/mm/ioremap.c2
-rw-r--r--arch/sparc/include/asm/page_64.h4
-rw-r--r--arch/sparc/include/asm/pgtable_64.h23
-rw-r--r--arch/sparc/include/asm/setup.h5
-rw-r--r--arch/sparc/include/asm/tlbflush_64.h5
-rw-r--r--arch/sparc/include/asm/topology_64.h4
-rw-r--r--arch/sparc/include/asm/uprobes.h4
-rw-r--r--arch/sparc/kernel/smp_64.c9
-rw-r--r--arch/sparc/kernel/tsb.S21
-rw-r--r--arch/sparc/mm/hugetlbpage.c201
-rw-r--r--arch/sparc/mm/init_64.c260
-rw-r--r--arch/sparc/mm/srmmu.c6
-rw-r--r--arch/sparc/mm/tlb.c17
-rw-r--r--arch/sparc/mm/tsb.c61
36 files changed, 1507 insertions, 358 deletions
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 8d22015fde3e..1e95920b0737 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -12,6 +12,7 @@ config OPENRISC
select HAVE_MEMBLOCK
select GPIOLIB
select HAVE_ARCH_TRACEHOOK
+ select SPARSE_IRQ
select GENERIC_IRQ_CHIP
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
diff --git a/arch/openrisc/TODO.openrisc b/arch/openrisc/TODO.openrisc
index 0eb04c8240f9..c43d4e1d14eb 100644
--- a/arch/openrisc/TODO.openrisc
+++ b/arch/openrisc/TODO.openrisc
@@ -10,4 +10,3 @@ that are due for investigation shortly, i.e. our TODO list:
or1k and this change is slowly trickling through the stack. For the time
being, or32 is equivalent to or1k.
--- Implement optimized version of memcpy and memset
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index ef8d1ccc3e45..fb241757f7f0 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -1,7 +1,6 @@
header-y += ucontext.h
-generic-y += atomic.h
generic-y += auxvec.h
generic-y += barrier.h
generic-y += bitsperlong.h
@@ -10,8 +9,6 @@ generic-y += bugs.h
generic-y += cacheflush.h
generic-y += checksum.h
generic-y += clkdev.h
-generic-y += cmpxchg-local.h
-generic-y += cmpxchg.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
@@ -22,12 +19,12 @@ generic-y += exec.h
generic-y += fb.h
generic-y += fcntl.h
generic-y += ftrace.h
-generic-y += futex.h
generic-y += hardirq.h
generic-y += hw_irq.h
generic-y += ioctl.h
generic-y += ioctls.h
generic-y += ipcbuf.h
+generic-y += irq.h
generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += kdebug.h
diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h
new file mode 100644
index 000000000000..146e1660f00e
--- /dev/null
+++ b/arch/openrisc/include/asm/atomic.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_OPENRISC_ATOMIC_H
+#define __ASM_OPENRISC_ATOMIC_H
+
+#include <linux/types.h>
+
+/* Atomically perform op with v->counter and i */
+#define ATOMIC_OP(op) \
+static inline void atomic_##op(int i, atomic_t *v) \
+{ \
+ int tmp; \
+ \
+ __asm__ __volatile__( \
+ "1: l.lwa %0,0(%1) \n" \
+ " l." #op " %0,%0,%2 \n" \
+ " l.swa 0(%1),%0 \n" \
+ " l.bnf 1b \n" \
+ " l.nop \n" \
+ : "=&r"(tmp) \
+ : "r"(&v->counter), "r"(i) \
+ : "cc", "memory"); \
+}
+
+/* Atomically perform op with v->counter and i, return the result */
+#define ATOMIC_OP_RETURN(op) \
+static inline int atomic_##op##_return(int i, atomic_t *v) \
+{ \
+ int tmp; \
+ \
+ __asm__ __volatile__( \
+ "1: l.lwa %0,0(%1) \n" \
+ " l." #op " %0,%0,%2 \n" \
+ " l.swa 0(%1),%0 \n" \
+ " l.bnf 1b \n" \
+ " l.nop \n" \
+ : "=&r"(tmp) \
+ : "r"(&v->counter), "r"(i) \
+ : "cc", "memory"); \
+ \
+ return tmp; \
+}
+
+/* Atomically perform op with v->counter and i, return orig v->counter */
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int tmp, old; \
+ \
+ __asm__ __volatile__( \
+ "1: l.lwa %0,0(%2) \n" \
+ " l." #op " %1,%0,%3 \n" \
+ " l.swa 0(%2),%1 \n" \
+ " l.bnf 1b \n" \
+ " l.nop \n" \
+ : "=&r"(old), "=&r"(tmp) \
+ : "r"(&v->counter), "r"(i) \
+ : "cc", "memory"); \
+ \
+ return old; \
+}
+
+ATOMIC_OP_RETURN(add)
+ATOMIC_OP_RETURN(sub)
+
+ATOMIC_FETCH_OP(add)
+ATOMIC_FETCH_OP(sub)
+ATOMIC_FETCH_OP(and)
+ATOMIC_FETCH_OP(or)
+ATOMIC_FETCH_OP(xor)
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#define atomic_add_return atomic_add_return
+#define atomic_sub_return atomic_sub_return
+#define atomic_fetch_add atomic_fetch_add
+#define atomic_fetch_sub atomic_fetch_sub
+#define atomic_fetch_and atomic_fetch_and
+#define atomic_fetch_or atomic_fetch_or
+#define atomic_fetch_xor atomic_fetch_xor
+#define atomic_and atomic_and
+#define atomic_or atomic_or
+#define atomic_xor atomic_xor
+
+/*
+ * Atomically add a to v->counter as long as v is not already u.
+ * Returns the original value at v->counter.
+ *
+ * This is often used through atomic_inc_not_zero()
+ */
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int old, tmp;
+
+ __asm__ __volatile__(
+ "1: l.lwa %0, 0(%2) \n"
+ " l.sfeq %0, %4 \n"
+ " l.bf 2f \n"
+ " l.add %1, %0, %3 \n"
+ " l.swa 0(%2), %1 \n"
+ " l.bnf 1b \n"
+ " l.nop \n"
+ "2: \n"
+ : "=&r"(old), "=&r" (tmp)
+ : "r"(&v->counter), "r"(a), "r"(u)
+ : "cc", "memory");
+
+ return old;
+}
+#define __atomic_add_unless __atomic_add_unless
+
+#include <asm-generic/atomic.h>
+
+#endif /* __ASM_OPENRISC_ATOMIC_H */
diff --git a/arch/openrisc/include/asm/bitops.h b/arch/openrisc/include/asm/bitops.h
index 3003cdad561b..689f56819d53 100644
--- a/arch/openrisc/include/asm/bitops.h
+++ b/arch/openrisc/include/asm/bitops.h
@@ -45,7 +45,7 @@
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
-#include <asm-generic/bitops/atomic.h>
+#include <asm/bitops/atomic.h>
#include <asm-generic/bitops/non-atomic.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic.h>
diff --git a/arch/openrisc/include/asm/bitops/atomic.h b/arch/openrisc/include/asm/bitops/atomic.h
new file mode 100644
index 000000000000..35fb85f61b4a
--- /dev/null
+++ b/arch/openrisc/include/asm/bitops/atomic.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_OPENRISC_BITOPS_ATOMIC_H
+#define __ASM_OPENRISC_BITOPS_ATOMIC_H
+
+static inline void set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+ unsigned long tmp;
+
+ __asm__ __volatile__(
+ "1: l.lwa %0,0(%1) \n"
+ " l.or %0,%0,%2 \n"
+ " l.swa 0(%1),%0 \n"
+ " l.bnf 1b \n"
+ " l.nop \n"
+ : "=&r"(tmp)
+ : "r"(p), "r"(mask)
+ : "cc", "memory");
+}
+
+static inline void clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+ unsigned long tmp;
+
+ __asm__ __volatile__(
+ "1: l.lwa %0,0(%1) \n"
+ " l.and %0,%0,%2 \n"
+ " l.swa 0(%1),%0 \n"
+ " l.bnf 1b \n"
+ " l.nop \n"
+ : "=&r"(tmp)
+ : "r"(p), "r"(~mask)
+ : "cc", "memory");
+}
+
+static inline void change_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+ unsigned long tmp;
+
+ __asm__ __volatile__(
+ "1: l.lwa %0,0(%1) \n"
+ " l.xor %0,%0,%2 \n"
+ " l.swa 0(%1),%0 \n"
+ " l.bnf 1b \n"
+ " l.nop \n"
+ : "=&r"(tmp)
+ : "r"(p), "r"(mask)
+ : "cc", "memory");
+}
+
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+ unsigned long old;
+ unsigned long tmp;
+
+ __asm__ __volatile__(
+ "1: l.lwa %0,0(%2) \n"
+ " l.or %1,%0,%3 \n"
+ " l.swa 0(%2),%1 \n"
+ " l.bnf 1b \n"
+ " l.nop \n"
+ : "=&r"(old), "=&r"(tmp)
+ : "r"(p), "r"(mask)
+ : "cc", "memory");
+
+ return (old & mask) != 0;
+}
+
+static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+ unsigned long old;
+ unsigned long tmp;
+
+ __asm__ __volatile__(
+ "1: l.lwa %0,0(%2) \n"
+ " l.and %1,%0,%3 \n"
+ " l.swa 0(%2),%1 \n"
+ " l.bnf 1b \n"
+ " l.nop \n"
+ : "=&r"(old), "=&r"(tmp)
+ : "r"(p), "r"(~mask)
+ : "cc", "memory");
+
+ return (old & mask) != 0;
+}
+
+static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+ unsigned long old;
+ unsigned long tmp;
+
+ __asm__ __volatile__(
+ "1: l.lwa %0,0(%2) \n"
+ " l.xor %1,%0,%3 \n"
+ " l.swa 0(%2),%1 \n"
+ " l.bnf 1b \n"
+ " l.nop \n"
+ : "=&r"(old), "=&r"(tmp)
+ : "r"(p), "r"(mask)
+ : "cc", "memory");
+
+ return (old & mask) != 0;
+}
+
+#endif /* __ASM_OPENRISC_BITOPS_ATOMIC_H */
diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h
new file mode 100644
index 000000000000..5fcb9ac72693
--- /dev/null
+++ b/arch/openrisc/include/asm/cmpxchg.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_OPENRISC_CMPXCHG_H
+#define __ASM_OPENRISC_CMPXCHG_H
+
+#include <linux/types.h>
+
+/*
+ * This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid cmpxchg().
+ */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
+{
+ if (size != 4) {
+ __cmpxchg_called_with_bad_pointer();
+ return old;
+ }
+
+ __asm__ __volatile__(
+ "1: l.lwa %0, 0(%1) \n"
+ " l.sfeq %0, %2 \n"
+ " l.bnf 2f \n"
+ " l.nop \n"
+ " l.swa 0(%1), %3 \n"
+ " l.bnf 1b \n"
+ " l.nop \n"
+ "2: \n"
+ : "=&r"(old)
+ : "r"(ptr), "r"(old), "r"(new)
+ : "cc", "memory");
+
+ return old;
+}
+
+#define cmpxchg(ptr, o, n) \
+ ({ \
+ (__typeof__(*(ptr))) __cmpxchg((ptr), \
+ (unsigned long)(o), \
+ (unsigned long)(n), \
+ sizeof(*(ptr))); \
+ })
+
+/*
+ * This function doesn't exist, so you'll get a linker error if
+ * something tries to do an invalidly-sized xchg().
+ */
+extern void __xchg_called_with_bad_pointer(void);
+
+static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
+ int size)
+{
+ if (size != 4) {
+ __xchg_called_with_bad_pointer();
+ return val;
+ }
+
+ __asm__ __volatile__(
+ "1: l.lwa %0, 0(%1) \n"
+ " l.swa 0(%1), %2 \n"
+ " l.bnf 1b \n"
+ " l.nop \n"
+ : "=&r"(val)
+ : "r"(ptr), "r"(val)
+ : "cc", "memory");
+
+ return val;
+}
+
+#define xchg(ptr, with) \
+ ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
+
+#endif /* __ASM_OPENRISC_CMPXCHG_H */
diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h
index 917318b6a970..ec10679d6429 100644
--- a/arch/openrisc/include/asm/cpuinfo.h
+++ b/arch/openrisc/include/asm/cpuinfo.h
@@ -24,9 +24,11 @@ struct cpuinfo {
u32 icache_size;
u32 icache_block_size;
+ u32 icache_ways;
u32 dcache_size;
u32 dcache_block_size;
+ u32 dcache_ways;
};
extern struct cpuinfo cpuinfo;
diff --git a/arch/openrisc/include/asm/futex.h b/arch/openrisc/include/asm/futex.h
new file mode 100644
index 000000000000..778087341977
--- /dev/null
+++ b/arch/openrisc/include/asm/futex.h
@@ -0,0 +1,135 @@
+#ifndef __ASM_OPENRISC_FUTEX_H
+#define __ASM_OPENRISC_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
+({ \
+ __asm__ __volatile__ ( \
+ "1: l.lwa %0, %2 \n" \
+ insn "\n" \
+ "2: l.swa %2, %1 \n" \
+ " l.bnf 1b \n" \
+ " l.ori %1, r0, 0 \n" \
+ "3: \n" \
+ ".section .fixup,\"ax\" \n" \
+ "4: l.j 3b \n" \
+ " l.addi %1, r0, %3 \n" \
+ ".previous \n" \
+ ".section __ex_table,\"a\" \n" \
+ ".word 1b,4b,2b,4b \n" \
+ ".previous \n" \
+ : "=&r" (oldval), "=&r" (ret), "+m" (*uaddr) \
+ : "i" (-EFAULT), "r" (oparg) \
+ : "cc", "memory" \
+ ); \
+})
+
+static inline int
+futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+{
+ int op = (encoded_op >> 28) & 7;
+ int cmp = (encoded_op >> 24) & 15;
+ int oparg = (encoded_op << 8) >> 20;
+ int cmparg = (encoded_op << 20) >> 20;
+ int oldval = 0, ret;
+
+ if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+ oparg = 1 << oparg;
+
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+ return -EFAULT;
+
+ pagefault_disable();
+
+ switch (op) {
+ case FUTEX_OP_SET:
+ __futex_atomic_op("l.or %1,%4,%4", ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ __futex_atomic_op("l.add %1,%0,%4", ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ __futex_atomic_op("l.or %1,%0,%4", ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ __futex_atomic_op("l.and %1,%0,%4", ret, oldval, uaddr, ~oparg);
+ break;
+ case FUTEX_OP_XOR:
+ __futex_atomic_op("l.xor %1,%0,%4", ret, oldval, uaddr, oparg);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+
+ pagefault_enable();
+
+ if (!ret) {
+ switch (cmp) {
+ case FUTEX_OP_CMP_EQ:
+ ret = (oldval == cmparg);
+ break;
+ case FUTEX_OP_CMP_NE:
+ ret = (oldval != cmparg);
+ break;
+ case FUTEX_OP_CMP_LT:
+ ret = (oldval < cmparg);
+ break;
+ case FUTEX_OP_CMP_GE:
+ ret = (oldval >= cmparg);
+ break;
+ case FUTEX_OP_CMP_LE:
+ ret = (oldval <= cmparg);
+ break;
+ case FUTEX_OP_CMP_GT:
+ ret = (oldval > cmparg);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+ }
+ return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
+{
+ int ret = 0;
+ u32 prev;
+
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+ return -EFAULT;
+
+ __asm__ __volatile__ ( \
+ "1: l.lwa %1, %2 \n" \
+ " l.sfeq %1, %3 \n" \
+ " l.bnf 3f \n" \
+ " l.nop \n" \
+ "2: l.swa %2, %4 \n" \
+ " l.bnf 1b \n" \
+ " l.nop \n" \
+ "3: \n" \
+ ".section .fixup,\"ax\" \n" \
+ "4: l.j 3b \n" \
+ " l.addi %0, r0, %5 \n" \
+ ".previous \n" \
+ ".section __ex_table,\"a\" \n" \
+ ".word 1b,4b,2b,4b \n" \
+ ".previous \n" \
+ : "+r" (ret), "=&r" (prev), "+m" (*uaddr) \
+ : "r" (oldval), "r" (newval), "i" (-EFAULT) \
+ : "cc", "memory" \
+ );
+
+ *uval = prev;
+ return ret;
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_OPENRISC_FUTEX_H */
diff --git a/arch/openrisc/include/asm/spr_defs.h b/arch/openrisc/include/asm/spr_defs.h
index 5dbc668865c4..367dac70326a 100644
--- a/arch/openrisc/include/asm/spr_defs.h
+++ b/arch/openrisc/include/asm/spr_defs.h
@@ -152,8 +152,8 @@
#define SPR_UPR_MP 0x00000020 /* MAC present */
#define SPR_UPR_DUP 0x00000040 /* Debug unit present */
#define SPR_UPR_PCUP 0x00000080 /* Performance counters unit present */
-#define SPR_UPR_PMP 0x00000100 /* Power management present */
-#define SPR_UPR_PICP 0x00000200 /* PIC present */
+#define SPR_UPR_PICP 0x00000100 /* PIC present */
+#define SPR_UPR_PMP 0x00000200 /* Power management present */
#define SPR_UPR_TTP 0x00000400 /* Tick timer present */
#define SPR_UPR_RES 0x00fe0000 /* Reserved */
#define SPR_UPR_CUP 0xff000000 /* Context units present */
diff --git a/arch/openrisc/include/asm/string.h b/arch/openrisc/include/asm/string.h
new file mode 100644
index 000000000000..64939ccd7531
--- /dev/null
+++ b/arch/openrisc/include/asm/string.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_OPENRISC_STRING_H
+#define __ASM_OPENRISC_STRING_H
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *s, int c, __kernel_size_t n);
+
+#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *dest, __const void *src, __kernel_size_t n);
+
+#endif /* __ASM_OPENRISC_STRING_H */
diff --git a/arch/openrisc/kernel/.gitignore b/arch/openrisc/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/openrisc/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index aac0bde3330c..bc6500860f4d 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -173,6 +173,11 @@ handler: ;\
l.j _ret_from_exception ;\
l.nop
+/* clobbers 'reg' */
+#define CLEAR_LWA_FLAG(reg) \
+ l.movhi reg,hi(lwa_flag) ;\
+ l.ori reg,reg,lo(lwa_flag) ;\
+ l.sw 0(reg),r0
/*
* NOTE: one should never assume that SPR_EPC, SPR_ESR, SPR_EEAR
* contain the same values as when exception we're handling
@@ -193,6 +198,7 @@ EXCEPTION_ENTRY(_tng_kernel_start)
/* ---[ 0x200: BUS exception ]------------------------------------------- */
EXCEPTION_ENTRY(_bus_fault_handler)
+ CLEAR_LWA_FLAG(r3)
/* r4: EA of fault (set by EXCEPTION_HANDLE) */
l.jal do_bus_fault
l.addi r3,r1,0 /* pt_regs */
@@ -202,11 +208,13 @@ EXCEPTION_ENTRY(_bus_fault_handler)
/* ---[ 0x300: Data Page Fault exception ]------------------------------- */
EXCEPTION_ENTRY(_dtlb_miss_page_fault_handler)
+ CLEAR_LWA_FLAG(r3)
l.and r5,r5,r0
l.j 1f
l.nop
EXCEPTION_ENTRY(_data_page_fault_handler)
+ CLEAR_LWA_FLAG(r3)
/* set up parameters for do_page_fault */
l.ori r5,r0,0x300 // exception vector
1:
@@ -220,7 +228,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler)
* DTLB miss handler in the CONFIG_GUARD_PROTECTED_CORE part
*/
#ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX
- l.lwz r6,PT_PC(r3) // address of an offending insn
+ l.lwz r6,PT_PC(r3) // address of an offending insn
l.lwz r6,0(r6) // instruction that caused pf
l.srli r6,r6,26 // check opcode for jump insn
@@ -236,57 +244,57 @@ EXCEPTION_ENTRY(_data_page_fault_handler)
l.bf 8f
l.sfeqi r6,0x12 // l.jalr
l.bf 8f
-
- l.nop
+ l.nop
l.j 9f
- l.nop
-8:
+ l.nop
- l.lwz r6,PT_PC(r3) // address of an offending insn
+8: // offending insn is in delay slot
+ l.lwz r6,PT_PC(r3) // address of an offending insn
l.addi r6,r6,4
l.lwz r6,0(r6) // instruction that caused pf
l.srli r6,r6,26 // get opcode
-9:
+9: // offending instruction opcode loaded in r6
#else
- l.mfspr r6,r0,SPR_SR // SR
-// l.lwz r6,PT_SR(r3) // ESR
- l.andi r6,r6,SPR_SR_DSX // check for delay slot exception
- l.sfeqi r6,0x1 // exception happened in delay slot
- l.bnf 7f
- l.lwz r6,PT_PC(r3) // address of an offending insn
+ l.lwz r6,PT_SR(r3) // SR
+ l.andi r6,r6,SPR_SR_DSX // check for delay slot exception
+ l.sfne r6,r0 // exception happened in delay slot
+ l.bnf 7f
+ l.lwz r6,PT_PC(r3) // address of an offending insn
- l.addi r6,r6,4 // offending insn is in delay slot
+ l.addi r6,r6,4 // offending insn is in delay slot
7:
l.lwz r6,0(r6) // instruction that caused pf
l.srli r6,r6,26 // check opcode for write access
#endif
- l.sfgeui r6,0x33 // check opcode for write access
+ l.sfgeui r6,0x33 // check opcode for write access
l.bnf 1f
l.sfleui r6,0x37
l.bnf 1f
l.ori r6,r0,0x1 // write access
l.j 2f
- l.nop
+ l.nop
1: l.ori r6,r0,0x0 // !write access
2:
/* call fault.c handler in or32/mm/fault.c */
l.jal do_page_fault
- l.nop
+ l.nop
l.j _ret_from_exception
- l.nop
+ l.nop
/* ---[ 0x400: Insn Page Fault exception ]------------------------------- */
EXCEPTION_ENTRY(_itlb_miss_page_fault_handler)
+ CLEAR_LWA_FLAG(r3)
l.and r5,r5,r0
l.j 1f
l.nop
EXCEPTION_ENTRY(_insn_page_fault_handler)
+ CLEAR_LWA_FLAG(r3)
/* set up parameters for do_page_fault */
l.ori r5,r0,0x400 // exception vector
1:
@@ -296,14 +304,15 @@ EXCEPTION_ENTRY(_insn_page_fault_handler)
/* call fault.c handler in or32/mm/fault.c */
l.jal do_page_fault
- l.nop
+ l.nop
l.j _ret_from_exception
- l.nop
+ l.nop
/* ---[ 0x500: Timer exception ]----------------------------------------- */
EXCEPTION_ENTRY(_timer_handler)
+ CLEAR_LWA_FLAG(r3)
l.jal timer_interrupt
l.addi r3,r1,0 /* pt_regs */
@@ -313,6 +322,7 @@ EXCEPTION_ENTRY(_timer_handler)
/* ---[ 0x600: Aligment exception ]-------------------------------------- */
EXCEPTION_ENTRY(_alignment_handler)
+ CLEAR_LWA_FLAG(r3)
/* r4: EA of fault (set by EXCEPTION_HANDLE) */
l.jal do_unaligned_access
l.addi r3,r1,0 /* pt_regs */
@@ -509,6 +519,7 @@ EXCEPTION_ENTRY(_external_irq_handler)
// l.sw PT_SR(r1),r4
1:
#endif
+ CLEAR_LWA_FLAG(r3)
l.addi r3,r1,0
l.movhi r8,hi(do_IRQ)
l.ori r8,r8,lo(do_IRQ)
@@ -556,8 +567,12 @@ ENTRY(_sys_call_handler)
* they should be clobbered, otherwise
*/
l.sw PT_GPR3(r1),r3
- /* r4 already saved */
- /* r4 holds the EEAR address of the fault, load the original r4 */
+ /*
+ * r4 already saved
+ * r4 holds the EEAR address of the fault, use it as screatch reg and
+ * then load the original r4
+ */
+ CLEAR_LWA_FLAG(r4)
l.lwz r4,PT_GPR4(r1)
l.sw PT_GPR5(r1),r5
l.sw PT_GPR6(r1),r6
@@ -776,6 +791,7 @@ UNHANDLED_EXCEPTION(_vector_0xd00,0xd00)
/* ---[ 0xe00: Trap exception ]------------------------------------------ */
EXCEPTION_ENTRY(_trap_handler)
+ CLEAR_LWA_FLAG(r3)
/* r4: EA of fault (set by EXCEPTION_HANDLE) */
l.jal do_trap
l.addi r3,r1,0 /* pt_regs */
diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S
index f14793306b03..d01b82eace3e 100644
--- a/arch/openrisc/kernel/head.S
+++ b/arch/openrisc/kernel/head.S
@@ -24,6 +24,7 @@
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/pgtable.h>
+#include <asm/thread_info.h>
#include <asm/cache.h>
#include <asm/spr_defs.h>
#include <asm/asm-offsets.h>
@@ -34,7 +35,7 @@
l.add rd,rd,rs
#define CLEAR_GPR(gpr) \
- l.or gpr,r0,r0
+ l.movhi gpr,0x0
#define LOAD_SYMBOL_2_GPR(gpr,symbol) \
l.movhi gpr,hi(symbol) ;\
@@ -442,6 +443,9 @@ _dispatch_do_ipage_fault:
__HEAD
.global _start
_start:
+ /* Init r0 to zero as per spec */
+ CLEAR_GPR(r0)
+
/* save kernel parameters */
l.or r25,r0,r3 /* pointer to fdt */
@@ -486,7 +490,8 @@ _start:
/*
* set up initial ksp and current
*/
- LOAD_SYMBOL_2_GPR(r1,init_thread_union+0x2000) // setup kernel stack
+ /* setup kernel stack */
+ LOAD_SYMBOL_2_GPR(r1,init_thread_union + THREAD_SIZE)
LOAD_SYMBOL_2_GPR(r10,init_thread_union) // setup current
tophys (r31,r10)
l.sw TI_KSP(r31), r1
@@ -520,22 +525,8 @@ enable_dc:
l.nop
flush_tlb:
- /*
- * I N V A L I D A T E T L B e n t r i e s
- */
- LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0))
- LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0))
- l.addi r7,r0,128 /* Maximum number of sets */
-1:
- l.mtspr r5,r0,0x0
- l.mtspr r6,r0,0x0
-
- l.addi r5,r5,1
- l.addi r6,r6,1
- l.sfeq r7,r0
- l.bnf 1b
- l.addi r7,r7,-1
-
+ l.jal _flush_tlb
+ l.nop
/* The MMU needs to be enabled before or32_early_setup is called */
@@ -627,6 +618,26 @@ jump_start_kernel:
l.jr r30
l.nop
+_flush_tlb:
+ /*
+ * I N V A L I D A T E T L B e n t r i e s
+ */
+ LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0))
+ LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0))
+ l.addi r7,r0,128 /* Maximum number of sets */
+1:
+ l.mtspr r5,r0,0x0
+ l.mtspr r6,r0,0x0
+
+ l.addi r5,r5,1
+ l.addi r6,r6,1
+ l.sfeq r7,r0
+ l.bnf 1b
+ l.addi r7,r7,-1
+
+ l.jr r9
+ l.nop
+
/* ========================================[ cache ]=== */
/* aligment here so we don't change memory offsets with
@@ -971,8 +982,6 @@ ENTRY(dtlb_miss_handler)
EXCEPTION_STORE_GPR2
EXCEPTION_STORE_GPR3
EXCEPTION_STORE_GPR4
- EXCEPTION_STORE_GPR5
- EXCEPTION_STORE_GPR6
/*
* get EA of the miss
*/
@@ -980,91 +989,70 @@ ENTRY(dtlb_miss_handler)
/*
* pmd = (pmd_t *)(current_pgd + pgd_index(daddr));
*/
- GET_CURRENT_PGD(r3,r5) // r3 is current_pgd, r5 is temp
+ GET_CURRENT_PGD(r3,r4) // r3 is current_pgd, r4 is temp
l.srli r4,r2,0x18 // >> PAGE_SHIFT + (PAGE_SHIFT - 2)
l.slli r4,r4,0x2 // to get address << 2
- l.add r5,r4,r3 // r4 is pgd_index(daddr)
+ l.add r3,r4,r3 // r4 is pgd_index(daddr)
/*
* if (pmd_none(*pmd))
* goto pmd_none:
*/
- tophys (r4,r5)
+ tophys (r4,r3)
l.lwz r3,0x0(r4) // get *pmd value
l.sfne r3,r0
l.bnf d_pmd_none
- l.andi r3,r3,~PAGE_MASK //0x1fff // ~PAGE_MASK
- /*
- * if (pmd_bad(*pmd))
- * pmd_clear(pmd)
- * goto pmd_bad:
- */
-// l.sfeq r3,r0 // check *pmd value
-// l.bf d_pmd_good
- l.addi r3,r0,0xffffe000 // PAGE_MASK
-// l.j d_pmd_bad
-// l.sw 0x0(r4),r0 // clear pmd
+ l.addi r3,r0,0xffffe000 // PAGE_MASK
+
d_pmd_good:
/*
* pte = *pte_offset(pmd, daddr);
*/
l.lwz r4,0x0(r4) // get **pmd value
l.and r4,r4,r3 // & PAGE_MASK
- l.srli r5,r2,0xd // >> PAGE_SHIFT, r2 == EEAR
- l.andi r3,r5,0x7ff // (1UL << PAGE_SHIFT - 2) - 1
+ l.srli r2,r2,0xd // >> PAGE_SHIFT, r2 == EEAR
+ l.andi r3,r2,0x7ff // (1UL << PAGE_SHIFT - 2) - 1
l.slli r3,r3,0x2 // to get address << 2
l.add r3,r3,r4
- l.lwz r2,0x0(r3) // this is pte at last
+ l.lwz r3,0x0(r3) // this is pte at last
/*
* if (!pte_present(pte))
*/
- l.andi r4,r2,0x1
+ l.andi r4,r3,0x1
l.sfne r4,r0 // is pte present
l.bnf d_pte_not_present
- l.addi r3,r0,0xffffe3fa // PAGE_MASK | DTLB_UP_CONVERT_MASK
+ l.addi r4,r0,0xffffe3fa // PAGE_MASK | DTLB_UP_CONVERT_MASK
/*
* fill DTLB TR register
*/
- l.and r4,r2,r3 // apply the mask
+ l.and r4,r3,r4 // apply the mask
// Determine number of DMMU sets
- l.mfspr r6, r0, SPR_DMMUCFGR
- l.andi r6, r6, SPR_DMMUCFGR_NTS
- l.srli r6, r6, SPR_DMMUCFGR_NTS_OFF
+ l.mfspr r2, r0, SPR_DMMUCFGR
+ l.andi r2, r2, SPR_DMMUCFGR_NTS
+ l.srli r2, r2, SPR_DMMUCFGR_NTS_OFF
l.ori r3, r0, 0x1
- l.sll r3, r3, r6 // r3 = number DMMU sets DMMUCFGR
- l.addi r6, r3, -1 // r6 = nsets mask
- l.and r5, r5, r6 // calc offset: & (NUM_TLB_ENTRIES-1)
+ l.sll r3, r3, r2 // r3 = number DMMU sets DMMUCFGR
+ l.addi r2, r3, -1 // r2 = nsets mask
+ l.mfspr r3, r0, SPR_EEAR_BASE
+ l.srli r3, r3, 0xd // >> PAGE_SHIFT
+ l.and r2, r3, r2 // calc offset: & (NUM_TLB_ENTRIES-1)
//NUM_TLB_ENTRIES
- l.mtspr r5,r4,SPR_DTLBTR_BASE(0)
+ l.mtspr r2,r4,SPR_DTLBTR_BASE(0)
/*
* fill DTLB MR register
*/
- l.mfspr r2,r0,SPR_EEAR_BASE
- l.addi r3,r0,0xffffe000 // PAGE_MASK
- l.and r4,r2,r3 // apply PAGE_MASK to EA (__PHX__ do we really need this?)
- l.ori r4,r4,0x1 // set hardware valid bit: DTBL_MR entry
- l.mtspr r5,r4,SPR_DTLBMR_BASE(0)
+ l.slli r3, r3, 0xd /* << PAGE_SHIFT => EA & PAGE_MASK */
+ l.ori r4,r3,0x1 // set hardware valid bit: DTBL_MR entry
+ l.mtspr r2,r4,SPR_DTLBMR_BASE(0)
EXCEPTION_LOAD_GPR2
EXCEPTION_LOAD_GPR3
EXCEPTION_LOAD_GPR4
- EXCEPTION_LOAD_GPR5
- EXCEPTION_LOAD_GPR6
- l.rfe
-d_pmd_bad:
- l.nop 1
- EXCEPTION_LOAD_GPR2
- EXCEPTION_LOAD_GPR3
- EXCEPTION_LOAD_GPR4
- EXCEPTION_LOAD_GPR5
- EXCEPTION_LOAD_GPR6
l.rfe
d_pmd_none:
d_pte_not_present:
EXCEPTION_LOAD_GPR2
EXCEPTION_LOAD_GPR3
EXCEPTION_LOAD_GPR4
- EXCEPTION_LOAD_GPR5
- EXCEPTION_LOAD_GPR6
EXCEPTION_HANDLE(_dtlb_miss_page_fault_handler)
/* ==============================================[ ITLB miss handler ]=== */
@@ -1072,8 +1060,6 @@ ENTRY(itlb_miss_handler)
EXCEPTION_STORE_GPR2
EXCEPTION_STORE_GPR3
EXCEPTION_STORE_GPR4
- EXCEPTION_STORE_GPR5
- EXCEPTION_STORE_GPR6
/*
* get EA of the miss
*/
@@ -1083,30 +1069,19 @@ ENTRY(itlb_miss_handler)
* pmd = (pmd_t *)(current_pgd + pgd_index(daddr));
*
*/
- GET_CURRENT_PGD(r3,r5) // r3 is current_pgd, r5 is temp
+ GET_CURRENT_PGD(r3,r4) // r3 is current_pgd, r5 is temp
l.srli r4,r2,0x18 // >> PAGE_SHIFT + (PAGE_SHIFT - 2)
l.slli r4,r4,0x2 // to get address << 2
- l.add r5,r4,r3 // r4 is pgd_index(daddr)
+ l.add r3,r4,r3 // r4 is pgd_index(daddr)
/*
* if (pmd_none(*pmd))
* goto pmd_none:
*/
- tophys (r4,r5)
+ tophys (r4,r3)
l.lwz r3,0x0(r4) // get *pmd value
l.sfne r3,r0
l.bnf i_pmd_none
- l.andi r3,r3,0x1fff // ~PAGE_MASK
- /*
- * if (pmd_bad(*pmd))
- * pmd_clear(pmd)
- * goto pmd_bad:
- */
-
-// l.sfeq r3,r0 // check *pmd value
-// l.bf i_pmd_good
- l.addi r3,r0,0xffffe000 // PAGE_MASK
-// l.j i_pmd_bad
-// l.sw 0x0(r4),r0 // clear pmd
+ l.addi r3,r0,0xffffe000 // PAGE_MASK
i_pmd_good:
/*
@@ -1115,35 +1090,36 @@ i_pmd_good:
*/
l.lwz r4,0x0(r4) // get **pmd value
l.and r4,r4,r3 // & PAGE_MASK
- l.srli r5,r2,0xd // >> PAGE_SHIFT, r2 == EEAR
- l.andi r3,r5,0x7ff // (1UL << PAGE_SHIFT - 2) - 1
+ l.srli r2,r2,0xd // >> PAGE_SHIFT, r2 == EEAR
+ l.andi r3,r2,0x7ff // (1UL << PAGE_SHIFT - 2) - 1
l.slli r3,r3,0x2 // to get address << 2
l.add r3,r3,r4
- l.lwz r2,0x0(r3) // this is pte at last
+ l.lwz r3,0x0(r3) // this is pte at last
/*
* if (!pte_present(pte))
*
*/
- l.andi r4,r2,0x1
+ l.andi r4,r3,0x1
l.sfne r4,r0 // is pte present
l.bnf i_pte_not_present
- l.addi r3,r0,0xffffe03a // PAGE_MASK | ITLB_UP_CONVERT_MASK
+ l.addi r4,r0,0xffffe03a // PAGE_MASK | ITLB_UP_CONVERT_MASK
/*
* fill ITLB TR register
*/
- l.and r4,r2,r3 // apply the mask
- l.andi r3,r2,0x7c0 // _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE | _PAGE_URE | _PAGE_UWE
-// l.andi r3,r2,0x400 // _PAGE_EXEC
+ l.and r4,r3,r4 // apply the mask
+ l.andi r3,r3,0x7c0 // _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE | _PAGE_URE | _PAGE_UWE
l.sfeq r3,r0
l.bf itlb_tr_fill //_workaround
// Determine number of IMMU sets
- l.mfspr r6, r0, SPR_IMMUCFGR
- l.andi r6, r6, SPR_IMMUCFGR_NTS
- l.srli r6, r6, SPR_IMMUCFGR_NTS_OFF
+ l.mfspr r2, r0, SPR_IMMUCFGR
+ l.andi r2, r2, SPR_IMMUCFGR_NTS
+ l.srli r2, r2, SPR_IMMUCFGR_NTS_OFF
l.ori r3, r0, 0x1
- l.sll r3, r3, r6 // r3 = number IMMU sets IMMUCFGR
- l.addi r6, r3, -1 // r6 = nsets mask
- l.and r5, r5, r6 // calc offset: & (NUM_TLB_ENTRIES-1)
+ l.sll r3, r3, r2 // r3 = number IMMU sets IMMUCFGR
+ l.addi r2, r3, -1 // r2 = nsets mask
+ l.mfspr r3, r0, SPR_EEAR_BASE
+ l.srli r3, r3, 0xd // >> PAGE_SHIFT
+ l.and r2, r3, r2 // calc offset: & (NUM_TLB_ENTRIES-1)
/*
* __PHX__ :: fixme
@@ -1155,38 +1131,24 @@ i_pmd_good:
itlb_tr_fill_workaround:
l.ori r4,r4,0xc0 // | (SPR_ITLBTR_UXE | ITLBTR_SXE)
itlb_tr_fill:
- l.mtspr r5,r4,SPR_ITLBTR_BASE(0)
+ l.mtspr r2,r4,SPR_ITLBTR_BASE(0)
/*
* fill DTLB MR register
*/
- l.mfspr r2,r0,SPR_EEAR_BASE
- l.addi r3,r0,0xffffe000 // PAGE_MASK
- l.and r4,r2,r3 // apply PAGE_MASK to EA (__PHX__ do we really need this?)
- l.ori r4,r4,0x1 // set hardware valid bit: DTBL_MR entry
- l.mtspr r5,r4,SPR_ITLBMR_BASE(0)
+ l.slli r3, r3, 0xd /* << PAGE_SHIFT => EA & PAGE_MASK */
+ l.ori r4,r3,0x1 // set hardware valid bit: ITBL_MR entry
+ l.mtspr r2,r4,SPR_ITLBMR_BASE(0)
EXCEPTION_LOAD_GPR2
EXCEPTION_LOAD_GPR3
EXCEPTION_LOAD_GPR4
- EXCEPTION_LOAD_GPR5
- EXCEPTION_LOAD_GPR6
l.rfe
-i_pmd_bad:
- l.nop 1
- EXCEPTION_LOAD_GPR2
- EXCEPTION_LOAD_GPR3
- EXCEPTION_LOAD_GPR4
- EXCEPTION_LOAD_GPR5
- EXCEPTION_LOAD_GPR6
- l.rfe
i_pmd_none:
i_pte_not_present:
EXCEPTION_LOAD_GPR2
EXCEPTION_LOAD_GPR3
EXCEPTION_LOAD_GPR4
- EXCEPTION_LOAD_GPR5
- EXCEPTION_LOAD_GPR6
EXCEPTION_HANDLE(_itlb_miss_page_fault_handler)
/* ==============================================[ boot tlb handlers ]=== */
@@ -1571,12 +1533,7 @@ ENTRY(_early_uart_init)
l.jr r9
l.nop
-_string_copying_linux:
- .string "\n\n\n\n\n\rCopying Linux... \0"
-
-_string_ok_booting:
- .string "Ok, booting the kernel.\n\r\0"
-
+ .section .rodata
_string_unhandled_exception:
.string "\n\rRunarunaround: Unhandled exception 0x\0"
@@ -1586,11 +1543,6 @@ _string_epc_prefix:
_string_nl:
.string "\n\r\0"
- .global _string_esr_irq_bug
-_string_esr_irq_bug:
- .string "\n\rESR external interrupt bug, for details look into entry.S\n\r\0"
-
-
/* ========================================[ page aligned structures ]=== */
diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c
index 86e31cf1de1d..5c4695d13542 100644
--- a/arch/openrisc/kernel/or32_ksyms.c
+++ b/arch/openrisc/kernel/or32_ksyms.c
@@ -44,3 +44,4 @@ DECLARE_EXPORT(__ashldi3);
DECLARE_EXPORT(__lshrdi3);
EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(memset);
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index d7990df9025a..6e9d1cb519f2 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -75,6 +75,17 @@ void machine_power_off(void)
__asm__("l.nop 1");
}
+/*
+ * Send the doze signal to the cpu if available.
+ * Make sure, that all interrupts are enabled
+ */
+void arch_cpu_idle(void)
+{
+ local_irq_enable();
+ if (mfspr(SPR_UPR) & SPR_UPR_PMP)
+ mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
+}
+
void (*pm_power_off) (void) = machine_power_off;
/*
@@ -226,6 +237,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu)
extern struct thread_info *_switch(struct thread_info *old_ti,
struct thread_info *new_ti);
+extern int lwa_flag;
struct task_struct *__switch_to(struct task_struct *old,
struct task_struct *new)
@@ -243,6 +255,8 @@ struct task_struct *__switch_to(struct task_struct *old,
new_ti = new->stack;
old_ti = old->stack;
+ lwa_flag = 0;
+
current_thread_info_set[smp_processor_id()] = new_ti;
last = (_switch(old_ti, new_ti))->task;
diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c
index 4f59fa4e34e5..228288887d74 100644
--- a/arch/openrisc/kernel/ptrace.c
+++ b/arch/openrisc/kernel/ptrace.c
@@ -16,7 +16,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <stddef.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index cb797a3beb47..dbf5ee95a0d5 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -117,13 +117,15 @@ static void print_cpuinfo(void)
if (upr & SPR_UPR_DCP)
printk(KERN_INFO
"-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n",
- cpuinfo.dcache_size, cpuinfo.dcache_block_size, 1);
+ cpuinfo.dcache_size, cpuinfo.dcache_block_size,
+ cpuinfo.dcache_ways);
else
printk(KERN_INFO "-- dcache disabled\n");
if (upr & SPR_UPR_ICP)
printk(KERN_INFO
"-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n",
- cpuinfo.icache_size, cpuinfo.icache_block_size, 1);
+ cpuinfo.icache_size, cpuinfo.icache_block_size,
+ cpuinfo.icache_ways);
else
printk(KERN_INFO "-- icache disabled\n");
@@ -155,25 +157,25 @@ void __init setup_cpuinfo(void)
{
struct device_node *cpu;
unsigned long iccfgr, dccfgr;
- unsigned long cache_set_size, cache_ways;
+ unsigned long cache_set_size;
cpu = of_find_compatible_node(NULL, NULL, "opencores,or1200-rtlsvn481");
if (!cpu)
panic("No compatible CPU found in device tree...\n");
iccfgr = mfspr(SPR_ICCFGR);
- cache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
+ cpuinfo.icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
cpuinfo.icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
cpuinfo.icache_size =
- cache_set_size * cache_ways * cpuinfo.icache_block_size;
+ cache_set_size * cpuinfo.icache_ways * cpuinfo.icache_block_size;
dccfgr = mfspr(SPR_DCCFGR);
- cache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
+ cpuinfo.dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
cpuinfo.dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
cpuinfo.dcache_size =
- cache_set_size * cache_ways * cpuinfo.dcache_block_size;
+ cache_set_size * cpuinfo.dcache_ways * cpuinfo.dcache_block_size;
if (of_property_read_u32(cpu, "clock-frequency",
&cpuinfo.clock_frequency)) {
@@ -308,30 +310,33 @@ static int show_cpuinfo(struct seq_file *m, void *v)
revision = vr & SPR_VR_REV;
seq_printf(m,
- "cpu\t\t: OpenRISC-%x\n"
- "revision\t: %d\n"
- "frequency\t: %ld\n"
- "dcache size\t: %d bytes\n"
- "dcache block size\t: %d bytes\n"
- "icache size\t: %d bytes\n"
- "icache block size\t: %d bytes\n"
- "immu\t\t: %d entries, %lu ways\n"
- "dmmu\t\t: %d entries, %lu ways\n"
- "bogomips\t: %lu.%02lu\n",
- version,
- revision,
- loops_per_jiffy * HZ,
- cpuinfo.dcache_size,
- cpuinfo.dcache_block_size,
- cpuinfo.icache_size,
- cpuinfo.icache_block_size,
- 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
- 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW),
- 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
- 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW),
- (loops_per_jiffy * HZ) / 500000,
- ((loops_per_jiffy * HZ) / 5000) % 100);
-
+ "cpu\t\t: OpenRISC-%x\n"
+ "revision\t: %d\n"
+ "frequency\t: %ld\n"
+ "dcache size\t: %d bytes\n"
+ "dcache block size\t: %d bytes\n"
+ "dcache ways\t: %d\n"
+ "icache size\t: %d bytes\n"
+ "icache block size\t: %d bytes\n"
+ "icache ways\t: %d\n"
+ "immu\t\t: %d entries, %lu ways\n"
+ "dmmu\t\t: %d entries, %lu ways\n"
+ "bogomips\t: %lu.%02lu\n",
+ version,
+ revision,
+ loops_per_jiffy * HZ,
+ cpuinfo.dcache_size,
+ cpuinfo.dcache_block_size,
+ cpuinfo.dcache_ways,
+ cpuinfo.icache_size,
+ cpuinfo.icache_block_size,
+ cpuinfo.icache_ways,
+ 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
+ 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW),
+ 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
+ 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW),
+ (loops_per_jiffy * HZ) / 500000,
+ ((loops_per_jiffy * HZ) / 5000) % 100);
return 0;
}
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index d29c41bfbffa..7e81ad258bca 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -40,6 +40,8 @@
extern char _etext, _stext;
int kstack_depth_to_print = 0x180;
+int lwa_flag;
+unsigned long __user *lwa_addr;
static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
{
@@ -334,10 +336,191 @@ asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address)
}
}
+static inline int in_delay_slot(struct pt_regs *regs)
+{
+#ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX
+ /* No delay slot flag, do the old way */
+ unsigned int op, insn;
+
+ insn = *((unsigned int *)regs->pc);
+ op = insn >> 26;
+ switch (op) {
+ case 0x00: /* l.j */
+ case 0x01: /* l.jal */
+ case 0x03: /* l.bnf */
+ case 0x04: /* l.bf */
+ case 0x11: /* l.jr */
+ case 0x12: /* l.jalr */
+ return 1;
+ default:
+ return 0;
+ }
+#else
+ return regs->sr & SPR_SR_DSX;
+#endif
+}
+
+static inline void adjust_pc(struct pt_regs *regs, unsigned long address)
+{
+ int displacement;
+ unsigned int rb, op, jmp;
+
+ if (unlikely(in_delay_slot(regs))) {
+ /* In delay slot, instruction at pc is a branch, simulate it */
+ jmp = *((unsigned int *)regs->pc);
+
+ displacement = sign_extend32(((jmp) & 0x3ffffff) << 2, 27);
+ rb = (jmp & 0x0000ffff) >> 11;
+ op = jmp >> 26;
+
+ switch (op) {
+ case 0x00: /* l.j */
+ regs->pc += displacement;
+ return;
+ case 0x01: /* l.jal */
+ regs->pc += displacement;
+ regs->gpr[9] = regs->pc + 8;
+ return;
+ case 0x03: /* l.bnf */
+ if (regs->sr & SPR_SR_F)
+ regs->pc += 8;
+ else
+ regs->pc += displacement;
+ return;
+ case 0x04: /* l.bf */
+ if (regs->sr & SPR_SR_F)
+ regs->pc += displacement;
+ else
+ regs->pc += 8;
+ return;
+ case 0x11: /* l.jr */
+ regs->pc = regs->gpr[rb];
+ return;
+ case 0x12: /* l.jalr */
+ regs->pc = regs->gpr[rb];
+ regs->gpr[9] = regs->pc + 8;
+ return;
+ default:
+ break;
+ }
+ } else {
+ regs->pc += 4;
+ }
+}
+
+static inline void simulate_lwa(struct pt_regs *regs, unsigned long address,
+ unsigned int insn)
+{
+ unsigned int ra, rd;
+ unsigned long value;
+ unsigned long orig_pc;
+ long imm;
+
+ const struct exception_table_entry *entry;
+
+ orig_pc = regs->pc;
+ adjust_pc(regs, address);
+
+ ra = (insn >> 16) & 0x1f;
+ rd = (insn >> 21) & 0x1f;
+ imm = (short)insn;
+ lwa_addr = (unsigned long __user *)(regs->gpr[ra] + imm);
+
+ if ((unsigned long)lwa_addr & 0x3) {
+ do_unaligned_access(regs, address);
+ return;
+ }
+
+ if (get_user(value, lwa_addr)) {
+ if (user_mode(regs)) {
+ force_sig(SIGSEGV, current);
+ return;
+ }
+
+ if ((entry = search_exception_tables(orig_pc))) {
+ regs->pc = entry->fixup;
+ return;
+ }
+
+ /* kernel access in kernel space, load it directly */
+ value = *((unsigned long *)lwa_addr);
+ }
+
+ lwa_flag = 1;
+ regs->gpr[rd] = value;
+}
+
+static inline void simulate_swa(struct pt_regs *regs, unsigned long address,
+ unsigned int insn)
+{
+ unsigned long __user *vaddr;
+ unsigned long orig_pc;
+ unsigned int ra, rb;
+ long imm;
+
+ const struct exception_table_entry *entry;
+
+ orig_pc = regs->pc;
+ adjust_pc(regs, address);
+
+ ra = (insn >> 16) & 0x1f;
+ rb = (insn >> 11) & 0x1f;
+ imm = (short)(((insn & 0x2200000) >> 10) | (insn & 0x7ff));
+ vaddr = (unsigned long __user *)(regs->gpr[ra] + imm);
+
+ if (!lwa_flag || vaddr != lwa_addr) {
+ regs->sr &= ~SPR_SR_F;
+ return;
+ }
+
+ if ((unsigned long)vaddr & 0x3) {
+ do_unaligned_access(regs, address);
+ return;
+ }
+
+ if (put_user(regs->gpr[rb], vaddr)) {
+ if (user_mode(regs)) {
+ force_sig(SIGSEGV, current);
+ return;
+ }
+
+ if ((entry = search_exception_tables(orig_pc))) {
+ regs->pc = entry->fixup;
+ return;
+ }
+
+ /* kernel access in kernel space, store it directly */
+ *((unsigned long *)vaddr) = regs->gpr[rb];
+ }
+
+ lwa_flag = 0;
+ regs->sr |= SPR_SR_F;
+}
+
+#define INSN_LWA 0x1b
+#define INSN_SWA 0x33
+
asmlinkage void do_illegal_instruction(struct pt_regs *regs,
unsigned long address)
{
siginfo_t info;
+ unsigned int op;
+ unsigned int insn = *((unsigned int *)address);
+
+ op = insn >> 26;
+
+ switch (op) {
+ case INSN_LWA:
+ simulate_lwa(regs, address, insn);
+ return;
+
+ case INSN_SWA:
+ simulate_swa(regs, address, insn);
+ return;
+
+ default:
+ break;
+ }
if (user_mode(regs)) {
/* Send a SIGILL */
diff --git a/arch/openrisc/lib/Makefile b/arch/openrisc/lib/Makefile
index 966f65dbc6f0..17d9d37f32d2 100644
--- a/arch/openrisc/lib/Makefile
+++ b/arch/openrisc/lib/Makefile
@@ -2,4 +2,4 @@
# Makefile for or32 specific library files..
#
-obj-y = string.o delay.o
+obj-y := delay.o string.o memset.o memcpy.o
diff --git a/arch/openrisc/lib/memcpy.c b/arch/openrisc/lib/memcpy.c
new file mode 100644
index 000000000000..669887a60e27
--- /dev/null
+++ b/arch/openrisc/lib/memcpy.c
@@ -0,0 +1,124 @@
+/*
+ * arch/openrisc/lib/memcpy.c
+ *
+ * Optimized memory copy routines for openrisc. These are mostly copied
+ * from ohter sources but slightly entended based on ideas discuassed in
+ * #openrisc.
+ *
+ * The word unroll implementation is an extension to the arm byte
+ * unrolled implementation, but using word copies (if things are
+ * properly aligned)
+ *
+ * The great arm loop unroll algorithm can be found at:
+ * arch/arm/boot/compressed/string.c
+ */
+
+#include <linux/export.h>
+
+#include <linux/string.h>
+
+#ifdef CONFIG_OR1K_1200
+/*
+ * Do memcpy with word copies and loop unrolling. This gives the
+ * best performance on the OR1200 and MOR1KX archirectures
+ */
+void *memcpy(void *dest, __const void *src, __kernel_size_t n)
+{
+ int i = 0;
+ unsigned char *d, *s;
+ uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
+
+ /* If both source and dest are word aligned copy words */
+ if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
+ /* Copy 32 bytes per loop */
+ for (i = n >> 5; i > 0; i--) {
+ *dest_w++ = *src_w++;
+ *dest_w++ = *src_w++;
+ *dest_w++ = *src_w++;
+ *dest_w++ = *src_w++;
+ *dest_w++ = *src_w++;
+ *dest_w++ = *src_w++;
+ *dest_w++ = *src_w++;
+ *dest_w++ = *src_w++;
+ }
+
+ if (n & 1 << 4) {
+ *dest_w++ = *src_w++;
+ *dest_w++ = *src_w++;
+ *dest_w++ = *src_w++;
+ *dest_w++ = *src_w++;
+ }
+
+ if (n & 1 << 3) {
+ *dest_w++ = *src_w++;
+ *dest_w++ = *src_w++;
+ }
+
+ if (n & 1 << 2)
+ *dest_w++ = *src_w++;
+
+ d = (unsigned char *)dest_w;
+ s = (unsigned char *)src_w;
+
+ } else {
+ d = (unsigned char *)dest_w;
+ s = (unsigned char *)src_w;
+
+ for (i = n >> 3; i > 0; i--) {
+ *d++ = *s++;
+ *d++ = *s++;
+ *d++ = *s++;
+ *d++ = *s++;
+ *d++ = *s++;
+ *d++ = *s++;
+ *d++ = *s++;
+ *d++ = *s++;
+ }
+
+ if (n & 1 << 2) {
+ *d++ = *s++;
+ *d++ = *s++;
+ *d++ = *s++;
+ *d++ = *s++;
+ }
+ }
+
+ if (n & 1 << 1) {
+ *d++ = *s++;
+ *d++ = *s++;
+ }
+
+ if (n & 1)
+ *d++ = *s++;
+
+ return dest;
+}
+#else
+/*
+ * Use word copies but no loop unrolling as we cannot assume there
+ * will be benefits on the archirecture
+ */
+void *memcpy(void *dest, __const void *src, __kernel_size_t n)
+{
+ unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src;
+ uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
+
+ /* If both source and dest are word aligned copy words */
+ if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
+ for (; n >= 4; n -= 4)
+ *dest_w++ = *src_w++;
+ }
+
+ d = (unsigned char *)dest_w;
+ s = (unsigned char *)src_w;
+
+ /* For remaining or if not aligned, copy bytes */
+ for (; n >= 1; n -= 1)
+ *d++ = *s++;
+
+ return dest;
+
+}
+#endif
+
+EXPORT_SYMBOL(memcpy);
diff --git a/arch/openrisc/lib/memset.S b/arch/openrisc/lib/memset.S
new file mode 100644
index 000000000000..92cc2eac26fa
--- /dev/null
+++ b/arch/openrisc/lib/memset.S
@@ -0,0 +1,98 @@
+/*
+ * OpenRISC memset.S
+ *
+ * Hand-optimized assembler version of memset for OpenRISC.
+ * Algorithm inspired by several other arch-specific memset routines
+ * in the kernel tree
+ *
+ * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+ .global memset
+ .type memset, @function
+memset:
+ /* arguments:
+ * r3 = *s
+ * r4 = c
+ * r5 = n
+ * r13, r15, r17, r19 used as temp regs
+ */
+
+ /* Exit if n == 0 */
+ l.sfeqi r5, 0
+ l.bf 4f
+
+ /* Truncate c to char */
+ l.andi r13, r4, 0xff
+
+ /* Skip word extension if c is 0 */
+ l.sfeqi r13, 0
+ l.bf 1f
+ /* Check for at least two whole words (8 bytes) */
+ l.sfleui r5, 7
+
+ /* Extend char c to 32-bit word cccc in r13 */
+ l.slli r15, r13, 16 // r13 = 000c, r15 = 0c00
+ l.or r13, r13, r15 // r13 = 0c0c, r15 = 0c00
+ l.slli r15, r13, 8 // r13 = 0c0c, r15 = c0c0
+ l.or r13, r13, r15 // r13 = cccc, r15 = c0c0
+
+1: l.addi r19, r3, 0 // Set r19 = src
+ /* Jump to byte copy loop if less than two words */
+ l.bf 3f
+ l.or r17, r5, r0 // Set r17 = n
+
+ /* Mask out two LSBs to check alignment */
+ l.andi r15, r3, 0x3
+
+ /* lsb == 00, jump to word copy loop */
+ l.sfeqi r15, 0
+ l.bf 2f
+ l.addi r19, r3, 0 // Set r19 = src
+
+ /* lsb == 01,10 or 11 */
+ l.sb 0(r3), r13 // *src = c
+ l.addi r17, r17, -1 // Decrease n
+
+ l.sfeqi r15, 3
+ l.bf 2f
+ l.addi r19, r3, 1 // src += 1
+
+ /* lsb == 01 or 10 */
+ l.sb 1(r3), r13 // *(src+1) = c
+ l.addi r17, r17, -1 // Decrease n
+
+ l.sfeqi r15, 2
+ l.bf 2f
+ l.addi r19, r3, 2 // src += 2
+
+ /* lsb == 01 */
+ l.sb 2(r3), r13 // *(src+2) = c
+ l.addi r17, r17, -1 // Decrease n
+ l.addi r19, r3, 3 // src += 3
+
+ /* Word copy loop */
+2: l.sw 0(r19), r13 // *src = cccc
+ l.addi r17, r17, -4 // Decrease n
+ l.sfgeui r17, 4
+ l.bf 2b
+ l.addi r19, r19, 4 // Increase src
+
+ /* When n > 0, copy the remaining bytes, otherwise jump to exit */
+ l.sfeqi r17, 0
+ l.bf 4f
+
+ /* Byte copy loop */
+3: l.addi r17, r17, -1 // Decrease n
+ l.sb 0(r19), r13 // *src = cccc
+ l.sfnei r17, 0
+ l.bf 3b
+ l.addi r19, r19, 1 // Increase src
+
+4: l.jr r9
+ l.ori r11, r3, 0
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 8705a46218f9..2175e4bfd9fc 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -80,6 +80,7 @@ __ioremap(phys_addr_t addr, unsigned long size, pgprot_t prot)
return (void __iomem *)(offset + (char *)v);
}
+EXPORT_SYMBOL(__ioremap);
void iounmap(void *addr)
{
@@ -106,6 +107,7 @@ void iounmap(void *addr)
return vfree((void *)(PAGE_MASK & (unsigned long)addr));
}
+EXPORT_SYMBOL(iounmap);
/**
* OK, this one's a bit tricky... ioremap can get called before memory is
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index c1263fc390db..f294dd42fc7d 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -17,7 +17,8 @@
#define HPAGE_SHIFT 23
#define REAL_HPAGE_SHIFT 22
-
+#define HPAGE_256MB_SHIFT 28
+#define HPAGE_64K_SHIFT 16
#define REAL_HPAGE_SIZE (_AC(1,UL) << REAL_HPAGE_SHIFT)
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
@@ -26,6 +27,7 @@
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
+#define HUGE_MAX_HSTATE 3
#endif
#ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 314b66851348..7932a4a37817 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -375,7 +375,10 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot)
#define pgprot_noncached pgprot_noncached
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-static inline unsigned long __pte_huge_mask(void)
+extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+ struct page *page, int writable);
+#define arch_make_huge_pte arch_make_huge_pte
+static inline unsigned long __pte_default_huge_mask(void)
{
unsigned long mask;
@@ -395,12 +398,14 @@ static inline unsigned long __pte_huge_mask(void)
static inline pte_t pte_mkhuge(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_PMD_HUGE | __pte_huge_mask());
+ return __pte(pte_val(pte) | __pte_default_huge_mask());
}
-static inline bool is_hugetlb_pte(pte_t pte)
+static inline bool is_default_hugetlb_pte(pte_t pte)
{
- return !!(pte_val(pte) & __pte_huge_mask());
+ unsigned long mask = __pte_default_huge_mask();
+
+ return (pte_val(pte) & mask) == mask;
}
static inline bool is_hugetlb_pmd(pmd_t pmd)
@@ -875,10 +880,12 @@ static inline unsigned long pud_pfn(pud_t pud)
/* Actual page table PTE updates. */
void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
- pte_t *ptep, pte_t orig, int fullmm);
+ pte_t *ptep, pte_t orig, int fullmm,
+ unsigned int hugepage_shift);
static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
- pte_t *ptep, pte_t orig, int fullmm)
+ pte_t *ptep, pte_t orig, int fullmm,
+ unsigned int hugepage_shift)
{
/* It is more efficient to let flush_tlb_kernel_range()
* handle init_mm tlb flushes.
@@ -887,7 +894,7 @@ static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
* and SUN4V pte layout, so this inline test is fine.
*/
if (likely(mm != &init_mm) && pte_accessible(mm, orig))
- tlb_batch_add(mm, vaddr, ptep, orig, fullmm);
+ tlb_batch_add(mm, vaddr, ptep, orig, fullmm, hugepage_shift);
}
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
@@ -906,7 +913,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t orig = *ptep;
*ptep = pte;
- maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm);
+ maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm, PAGE_SHIFT);
}
#define set_pte_at(mm,addr,ptep,pte) \
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 29d64b1758ed..478bf6bb4598 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -59,8 +59,11 @@ extern atomic_t dcpage_flushes;
extern atomic_t dcpage_flushes_xcall;
extern int sysctl_tsb_ratio;
-#endif
+#ifdef CONFIG_SERIAL_SUNHV
+void sunhv_migrate_hvcons_irq(int cpu);
+#endif
+#endif
void sun_do_break(void);
extern int stop_a_enabled;
extern int scons_pwroff;
diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h
index a8e192e90700..54be88a6774c 100644
--- a/arch/sparc/include/asm/tlbflush_64.h
+++ b/arch/sparc/include/asm/tlbflush_64.h
@@ -8,7 +8,7 @@
#define TLB_BATCH_NR 192
struct tlb_batch {
- bool huge;
+ unsigned int hugepage_shift;
struct mm_struct *mm;
unsigned long tlb_nr;
unsigned long active;
@@ -17,7 +17,8 @@ struct tlb_batch {
void flush_tsb_kernel_range(unsigned long start, unsigned long end);
void flush_tsb_user(struct tlb_batch *tb);
-void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge);
+void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr,
+ unsigned int hugepage_shift);
/* TLB flush operations. */
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 225543000122..ad5293f89680 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -4,7 +4,6 @@
#ifdef CONFIG_NUMA
#include <asm/mmzone.h>
-#include <asm/cpudata.h>
static inline int cpu_to_node(int cpu)
{
@@ -42,6 +41,9 @@ int __node_distance(int, int);
#endif /* !(CONFIG_NUMA) */
#ifdef CONFIG_SMP
+
+#include <asm/cpudata.h>
+
#define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id)
#define topology_core_id(cpu) (cpu_data(cpu).core_id)
#define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu])
diff --git a/arch/sparc/include/asm/uprobes.h b/arch/sparc/include/asm/uprobes.h
index f87aae5a908e..36196c17aff8 100644
--- a/arch/sparc/include/asm/uprobes.h
+++ b/arch/sparc/include/asm/uprobes.h
@@ -42,8 +42,8 @@ struct arch_uprobe {
};
struct arch_uprobe_task {
- u32 saved_tpc;
- u32 saved_tnpc;
+ u64 saved_tpc;
+ u64 saved_tnpc;
};
struct task_struct;
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 0ce347f8e4cc..90a02cb64e20 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1443,6 +1443,7 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
static void stop_this_cpu(void *dummy)
{
+ set_cpu_online(smp_processor_id(), false);
prom_stopself();
}
@@ -1451,9 +1452,15 @@ void smp_send_stop(void)
int cpu;
if (tlb_type == hypervisor) {
+ int this_cpu = smp_processor_id();
+#ifdef CONFIG_SERIAL_SUNHV
+ sunhv_migrate_hvcons_irq(this_cpu);
+#endif
for_each_online_cpu(cpu) {
- if (cpu == smp_processor_id())
+ if (cpu == this_cpu)
continue;
+
+ set_cpu_online(cpu, false);
#ifdef CONFIG_SUN_LDOMS
if (ldom_domaining_enabled) {
unsigned long hv_err;
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index d568c8207af7..10689cfd0ad4 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -117,26 +117,11 @@ tsb_miss_page_table_walk_sun4v_fastpath:
/* Valid PTE is now in %g5. */
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-661: sethi %uhi(_PAGE_SZALL_4U), %g7
+ sethi %uhi(_PAGE_PMD_HUGE), %g7
sllx %g7, 32, %g7
- .section .sun4v_2insn_patch, "ax"
- .word 661b
- mov _PAGE_SZALL_4V, %g7
- nop
- .previous
-
- and %g5, %g7, %g2
-
-661: sethi %uhi(_PAGE_SZHUGE_4U), %g7
- sllx %g7, 32, %g7
- .section .sun4v_2insn_patch, "ax"
- .word 661b
- mov _PAGE_SZHUGE_4V, %g7
- nop
- .previous
- cmp %g2, %g7
- bne,pt %xcc, 60f
+ andcc %g5, %g7, %g0
+ be,pt %xcc, 60f
nop
/* It is a huge page, use huge page TSB entry address we
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 988acc8b1b80..e98a3f2e8f0f 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -28,6 +28,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
unsigned long pgoff,
unsigned long flags)
{
+ struct hstate *h = hstate_file(filp);
unsigned long task_size = TASK_SIZE;
struct vm_unmapped_area_info info;
@@ -38,7 +39,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
info.length = len;
info.low_limit = TASK_UNMAPPED_BASE;
info.high_limit = min(task_size, VA_EXCLUDE_START);
- info.align_mask = PAGE_MASK & ~HPAGE_MASK;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
addr = vm_unmapped_area(&info);
@@ -58,6 +59,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
const unsigned long pgoff,
const unsigned long flags)
{
+ struct hstate *h = hstate_file(filp);
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
@@ -69,7 +71,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = mm->mmap_base;
- info.align_mask = PAGE_MASK & ~HPAGE_MASK;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
addr = vm_unmapped_area(&info);
@@ -94,6 +96,7 @@ unsigned long
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
+ struct hstate *h = hstate_file(file);
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long task_size = TASK_SIZE;
@@ -101,7 +104,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (test_thread_flag(TIF_32BIT))
task_size = STACK_TOP32;
- if (len & ~HPAGE_MASK)
+ if (len & ~huge_page_mask(h))
return -EINVAL;
if (len > task_size)
return -ENOMEM;
@@ -113,7 +116,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
}
if (addr) {
- addr = ALIGN(addr, HPAGE_SIZE);
+ addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (task_size - len >= addr &&
(!vma || addr + len <= vma->vm_start))
@@ -127,17 +130,141 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
pgoff, flags);
}
+static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
+{
+ return entry;
+}
+
+static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
+{
+ unsigned long hugepage_size = _PAGE_SZ4MB_4V;
+
+ pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
+
+ switch (shift) {
+ case HPAGE_256MB_SHIFT:
+ hugepage_size = _PAGE_SZ256MB_4V;
+ pte_val(entry) |= _PAGE_PMD_HUGE;
+ break;
+ case HPAGE_SHIFT:
+ pte_val(entry) |= _PAGE_PMD_HUGE;
+ break;
+ case HPAGE_64K_SHIFT:
+ hugepage_size = _PAGE_SZ64K_4V;
+ break;
+ default:
+ WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift);
+ }
+
+ pte_val(entry) = pte_val(entry) | hugepage_size;
+ return entry;
+}
+
+static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift)
+{
+ if (tlb_type == hypervisor)
+ return sun4v_hugepage_shift_to_tte(entry, shift);
+ else
+ return sun4u_hugepage_shift_to_tte(entry, shift);
+}
+
+pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+ struct page *page, int writeable)
+{
+ unsigned int shift = huge_page_shift(hstate_vma(vma));
+
+ return hugepage_shift_to_tte(entry, shift);
+}
+
+static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
+{
+ unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V;
+ unsigned int shift;
+
+ switch (tte_szbits) {
+ case _PAGE_SZ256MB_4V:
+ shift = HPAGE_256MB_SHIFT;
+ break;
+ case _PAGE_SZ4MB_4V:
+ shift = REAL_HPAGE_SHIFT;
+ break;
+ case _PAGE_SZ64K_4V:
+ shift = HPAGE_64K_SHIFT;
+ break;
+ default:
+ shift = PAGE_SHIFT;
+ break;
+ }
+ return shift;
+}
+
+static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
+{
+ unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U;
+ unsigned int shift;
+
+ switch (tte_szbits) {
+ case _PAGE_SZ256MB_4U:
+ shift = HPAGE_256MB_SHIFT;
+ break;
+ case _PAGE_SZ4MB_4U:
+ shift = REAL_HPAGE_SHIFT;
+ break;
+ case _PAGE_SZ64K_4U:
+ shift = HPAGE_64K_SHIFT;
+ break;
+ default:
+ shift = PAGE_SHIFT;
+ break;
+ }
+ return shift;
+}
+
+static unsigned int huge_tte_to_shift(pte_t entry)
+{
+ unsigned long shift;
+
+ if (tlb_type == hypervisor)
+ shift = sun4v_huge_tte_to_shift(entry);
+ else
+ shift = sun4u_huge_tte_to_shift(entry);
+
+ if (shift == PAGE_SHIFT)
+ WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
+ pte_val(entry));
+
+ return shift;
+}
+
+static unsigned long huge_tte_to_size(pte_t pte)
+{
+ unsigned long size = 1UL << huge_tte_to_shift(pte);
+
+ if (size == REAL_HPAGE_SIZE)
+ size = HPAGE_SIZE;
+ return size;
+}
+
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
+ pmd_t *pmd;
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
- if (pud)
- pte = (pte_t *)pmd_alloc(mm, pud, addr);
+ if (pud) {
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return NULL;
+
+ if (sz == PMD_SHIFT)
+ pte = (pte_t *)pmd;
+ else
+ pte = pte_alloc_map(mm, pmd, addr);
+ }
return pte;
}
@@ -146,49 +273,83 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
+ pmd_t *pmd;
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
if (!pgd_none(*pgd)) {
pud = pud_offset(pgd, addr);
- if (!pud_none(*pud))
- pte = (pte_t *)pmd_offset(pud, addr);
+ if (!pud_none(*pud)) {
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd)) {
+ if (is_hugetlb_pmd(*pmd))
+ pte = (pte_t *)pmd;
+ else
+ pte = pte_offset_map(pmd, addr);
+ }
+ }
}
+
return pte;
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry)
{
+ unsigned int i, nptes, orig_shift, shift;
+ unsigned long size;
pte_t orig;
+ size = huge_tte_to_size(entry);
+ shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT;
+ nptes = size >> shift;
+
if (!pte_present(*ptep) && pte_present(entry))
- mm->context.hugetlb_pte_count++;
+ mm->context.hugetlb_pte_count += nptes;
- addr &= HPAGE_MASK;
+ addr &= ~(size - 1);
orig = *ptep;
- *ptep = entry;
+ orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig);
+
+ for (i = 0; i < nptes; i++)
+ ptep[i] = __pte(pte_val(entry) + (i << shift));
- /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
- maybe_tlb_batch_add(mm, addr, ptep, orig, 0);
- maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0);
+ maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift);
+ /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
+ if (size == HPAGE_SIZE)
+ maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0,
+ orig_shift);
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
+ unsigned int i, nptes, hugepage_shift;
+ unsigned long size;
pte_t entry;
entry = *ptep;
+ size = huge_tte_to_size(entry);
+ if (size >= HPAGE_SIZE)
+ nptes = size >> PMD_SHIFT;
+ else
+ nptes = size >> PAGE_SHIFT;
+
+ hugepage_shift = pte_none(entry) ? PAGE_SHIFT :
+ huge_tte_to_shift(entry);
+
if (pte_present(entry))
- mm->context.hugetlb_pte_count--;
+ mm->context.hugetlb_pte_count -= nptes;
- addr &= HPAGE_MASK;
- *ptep = __pte(0UL);
+ addr &= ~(size - 1);
+ for (i = 0; i < nptes; i++)
+ ptep[i] = __pte(0UL);
- /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
- maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
- maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0);
+ maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift);
+ /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
+ if (size == HPAGE_SIZE)
+ maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
+ hugepage_shift);
return entry;
}
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 5d2f91511c60..ccd455328989 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -324,6 +324,50 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde
tsb_insert(tsb, tag, tte);
}
+#ifdef CONFIG_HUGETLB_PAGE
+static int __init setup_hugepagesz(char *string)
+{
+ unsigned long long hugepage_size;
+ unsigned int hugepage_shift;
+ unsigned short hv_pgsz_idx;
+ unsigned int hv_pgsz_mask;
+ int rc = 0;
+
+ hugepage_size = memparse(string, &string);
+ hugepage_shift = ilog2(hugepage_size);
+
+ switch (hugepage_shift) {
+ case HPAGE_256MB_SHIFT:
+ hv_pgsz_mask = HV_PGSZ_MASK_256MB;
+ hv_pgsz_idx = HV_PGSZ_IDX_256MB;
+ break;
+ case HPAGE_SHIFT:
+ hv_pgsz_mask = HV_PGSZ_MASK_4MB;
+ hv_pgsz_idx = HV_PGSZ_IDX_4MB;
+ break;
+ case HPAGE_64K_SHIFT:
+ hv_pgsz_mask = HV_PGSZ_MASK_64K;
+ hv_pgsz_idx = HV_PGSZ_IDX_64K;
+ break;
+ default:
+ hv_pgsz_mask = 0;
+ }
+
+ if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) {
+ pr_warn("hugepagesz=%llu not supported by MMU.\n",
+ hugepage_size);
+ goto out;
+ }
+
+ hugetlb_add_hstate(hugepage_shift - PAGE_SHIFT);
+ rc = 1;
+
+out:
+ return rc;
+}
+__setup("hugepagesz=", setup_hugepagesz);
+#endif /* CONFIG_HUGETLB_PAGE */
+
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
{
struct mm_struct *mm;
@@ -347,7 +391,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
- is_hugetlb_pte(pte)) {
+ is_hugetlb_pmd(__pmd(pte_val(pte)))) {
/* We are fabricating 8MB pages using 4MB real hw pages. */
pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
__update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
@@ -785,13 +829,23 @@ static void __init find_ramdisk(unsigned long phys_base)
struct node_mem_mask {
unsigned long mask;
- unsigned long val;
+ unsigned long match;
};
static struct node_mem_mask node_masks[MAX_NUMNODES];
static int num_node_masks;
#ifdef CONFIG_NEED_MULTIPLE_NODES
+struct mdesc_mlgroup {
+ u64 node;
+ u64 latency;
+ u64 match;
+ u64 mask;
+};
+
+static struct mdesc_mlgroup *mlgroups;
+static int num_mlgroups;
+
int numa_cpu_lookup_table[NR_CPUS];
cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
@@ -802,78 +856,129 @@ struct mdesc_mblock {
};
static struct mdesc_mblock *mblocks;
static int num_mblocks;
-static int find_numa_node_for_addr(unsigned long pa,
- struct node_mem_mask *pnode_mask);
-static unsigned long __init ra_to_pa(unsigned long addr)
+static struct mdesc_mblock * __init addr_to_mblock(unsigned long addr)
{
+ struct mdesc_mblock *m = NULL;
int i;
for (i = 0; i < num_mblocks; i++) {
- struct mdesc_mblock *m = &mblocks[i];
+ m = &mblocks[i];
if (addr >= m->base &&
addr < (m->base + m->size)) {
- addr += m->offset;
break;
}
}
- return addr;
+
+ return m;
}
-static int __init find_node(unsigned long addr)
+static u64 __init memblock_nid_range_sun4u(u64 start, u64 end, int *nid)
{
- static bool search_mdesc = true;
- static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL };
- static int last_index;
- int i;
+ int prev_nid, new_nid;
- addr = ra_to_pa(addr);
- for (i = 0; i < num_node_masks; i++) {
- struct node_mem_mask *p = &node_masks[i];
+ prev_nid = -1;
+ for ( ; start < end; start += PAGE_SIZE) {
+ for (new_nid = 0; new_nid < num_node_masks; new_nid++) {
+ struct node_mem_mask *p = &node_masks[new_nid];
- if ((addr & p->mask) == p->val)
- return i;
- }
- /* The following condition has been observed on LDOM guests because
- * node_masks only contains the best latency mask and value.
- * LDOM guest's mdesc can contain a single latency group to
- * cover multiple address range. Print warning message only if the
- * address cannot be found in node_masks nor mdesc.
- */
- if ((search_mdesc) &&
- ((addr & last_mem_mask.mask) != last_mem_mask.val)) {
- /* find the available node in the mdesc */
- last_index = find_numa_node_for_addr(addr, &last_mem_mask);
- numadbg("find_node: latency group for address 0x%lx is %d\n",
- addr, last_index);
- if ((last_index < 0) || (last_index >= num_node_masks)) {
- /* WARN_ONCE() and use default group 0 */
- WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0.");
- search_mdesc = false;
- last_index = 0;
+ if ((start & p->mask) == p->match) {
+ if (prev_nid == -1)
+ prev_nid = new_nid;
+ break;
+ }
}
+
+ if (new_nid == num_node_masks) {
+ prev_nid = 0;
+ WARN_ONCE(1, "addr[%Lx] doesn't match a NUMA node rule. Some memory will be owned by node 0.",
+ start);
+ break;
+ }
+
+ if (prev_nid != new_nid)
+ break;
}
+ *nid = prev_nid;
- return last_index;
+ return start > end ? end : start;
}
static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
{
- *nid = find_node(start);
- start += PAGE_SIZE;
- while (start < end) {
- int n = find_node(start);
+ u64 ret_end, pa_start, m_mask, m_match, m_end;
+ struct mdesc_mblock *mblock;
+ int _nid, i;
+
+ if (tlb_type != hypervisor)
+ return memblock_nid_range_sun4u(start, end, nid);
+
+ mblock = addr_to_mblock(start);
+ if (!mblock) {
+ WARN_ONCE(1, "memblock_nid_range: Can't find mblock addr[%Lx]",
+ start);
+
+ _nid = 0;
+ ret_end = end;
+ goto done;
+ }
+
+ pa_start = start + mblock->offset;
+ m_match = 0;
+ m_mask = 0;
+
+ for (_nid = 0; _nid < num_node_masks; _nid++) {
+ struct node_mem_mask *const m = &node_masks[_nid];
- if (n != *nid)
+ if ((pa_start & m->mask) == m->match) {
+ m_match = m->match;
+ m_mask = m->mask;
break;
- start += PAGE_SIZE;
+ }
}
- if (start > end)
- start = end;
+ if (num_node_masks == _nid) {
+ /* We could not find NUMA group, so default to 0, but lets
+ * search for latency group, so we could calculate the correct
+ * end address that we return
+ */
+ _nid = 0;
+
+ for (i = 0; i < num_mlgroups; i++) {
+ struct mdesc_mlgroup *const m = &mlgroups[i];
+
+ if ((pa_start & m->mask) == m->match) {
+ m_match = m->match;
+ m_mask = m->mask;
+ break;
+ }
+ }
+
+ if (i == num_mlgroups) {
+ WARN_ONCE(1, "memblock_nid_range: Can't find latency group addr[%Lx]",
+ start);
+
+ ret_end = end;
+ goto done;
+ }
+ }
- return start;
+ /*
+ * Each latency group has match and mask, and each memory block has an
+ * offset. An address belongs to a latency group if its address matches
+ * the following formula: ((addr + offset) & mask) == match
+ * It is, however, slow to check every single page if it matches a
+ * particular latency group. As optimization we calculate end value by
+ * using bit arithmetics.
+ */
+ m_end = m_match + (1ul << __ffs(m_mask)) - mblock->offset;
+ m_end += pa_start & ~((1ul << fls64(m_mask)) - 1);
+ ret_end = m_end > end ? end : m_end;
+
+done:
+ *nid = _nid;
+ return ret_end;
}
#endif
@@ -914,7 +1019,8 @@ static void init_node_masks_nonnuma(void)
numadbg("Initializing tables for non-numa.\n");
- node_masks[0].mask = node_masks[0].val = 0;
+ node_masks[0].mask = 0;
+ node_masks[0].match = 0;
num_node_masks = 1;
#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -932,15 +1038,6 @@ EXPORT_SYMBOL(numa_cpu_lookup_table);
EXPORT_SYMBOL(numa_cpumask_lookup_table);
EXPORT_SYMBOL(node_data);
-struct mdesc_mlgroup {
- u64 node;
- u64 latency;
- u64 match;
- u64 mask;
-};
-static struct mdesc_mlgroup *mlgroups;
-static int num_mlgroups;
-
static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio,
u32 cfg_handle)
{
@@ -1029,6 +1126,10 @@ int of_node_to_nid(struct device_node *dp)
static void __init add_node_ranges(void)
{
struct memblock_region *reg;
+ unsigned long prev_max;
+
+memblock_resized:
+ prev_max = memblock.memory.max;
for_each_memblock(memory, reg) {
unsigned long size = reg->size;
@@ -1048,6 +1149,8 @@ static void __init add_node_ranges(void)
memblock_set_node(start, this_end - start,
&memblock.memory, nid);
+ if (memblock.memory.max != prev_max)
+ goto memblock_resized;
start = this_end;
}
}
@@ -1182,41 +1285,6 @@ int __node_distance(int from, int to)
return numa_latency[from][to];
}
-static int find_numa_node_for_addr(unsigned long pa,
- struct node_mem_mask *pnode_mask)
-{
- struct mdesc_handle *md = mdesc_grab();
- u64 node, arc;
- int i = 0;
-
- node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
- if (node == MDESC_NODE_NULL)
- goto out;
-
- mdesc_for_each_node_by_name(md, node, "group") {
- mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) {
- u64 target = mdesc_arc_target(md, arc);
- struct mdesc_mlgroup *m = find_mlgroup(target);
-
- if (!m)
- continue;
- if ((pa & m->mask) == m->match) {
- if (pnode_mask) {
- pnode_mask->mask = m->mask;
- pnode_mask->val = m->match;
- }
- mdesc_release(md);
- return i;
- }
- }
- i++;
- }
-
-out:
- mdesc_release(md);
- return -1;
-}
-
static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
{
int i;
@@ -1224,7 +1292,7 @@ static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
for (i = 0; i < MAX_NUMNODES; i++) {
struct node_mem_mask *n = &node_masks[i];
- if ((grp->mask == n->mask) && (grp->match == n->val))
+ if ((grp->mask == n->mask) && (grp->match == n->match))
break;
}
return i;
@@ -1279,10 +1347,10 @@ static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp,
n = &node_masks[num_node_masks++];
n->mask = candidate->mask;
- n->val = candidate->match;
+ n->match = candidate->match;
- numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%llx])\n",
- index, n->mask, n->val, candidate->latency);
+ numadbg("NUMA NODE[%d]: mask[%lx] match[%lx] (latency[%llx])\n",
+ index, n->mask, n->match, candidate->latency);
return 0;
}
@@ -1379,7 +1447,7 @@ static int __init numa_parse_jbus(void)
numa_cpu_lookup_table[cpu] = index;
cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu));
node_masks[index].mask = ~((1UL << 36UL) - 1UL);
- node_masks[index].val = cpu << 36UL;
+ node_masks[index].match = cpu << 36UL;
index++;
}
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index c7f2a5295b3a..def82f6d626f 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -1444,7 +1444,7 @@ static void poke_viking(void)
srmmu_set_mmureg(mreg);
}
-static struct sparc32_cachetlb_ops viking_ops = {
+static struct sparc32_cachetlb_ops viking_ops __ro_after_init = {
.cache_all = viking_flush_cache_all,
.cache_mm = viking_flush_cache_mm,
.cache_page = viking_flush_cache_page,
@@ -1475,7 +1475,7 @@ static struct sparc32_cachetlb_ops viking_ops = {
* flushes going at once will require SMP locking anyways so there's
* no real value in trying any harder than this.
*/
-static struct sparc32_cachetlb_ops viking_sun4d_smp_ops = {
+static struct sparc32_cachetlb_ops viking_sun4d_smp_ops __ro_after_init = {
.cache_all = viking_flush_cache_all,
.cache_mm = viking_flush_cache_mm,
.cache_page = viking_flush_cache_page,
@@ -1759,7 +1759,7 @@ static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
local_ops->sig_insns(mm, insn_addr);
}
-static struct sparc32_cachetlb_ops smp_cachetlb_ops = {
+static struct sparc32_cachetlb_ops smp_cachetlb_ops __ro_after_init = {
.cache_all = smp_flush_cache_all,
.cache_mm = smp_flush_cache_mm,
.cache_page = smp_flush_cache_page,
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index c56a195c9071..afda3bbf7854 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -67,7 +67,7 @@ void arch_leave_lazy_mmu_mode(void)
}
static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
- bool exec, bool huge)
+ bool exec, unsigned int hugepage_shift)
{
struct tlb_batch *tb = &get_cpu_var(tlb_batch);
unsigned long nr;
@@ -84,19 +84,19 @@ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
}
if (!tb->active) {
- flush_tsb_user_page(mm, vaddr, huge);
+ flush_tsb_user_page(mm, vaddr, hugepage_shift);
global_flush_tlb_page(mm, vaddr);
goto out;
}
if (nr == 0) {
tb->mm = mm;
- tb->huge = huge;
+ tb->hugepage_shift = hugepage_shift;
}
- if (tb->huge != huge) {
+ if (tb->hugepage_shift != hugepage_shift) {
flush_tlb_pending();
- tb->huge = huge;
+ tb->hugepage_shift = hugepage_shift;
nr = 0;
}
@@ -110,10 +110,9 @@ out:
}
void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
- pte_t *ptep, pte_t orig, int fullmm)
+ pte_t *ptep, pte_t orig, int fullmm,
+ unsigned int hugepage_shift)
{
- bool huge = is_hugetlb_pte(orig);
-
if (tlb_type != hypervisor &&
pte_dirty(orig)) {
unsigned long paddr, pfn = pte_pfn(orig);
@@ -139,7 +138,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
no_cache_flush:
if (!fullmm)
- tlb_batch_add_one(mm, vaddr, pte_exec(orig), huge);
+ tlb_batch_add_one(mm, vaddr, pte_exec(orig), hugepage_shift);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index e20fbbafb0b0..23479c3d39f0 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -86,6 +86,33 @@ static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
__flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries);
}
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+static void __flush_huge_tsb_one_entry(unsigned long tsb, unsigned long v,
+ unsigned long hash_shift,
+ unsigned long nentries,
+ unsigned int hugepage_shift)
+{
+ unsigned int hpage_entries;
+ unsigned int i;
+
+ hpage_entries = 1 << (hugepage_shift - hash_shift);
+ for (i = 0; i < hpage_entries; i++)
+ __flush_tsb_one_entry(tsb, v + (i << hash_shift), hash_shift,
+ nentries);
+}
+
+static void __flush_huge_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
+ unsigned long tsb, unsigned long nentries,
+ unsigned int hugepage_shift)
+{
+ unsigned long i;
+
+ for (i = 0; i < tb->tlb_nr; i++)
+ __flush_huge_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift,
+ nentries, hugepage_shift);
+}
+#endif
+
void flush_tsb_user(struct tlb_batch *tb)
{
struct mm_struct *mm = tb->mm;
@@ -93,45 +120,61 @@ void flush_tsb_user(struct tlb_batch *tb)
spin_lock_irqsave(&mm->context.lock, flags);
- if (!tb->huge) {
+ if (tb->hugepage_shift < HPAGE_SHIFT) {
base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
if (tlb_type == cheetah_plus || tlb_type == hypervisor)
base = __pa(base);
- __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
+ if (tb->hugepage_shift == PAGE_SHIFT)
+ __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
+#if defined(CONFIG_HUGETLB_PAGE)
+ else
+ __flush_huge_tsb_one(tb, PAGE_SHIFT, base, nentries,
+ tb->hugepage_shift);
+#endif
}
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
- if (tb->huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+ else if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
if (tlb_type == cheetah_plus || tlb_type == hypervisor)
base = __pa(base);
- __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries);
+ __flush_huge_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries,
+ tb->hugepage_shift);
}
#endif
spin_unlock_irqrestore(&mm->context.lock, flags);
}
-void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge)
+void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr,
+ unsigned int hugepage_shift)
{
unsigned long nentries, base, flags;
spin_lock_irqsave(&mm->context.lock, flags);
- if (!huge) {
+ if (hugepage_shift < HPAGE_SHIFT) {
base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
if (tlb_type == cheetah_plus || tlb_type == hypervisor)
base = __pa(base);
- __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries);
+ if (hugepage_shift == PAGE_SHIFT)
+ __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT,
+ nentries);
+#if defined(CONFIG_HUGETLB_PAGE)
+ else
+ __flush_huge_tsb_one_entry(base, vaddr, PAGE_SHIFT,
+ nentries, hugepage_shift);
+#endif
}
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
- if (huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+ else if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
if (tlb_type == cheetah_plus || tlb_type == hypervisor)
base = __pa(base);
- __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries);
+ __flush_huge_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT,
+ nentries, hugepage_shift);
}
#endif
spin_unlock_irqrestore(&mm->context.lock, flags);