summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig10
-rw-r--r--arch/powerpc/Kconfig.debug32
-rw-r--r--arch/powerpc/Makefile15
-rw-r--r--arch/powerpc/boot/addnote.c6
-rw-r--r--arch/powerpc/boot/dts/fsl/b4qds.dtsi1
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--arch/powerpc/configs/skiroot_defconfig2
-rw-r--r--arch/powerpc/crypto/crc-vpmsum_test.c10
-rw-r--r--arch/powerpc/include/asm/book3s/32/kup.h145
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h9
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgalloc.h41
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h13
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h23
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h21
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h95
-rw-r--r--arch/powerpc/include/asm/book3s/64/hugetlb.h72
-rw-r--r--arch/powerpc/include/asm/book3s/64/kup-radix.h108
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h70
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h104
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h52
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h12
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-4k.h9
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-64k.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h40
-rw-r--r--arch/powerpc/include/asm/book3s/64/slice.h13
-rw-r--r--arch/powerpc/include/asm/cpuidle.h19
-rw-r--r--arch/powerpc/include/asm/drmem.h21
-rw-r--r--arch/powerpc/include/asm/exception-64s.h2
-rw-r--r--arch/powerpc/include/asm/fadump.h1
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h3
-rw-r--r--arch/powerpc/include/asm/fixmap.h5
-rw-r--r--arch/powerpc/include/asm/futex.h4
-rw-r--r--arch/powerpc/include/asm/hugetlb.h87
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h8
-rw-r--r--arch/powerpc/include/asm/imc-pmu.h39
-rw-r--r--arch/powerpc/include/asm/kasan.h40
-rw-r--r--arch/powerpc/include/asm/kup.h73
-rw-r--r--arch/powerpc/include/asm/mce.h97
-rw-r--r--arch/powerpc/include/asm/mmu.h28
-rw-r--r--arch/powerpc/include/asm/mmu_context.h7
-rw-r--r--arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h44
-rw-r--r--arch/powerpc/include/asm/nohash/32/kup-8xx.h58
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu-8xx.h102
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu.h25
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgalloc.h123
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h13
-rw-r--r--arch/powerpc/include/asm/nohash/32/slice.h2
-rw-r--r--arch/powerpc/include/asm/nohash/64/mmu.h12
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgalloc.h117
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h12
-rw-r--r--arch/powerpc/include/asm/nohash/64/slice.h12
-rw-r--r--arch/powerpc/include/asm/nohash/hugetlb-book3e.h45
-rw-r--r--arch/powerpc/include/asm/nohash/mmu-book3e.h2
-rw-r--r--arch/powerpc/include/asm/nohash/mmu.h16
-rw-r--r--arch/powerpc/include/asm/nohash/pgalloc.h56
-rw-r--r--arch/powerpc/include/asm/nohash/pte-book3e.h5
-rw-r--r--arch/powerpc/include/asm/opal-api.h18
-rw-r--r--arch/powerpc/include/asm/opal.h9
-rw-r--r--arch/powerpc/include/asm/paca.h40
-rw-r--r--arch/powerpc/include/asm/page.h23
-rw-r--r--arch/powerpc/include/asm/pgalloc.h51
-rw-r--r--arch/powerpc/include/asm/pgtable-be-types.h9
-rw-r--r--arch/powerpc/include/asm/pgtable-types.h9
-rw-r--r--arch/powerpc/include/asm/pgtable.h9
-rw-r--r--arch/powerpc/include/asm/processor.h12
-rw-r--r--arch/powerpc/include/asm/ptrace.h11
-rw-r--r--arch/powerpc/include/asm/reg.h8
-rw-r--r--arch/powerpc/include/asm/reg_booke.h2
-rw-r--r--arch/powerpc/include/asm/slice.h9
-rw-r--r--arch/powerpc/include/asm/sparsemem.h4
-rw-r--r--arch/powerpc/include/asm/string.h32
-rw-r--r--arch/powerpc/include/asm/task_size_64.h2
-rw-r--r--arch/powerpc/include/asm/time.h2
-rw-r--r--arch/powerpc/include/asm/trace.h16
-rw-r--r--arch/powerpc/include/asm/uaccess.h38
-rw-r--r--arch/powerpc/include/asm/xive.h14
-rw-r--r--arch/powerpc/kernel/Makefile14
-rw-r--r--arch/powerpc/kernel/asm-offsets.c25
-rw-r--r--arch/powerpc/kernel/cacheinfo.c13
-rw-r--r--arch/powerpc/kernel/cputable.c13
-rw-r--r--arch/powerpc/kernel/dbell.c3
-rw-r--r--arch/powerpc/kernel/early_32.c36
-rw-r--r--arch/powerpc/kernel/entry_32.S186
-rw-r--r--arch/powerpc/kernel/entry_64.S35
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S26
-rw-r--r--arch/powerpc/kernel/fadump.c1
-rw-r--r--arch/powerpc/kernel/fpu.S1
-rw-r--r--arch/powerpc/kernel/head_32.S258
-rw-r--r--arch/powerpc/kernel/head_32.h203
-rw-r--r--arch/powerpc/kernel/head_40x.S155
-rw-r--r--arch/powerpc/kernel/head_44x.S12
-rw-r--r--arch/powerpc/kernel/head_64.S4
-rw-r--r--arch/powerpc/kernel/head_8xx.S136
-rw-r--r--arch/powerpc/kernel/head_booke.h131
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S32
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c62
-rw-r--r--arch/powerpc/kernel/idle_book3s.S1060
-rw-r--r--arch/powerpc/kernel/irq.c16
-rw-r--r--arch/powerpc/kernel/mce.c106
-rw-r--r--arch/powerpc/kernel/mce_power.c253
-rw-r--r--arch/powerpc/kernel/paca.c12
-rw-r--r--arch/powerpc/kernel/process.c35
-rw-r--r--arch/powerpc/kernel/prom_init.c248
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh12
-rw-r--r--arch/powerpc/kernel/ptrace.c3
-rw-r--r--arch/powerpc/kernel/security.c8
-rw-r--r--arch/powerpc/kernel/setup-common.c116
-rw-r--r--arch/powerpc/kernel/setup_32.c28
-rw-r--r--arch/powerpc/kernel/setup_64.c10
-rw-r--r--arch/powerpc/kernel/signal_64.c27
-rw-r--r--arch/powerpc/kernel/time.c10
-rw-r--r--arch/powerpc/kernel/traps.c8
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile5
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile5
-rw-r--r--arch/powerpc/kernel/vector.S1
-rw-r--r--arch/powerpc/kernel/watchdog.c81
-rw-r--r--arch/powerpc/kvm/book3s_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S141
-rw-r--r--arch/powerpc/lib/Makefile19
-rw-r--r--arch/powerpc/lib/checksum_wrappers.c4
-rw-r--r--arch/powerpc/lib/code-patching.c5
-rw-r--r--arch/powerpc/lib/copy_32.S12
-rw-r--r--arch/powerpc/lib/mem_64.S9
-rw-r--r--arch/powerpc/lib/memcpy_64.S4
-rw-r--r--arch/powerpc/mm/Makefile47
-rw-r--r--arch/powerpc/mm/book3s32/Makefile9
-rw-r--r--arch/powerpc/mm/book3s32/hash_low.S (renamed from arch/powerpc/mm/hash_low_32.S)6
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c (renamed from arch/powerpc/mm/ppc_mmu_32.c)76
-rw-r--r--arch/powerpc/mm/book3s32/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_hash32.c)0
-rw-r--r--arch/powerpc/mm/book3s32/tlb.c (renamed from arch/powerpc/mm/tlb_hash32.c)2
-rw-r--r--arch/powerpc/mm/book3s64/Makefile24
-rw-r--r--arch/powerpc/mm/book3s64/hash_4k.c (renamed from arch/powerpc/mm/hash64_4k.c)2
-rw-r--r--arch/powerpc/mm/book3s64/hash_64k.c (renamed from arch/powerpc/mm/hash64_64k.c)2
-rw-r--r--arch/powerpc/mm/book3s64/hash_hugepage.c (renamed from arch/powerpc/mm/hugepage-hash64.c)2
-rw-r--r--arch/powerpc/mm/book3s64/hash_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-hash64.c)31
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c (renamed from arch/powerpc/mm/hash_native_64.c)0
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c (renamed from arch/powerpc/mm/pgtable-hash64.c)15
-rw-r--r--arch/powerpc/mm/book3s64/hash_tlb.c (renamed from arch/powerpc/mm/tlb_hash64.c)18
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c (renamed from arch/powerpc/mm/hash_utils_64.c)145
-rw-r--r--arch/powerpc/mm/book3s64/iommu_api.c (renamed from arch/powerpc/mm/mmu_context_iommu.c)0
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_book3s64.c)29
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c (renamed from arch/powerpc/mm/pgtable-book3s64.c)2
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c (renamed from arch/powerpc/mm/pkeys.c)1
-rw-r--r--arch/powerpc/mm/book3s64/radix_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-radix.c)0
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c (renamed from arch/powerpc/mm/pgtable-radix.c)117
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c (renamed from arch/powerpc/mm/tlb-radix.c)0
-rw-r--r--arch/powerpc/mm/book3s64/slb.c (renamed from arch/powerpc/mm/slb.c)31
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c (renamed from arch/powerpc/mm/subpage-prot.c)39
-rw-r--r--arch/powerpc/mm/book3s64/vphn.c (renamed from arch/powerpc/mm/vphn.c)6
-rw-r--r--arch/powerpc/mm/book3s64/vphn.h (renamed from arch/powerpc/mm/vphn.h)3
-rw-r--r--arch/powerpc/mm/copro_fault.c18
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c2
-rw-r--r--arch/powerpc/mm/drmem.c6
-rw-r--r--arch/powerpc/mm/fault.c49
-rw-r--r--arch/powerpc/mm/highmem.c14
-rw-r--r--arch/powerpc/mm/hugetlbpage.c242
-rw-r--r--arch/powerpc/mm/init-common.c26
-rw-r--r--arch/powerpc/mm/init_32.c8
-rw-r--r--arch/powerpc/mm/init_64.c2
-rw-r--r--arch/powerpc/mm/kasan/Makefile5
-rw-r--r--arch/powerpc/mm/kasan/kasan_init_32.c183
-rw-r--r--arch/powerpc/mm/mem.c17
-rw-r--r--arch/powerpc/mm/mmu_context.c2
-rw-r--r--arch/powerpc/mm/mmu_decl.h9
-rw-r--r--arch/powerpc/mm/nohash/40x.c (renamed from arch/powerpc/mm/40x_mmu.c)2
-rw-r--r--arch/powerpc/mm/nohash/44x.c (renamed from arch/powerpc/mm/44x_mmu.c)2
-rw-r--r--arch/powerpc/mm/nohash/8xx.c (renamed from arch/powerpc/mm/8xx_mmu.c)26
-rw-r--r--arch/powerpc/mm/nohash/Makefile18
-rw-r--r--arch/powerpc/mm/nohash/book3e_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-book3e.c)52
-rw-r--r--arch/powerpc/mm/nohash/book3e_pgtable.c (renamed from arch/powerpc/mm/pgtable-book3e.c)9
-rw-r--r--arch/powerpc/mm/nohash/fsl_booke.c (renamed from arch/powerpc/mm/fsl_booke_mmu.c)2
-rw-r--r--arch/powerpc/mm/nohash/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_nohash.c)2
-rw-r--r--arch/powerpc/mm/nohash/tlb.c (renamed from arch/powerpc/mm/tlb_nohash.c)19
-rw-r--r--arch/powerpc/mm/nohash/tlb_low.S (renamed from arch/powerpc/mm/tlb_nohash_low.S)0
-rw-r--r--arch/powerpc/mm/nohash/tlb_low_64e.S (renamed from arch/powerpc/mm/tlb_low_64e.S)31
-rw-r--r--arch/powerpc/mm/numa.c35
-rw-r--r--arch/powerpc/mm/pgtable.c114
-rw-r--r--arch/powerpc/mm/pgtable_32.c47
-rw-r--r--arch/powerpc/mm/pgtable_64.c13
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c2
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c86
-rw-r--r--arch/powerpc/mm/slice.c109
-rw-r--r--arch/powerpc/perf/Makefile3
-rw-r--r--arch/powerpc/perf/core-book3s.c28
-rw-r--r--arch/powerpc/perf/generic-compat-pmu.c234
-rw-r--r--arch/powerpc/perf/imc-pmu.c347
-rw-r--r--arch/powerpc/perf/internal.h12
-rw-r--r--arch/powerpc/perf/power5+-pmu.c4
-rw-r--r--arch/powerpc/perf/power5-pmu.c4
-rw-r--r--arch/powerpc/perf/power6-pmu.c4
-rw-r--r--arch/powerpc/perf/power7-pmu.c4
-rw-r--r--arch/powerpc/perf/power8-pmu.c3
-rw-r--r--arch/powerpc/perf/power9-events-list.h2
-rw-r--r--arch/powerpc/perf/power9-pmu.c3
-rw-r--r--arch/powerpc/perf/ppc970-pmu.c4
-rw-r--r--arch/powerpc/platforms/83xx/usb.c4
-rw-r--r--arch/powerpc/platforms/8xx/pic.c3
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype45
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c4
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c19
-rw-r--r--arch/powerpc/platforms/powermac/Makefile6
-rw-r--r--arch/powerpc/platforms/powernv/idle.c902
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c5
-rw-r--r--arch/powerpc/platforms/powernv/opal.c23
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c35
-rw-r--r--arch/powerpc/platforms/powernv/pci.h2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c5
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c17
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c13
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c3
-rw-r--r--arch/powerpc/platforms/pseries/pmem.c3
-rw-r--r--arch/powerpc/platforms/pseries/ras.c135
-rw-r--r--arch/powerpc/purgatory/Makefile3
-rw-r--r--arch/powerpc/sysdev/xive/native.c99
-rw-r--r--arch/powerpc/xmon/Makefile1
-rw-r--r--arch/powerpc/xmon/xmon.c66
219 files changed, 5971 insertions, 3738 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index fa7219ffeadc..d7996cfaceca 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -167,6 +167,7 @@ config PPC
select GENERIC_TIME_VSYSCALL
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
+ select HAVE_ARCH_KASAN if PPC32
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
@@ -375,7 +376,6 @@ config ZONE_DMA
config PGTABLE_LEVELS
int
default 2 if !PPC64
- default 3 if PPC_64K_PAGES && !PPC_BOOK3S_64
default 4
source "arch/powerpc/sysdev/Kconfig"
@@ -391,7 +391,7 @@ source "kernel/Kconfig.hz"
config HUGETLB_PAGE_SIZE_VARIABLE
bool
- depends on HUGETLB_PAGE
+ depends on HUGETLB_PAGE && PPC_BOOK3S_64
default y
config MATH_EMULATION
@@ -832,9 +832,9 @@ config CMDLINE_BOOL
bool "Default bootloader kernel arguments"
config CMDLINE
- string "Initial kernel command string"
- depends on CMDLINE_BOOL
- default "console=ttyS0,9600 console=tty0 root=/dev/sda2"
+ string "Initial kernel command string" if CMDLINE_BOOL
+ default "console=ttyS0,9600 console=tty0 root=/dev/sda2" if CMDLINE_BOOL
+ default ""
help
On some platforms, there is currently no way for the boot loader to
pass arguments to the kernel. For these platforms, you can supply
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 4e00cb0a5464..c59920920ddc 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -117,6 +117,14 @@ config XMON_DISASSEMBLY
to say Y here, unless you're building for a memory-constrained
system.
+config XMON_DEFAULT_RO_MODE
+ bool "Restrict xmon to read-only operations by default"
+ depends on XMON
+ default y
+ help
+ Operate xmon in read-only mode. The cmdline options 'xmon=rw' and
+ 'xmon=ro' override this default.
+
config DEBUGGER
bool
depends on KGDB || XMON
@@ -361,8 +369,32 @@ config PPC_PTDUMP
If you are unsure, say N.
+config PPC_DEBUG_WX
+ bool "Warn on W+X mappings at boot"
+ depends on PPC_PTDUMP
+ help
+ Generate a warning if any W+X mappings are found at boot.
+
+ This is useful for discovering cases where the kernel is leaving
+ W+X mappings after applying NX, as such mappings are a security risk.
+
+ Note that even if the check fails, your kernel is possibly
+ still fine, as W+X mappings are not a security hole in
+ themselves, what they do is that they make the exploitation
+ of other unfixed kernel bugs easier.
+
+ There is no runtime or memory usage effect of this option
+ once the kernel has booted up - it's a one time check.
+
+ If in doubt, say "Y".
+
config PPC_FAST_ENDIAN_SWITCH
bool "Deprecated fast endian-switch syscall"
depends on DEBUG_KERNEL && PPC_BOOK3S_64
help
If you're unsure what this is, say N.
+
+config KASAN_SHADOW_OFFSET
+ hex
+ depends on KASAN
+ default 0xe0000000
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 7de49889bd5d..258ea6b2f2e7 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -34,11 +34,10 @@ ifdef CONFIG_PPC_BOOK3S_32
KBUILD_CFLAGS += -mcpu=powerpc
endif
-ifeq ($(CROSS_COMPILE),)
-KBUILD_DEFCONFIG := $(shell uname -m)_defconfig
-else
-KBUILD_DEFCONFIG := ppc64_defconfig
-endif
+# If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use
+# ppc64_defconfig because we have nothing better to go on.
+uname := $(shell uname -m)
+KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig
ifdef CONFIG_PPC64
new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
@@ -367,6 +366,10 @@ ppc32_allmodconfig:
$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/book3s_32.config \
-f $(srctree)/Makefile allmodconfig
+PHONY += ppc_defconfig
+ppc_defconfig:
+ $(call merge_into_defconfig,book3s_32.config,)
+
PHONY += ppc64le_allmodconfig
ppc64le_allmodconfig:
$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/le.config \
@@ -406,7 +409,9 @@ vdso_install:
ifdef CONFIG_PPC64
$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
endif
+ifdef CONFIG_VDSO32
$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
+endif
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c
index 9d9f6f334d3c..3da3e2b1b51b 100644
--- a/arch/powerpc/boot/addnote.c
+++ b/arch/powerpc/boot/addnote.c
@@ -223,7 +223,11 @@ main(int ac, char **av)
PUT_16(E_PHNUM, np + 2);
/* write back */
- lseek(fd, (long) 0, SEEK_SET);
+ i = lseek(fd, (long) 0, SEEK_SET);
+ if (i < 0) {
+ perror("lseek");
+ exit(1);
+ }
i = write(fd, buf, n);
if (i < 0) {
perror("write");
diff --git a/arch/powerpc/boot/dts/fsl/b4qds.dtsi b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
index 999efd3bc167..05be919f3545 100644
--- a/arch/powerpc/boot/dts/fsl/b4qds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
@@ -40,6 +40,7 @@
interrupt-parent = <&mpic>;
aliases {
+ crypto = &crypto;
phy_sgmii_10 = &phy_sgmii_10;
phy_sgmii_11 = &phy_sgmii_11;
phy_sgmii_1c = &phy_sgmii_1c;
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index ea79c519863d..62e12f61a3b2 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -217,6 +217,7 @@ CONFIG_USB_MON=m
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_HCD_PPC_OF is not set
CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_XHCI_HCD=y
CONFIG_USB_STORAGE=m
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=m
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 1bcd468ab422..a887616e35a2 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -163,6 +163,8 @@ CONFIG_S2IO=m
CONFIG_MLX4_EN=m
# CONFIG_MLX4_CORE_GEN2 is not set
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
+# CONFIG_MLX5_EN_RXNFC is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
CONFIG_MYRI10GE=m
diff --git a/arch/powerpc/crypto/crc-vpmsum_test.c b/arch/powerpc/crypto/crc-vpmsum_test.c
index 0153a9c6f4af..98ea4f4d3dde 100644
--- a/arch/powerpc/crypto/crc-vpmsum_test.c
+++ b/arch/powerpc/crypto/crc-vpmsum_test.c
@@ -78,16 +78,12 @@ static int __init crc_test_init(void)
pr_info("crc-vpmsum_test begins, %lu iterations\n", iterations);
for (i=0; i<iterations; i++) {
- size_t len, offset;
+ size_t offset = prandom_u32_max(16);
+ size_t len = prandom_u32_max(MAX_CRC_LENGTH);
- get_random_bytes(data, MAX_CRC_LENGTH);
- get_random_bytes(&len, sizeof(len));
- get_random_bytes(&offset, sizeof(offset));
-
- len %= MAX_CRC_LENGTH;
- offset &= 15;
if (len <= offset)
continue;
+ prandom_bytes(data, len);
len -= offset;
crypto_shash_update(crct10dif_shash, data+offset, len);
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
new file mode 100644
index 000000000000..677e9babef80
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_KUP_H
+#define _ASM_POWERPC_BOOK3S_32_KUP_H
+
+#include <asm/book3s/32/mmu-hash.h>
+
+#ifdef __ASSEMBLY__
+
+.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */
+101: mtsrin \gpr1, \gpr2
+ addi \gpr1, \gpr1, 0x111 /* next VSID */
+ rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */
+ addis \gpr2, \gpr2, 0x1000 /* address of next segment */
+ bdnz 101b
+ isync
+.endm
+
+.macro kuep_lock gpr1, gpr2
+#ifdef CONFIG_PPC_KUEP
+ li \gpr1, NUM_USER_SEGMENTS
+ li \gpr2, 0
+ mtctr \gpr1
+ mfsrin \gpr1, \gpr2
+ oris \gpr1, \gpr1, SR_NX@h /* set Nx */
+ kuep_update_sr \gpr1, \gpr2
+#endif
+.endm
+
+.macro kuep_unlock gpr1, gpr2
+#ifdef CONFIG_PPC_KUEP
+ li \gpr1, NUM_USER_SEGMENTS
+ li \gpr2, 0
+ mtctr \gpr1
+ mfsrin \gpr1, \gpr2
+ rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */
+ kuep_update_sr \gpr1, \gpr2
+#endif
+.endm
+
+#ifdef CONFIG_PPC_KUAP
+
+.macro kuap_update_sr gpr1, gpr2, gpr3 /* NEVER use r0 as gpr2 due to addis */
+101: mtsrin \gpr1, \gpr2
+ addi \gpr1, \gpr1, 0x111 /* next VSID */
+ rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */
+ addis \gpr2, \gpr2, 0x1000 /* address of next segment */
+ cmplw \gpr2, \gpr3
+ blt- 101b
+ isync
+.endm
+
+.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3
+ lwz \gpr2, KUAP(\thread)
+ rlwinm. \gpr3, \gpr2, 28, 0xf0000000
+ stw \gpr2, STACK_REGS_KUAP(\sp)
+ beq+ 102f
+ li \gpr1, 0
+ stw \gpr1, KUAP(\thread)
+ mfsrin \gpr1, \gpr2
+ oris \gpr1, \gpr1, SR_KS@h /* set Ks */
+ kuap_update_sr \gpr1, \gpr2, \gpr3
+102:
+.endm
+
+.macro kuap_restore sp, current, gpr1, gpr2, gpr3
+ lwz \gpr2, STACK_REGS_KUAP(\sp)
+ rlwinm. \gpr3, \gpr2, 28, 0xf0000000
+ stw \gpr2, THREAD + KUAP(\current)
+ beq+ 102f
+ mfsrin \gpr1, \gpr2
+ rlwinm \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */
+ kuap_update_sr \gpr1, \gpr2, \gpr3
+102:
+.endm
+
+.macro kuap_check current, gpr
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ lwz \gpr2, KUAP(thread)
+999: twnei \gpr, 0
+ EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+#endif
+.endm
+
+#endif /* CONFIG_PPC_KUAP */
+
+#else /* !__ASSEMBLY__ */
+
+#ifdef CONFIG_PPC_KUAP
+
+#include <linux/sched.h>
+
+static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
+{
+ barrier(); /* make sure thread.kuap is updated before playing with SRs */
+ while (addr < end) {
+ mtsrin(sr, addr);
+ sr += 0x111; /* next VSID */
+ sr &= 0xf0ffffff; /* clear VSID overflow */
+ addr += 0x10000000; /* address of next segment */
+ }
+ isync(); /* Context sync required after mtsrin() */
+}
+
+static inline void allow_user_access(void __user *to, const void __user *from, u32 size)
+{
+ u32 addr, end;
+
+ if (__builtin_constant_p(to) && to == NULL)
+ return;
+
+ addr = (__force u32)to;
+
+ if (!addr || addr >= TASK_SIZE || !size)
+ return;
+
+ end = min(addr + size, TASK_SIZE);
+ current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf);
+ kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */
+}
+
+static inline void prevent_user_access(void __user *to, const void __user *from, u32 size)
+{
+ u32 addr = (__force u32)to;
+ u32 end = min(addr + size, TASK_SIZE);
+
+ if (!addr || addr >= TASK_SIZE || !size)
+ return;
+
+ current->thread.kuap = 0;
+ kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */
+}
+
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+{
+ if (!is_write)
+ return false;
+
+ return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !");
+}
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_32_KUP_H */
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 5cb588395fdc..2e277ca0170f 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -10,8 +10,6 @@
* BATs
*/
-#include <asm/page.h>
-
/* Block size masks */
#define BL_128K 0x000
#define BL_256K 0x001
@@ -49,8 +47,6 @@ struct ppc_bat {
u32 batu;
u32 batl;
};
-
-typedef pte_t *pgtable_t;
#endif /* !__ASSEMBLY__ */
/*
@@ -63,6 +59,11 @@ typedef pte_t *pgtable_t;
#define PP_RWRW 2 /* Supervisor read/write, User read/write */
#define PP_RXRX 3 /* Supervisor read, User read */
+/* Values for Segment Registers */
+#define SR_NX 0x10000000 /* No Execute */
+#define SR_KP 0x20000000 /* User key */
+#define SR_KS 0x40000000 /* Supervisor key */
+
#ifndef __ASSEMBLY__
/*
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 3633502e102c..998317702630 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -5,28 +5,6 @@
#include <linux/threads.h>
#include <linux/slab.h>
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern void __bad_pte(pmd_t *pmd);
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
@@ -59,24 +37,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
*pmdp = __pmd(__pa(pte_page) | _PMD_PRESENT);
}
-#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm);
-void pte_frag_destroy(void *pte_frag);
-pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
-void pte_fragment_free(unsigned long *table, int kernel);
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- pte_fragment_free((unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pte_fragment_free((unsigned long *)ptepage, 0);
-}
-
static inline void pgtable_free(void *table, unsigned index_size)
{
if (!index_size) {
@@ -87,7 +47,6 @@ static inline void pgtable_free(void *table, unsigned index_size)
}
}
-#define check_pgt_cache() do { } while (0)
#define get_hugepd_cache_index(x) (x)
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index aa8406b8f7ba..838de59f6754 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -134,15 +134,24 @@ static inline bool pte_user(pte_t pte)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
+
+#ifndef __ASSEMBLY__
+
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+
+#endif /* !__ASSEMBLY__ */
+
/*
* This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
* value (for now) on others, from where we can start layout kernel
* virtual space that goes below PKMAP and FIXMAP
*/
+#include <asm/fixmap.h>
+
#ifdef CONFIG_HIGHMEM
#define KVIRT_TOP PKMAP_BASE
#else
-#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */
+#define KVIRT_TOP FIXADDR_START
#endif
/*
@@ -373,8 +382,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
-int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
-
/* Generic accessors to PTE bits */
static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
static inline int pte_read(pte_t pte) { return 1; }
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index cf5ba5254299..8fd8599c9395 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -2,10 +2,10 @@
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_4K_H
-#define H_PTE_INDEX_SIZE 9
-#define H_PMD_INDEX_SIZE 7
-#define H_PUD_INDEX_SIZE 9
-#define H_PGD_INDEX_SIZE 9
+#define H_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 4KB = 2MB
+#define H_PMD_INDEX_SIZE 7 // size: 8B << 7 = 1KB, maps: 2^7 x 2MB = 256MB
+#define H_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 256MB = 128GB
+#define H_PGD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 128GB = 64TB
/*
* Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
@@ -13,6 +13,21 @@
*/
#define MAX_EA_BITS_PER_CONTEXT 46
+#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2)
+
+/*
+ * Our page table limit us to 64TB. Hence for the kernel mapping,
+ * each MAP area is limited to 16 TB.
+ * The four map areas are: linear mapping, vmap, IO and vmemmap
+ */
+#define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT)
+
+/*
+ * Define the address range of the kernel non-linear virtual area
+ * 16TB
+ */
+#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000)
+
#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index f82ee8a3b561..d1d9177d9ebd 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -2,16 +2,29 @@
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
-#define H_PTE_INDEX_SIZE 8
-#define H_PMD_INDEX_SIZE 10
-#define H_PUD_INDEX_SIZE 10
-#define H_PGD_INDEX_SIZE 8
+#define H_PTE_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 64KB = 16MB
+#define H_PMD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16MB = 16GB
+#define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB
+#define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB
+
/*
* Each context is 512TB size. SLB miss for first context/default context
* is handled in the hotpath.
*/
#define MAX_EA_BITS_PER_CONTEXT 49
+#define REGION_SHIFT MAX_EA_BITS_PER_CONTEXT
+
+/*
+ * We use one context for each MAP area.
+ */
+#define H_KERN_MAP_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
+
+/*
+ * Define the address range of the kernel non-linear virtual area
+ * 2PB
+ */
+#define H_KERN_VIRT_START ASM_CONST(0xc008000000000000)
/*
* 64k aligned address free up few of the lower bits of RPN for us
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 54b7af6cd27f..1d1183048cfd 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -29,6 +29,10 @@
#define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \
H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
+/*
+ * Top 2 bits are ignored in page table walk.
+ */
+#define EA_MASK (~(0xcUL << 60))
/*
* We store the slot details in the second half of page table.
@@ -42,59 +46,63 @@
#endif
/*
- * Define the address range of the kernel non-linear virtual area. In contrast
- * to the linear mapping, this is managed using the kernel page tables and then
- * inserted into the hash page table to actually take effect, similarly to user
- * mappings.
+ * +------------------------------+
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel virtual map end (0xc00e000000000000)
+ * | |
+ * | |
+ * | 512TB/16TB of vmemmap |
+ * | |
+ * | |
+ * +------------------------------+ Kernel vmemmap start
+ * | |
+ * | 512TB/16TB of IO map |
+ * | |
+ * +------------------------------+ Kernel IO map start
+ * | |
+ * | 512TB/16TB of vmap |
+ * | |
+ * +------------------------------+ Kernel virt start (0xc008000000000000)
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel linear (0xc.....)
*/
-#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
-/*
- * Allow virtual mapping of one context size.
- * 512TB for 64K page size
- * 64TB for 4K page size
- */
-#define H_KERN_VIRT_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
+#define H_VMALLOC_START H_KERN_VIRT_START
+#define H_VMALLOC_SIZE H_KERN_MAP_SIZE
+#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
-/*
- * 8TB IO mapping size
- */
-#define H_KERN_IO_SIZE ASM_CONST(0x80000000000) /* 8T */
+#define H_KERN_IO_START H_VMALLOC_END
+#define H_KERN_IO_SIZE H_KERN_MAP_SIZE
+#define H_KERN_IO_END (H_KERN_IO_START + H_KERN_IO_SIZE)
-/*
- * The vmalloc space starts at the beginning of the kernel non-linear virtual
- * region, and occupies 504T (64K) or 56T (4K)
- */
-#define H_VMALLOC_START H_KERN_VIRT_START
-#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE)
-#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
+#define H_VMEMMAP_START H_KERN_IO_END
+#define H_VMEMMAP_SIZE H_KERN_MAP_SIZE
+#define H_VMEMMAP_END (H_VMEMMAP_START + H_VMEMMAP_SIZE)
-#define H_KERN_IO_START H_VMALLOC_END
+#define NON_LINEAR_REGION_ID(ea) ((((unsigned long)ea - H_KERN_VIRT_START) >> REGION_SHIFT) + 2)
/*
* Region IDs
*/
-#define REGION_SHIFT 60UL
-#define REGION_MASK (0xfUL << REGION_SHIFT)
-#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT)
-
-#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START))
-#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
-#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
-#define USER_REGION_ID (0UL)
+#define USER_REGION_ID 0
+#define LINEAR_MAP_REGION_ID 1
+#define VMALLOC_REGION_ID NON_LINEAR_REGION_ID(H_VMALLOC_START)
+#define IO_REGION_ID NON_LINEAR_REGION_ID(H_KERN_IO_START)
+#define VMEMMAP_REGION_ID NON_LINEAR_REGION_ID(H_VMEMMAP_START)
/*
* Defines the address of the vmemap area, in its own region on
* hash table CPUs.
*/
-#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
-
#ifdef CONFIG_PPC_MM_SLICES
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
#endif /* CONFIG_PPC_MM_SLICES */
-
/* PTEIDX nibble */
#define _PTEIDX_SECONDARY 0x8
#define _PTEIDX_GROUP_IX 0x7
@@ -103,6 +111,25 @@
#define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1)
#ifndef __ASSEMBLY__
+static inline int get_region_id(unsigned long ea)
+{
+ int region_id;
+ int id = (ea >> 60UL);
+
+ if (id == 0)
+ return USER_REGION_ID;
+
+ if (ea < H_KERN_VIRT_START)
+ return LINEAR_MAP_REGION_ID;
+
+ VM_BUG_ON(id != 0xc);
+ BUILD_BUG_ON(NON_LINEAR_REGION_ID(H_VMALLOC_START) != 2);
+
+ region_id = NON_LINEAR_REGION_ID(ea);
+ VM_BUG_ON(region_id > VMEMMAP_REGION_ID);
+ return region_id;
+}
+
#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS)
#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS)
static inline int hash__pgd_bad(pgd_t pgd)
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index ec2a55a553c7..56140d19c85f 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -62,4 +62,76 @@ extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t old_pte, pte_t new_pte);
+/*
+ * This should work for other subarchs too. But right now we use the
+ * new format only for 64bit book3s
+ */
+static inline pte_t *hugepd_page(hugepd_t hpd)
+{
+ BUG_ON(!hugepd_ok(hpd));
+ /*
+ * We have only four bits to encode, MMU page size
+ */
+ BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
+ return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK);
+}
+
+static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
+{
+ return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2;
+}
+
+static inline unsigned int hugepd_shift(hugepd_t hpd)
+{
+ return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
+}
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ if (radix_enabled())
+ return radix__flush_hugetlb_page(vma, vmaddr);
+}
+
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
+ unsigned int pdshift)
+{
+ unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
+
+ return hugepd_page(hpd) + idx;
+}
+
+static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
+{
+ *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | (shift_to_mmu_psize(pshift) << 2));
+}
+
+void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+
+static inline int check_and_get_huge_psize(int shift)
+{
+ int mmu_psize;
+
+ if (shift > SLICE_HIGH_SHIFT)
+ return -EINVAL;
+
+ mmu_psize = shift_to_mmu_psize(shift);
+
+ /*
+ * We need to make sure that for different page sizes reported by
+ * firmware we only add hugetlb support for page sizes that can be
+ * supported by linux page table layout.
+ * For now we have
+ * Radix: 2M and 1G
+ * Hash: 16M and 16G
+ */
+ if (radix_enabled()) {
+ if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
+ return -EINVAL;
+ } else {
+ if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
+ return -EINVAL;
+ }
+ return mmu_psize;
+}
+
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
new file mode 100644
index 000000000000..f254de956d6a
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
+#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
+
+#include <linux/const.h>
+
+#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000)
+#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000)
+#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE)
+#define AMR_KUAP_SHIFT 62
+
+#ifdef __ASSEMBLY__
+
+.macro kuap_restore_amr gpr
+#ifdef CONFIG_PPC_KUAP
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ ld \gpr, STACK_REGS_KUAP(r1)
+ mtspr SPRN_AMR, \gpr
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
+#endif
+.endm
+
+.macro kuap_check_amr gpr1, gpr2
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ mfspr \gpr1, SPRN_AMR
+ li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT)
+ sldi \gpr2, \gpr2, AMR_KUAP_SHIFT
+999: tdne \gpr1, \gpr2
+ EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
+#endif
+.endm
+
+.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr
+#ifdef CONFIG_PPC_KUAP
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ .ifnb \msr_pr_cr
+ bne \msr_pr_cr, 99f
+ .endif
+ mfspr \gpr1, SPRN_AMR
+ std \gpr1, STACK_REGS_KUAP(r1)
+ li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT)
+ sldi \gpr2, \gpr2, AMR_KUAP_SHIFT
+ cmpd \use_cr, \gpr1, \gpr2
+ beq \use_cr, 99f
+ // We don't isync here because we very recently entered via rfid
+ mtspr SPRN_AMR, \gpr2
+ isync
+99:
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
+#endif
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#ifdef CONFIG_PPC_KUAP
+
+#include <asm/reg.h>
+
+/*
+ * We support individually allowing read or write, but we don't support nesting
+ * because that would require an expensive read/modify write of the AMR.
+ */
+
+static inline void set_kuap(unsigned long value)
+{
+ if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP))
+ return;
+
+ /*
+ * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both
+ * before and after the move to AMR. See table 6 on page 1134.
+ */
+ isync();
+ mtspr(SPRN_AMR, value);
+ isync();
+}
+
+static inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ // This is written so we can resolve to a single case at build time
+ if (__builtin_constant_p(to) && to == NULL)
+ set_kuap(AMR_KUAP_BLOCK_WRITE);
+ else if (__builtin_constant_p(from) && from == NULL)
+ set_kuap(AMR_KUAP_BLOCK_READ);
+ else
+ set_kuap(0);
+}
+
+static inline void prevent_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ set_kuap(AMR_KUAP_BLOCKED);
+}
+
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+{
+ return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
+ (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
+ "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
+}
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index a28a28079edb..1e4705516a54 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -588,7 +588,8 @@ extern void slb_set_size(u16 size);
#endif
#define MAX_VMALLOC_CTX_CNT 1
-#define MAX_MEMMAP_CTX_CNT 1
+#define MAX_IO_CTX_CNT 1
+#define MAX_VMEMMAP_CTX_CNT 1
/*
* 256MB segment
@@ -601,13 +602,10 @@ extern void slb_set_size(u16 size);
* would give a protovsid of 0x1fffffffff. That will result in a VSID 0
* because of the modulo operation in vsid scramble.
*
- * We add one extra context to MIN_USER_CONTEXT so that we can map kernel
- * context easily. The +1 is to map the unused 0xe region mapping.
*/
#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2)
#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
- MAX_MEMMAP_CTX_CNT + 2)
-
+ MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT)
/*
* For platforms that support on 65bit VA we limit the context bits
*/
@@ -657,8 +655,8 @@ extern void slb_set_size(u16 size);
/* 4 bits per slice and we have one slice per 1TB */
#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
-#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.slb_addr_limit >> 41)
-
+#define LOW_SLICE_ARRAY_SZ (BITS_PER_LONG / BITS_PER_BYTE)
+#define TASK_SLICE_ARRAY_SZ(x) ((x)->hash_context->slb_addr_limit >> 41)
#ifndef __ASSEMBLY__
#ifdef CONFIG_PPC_SUBPAGE_PROT
@@ -687,12 +685,41 @@ struct subpage_prot_table {
#define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS)
extern void subpage_prot_free(struct mm_struct *mm);
-extern void subpage_prot_init_new_context(struct mm_struct *mm);
#else
static inline void subpage_prot_free(struct mm_struct *mm) {}
-static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
#endif /* CONFIG_PPC_SUBPAGE_PROT */
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+ u64 low_slices;
+ DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
+struct hash_mm_context {
+ u16 user_psize; /* page size index */
+
+ /* SLB page size encodings*/
+ unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
+ unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+ unsigned long slb_addr_limit;
+#ifdef CONFIG_PPC_64K_PAGES
+ struct slice_mask mask_64k;
+#endif
+ struct slice_mask mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+ struct slice_mask mask_16m;
+ struct slice_mask mask_16g;
+#endif
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ struct subpage_prot_table *spt;
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+};
+
#if 0
/*
* The code below is equivalent to this function for arguments
@@ -747,7 +774,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
/*
* Bad address. We return VSID 0 for that
*/
- if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
+ if ((ea & EA_MASK) >= H_PGTABLE_RANGE)
return 0;
if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
@@ -774,28 +801,29 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
* 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff]
* 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff]
* 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff]
-
- * 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ]
- * 0x00006 - Not used - Can map 0xe000000000000000 range.
- * 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ]
*
- * So we can compute the context from the region (top nibble) by
- * subtracting 11, or 0xc - 1.
+ * vmap, IO, vmemap
+ *
+ * 0x00005 - [ 0xc008000000000000 - 0xc009ffffffffffff]
+ * 0x00006 - [ 0xc00a000000000000 - 0xc00bffffffffffff]
+ * 0x00007 - [ 0xc00c000000000000 - 0xc00dffffffffffff]
+ *
*/
static inline unsigned long get_kernel_context(unsigned long ea)
{
- unsigned long region_id = REGION_ID(ea);
+ unsigned long region_id = get_region_id(ea);
unsigned long ctx;
/*
- * For linear mapping we do support multiple context
+ * Depending on Kernel config, kernel region can have one context
+ * or more.
*/
- if (region_id == KERNEL_REGION_ID) {
+ if (region_id == LINEAR_MAP_REGION_ID) {
/*
* We already verified ea to be not beyond the addr limit.
*/
- ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT);
+ ctx = 1 + ((ea & EA_MASK) >> MAX_EA_BITS_PER_CONTEXT);
} else
- ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT;
+ ctx = region_id + MAX_KERNEL_CTX_CNT - 1;
return ctx;
}
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 1ceee000c18d..74d24201fc4f 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -25,15 +25,22 @@ struct mmu_psize_def {
};
};
extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+#endif /* __ASSEMBLY__ */
/*
- * For BOOK3s 64 with 4k and 64K linux page size
- * we want to use pointers, because the page table
- * actually store pfn
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
*/
-typedef pte_t *pgtable_t;
-
-#endif /* __ASSEMBLY__ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
+ defined(CONFIG_PPC_64K_PAGES)
+#define MAX_PHYSMEM_BITS 51
+#else
+#define MAX_PHYSMEM_BITS 46
+#endif
/* 64-bit classic hash table MMU */
#include <asm/book3s/64/mmu-hash.h>
@@ -89,16 +96,6 @@ struct spinlock;
/* Maximum possible number of NPUs in a system. */
#define NV_MAX_NPUS 8
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- */
-struct slice_mask {
- u64 low_slices;
- DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
-};
-
typedef struct {
union {
/*
@@ -112,7 +109,6 @@ typedef struct {
mm_context_id_t id;
mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
};
- u16 user_psize; /* page size index */
/* Number of bits in the mm_cpumask */
atomic_t active_cpus;
@@ -122,27 +118,9 @@ typedef struct {
/* NPU NMMU context */
struct npu_context *npu_context;
+ struct hash_mm_context *hash_context;
-#ifdef CONFIG_PPC_MM_SLICES
- /* SLB page size encodings*/
- unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
- unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
- unsigned long slb_addr_limit;
-# ifdef CONFIG_PPC_64K_PAGES
- struct slice_mask mask_64k;
-# endif
- struct slice_mask mask_4k;
-# ifdef CONFIG_HUGETLB_PAGE
- struct slice_mask mask_16m;
- struct slice_mask mask_16g;
-# endif
-#else
- u16 sllp; /* SLB page size encoding */
-#endif
unsigned long vdso_base;
-#ifdef CONFIG_PPC_SUBPAGE_PROT
- struct subpage_prot_table spt;
-#endif /* CONFIG_PPC_SUBPAGE_PROT */
/*
* pagetable fragment support
*/
@@ -163,6 +141,60 @@ typedef struct {
#endif
} mm_context_t;
+static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
+{
+ return ctx->hash_context->user_psize;
+}
+
+static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
+{
+ ctx->hash_context->user_psize = user_psize;
+}
+
+static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
+{
+ return ctx->hash_context->low_slices_psize;
+}
+
+static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
+{
+ return ctx->hash_context->high_slices_psize;
+}
+
+static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
+{
+ return ctx->hash_context->slb_addr_limit;
+}
+
+static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
+{
+ ctx->hash_context->slb_addr_limit = limit;
+}
+
+static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+ if (psize == MMU_PAGE_64K)
+ return &ctx->hash_context->mask_64k;
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+ if (psize == MMU_PAGE_16M)
+ return &ctx->hash_context->mask_16m;
+ if (psize == MMU_PAGE_16G)
+ return &ctx->hash_context->mask_16g;
+#endif
+ BUG_ON(psize != MMU_PAGE_4K);
+
+ return &ctx->hash_context->mask_4k;
+}
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
+{
+ return ctx->hash_context->spt;
+}
+#endif
+
/*
* The current system page and segment sizes
*/
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 138bc2ecc0c4..d45e4449619f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -19,29 +19,7 @@ struct vmemmap_backing {
};
extern struct vmemmap_backing *vmemmap_list;
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
-extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
-extern void pte_fragment_free(unsigned long *, int);
extern void pmd_fragment_free(unsigned long *);
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
#ifdef CONFIG_SMP
@@ -81,6 +59,9 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
pgtable_gfp_flags(mm, GFP_KERNEL));
+ if (unlikely(!pgd))
+ return pgd;
+
/*
* Don't scan the PGD for pointers, it contains references to PUDs but
* those references are not full pointers and so can't be recognised by
@@ -185,31 +166,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
*pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS);
}
-static inline pgtable_t pmd_pgtable(pmd_t pmd)
-{
- return (pgtable_t)pmd_page_vaddr(pmd);
-}
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)pte_fragment_alloc(mm, 1);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- return (pgtable_t)pte_fragment_alloc(mm, 0);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- pte_fragment_free((unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pte_fragment_free((unsigned long *)ptepage, 0);
-}
-
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
{
@@ -221,8 +177,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
pgtable_free_tlb(tlb, table, PTE_INDEX);
}
-#define check_pgt_cache() do { } while (0)
-
extern atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
static inline void update_page_count(int psize, long count)
{
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 581f91be9dd4..7dede2e34b70 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -277,9 +277,11 @@ extern unsigned long __vmalloc_end;
extern unsigned long __kernel_virt_start;
extern unsigned long __kernel_virt_size;
extern unsigned long __kernel_io_start;
+extern unsigned long __kernel_io_end;
#define KERN_VIRT_START __kernel_virt_start
-#define KERN_VIRT_SIZE __kernel_virt_size
#define KERN_IO_START __kernel_io_start
+#define KERN_IO_END __kernel_io_end
+
extern struct page *vmemmap;
extern unsigned long ioremap_bot;
extern unsigned long pci_io_base;
@@ -296,8 +298,7 @@ extern unsigned long pci_io_base;
#include <asm/barrier.h>
/*
- * The second half of the kernel virtual space is used for IO mappings,
- * it's itself carved into the PIO region (ISA and PHB IO space) and
+ * IO space itself carved into the PIO region (ISA and PHB IO space) and
* the ioremap space
*
* ISA_IO_BASE = KERN_IO_START, 64K reserved area
@@ -310,7 +311,7 @@ extern unsigned long pci_io_base;
#define PHB_IO_BASE (ISA_IO_END)
#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
#define IOREMAP_BASE (PHB_IO_END)
-#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE)
+#define IOREMAP_END (KERN_IO_END)
/* Advertise special mapping type for AGP */
#define HAVE_PAGE_AGP
@@ -992,7 +993,8 @@ extern struct page *pgd_page(pgd_t pgd);
(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte) do { } while(0)
+
+static inline void pte_unmap(pte_t *pte) { }
/* to find an entry in a kernel page-table-directory */
/* This now only contains the vmalloc pages */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
index 863c3e8286fb..d5f5ab73dc7f 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -5,10 +5,11 @@
/*
* For 4K page size supported index is 13/9/9/9
*/
-#define RADIX_PTE_INDEX_SIZE 9 /* 2MB huge page */
-#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */
-#define RADIX_PUD_INDEX_SIZE 9
-#define RADIX_PGD_INDEX_SIZE 13
+#define RADIX_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 4K = 2MB
+#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB
+#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB
+#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB
+
/*
* One fragment per per page
*/
diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h
index ccb78ca9d0c5..54e33828b0fb 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h
@@ -5,10 +5,10 @@
/*
* For 64K page size supported index is 13/9/9/5
*/
-#define RADIX_PTE_INDEX_SIZE 5 /* 2MB huge page */
-#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */
-#define RADIX_PUD_INDEX_SIZE 9
-#define RADIX_PGD_INDEX_SIZE 13
+#define RADIX_PTE_INDEX_SIZE 5 // size: 8B << 5 = 256B, maps 2^5 x 64K = 2MB
+#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB
+#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB
+#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB
/*
* We use a 256 byte PTE page fragment in radix
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 5ab134eeed20..574eca33f893 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -72,19 +72,17 @@
* | |
* | |
* | |
- * +------------------------------+ Kernel IO map end (0xc010000000000000)
+ * +------------------------------+ Kernel vmemmap end (0xc010000000000000)
* | |
+ * | 512TB |
* | |
- * | 1/2 of virtual map |
+ * +------------------------------+ Kernel IO map end/vmemap start
* | |
+ * | 512TB |
* | |
- * +------------------------------+ Kernel IO map start
+ * +------------------------------+ Kernel vmap end/ IO map start
* | |
- * | 1/4 of virtual map |
- * | |
- * +------------------------------+ Kernel vmemap start
- * | |
- * | 1/4 of virtual map |
+ * | 512TB |
* | |
* +------------------------------+ Kernel virt start (0xc008000000000000)
* | |
@@ -93,24 +91,24 @@
* +------------------------------+ Kernel linear (0xc.....)
*/
-#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
-#define RADIX_KERN_VIRT_SIZE ASM_CONST(0x0008000000000000)
-
+#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
/*
- * The vmalloc space starts at the beginning of that region, and
- * occupies a quarter of it on radix config.
- * (we keep a quarter for the virtual memmap)
+ * 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick
+ * the same value as hash.
*/
+#define RADIX_KERN_MAP_SIZE (1UL << 49)
+
#define RADIX_VMALLOC_START RADIX_KERN_VIRT_START
-#define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2)
+#define RADIX_VMALLOC_SIZE RADIX_KERN_MAP_SIZE
#define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE)
-/*
- * Defines the address of the vmemap area, in its own region on
- * hash table CPUs.
- */
-#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END)
-#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1))
+#define RADIX_KERN_IO_START RADIX_VMALLOC_END
+#define RADIX_KERN_IO_SIZE RADIX_KERN_MAP_SIZE
+#define RADIX_KERN_IO_END (RADIX_KERN_IO_START + RADIX_KERN_IO_SIZE)
+
+#define RADIX_VMEMMAP_START RADIX_KERN_IO_END
+#define RADIX_VMEMMAP_SIZE RADIX_KERN_MAP_SIZE
+#define RADIX_VMEMMAP_END (RADIX_VMEMMAP_START + RADIX_VMEMMAP_SIZE)
#ifndef __ASSEMBLY__
#define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h
index db0dedab65ee..f0d3194ba41b 100644
--- a/arch/powerpc/include/asm/book3s/64/slice.h
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -2,8 +2,6 @@
#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
#define _ASM_POWERPC_BOOK3S_64_SLICE_H
-#ifdef CONFIG_PPC_MM_SLICES
-
#define SLICE_LOW_SHIFT 28
#define SLICE_LOW_TOP (0x100000000ul)
#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
@@ -13,15 +11,6 @@
#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
-#else /* CONFIG_PPC_MM_SLICES */
-
-#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
-#define slice_set_user_psize(mm, psize) \
-do { \
- (mm)->context.user_psize = (psize); \
- (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
-} while (0)
-
-#endif /* CONFIG_PPC_MM_SLICES */
+#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW_USER64
#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 43e5f31fe64d..9844b3ded187 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -27,10 +27,11 @@
* the THREAD_WINKLE_BITS are set, which indicate which threads have not
* yet woken from the winkle state.
*/
-#define PNV_CORE_IDLE_LOCK_BIT 0x10000000
+#define NR_PNV_CORE_IDLE_LOCK_BIT 28
+#define PNV_CORE_IDLE_LOCK_BIT (1ULL << NR_PNV_CORE_IDLE_LOCK_BIT)
+#define PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 16
#define PNV_CORE_IDLE_WINKLE_COUNT 0x00010000
-#define PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT 0x00080000
#define PNV_CORE_IDLE_WINKLE_COUNT_BITS 0x000F0000
#define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT 8
#define PNV_CORE_IDLE_THREAD_WINKLE_BITS 0x0000FF00
@@ -68,16 +69,6 @@
#define ERR_DEEP_STATE_ESL_MISMATCH -2
#ifndef __ASSEMBLY__
-/* Additional SPRs that need to be saved/restored during stop */
-struct stop_sprs {
- u64 pid;
- u64 ldbar;
- u64 fscr;
- u64 hfscr;
- u64 mmcr1;
- u64 mmcr2;
- u64 mmcra;
-};
#define PNV_IDLE_NAME_LEN 16
struct pnv_idle_states_t {
@@ -92,10 +83,6 @@ struct pnv_idle_states_t {
extern struct pnv_idle_states_t *pnv_idle_states;
extern int nr_pnv_idle_states;
-extern u32 pnv_fastsleep_workaround_at_entry[];
-extern u32 pnv_fastsleep_workaround_at_exit[];
-
-extern u64 pnv_first_deep_stop_state;
unsigned long pnv_cpu_offline(unsigned int cpu);
int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 7c1d8e74b25d..7f3279b014db 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -17,6 +17,9 @@ struct drmem_lmb {
u32 drc_index;
u32 aa_index;
u32 flags;
+#ifdef CONFIG_MEMORY_HOTPLUG
+ int nid;
+#endif
};
struct drmem_lmb_info {
@@ -104,4 +107,22 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
lmb->aa_index = 0xffffffff;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+static inline void lmb_set_nid(struct drmem_lmb *lmb)
+{
+ lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
+}
+static inline void lmb_clear_nid(struct drmem_lmb *lmb)
+{
+ lmb->nid = -1;
+}
+#else
+static inline void lmb_set_nid(struct drmem_lmb *lmb)
+{
+}
+static inline void lmb_clear_nid(struct drmem_lmb *lmb)
+{
+}
+#endif
+
#endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 937bb630093f..bef4e05a6823 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -497,6 +497,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
RESTORE_CTR(r1, area); \
b bad_stack; \
3: EXCEPTION_PROLOG_COMMON_1(); \
+ kuap_save_amr_and_lock r9, r10, cr1, cr0; \
beq 4f; /* if from kernel mode */ \
ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \
SAVE_PPR(area, r9); \
@@ -691,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
*/
#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \
EXCEPTION_PROLOG_COMMON_1(); \
+ kuap_save_amr_and_lock r9, r10, cr1; \
EXCEPTION_PROLOG_COMMON_2(area); \
EXCEPTION_PROLOG_COMMON_3(trap); \
/* Volatile regs are potentially clobbered here */ \
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 188776befaf9..e2099c0a15c3 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -219,5 +219,6 @@ extern void fadump_cleanup(void);
static inline int is_fadump_active(void) { return 0; }
static inline int should_fadump_crash(void) { return 0; }
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
+static inline void fadump_cleanup(void) { }
#endif
#endif
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 40a6c9261a6b..f6fc31f8baff 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -100,6 +100,9 @@ label##5: \
#define END_MMU_FTR_SECTION(msk, val) \
END_MMU_FTR_SECTION_NESTED(msk, val, 97)
+#define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label) \
+ END_MMU_FTR_SECTION_NESTED((msk), (msk), label)
+
#define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk))
#define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0)
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index b9fbed84ddca..0cfc365d814b 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -22,7 +22,12 @@
#include <asm/kmap_types.h>
#endif
+#ifdef CONFIG_KASAN
+#include <asm/kasan.h>
+#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE)
+#else
#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE))
+#endif
/*
* Here we define all the compile-time 'special' virtual
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 88b38b37c21b..3a6aa57b9d90 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -35,6 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
{
int oldval = 0, ret;
+ allow_write_to_user(uaddr, sizeof(*uaddr));
pagefault_disable();
switch (op) {
@@ -62,6 +63,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
if (!ret)
*oval = oldval;
+ prevent_write_to_user(uaddr, sizeof(*uaddr));
return ret;
}
@@ -75,6 +77,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
+ allow_write_to_user(uaddr, sizeof(*uaddr));
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
@@ -95,6 +98,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: "cc", "memory");
*uval = prev;
+ prevent_write_to_user(uaddr, sizeof(*uaddr));
return ret;
}
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 8d40565ad0c3..20a101046cff 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -6,82 +6,16 @@
#include <asm/page.h>
#ifdef CONFIG_PPC_BOOK3S_64
-
#include <asm/book3s/64/hugetlb.h>
-/*
- * This should work for other subarchs too. But right now we use the
- * new format only for 64bit book3s
- */
-static inline pte_t *hugepd_page(hugepd_t hpd)
-{
- BUG_ON(!hugepd_ok(hpd));
- /*
- * We have only four bits to encode, MMU page size
- */
- BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
- return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK);
-}
-
-static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
-{
- return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2;
-}
-
-static inline unsigned int hugepd_shift(hugepd_t hpd)
-{
- return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
-}
-static inline void flush_hugetlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
- if (radix_enabled())
- return radix__flush_hugetlb_page(vma, vmaddr);
-}
-
-#else
-
-static inline pte_t *hugepd_page(hugepd_t hpd)
-{
- BUG_ON(!hugepd_ok(hpd));
-#ifdef CONFIG_PPC_8xx
- return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK);
-#else
- return (pte_t *)((hpd_val(hpd) &
- ~HUGEPD_SHIFT_MASK) | PD_HUGE);
-#endif
-}
-
-static inline unsigned int hugepd_shift(hugepd_t hpd)
-{
-#ifdef CONFIG_PPC_8xx
- return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17;
-#else
- return hpd_val(hpd) & HUGEPD_SHIFT_MASK;
-#endif
-}
-
+#elif defined(CONFIG_PPC_FSL_BOOK3E)
+#include <asm/nohash/hugetlb-book3e.h>
+#elif defined(CONFIG_PPC_8xx)
+#include <asm/nohash/32/hugetlb-8xx.h>
#endif /* CONFIG_PPC_BOOK3S_64 */
+extern bool hugetlb_disabled;
-static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
- unsigned pdshift)
-{
- /*
- * On FSL BookE, we have multiple higher-level table entries that
- * point to the same hugepte. Just use the first one since they're all
- * identical. So for that case, idx=0.
- */
- unsigned long idx = 0;
-
- pte_t *dir = hugepd_page(hpd);
-#ifdef CONFIG_PPC_8xx
- idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT;
-#elif !defined(CONFIG_PPC_FSL_BOOK3E)
- idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
-#endif
-
- return dir + idx;
-}
+void hugetlbpage_init_default(void);
void flush_dcache_icache_hugepage(struct page *page);
@@ -99,15 +33,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
pte_t pte);
-#ifdef CONFIG_PPC_8xx
-static inline void flush_hugetlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
- flush_tlb_page(vma, vmaddr);
-}
-#else
-void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-#endif
#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index ece4dc89c90b..0fe8c1e46bbc 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -90,10 +90,18 @@ static inline void hw_breakpoint_disable(void)
extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
int hw_breakpoint_handler(struct die_args *args);
+extern int set_dawr(struct arch_hw_breakpoint *brk);
+extern bool dawr_force_enable;
+static inline bool dawr_enabled(void)
+{
+ return dawr_force_enable;
+}
+
#else /* CONFIG_HAVE_HW_BREAKPOINT */
static inline void hw_breakpoint_disable(void) { }
static inline void thread_change_pc(struct task_struct *tsk,
struct pt_regs *regs) { }
+static inline bool dawr_enabled(void) { return false; }
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif /* __KERNEL__ */
#endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 69f516ecb2fd..7c2ef0e42661 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -33,6 +33,7 @@
*/
#define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL
#define THREAD_IMC_ENABLE 0x8000000000000000ULL
+#define TRACE_IMC_ENABLE 0x4000000000000000ULL
/*
* For debugfs interface for imc-mode and imc-command
@@ -59,6 +60,34 @@ struct imc_events {
char *scale;
};
+/*
+ * Trace IMC hardware updates a 64bytes record on
+ * Core Performance Monitoring Counter (CPMC)
+ * overflow. Here is the layout for the trace imc record
+ *
+ * DW 0 : Timebase
+ * DW 1 : Program Counter
+ * DW 2 : PIDR information
+ * DW 3 : CPMC1
+ * DW 4 : CPMC2
+ * DW 5 : CPMC3
+ * Dw 6 : CPMC4
+ * DW 7 : Timebase
+ * .....
+ *
+ * The following is the data structure to hold trace imc data.
+ */
+struct trace_imc_data {
+ u64 tb1;
+ u64 ip;
+ u64 val;
+ u64 cpmc1;
+ u64 cpmc2;
+ u64 cpmc3;
+ u64 cpmc4;
+ u64 tb2;
+};
+
/* Event attribute array index */
#define IMC_FORMAT_ATTR 0
#define IMC_EVENT_ATTR 1
@@ -69,6 +98,13 @@ struct imc_events {
#define IMC_EVENT_OFFSET_MASK 0xffffffffULL
/*
+ * Macro to mask bits 0:21 of first double word(which is the timebase) to
+ * compare with 8th double word (timebase) of trace imc record data.
+ */
+#define IMC_TRACE_RECORD_TB1_MASK 0x3ffffffffffULL
+
+
+/*
* Device tree parser code detects IMC pmu support and
* registers new IMC pmus. This structure will hold the
* pmu functions, events, counter memory information
@@ -113,6 +149,7 @@ struct imc_pmu_ref {
enum {
IMC_TYPE_THREAD = 0x1,
+ IMC_TYPE_TRACE = 0x2,
IMC_TYPE_CORE = 0x4,
IMC_TYPE_CHIP = 0x10,
};
@@ -123,6 +160,8 @@ enum {
#define IMC_DOMAIN_NEST 1
#define IMC_DOMAIN_CORE 2
#define IMC_DOMAIN_THREAD 3
+/* For trace-imc the domain is still thread but it operates in trace-mode */
+#define IMC_DOMAIN_TRACE 4
extern int init_imc_pmu(struct device_node *parent,
struct imc_pmu *pmu_ptr, int pmu_id);
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
new file mode 100644
index 000000000000..296e51c2f066
--- /dev/null
+++ b/arch/powerpc/include/asm/kasan.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifdef CONFIG_KASAN
+#define _GLOBAL_KASAN(fn) _GLOBAL(__##fn)
+#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(__##fn)
+#define EXPORT_SYMBOL_KASAN(fn) EXPORT_SYMBOL(__##fn)
+#else
+#define _GLOBAL_KASAN(fn) _GLOBAL(fn)
+#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(fn)
+#define EXPORT_SYMBOL_KASAN(fn)
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+
+#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
+ (PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT))
+
+#define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET)
+
+#define KASAN_SHADOW_END 0UL
+
+#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START)
+
+#ifdef CONFIG_KASAN
+void kasan_early_init(void);
+void kasan_mmu_init(void);
+void kasan_init(void);
+#else
+static inline void kasan_init(void) { }
+static inline void kasan_mmu_init(void) { }
+#endif
+
+#endif /* __ASSEMBLY */
+#endif
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
new file mode 100644
index 000000000000..5b5e39643a27
--- /dev/null
+++ b/arch/powerpc/include/asm/kup.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_H_
+#define _ASM_POWERPC_KUP_H_
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/kup-radix.h>
+#endif
+#ifdef CONFIG_PPC_8xx
+#include <asm/nohash/32/kup-8xx.h>
+#endif
+#ifdef CONFIG_PPC_BOOK3S_32
+#include <asm/book3s/32/kup.h>
+#endif
+
+#ifdef __ASSEMBLY__
+#ifndef CONFIG_PPC_KUAP
+.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3
+.endm
+
+.macro kuap_restore sp, current, gpr1, gpr2, gpr3
+.endm
+
+.macro kuap_check current, gpr
+.endm
+
+#endif
+
+#else /* !__ASSEMBLY__ */
+
+#include <asm/pgtable.h>
+
+void setup_kup(void);
+
+#ifdef CONFIG_PPC_KUEP
+void setup_kuep(bool disabled);
+#else
+static inline void setup_kuep(bool disabled) { }
+#endif /* CONFIG_PPC_KUEP */
+
+#ifdef CONFIG_PPC_KUAP
+void setup_kuap(bool disabled);
+#else
+static inline void setup_kuap(bool disabled) { }
+static inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size) { }
+static inline void prevent_user_access(void __user *to, const void __user *from,
+ unsigned long size) { }
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; }
+#endif /* CONFIG_PPC_KUAP */
+
+static inline void allow_read_from_user(const void __user *from, unsigned long size)
+{
+ allow_user_access(NULL, from, size);
+}
+
+static inline void allow_write_to_user(void __user *to, unsigned long size)
+{
+ allow_user_access(to, NULL, size);
+}
+
+static inline void prevent_read_from_user(const void __user *from, unsigned long size)
+{
+ prevent_user_access(NULL, from, size);
+}
+
+static inline void prevent_write_to_user(void __user *to, unsigned long size)
+{
+ prevent_user_access(to, NULL, size);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_KUP_H_ */
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 17996bc9382b..23247a132ce8 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -31,7 +31,7 @@ enum MCE_Version {
enum MCE_Severity {
MCE_SEV_NO_ERROR = 0,
MCE_SEV_WARNING = 1,
- MCE_SEV_ERROR_SYNC = 2,
+ MCE_SEV_SEVERE = 2,
MCE_SEV_FATAL = 3,
};
@@ -56,6 +56,14 @@ enum MCE_ErrorType {
MCE_ERROR_TYPE_LINK = 7,
};
+enum MCE_ErrorClass {
+ MCE_ECLASS_UNKNOWN = 0,
+ MCE_ECLASS_HARDWARE,
+ MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_ECLASS_SOFTWARE,
+ MCE_ECLASS_SOFT_INDETERMINATE,
+};
+
enum MCE_UeErrorType {
MCE_UE_ERROR_INDETERMINATE = 0,
MCE_UE_ERROR_IFETCH = 1,
@@ -110,73 +118,75 @@ enum MCE_LinkErrorType {
};
struct machine_check_event {
- enum MCE_Version version:8; /* 0x00 */
- uint8_t in_use; /* 0x01 */
- enum MCE_Severity severity:8; /* 0x02 */
- enum MCE_Initiator initiator:8; /* 0x03 */
- enum MCE_ErrorType error_type:8; /* 0x04 */
- enum MCE_Disposition disposition:8; /* 0x05 */
- uint8_t reserved_1[2]; /* 0x06 */
- uint64_t gpr3; /* 0x08 */
- uint64_t srr0; /* 0x10 */
- uint64_t srr1; /* 0x18 */
- union { /* 0x20 */
+ enum MCE_Version version:8;
+ u8 in_use;
+ enum MCE_Severity severity:8;
+ enum MCE_Initiator initiator:8;
+ enum MCE_ErrorType error_type:8;
+ enum MCE_ErrorClass error_class:8;
+ enum MCE_Disposition disposition:8;
+ bool sync_error;
+ u16 cpu;
+ u64 gpr3;
+ u64 srr0;
+ u64 srr1;
+ union {
struct {
enum MCE_UeErrorType ue_error_type:8;
- uint8_t effective_address_provided;
- uint8_t physical_address_provided;
- uint8_t reserved_1[5];
- uint64_t effective_address;
- uint64_t physical_address;
- uint8_t reserved_2[8];
+ u8 effective_address_provided;
+ u8 physical_address_provided;
+ u8 reserved_1[5];
+ u64 effective_address;
+ u64 physical_address;
+ u8 reserved_2[8];
} ue_error;
struct {
enum MCE_SlbErrorType slb_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} slb_error;
struct {
enum MCE_EratErrorType erat_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} erat_error;
struct {
enum MCE_TlbErrorType tlb_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} tlb_error;
struct {
enum MCE_UserErrorType user_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} user_error;
struct {
enum MCE_RaErrorType ra_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} ra_error;
struct {
enum MCE_LinkErrorType link_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} link_error;
} u;
};
@@ -194,6 +204,8 @@ struct mce_error_info {
} u;
enum MCE_Severity severity:8;
enum MCE_Initiator initiator:8;
+ enum MCE_ErrorClass error_class:8;
+ bool sync_error;
};
#define MAX_MC_EVT 100
@@ -210,6 +222,7 @@ extern void release_mce_event(void);
extern void machine_check_queue_event(void);
extern void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest);
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
#ifdef CONFIG_PPC_BOOK3S_64
void flush_and_reload_slb(void);
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 8ddd4a91bdc1..ba94ce8c22d7 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -107,6 +107,11 @@
*/
#define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000)
+/*
+ * Supports KUAP (key 0 controlling userspace addresses) on radix
+ */
+#define MMU_FTR_RADIX_KUAP ASM_CONST(0x80000000)
+
/* MMU feature bit sets for various CPUs */
#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \
MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
@@ -124,6 +129,9 @@
#ifndef __ASSEMBLY__
#include <linux/bug.h>
#include <asm/cputable.h>
+#include <asm/page.h>
+
+typedef pte_t *pgtable_t;
#ifdef CONFIG_PPC_FSL_BOOK3E
#include <asm/percpu.h>
@@ -164,7 +172,10 @@ enum {
#endif
#ifdef CONFIG_PPC_RADIX_MMU
MMU_FTR_TYPE_RADIX |
-#endif
+#ifdef CONFIG_PPC_KUAP
+ MMU_FTR_RADIX_KUAP |
+#endif /* CONFIG_PPC_KUAP */
+#endif /* CONFIG_PPC_RADIX_MMU */
0,
};
@@ -341,21 +352,6 @@ static inline bool strict_kernel_rwx_enabled(void)
*/
#define MMU_PAGE_COUNT 16
-/*
- * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
- * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
- * page_to_nid does a page->section->node lookup
- * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
- * memory requirements with large number of sections.
- * 51 bits is the max physical real address on POWER9
- */
-#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
- defined (CONFIG_PPC_64K_PAGES)
-#define MAX_PHYSMEM_BITS 51
-#elif defined(CONFIG_PPC64)
-#define MAX_PHYSMEM_BITS 46
-#endif
-
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/book3s/64/mmu.h>
#else /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 6ee8195a2ffb..611204e588b9 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -52,6 +52,7 @@ static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
{
return false;
}
+static inline void mm_iommu_init(struct mm_struct *mm) { }
#endif
extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
extern void set_context(unsigned long id, pgd_t *pgd);
@@ -228,13 +229,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
#endif
}
-#ifdef CONFIG_PPC_BOOK3E_64
-static inline void arch_exit_mmap(struct mm_struct *mm)
-{
-}
-#else
extern void arch_exit_mmap(struct mm_struct *mm);
-#endif
static inline void arch_unmap(struct mm_struct *mm,
struct vm_area_struct *vma,
diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
new file mode 100644
index 000000000000..a46616937d20
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H
+#define _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H
+
+#define PAGE_SHIFT_8M 23
+
+static inline pte_t *hugepd_page(hugepd_t hpd)
+{
+ BUG_ON(!hugepd_ok(hpd));
+
+ return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK);
+}
+
+static inline unsigned int hugepd_shift(hugepd_t hpd)
+{
+ return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17;
+}
+
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
+ unsigned int pdshift)
+{
+ unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT;
+
+ return hugepd_page(hpd) + idx;
+}
+
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ flush_tlb_page(vma, vmaddr);
+}
+
+static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
+{
+ *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT |
+ (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K));
+}
+
+static inline int check_and_get_huge_psize(int shift)
+{
+ return shift_to_mmu_psize(shift);
+}
+
+#endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
new file mode 100644
index 000000000000..1c3133b5f86a
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_8XX_H_
+#define _ASM_POWERPC_KUP_8XX_H_
+
+#include <asm/bug.h>
+
+#ifdef CONFIG_PPC_KUAP
+
+#ifdef __ASSEMBLY__
+
+.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3
+ lis \gpr2, MD_APG_KUAP@h /* only APG0 and APG1 are used */
+ mfspr \gpr1, SPRN_MD_AP
+ mtspr SPRN_MD_AP, \gpr2
+ stw \gpr1, STACK_REGS_KUAP(\sp)
+.endm
+
+.macro kuap_restore sp, current, gpr1, gpr2, gpr3
+ lwz \gpr1, STACK_REGS_KUAP(\sp)
+ mtspr SPRN_MD_AP, \gpr1
+.endm
+
+.macro kuap_check current, gpr
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ mfspr \gpr, SPRN_MD_AP
+ rlwinm \gpr, \gpr, 16, 0xffff
+999: twnei \gpr, MD_APG_KUAP@h
+ EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+#endif
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#include <asm/reg.h>
+
+static inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ mtspr(SPRN_MD_AP, MD_APG_INIT);
+}
+
+static inline void prevent_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+{
+ return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000),
+ "Bug: fault blocked by AP register !");
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* _ASM_POWERPC_KUP_8XX_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 0a1a3fc54e54..76af5b0cb16e 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -35,11 +35,18 @@
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
* Therefore, we define 2 APG groups. lsb is _PMD_USER
- * 0 => No user => 01 (all accesses performed according to page definition)
+ * 0 => Kernel => 01 (all accesses performed according to page definition)
* 1 => User => 00 (all accesses performed as supervisor iaw page definition)
- * We define all 16 groups so that all other bits of APG can take any value
+ * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
+ */
+#define MI_APG_INIT 0x4fffffff
+
+/*
+ * 0 => Kernel => 01 (all accesses performed according to page definition)
+ * 1 => User => 10 (all accesses performed according to swaped page definition)
+ * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
*/
-#define MI_APG_INIT 0x44444444
+#define MI_APG_KUEP 0x6fffffff
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MI_RPN is written, bits in
@@ -108,11 +115,18 @@
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
* Therefore, we define 2 APG groups. lsb is _PMD_USER
- * 0 => No user => 01 (all accesses performed according to page definition)
+ * 0 => Kernel => 01 (all accesses performed according to page definition)
* 1 => User => 00 (all accesses performed as supervisor iaw page definition)
- * We define all 16 groups so that all other bits of APG can take any value
+ * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
+ */
+#define MD_APG_INIT 0x4fffffff
+
+/*
+ * 0 => No user => 01 (all accesses performed according to page definition)
+ * 1 => User => 10 (all accesses performed according to swaped page definition)
+ * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
*/
-#define MD_APG_INIT 0x44444444
+#define MD_APG_KUAP 0x6fffffff
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MD_RPN is written, bits in
@@ -167,9 +181,26 @@
#ifdef CONFIG_PPC_MM_SLICES
#include <asm/nohash/32/slice.h>
#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
+#define LOW_SLICE_ARRAY_SZ SLICE_ARRAY_SIZE
#endif
+#if defined(CONFIG_PPC_4K_PAGES)
+#define mmu_virtual_psize MMU_PAGE_4K
+#elif defined(CONFIG_PPC_16K_PAGES)
+#define mmu_virtual_psize MMU_PAGE_16K
+#define PTE_FRAG_NR 4
+#define PTE_FRAG_SIZE_SHIFT 12
+#define PTE_FRAG_SIZE (1UL << 12)
+#else
+#error "Unsupported PAGE_SIZE"
+#endif
+
+#define mmu_linear_psize MMU_PAGE_8M
+
#ifndef __ASSEMBLY__
+
+#include <linux/mmdebug.h>
+
struct slice_mask {
u64 low_slices;
DECLARE_BITMAP(high_slices, 0);
@@ -185,14 +216,56 @@ typedef struct {
unsigned char high_slices_psize[0];
unsigned long slb_addr_limit;
struct slice_mask mask_base_psize; /* 4k or 16k */
-# ifdef CONFIG_HUGETLB_PAGE
struct slice_mask mask_512k;
struct slice_mask mask_8m;
-# endif
#endif
void *pte_frag;
} mm_context_t;
+#ifdef CONFIG_PPC_MM_SLICES
+static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
+{
+ return ctx->user_psize;
+}
+
+static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
+{
+ ctx->user_psize = user_psize;
+}
+
+static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
+{
+ return ctx->low_slices_psize;
+}
+
+static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
+{
+ return ctx->high_slices_psize;
+}
+
+static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
+{
+ return ctx->slb_addr_limit;
+}
+
+static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
+{
+ ctx->slb_addr_limit = limit;
+}
+
+static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize)
+{
+ if (psize == MMU_PAGE_512K)
+ return &ctx->mask_512k;
+ if (psize == MMU_PAGE_8M)
+ return &ctx->mask_8m;
+
+ BUG_ON(psize != mmu_virtual_psize);
+
+ return &ctx->mask_base_psize;
+}
+#endif /* CONFIG_PPC_MM_SLICE */
+
#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
@@ -242,17 +315,4 @@ extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf;
#endif /* !__ASSEMBLY__ */
-#if defined(CONFIG_PPC_4K_PAGES)
-#define mmu_virtual_psize MMU_PAGE_4K
-#elif defined(CONFIG_PPC_16K_PAGES)
-#define mmu_virtual_psize MMU_PAGE_16K
-#define PTE_FRAG_NR 4
-#define PTE_FRAG_SIZE_SHIFT 12
-#define PTE_FRAG_SIZE (1UL << 12)
-#else
-#error "Unsupported PAGE_SIZE"
-#endif
-
-#define mmu_linear_psize MMU_PAGE_8M
-
#endif /* _ASM_POWERPC_MMU_8XX_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu.h b/arch/powerpc/include/asm/nohash/32/mmu.h
deleted file mode 100644
index 7d94a36d57d2..000000000000
--- a/arch/powerpc/include/asm/nohash/32/mmu.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_32_MMU_H_
-#define _ASM_POWERPC_NOHASH_32_MMU_H_
-
-#include <asm/page.h>
-
-#if defined(CONFIG_40x)
-/* 40x-style software loaded TLB */
-#include <asm/nohash/32/mmu-40x.h>
-#elif defined(CONFIG_44x)
-/* 44x-style software loaded TLB */
-#include <asm/nohash/32/mmu-44x.h>
-#elif defined(CONFIG_PPC_BOOK3E_MMU)
-/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
-#include <asm/nohash/mmu-book3e.h>
-#elif defined (CONFIG_PPC_8xx)
-/* Motorola/Freescale 8xx software loaded TLB */
-#include <asm/nohash/32/mmu-8xx.h>
-#endif
-
-#ifndef __ASSEMBLY__
-typedef pte_t *pgtable_t;
-#endif
-
-#endif /* _ASM_POWERPC_NOHASH_32_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index bd186e85b4f7..11eac371e7e0 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -6,39 +6,6 @@
#include <linux/slab.h>
/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern void __bad_pte(pmd_t *pmd);
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
-{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
- pgtable_gfp_flags(mm, GFP_KERNEL));
-}
-
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
-}
-
-/*
* We don't have any real pmd's, and this code never triggers because
* the pgd will always be present..
*/
@@ -47,96 +14,22 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
#define __pmd_free_tlb(tlb,x,a) do { } while (0)
/* #define pgd_populate(mm, pmd, pte) BUG() */
-#ifndef CONFIG_BOOKE
-
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
- pte_t *pte)
-{
- *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
-}
-
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pte_page)
-{
- *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
-}
-
-#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-#else
-
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
pte_t *pte)
{
- *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
+ if (IS_ENABLED(CONFIG_BOOKE))
+ *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
+ else
+ *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
}
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pte_page)
{
- *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT);
+ if (IS_ENABLED(CONFIG_BOOKE))
+ *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT);
+ else
+ *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
}
-#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-#endif
-
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm);
-void pte_frag_destroy(void *pte_frag);
-pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
-void pte_fragment_free(unsigned long *table, int kernel);
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- pte_fragment_free((unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pte_fragment_free((unsigned long *)ptepage, 0);
-}
-
-static inline void pgtable_free(void *table, unsigned index_size)
-{
- if (!index_size) {
- pte_fragment_free((unsigned long *)table, 0);
- } else {
- BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
- kmem_cache_free(PGT_CACHE(index_size), table);
- }
-}
-
-#define check_pgt_cache() do { } while (0)
-#define get_hugepd_cache_index(x) (x)
-
-#ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- unsigned long pgf = (unsigned long)table;
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf |= shift;
- tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
- void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
-}
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
- unsigned long address)
-{
- tlb_flush_pgtable(tlb, address);
- pgtable_free_tlb(tlb, table, 0);
-}
#endif /* _ASM_POWERPC_PGALLOC_32_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index bed433358260..0284f8f5305f 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -64,15 +64,24 @@ extern int icache_44x_need_flush;
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+#ifndef __ASSEMBLY__
+
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+
+#endif /* !__ASSEMBLY__ */
+
+
/*
* This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
* value (for now) on others, from where we can start layout kernel
* virtual space that goes below PKMAP and FIXMAP
*/
+#include <asm/fixmap.h>
+
#ifdef CONFIG_HIGHMEM
#define KVIRT_TOP PKMAP_BASE
#else
-#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */
+#define KVIRT_TOP FIXADDR_START
#endif
/*
@@ -379,8 +388,6 @@ static inline int pte_young(pte_t pte)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
-int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
-
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h
index 777d62e40ac0..39eb0154ae2d 100644
--- a/arch/powerpc/include/asm/nohash/32/slice.h
+++ b/arch/powerpc/include/asm/nohash/32/slice.h
@@ -13,6 +13,8 @@
#define SLICE_NUM_HIGH 0ul
#define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
+#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW
+
#endif /* CONFIG_PPC_MM_SLICES */
#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */
diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h
deleted file mode 100644
index e6585480dfc4..000000000000
--- a/arch/powerpc/include/asm/nohash/64/mmu.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_64_MMU_H_
-#define _ASM_POWERPC_NOHASH_64_MMU_H_
-
-/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
-#include <asm/nohash/mmu-book3e.h>
-
-#ifndef __ASSEMBLY__
-typedef struct page *pgtable_t;
-#endif
-
-#endif /* _ASM_POWERPC_NOHASH_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 66d086f85bd5..62321cd12da9 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -18,37 +18,6 @@ struct vmemmap_backing {
};
extern struct vmemmap_backing *vmemmap_list;
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
-{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
- pgtable_gfp_flags(mm, GFP_KERNEL));
-}
-
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
-}
-
#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -76,11 +45,9 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte_page)
{
- pmd_set(pmd, (unsigned long)page_address(pte_page));
+ pmd_set(pmd, (unsigned long)pte_page);
}
-#define pmd_pgtable(pmd) pmd_page(pmd)
-
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
@@ -92,91 +59,9 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
}
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- struct page *page;
- pte_t *pte;
-
- pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
- if (!pte)
- return NULL;
- page = virt_to_page(pte);
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- return page;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pgtable_page_dtor(ptepage);
- __free_page(ptepage);
-}
-
-static inline void pgtable_free(void *table, int shift)
-{
- if (!shift) {
- pgtable_page_dtor(virt_to_page(table));
- free_page((unsigned long)table);
- } else {
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- kmem_cache_free(PGT_CACHE(shift), table);
- }
-}
-
-#define get_hugepd_cache_index(x) (x)
-#ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
- unsigned long pgf = (unsigned long)table;
-
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf |= shift;
- tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
- void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
-}
-
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
- unsigned long address)
-{
- tlb_flush_pgtable(tlb, address);
- pgtable_free_tlb(tlb, page_address(table), 0);
-}
-
#define __pmd_free_tlb(tlb, pmd, addr) \
pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
-#ifndef CONFIG_PPC_64K_PAGES
#define __pud_free_tlb(tlb, pud, addr) \
pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
-#endif /* CONFIG_PPC_64K_PAGES */
-
-#define check_pgt_cache() do { } while (0)
-
#endif /* _ASM_POWERPC_PGALLOC_64_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index e77ed9761632..b9f66cf15c31 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -10,10 +10,6 @@
#include <asm/barrier.h>
#include <asm/asm-const.h>
-#ifdef CONFIG_PPC_64K_PAGES
-#error "Page size not supported"
-#endif
-
#define FIRST_USER_ADDRESS 0UL
/*
@@ -23,11 +19,7 @@
PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1)
-#else
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
-#endif
#define PUD_CACHE_INDEX PUD_INDEX_SIZE
/*
@@ -73,7 +65,6 @@
#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START))
#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
-#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
#define USER_REGION_ID (0UL)
/*
@@ -205,7 +196,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
(((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte) do { } while(0)
+
+static inline void pte_unmap(pte_t *pte) { }
/* to find an entry in a kernel page-table-directory */
/* This now only contains the vmalloc pages */
diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h
deleted file mode 100644
index ad0d6e3cc1c5..000000000000
--- a/arch/powerpc/include/asm/nohash/64/slice.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H
-#define _ASM_POWERPC_NOHASH_64_SLICE_H
-
-#ifdef CONFIG_PPC_64K_PAGES
-#define get_slice_psize(mm, addr) MMU_PAGE_64K
-#else /* CONFIG_PPC_64K_PAGES */
-#define get_slice_psize(mm, addr) MMU_PAGE_4K
-#endif /* !CONFIG_PPC_64K_PAGES */
-#define slice_set_user_psize(mm, psize) do { BUG(); } while (0)
-
-#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h
new file mode 100644
index 000000000000..ecd8694cb229
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H
+#define _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H
+
+static inline pte_t *hugepd_page(hugepd_t hpd)
+{
+ if (WARN_ON(!hugepd_ok(hpd)))
+ return NULL;
+
+ return (pte_t *)((hpd_val(hpd) & ~HUGEPD_SHIFT_MASK) | PD_HUGE);
+}
+
+static inline unsigned int hugepd_shift(hugepd_t hpd)
+{
+ return hpd_val(hpd) & HUGEPD_SHIFT_MASK;
+}
+
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
+ unsigned int pdshift)
+{
+ /*
+ * On FSL BookE, we have multiple higher-level table entries that
+ * point to the same hugepte. Just use the first one since they're all
+ * identical. So for that case, idx=0.
+ */
+ return hugepd_page(hpd);
+}
+
+void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+
+static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
+{
+ /* We use the old format for PPC_FSL_BOOK3E */
+ *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift);
+}
+
+static inline int check_and_get_huge_psize(int shift)
+{
+ if (shift & 1) /* Not a power of 4 */
+ return -EINVAL;
+
+ return shift_to_mmu_psize(shift);
+}
+
+#endif /* _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H */
diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h
index e20072972e35..4c9777d256fb 100644
--- a/arch/powerpc/include/asm/nohash/mmu-book3e.h
+++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h
@@ -306,6 +306,8 @@ extern int book3e_htw_mode;
#define mmu_cleanup_all NULL
+#define MAX_PHYSMEM_BITS 44
+
#endif
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/nohash/mmu.h b/arch/powerpc/include/asm/nohash/mmu.h
index a037cb1efb57..edc793e5f08f 100644
--- a/arch/powerpc/include/asm/nohash/mmu.h
+++ b/arch/powerpc/include/asm/nohash/mmu.h
@@ -2,10 +2,18 @@
#ifndef _ASM_POWERPC_NOHASH_MMU_H_
#define _ASM_POWERPC_NOHASH_MMU_H_
-#ifdef CONFIG_PPC64
-#include <asm/nohash/64/mmu.h>
-#else
-#include <asm/nohash/32/mmu.h>
+#if defined(CONFIG_40x)
+/* 40x-style software loaded TLB */
+#include <asm/nohash/32/mmu-40x.h>
+#elif defined(CONFIG_44x)
+/* 44x-style software loaded TLB */
+#include <asm/nohash/32/mmu-44x.h>
+#elif defined(CONFIG_PPC_BOOK3E_MMU)
+/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
+#include <asm/nohash/mmu-book3e.h>
+#elif defined (CONFIG_PPC_8xx)
+/* Motorola/Freescale 8xx software loaded TLB */
+#include <asm/nohash/32/mmu-8xx.h>
#endif
#endif /* _ASM_POWERPC_NOHASH_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
index 0634f2949438..332b13b4ecdb 100644
--- a/arch/powerpc/include/asm/nohash/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -3,6 +3,7 @@
#define _ASM_POWERPC_NOHASH_PGALLOC_H
#include <linux/mm.h>
+#include <linux/slab.h>
extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
#ifdef CONFIG_PPC64
@@ -16,9 +17,64 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
}
#endif /* !CONFIG_PPC_BOOK3E */
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
+
#ifdef CONFIG_PPC64
#include <asm/nohash/64/pgalloc.h>
#else
#include <asm/nohash/32/pgalloc.h>
#endif
+
+static inline void pgtable_free(void *table, int shift)
+{
+ if (!shift) {
+ pte_fragment_free((unsigned long *)table, 0);
+ } else {
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ kmem_cache_free(PGT_CACHE(shift), table);
+ }
+}
+
+#define get_hugepd_cache_index(x) (x)
+
+#ifdef CONFIG_SMP
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ pgtable_free(table, shift);
+}
+
+#else
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ pgtable_free(table, shift);
+}
+#endif
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+ unsigned long address)
+{
+ tlb_flush_pgtable(tlb, address);
+ pgtable_free_tlb(tlb, table, 0);
+}
#endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h
index dd40d200f274..813918f40765 100644
--- a/arch/powerpc/include/asm/nohash/pte-book3e.h
+++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
@@ -60,13 +60,8 @@
#define _PAGE_SPECIAL _PAGE_SW0
/* Base page size */
-#ifdef CONFIG_PPC_64K_PAGES
-#define _PAGE_PSIZE _PAGE_PSIZE_64K
-#define PTE_RPN_SHIFT (28)
-#else
#define _PAGE_PSIZE _PAGE_PSIZE_4K
#define PTE_RPN_SHIFT (24)
-#endif
#define PTE_WIMGE_SHIFT (19)
#define PTE_BAP_SHIFT (2)
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 870fb7b239ea..e1577cfa7186 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -186,8 +186,8 @@
#define OPAL_XIVE_FREE_IRQ 140
#define OPAL_XIVE_SYNC 141
#define OPAL_XIVE_DUMP 142
-#define OPAL_XIVE_RESERVED3 143
-#define OPAL_XIVE_RESERVED4 144
+#define OPAL_XIVE_GET_QUEUE_STATE 143
+#define OPAL_XIVE_SET_QUEUE_STATE 144
#define OPAL_SIGNAL_SYSTEM_RESET 145
#define OPAL_NPU_INIT_CONTEXT 146
#define OPAL_NPU_DESTROY_CONTEXT 147
@@ -209,8 +209,10 @@
#define OPAL_SENSOR_GROUP_ENABLE 163
#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164
#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165
+#define OPAL_HANDLE_HMI2 166
#define OPAL_NX_COPROC_INIT 167
-#define OPAL_LAST 167
+#define OPAL_XIVE_GET_VP_STATE 170
+#define OPAL_LAST 170
#define QUIESCE_HOLD 1 /* Spin all calls at entry */
#define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
@@ -634,6 +636,15 @@ struct OpalHMIEvent {
} u;
};
+/* OPAL_HANDLE_HMI2 out_flags */
+enum {
+ OPAL_HMI_FLAGS_TB_RESYNC = (1ull << 0), /* Timebase has been resynced */
+ OPAL_HMI_FLAGS_DEC_LOST = (1ull << 1), /* DEC lost, needs to be reprogrammed */
+ OPAL_HMI_FLAGS_HDEC_LOST = (1ull << 2), /* HDEC lost, needs to be reprogrammed */
+ OPAL_HMI_FLAGS_TOD_TB_FAIL = (1ull << 3), /* TOD/TB recovery failed. */
+ OPAL_HMI_FLAGS_NEW_EVENT = (1ull << 63), /* An event has been created */
+};
+
enum {
OPAL_P7IOC_DIAG_TYPE_NONE = 0,
OPAL_P7IOC_DIAG_TYPE_RGC = 1,
@@ -1118,6 +1129,7 @@ enum {
enum {
OPAL_IMC_COUNTERS_NEST = 1,
OPAL_IMC_COUNTERS_CORE = 2,
+ OPAL_IMC_COUNTERS_TRACE = 3,
};
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index a55b01c90bb1..4cc37e708bc7 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -203,6 +203,7 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data);
int64_t opal_handle_hmi(void);
+int64_t opal_handle_hmi2(__be64 *out_flags);
int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
int64_t opal_unregister_dump_region(uint32_t id);
int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
@@ -279,6 +280,13 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id);
int64_t opal_xive_free_irq(uint32_t girq);
int64_t opal_xive_sync(uint32_t type, uint32_t id);
int64_t opal_xive_dump(uint32_t type, uint32_t id);
+int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio,
+ __be32 *out_qtoggle,
+ __be32 *out_qindex);
+int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio,
+ uint32_t qtoggle,
+ uint32_t qindex);
+int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01);
int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target,
uint64_t desc, uint16_t pe_number);
@@ -352,6 +360,7 @@ int opal_power_control_init(void);
extern int opal_machine_check(struct pt_regs *regs);
extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
extern int opal_hmi_exception_early(struct pt_regs *regs);
+extern int opal_hmi_exception_early2(struct pt_regs *regs);
extern int opal_handle_hmi_exception(struct pt_regs *regs);
extern void opal_shutdown(void);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 134e912d403f..62f27e0aef7c 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -174,7 +174,6 @@ struct paca_struct {
u8 irq_soft_mask; /* mask for irq soft masking */
u8 irq_happened; /* irq happened while soft-disabled */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
- u8 nap_state_lost; /* NV GPR values lost in power7_idle */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
u8 pmcregs_in_use; /* pseries puts this in lppaca */
#endif
@@ -184,23 +183,28 @@ struct paca_struct {
#endif
#ifdef CONFIG_PPC_POWERNV
- /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
- u32 *core_idle_state_ptr;
- u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */
- /* Mask to indicate thread id in core */
- u8 thread_mask;
- /* Mask to denote subcore sibling threads */
- u8 subcore_sibling_mask;
- /* Flag to request this thread not to stop */
- atomic_t dont_stop;
- /* The PSSCR value that the kernel requested before going to stop */
- u64 requested_psscr;
-
- /*
- * Save area for additional SPRs that need to be
- * saved/restored during cpuidle stop.
- */
- struct stop_sprs stop_sprs;
+ /* PowerNV idle fields */
+ /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */
+ unsigned long idle_state;
+ union {
+ /* P7/P8 specific fields */
+ struct {
+ /* PNV_THREAD_RUNNING/NAP/SLEEP */
+ u8 thread_idle_state;
+ /* Mask to denote subcore sibling threads */
+ u8 subcore_sibling_mask;
+ };
+
+ /* P9 specific fields */
+ struct {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* The PSSCR value that the kernel requested before going to stop */
+ u64 requested_psscr;
+ /* Flag to request this thread not to stop */
+ atomic_t dont_stop;
+#endif
+ };
+ };
#endif
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index ed870468ef6f..dbc8c0679480 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -28,11 +28,15 @@
#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
#ifndef __ASSEMBLY__
-#ifdef CONFIG_HUGETLB_PAGE
-extern bool hugetlb_disabled;
-extern unsigned int HPAGE_SHIFT;
-#else
+#ifndef CONFIG_HUGETLB_PAGE
#define HPAGE_SHIFT PAGE_SHIFT
+#elif defined(CONFIG_PPC_BOOK3S_64)
+extern unsigned int hpage_shift;
+#define HPAGE_SHIFT hpage_shift
+#elif defined(CONFIG_PPC_8xx)
+#define HPAGE_SHIFT 19 /* 512k pages */
+#elif defined(CONFIG_PPC_FSL_BOOK3E)
+#define HPAGE_SHIFT 22 /* 4M pages */
#endif
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
@@ -132,18 +136,7 @@ static inline bool pfn_valid(unsigned long pfn)
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
-#ifdef CONFIG_PPC_BOOK3S_64
-/*
- * On hash the vmalloc and other regions alias to the kernel region when passed
- * through __pa(), which virt_to_pfn() uses. That means virt_addr_valid() can
- * return true for some vmalloc addresses, which is incorrect. So explicitly
- * check that the address is in the kernel region.
- */
-#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \
- pfn_valid(virt_to_pfn(kaddr)))
-#else
#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
-#endif
/*
* On Book-E parts we need __va to parse the device tree and we can't
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index e11f03007b57..2b2c60a1a66d 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -20,10 +20,61 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
+{
+ return (pte_t *)pte_fragment_alloc(mm, 1);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
+{
+ return (pgtable_t)pte_fragment_alloc(mm, 0);
+}
+
+void pte_frag_destroy(void *pte_frag);
+void pte_fragment_free(unsigned long *table, int kernel);
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ pte_fragment_free((unsigned long *)pte, 1);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+ pte_fragment_free((unsigned long *)ptepage, 0);
+}
+
+/*
+ * Functions that deal with pagetables that could be at any level of
+ * the table need to be passed an "index_size" so they know how to
+ * handle allocation. For PTE pages, the allocation size will be
+ * (2^index_size * sizeof(pointer)) and allocations are drawn from
+ * the kmem_cache in PGT_CACHE(index_size).
+ *
+ * The maximum index size needs to be big enough to allow any
+ * pagetable sizes we need, but small enough to fit in the low bits of
+ * any page table pointer. In other words all pagetables, even tiny
+ * ones, must be aligned to allow at least enough low 0 bits to
+ * contain this value. This value is also used as a mask, so it must
+ * be one less than a power of two.
+ */
+#define MAX_PGTABLE_INDEX_SIZE 0xf
+
+extern struct kmem_cache *pgtable_cache[];
+#define PGT_CACHE(shift) pgtable_cache[shift]
+
+static inline void check_pgt_cache(void) { }
+
#ifdef CONFIG_PPC_BOOK3S
#include <asm/book3s/pgalloc.h>
#else
#include <asm/nohash/pgalloc.h>
#endif
+static inline pgtable_t pmd_pgtable(pmd_t pmd)
+{
+ return (pgtable_t)pmd_page_vaddr(pmd);
+}
+
#endif /* _ASM_POWERPC_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
index a89c67b62680..b169bbf95fcb 100644
--- a/arch/powerpc/include/asm/pgtable-be-types.h
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -33,11 +33,7 @@ static inline __be64 pmd_raw(pmd_t x)
return x.pmd;
}
-/*
- * 64 bit hash always use 4 level table. Everybody else use 4 level
- * only for 4K page size.
- */
-#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+/* 64 bit always use 4 level table. */
typedef struct { __be64 pud; } pud_t;
#define __pud(x) ((pud_t) { cpu_to_be64(x) })
#define __pud_raw(x) ((pud_t) { (x) })
@@ -51,7 +47,6 @@ static inline __be64 pud_raw(pud_t x)
return x.pud;
}
-#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
#endif /* CONFIG_PPC64 */
/* PGD level */
@@ -77,7 +72,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
* With hash config 64k pages additionally define a bigger "real PTE" type that
* gathers the "second half" part of the PTE for pseudo 64k pages
*/
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64)
+#ifdef CONFIG_PPC_64K_PAGES
typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
#else
typedef struct { pte_t pte; } real_pte_t;
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
index 3b0edf041b2e..d11b4c61d686 100644
--- a/arch/powerpc/include/asm/pgtable-types.h
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -23,18 +23,13 @@ static inline unsigned long pmd_val(pmd_t x)
return x.pmd;
}
-/*
- * 64 bit hash always use 4 level table. Everybody else use 4 level
- * only for 4K page size.
- */
-#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+/* 64 bit always use 4 level table. */
typedef struct { unsigned long pud; } pud_t;
#define __pud(x) ((pud_t) { (x) })
static inline unsigned long pud_val(pud_t x)
{
return x.pud;
}
-#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
#endif /* CONFIG_PPC64 */
/* PGD level */
@@ -54,7 +49,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
* With hash config 64k pages additionally define a bigger "real PTE" type that
* gathers the "second half" part of the PTE for pseudo 64k pages
*/
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64)
+#ifdef CONFIG_PPC_64K_PAGES
typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
#else
typedef struct { pte_t pte; } real_pte_t;
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 505550fb2935..3f53be60fb01 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -89,9 +89,6 @@ extern void paging_init(void);
*/
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
-extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write,
- struct page **pages, int *nr);
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_large(pmd) 0
#endif
@@ -108,6 +105,12 @@ void mark_initmem_nx(void);
static inline void mark_initmem_nx(void) { }
#endif
+#ifdef CONFIG_PPC_DEBUG_WX
+void ptdump_check_wx(void);
+#else
+static inline void ptdump_check_wx(void) { }
+#endif
+
/*
* When used, PTE_FRAG_NR is defined in subarch pgtable.h
* so we are sure it is included when arriving here.
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 3351bcf42f2d..706ac5df546f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -164,6 +164,9 @@ struct thread_struct {
unsigned long rtas_sp; /* stack pointer for when in RTAS */
#endif
#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+ unsigned long kuap; /* opened segments for user access */
+#endif
/* Debug Registers */
struct debug_reg debug;
struct thread_fp_state fp_state;
@@ -411,14 +414,17 @@ static inline unsigned long get_clean_sp(unsigned long sp, int is_32)
}
#endif
+/* asm stubs */
+extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
+extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
+extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
+
extern unsigned long cpuidle_disable;
enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
-extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/
+
extern void power7_idle_type(unsigned long type);
-extern unsigned long power9_idle_stop(unsigned long psscr_val);
-extern unsigned long power9_offline_stop(unsigned long psscr_val);
extern void power9_idle_type(unsigned long stop_psscr_val,
unsigned long stop_psscr_mask);
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 64271e562fed..6f047730e642 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -52,10 +52,17 @@ struct pt_regs
};
};
+ union {
+ struct {
#ifdef CONFIG_PPC64
- unsigned long ppr;
- unsigned long __pad; /* Maintain 16 byte interrupt stack alignment */
+ unsigned long ppr;
+#endif
+#ifdef CONFIG_PPC_KUAP
+ unsigned long kuap;
#endif
+ };
+ unsigned long __pad[2]; /* Maintain 16 byte interrupt stack alignment */
+ };
};
#endif
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c5b2aff0ce8e..10caa145f98b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -168,6 +168,7 @@
#define PSSCR_ESL 0x00200000 /* Enable State Loss */
#define PSSCR_SD 0x00400000 /* Status Disable */
#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
+#define PSSCR_PLS_SHIFT 60
#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */
#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
@@ -758,10 +759,9 @@
#define SRR1_WAKERESET 0x00100000 /* System reset */
#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */
#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */
-#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained,
- * may not be recoverable */
-#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */
-#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */
+#define SRR1_WS_HVLOSS 0x00030000 /* HV resources not maintained */
+#define SRR1_WS_GPRLOSS 0x00020000 /* GPRs not maintained */
+#define SRR1_WS_NOLOSS 0x00010000 /* All resources maintained */
#define SRR1_PROGTM 0x00200000 /* TM Bad Thing */
#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
#define SRR1_PROGILL 0x00080000 /* Illegal instruction */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index eb2a33d5df26..e382bd6ede84 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -41,7 +41,7 @@
#if defined(CONFIG_PPC_BOOK3E_64)
#define MSR_64BIT MSR_CM
-#define MSR_ (MSR_ME | MSR_CE)
+#define MSR_ (MSR_ME | MSR_RI | MSR_CE)
#define MSR_KERNEL (MSR_ | MSR_64BIT)
#define MSR_USER32 (MSR_ | MSR_PR | MSR_EE)
#define MSR_USER64 (MSR_USER32 | MSR_64BIT)
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
index 44816cbc4198..c6f466f4c241 100644
--- a/arch/powerpc/include/asm/slice.h
+++ b/arch/powerpc/include/asm/slice.h
@@ -4,9 +4,7 @@
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/book3s/64/slice.h>
-#elif defined(CONFIG_PPC64)
-#include <asm/nohash/64/slice.h>
-#elif defined(CONFIG_PPC_MMU_NOHASH)
+#elif defined(CONFIG_PPC_MMU_NOHASH_32)
#include <asm/nohash/32/slice.h>
#endif
@@ -38,6 +36,11 @@ void slice_setup_new_exec(void);
static inline void slice_init_new_context_exec(struct mm_struct *mm) {}
+static inline unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
+{
+ return 0;
+}
+
#endif /* CONFIG_PPC_MM_SLICES */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index 68da49320592..3192d454a733 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -17,9 +17,9 @@ extern int create_section_mapping(unsigned long start, unsigned long end, int ni
extern int remove_section_mapping(unsigned long start, unsigned long end);
#ifdef CONFIG_PPC_BOOK3S_64
-extern void resize_hpt_for_hotplug(unsigned long new_mem_size);
+extern int resize_hpt_for_hotplug(unsigned long new_mem_size);
#else
-static inline void resize_hpt_for_hotplug(unsigned long new_mem_size) { }
+static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; }
#endif
#ifdef CONFIG_NUMA
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index 1647de15a31e..9bf6dffb4090 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -4,14 +4,17 @@
#ifdef __KERNEL__
+#ifndef CONFIG_KASAN
#define __HAVE_ARCH_STRNCPY
#define __HAVE_ARCH_STRNCMP
+#define __HAVE_ARCH_MEMCHR
+#define __HAVE_ARCH_MEMCMP
+#define __HAVE_ARCH_MEMSET16
+#endif
+
#define __HAVE_ARCH_MEMSET
#define __HAVE_ARCH_MEMCPY
#define __HAVE_ARCH_MEMMOVE
-#define __HAVE_ARCH_MEMCMP
-#define __HAVE_ARCH_MEMCHR
-#define __HAVE_ARCH_MEMSET16
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
extern char * strcpy(char *,const char *);
@@ -27,7 +30,27 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
extern void * memchr(const void *,int,__kernel_size_t);
extern void * memcpy_flushcache(void *,const void *,__kernel_size_t);
+void *__memset(void *s, int c, __kernel_size_t count);
+void *__memcpy(void *to, const void *from, __kernel_size_t n);
+void *__memmove(void *to, const void *from, __kernel_size_t n);
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
+
#ifdef CONFIG_PPC64
+#ifndef CONFIG_KASAN
#define __HAVE_ARCH_MEMSET32
#define __HAVE_ARCH_MEMSET64
@@ -49,8 +72,11 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
{
return __memset64(p, v, n * 8);
}
+#endif
#else
+#ifndef CONFIG_KASAN
#define __HAVE_ARCH_STRLEN
+#endif
extern void *memset16(uint16_t *, uint16_t, __kernel_size_t);
#endif
diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h
index eab4779f6b84..c993482237ed 100644
--- a/arch/powerpc/include/asm/task_size_64.h
+++ b/arch/powerpc/include/asm/task_size_64.h
@@ -20,7 +20,7 @@
/*
* For now 512TB is only supported with book3s and 64K linux page size.
*/
-#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES)
+#ifdef CONFIG_PPC_64K_PAGES
/*
* Max value currently used:
*/
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 54bf7e68a7e1..57e968413d1e 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -36,6 +36,8 @@ extern unsigned long ppc_proc_freq;
extern unsigned long ppc_tb_freq;
#define DEFAULT_TB_FREQ 125000000UL
+extern bool tb_invalid;
+
struct div_result {
u64 result_high;
u64 result_low;
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 58ef8c43a89d..08cd60cd70b7 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -54,6 +54,22 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit,
TP_ARGS(regs)
);
+#ifdef CONFIG_PPC_DOORBELL
+DEFINE_EVENT(ppc64_interrupt_class, doorbell_entry,
+
+ TP_PROTO(struct pt_regs *regs),
+
+ TP_ARGS(regs)
+);
+
+DEFINE_EVENT(ppc64_interrupt_class, doorbell_exit,
+
+ TP_PROTO(struct pt_regs *regs),
+
+ TP_ARGS(regs)
+);
+#endif
+
#ifdef CONFIG_PPC_PSERIES
extern int hcall_tracepoint_regfunc(void);
extern void hcall_tracepoint_unregfunc(void);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 4d6d905e9138..76f34346b642 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -6,6 +6,7 @@
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/extable.h>
+#include <asm/kup.h>
/*
* The fs value determines whether argument validity checking should be
@@ -140,6 +141,7 @@ extern long __put_user_bad(void);
#define __put_user_size(x, ptr, size, retval) \
do { \
retval = 0; \
+ allow_write_to_user(ptr, size); \
switch (size) { \
case 1: __put_user_asm(x, ptr, retval, "stb"); break; \
case 2: __put_user_asm(x, ptr, retval, "sth"); break; \
@@ -147,6 +149,7 @@ do { \
case 8: __put_user_asm2(x, ptr, retval); break; \
default: __put_user_bad(); \
} \
+ prevent_write_to_user(ptr, size); \
} while (0)
#define __put_user_nocheck(x, ptr, size) \
@@ -239,6 +242,7 @@ do { \
__chk_user_ptr(ptr); \
if (size > sizeof(x)) \
(x) = __get_user_bad(); \
+ allow_read_from_user(ptr, size); \
switch (size) { \
case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \
case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \
@@ -246,6 +250,7 @@ do { \
case 8: __get_user_asm2(x, ptr, retval); break; \
default: (x) = __get_user_bad(); \
} \
+ prevent_read_from_user(ptr, size); \
} while (0)
/*
@@ -305,15 +310,21 @@ extern unsigned long __copy_tofrom_user(void __user *to,
static inline unsigned long
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
- return __copy_tofrom_user(to, from, n);
+ unsigned long ret;
+
+ allow_user_access(to, from, n);
+ ret = __copy_tofrom_user(to, from, n);
+ prevent_user_access(to, from, n);
+ return ret;
}
#endif /* __powerpc64__ */
static inline unsigned long raw_copy_from_user(void *to,
const void __user *from, unsigned long n)
{
+ unsigned long ret;
if (__builtin_constant_p(n) && (n <= 8)) {
- unsigned long ret = 1;
+ ret = 1;
switch (n) {
case 1:
@@ -338,14 +349,18 @@ static inline unsigned long raw_copy_from_user(void *to,
}
barrier_nospec();
- return __copy_tofrom_user((__force void __user *)to, from, n);
+ allow_read_from_user(from, n);
+ ret = __copy_tofrom_user((__force void __user *)to, from, n);
+ prevent_read_from_user(from, n);
+ return ret;
}
static inline unsigned long raw_copy_to_user(void __user *to,
const void *from, unsigned long n)
{
+ unsigned long ret;
if (__builtin_constant_p(n) && (n <= 8)) {
- unsigned long ret = 1;
+ ret = 1;
switch (n) {
case 1:
@@ -365,17 +380,24 @@ static inline unsigned long raw_copy_to_user(void __user *to,
return 0;
}
- return __copy_tofrom_user(to, (__force const void __user *)from, n);
+ allow_write_to_user(to, n);
+ ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
+ prevent_write_to_user(to, n);
+ return ret;
}
extern unsigned long __clear_user(void __user *addr, unsigned long size);
static inline unsigned long clear_user(void __user *addr, unsigned long size)
{
+ unsigned long ret = size;
might_fault();
- if (likely(access_ok(addr, size)))
- return __clear_user(addr, size);
- return size;
+ if (likely(access_ok(addr, size))) {
+ allow_write_to_user(addr, size);
+ ret = __clear_user(addr, size);
+ prevent_write_to_user(addr, size);
+ }
+ return ret;
}
extern long strncpy_from_user(char *dst, const char __user *src, long count);
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 3c704f5dd3ae..b579a943407b 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -109,12 +109,26 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
extern void xive_native_sync_source(u32 hw_irq);
+extern void xive_native_sync_queue(u32 hw_irq);
extern bool is_xive_irq(struct irq_chip *chip);
extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
extern int xive_native_disable_vp(u32 vp_id);
extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
extern bool xive_native_has_single_escalation(void);
+extern int xive_native_get_queue_info(u32 vp_id, uint32_t prio,
+ u64 *out_qpage,
+ u64 *out_qsize,
+ u64 *out_qeoi_page,
+ u32 *out_escalate_irq,
+ u64 *out_qflags);
+
+extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle,
+ u32 *qindex);
+extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
+ u32 qindex);
+extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
+
#else
static inline bool xive_enabled(void) { return false; }
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index cddadccf551d..0ea6c4aa3a20 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -31,6 +31,18 @@ CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE)
endif
+KASAN_SANITIZE_early_32.o := n
+KASAN_SANITIZE_cputable.o := n
+KASAN_SANITIZE_prom_init.o := n
+KASAN_SANITIZE_btext.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
+endif
+
obj-y := cputable.o ptrace.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
process.o systbl.o idle.o \
@@ -93,7 +105,7 @@ extra-y += vmlinux.lds
obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
-obj-$(CONFIG_PPC32) += entry_32.o setup_32.o
+obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 86a61e5f8285..8e02444e9d3d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -147,6 +147,9 @@ int main(void)
#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu);
#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+ OFFSET(KUAP, thread_struct, kuap);
+#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
OFFSET(PACATMSCRATCH, paca_struct, tm_scratch);
@@ -268,7 +271,6 @@ int main(void)
OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
- OFFSET(PACA_NAPSTATELOST, paca_struct, nap_state_lost);
OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
#else /* CONFIG_PPC64 */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
@@ -332,6 +334,10 @@ int main(void)
STACK_PT_REGS_OFFSET(_PPR, ppr);
#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC_KUAP
+ STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap);
+#endif
+
#if defined(CONFIG_PPC32)
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
@@ -766,23 +772,6 @@ int main(void)
OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl);
#endif
-#ifdef CONFIG_PPC_POWERNV
- OFFSET(PACA_CORE_IDLE_STATE_PTR, paca_struct, core_idle_state_ptr);
- OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state);
- OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
- OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
- OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
- OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
-#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
- STOP_SPR(STOP_PID, pid);
- STOP_SPR(STOP_LDBAR, ldbar);
- STOP_SPR(STOP_FSCR, fscr);
- STOP_SPR(STOP_HFSCR, hfscr);
- STOP_SPR(STOP_MMCR1, mmcr1);
- STOP_SPR(STOP_MMCR2, mmcr2);
- STOP_SPR(STOP_MMCRA, mmcra);
-#endif
-
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 53102764fd2f..f2ed3ef4b129 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -759,23 +759,22 @@ static void cacheinfo_create_index_dir(struct cache *cache, int index,
index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
if (!index_dir)
- goto err;
+ return;
index_dir->cache = cache;
rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
cache_dir->kobj, "index%d", index);
- if (rc)
- goto err;
+ if (rc) {
+ kobject_put(&index_dir->kobj);
+ kfree(index_dir);
+ return;
+ }
index_dir->next = cache_dir->index;
cache_dir->index = index_dir;
cacheinfo_create_index_opt_attrs(index_dir);
-
- return;
-err:
- kfree(index_dir);
}
static void cacheinfo_sysfs_populate(unsigned int cpu_id,
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 1eab54bc6ee9..cd12f362b61f 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2147,7 +2147,11 @@ void __init set_cur_cpu_spec(struct cpu_spec *s)
struct cpu_spec *t = &the_cpu_spec;
t = PTRRELOC(t);
- *t = *s;
+ /*
+ * use memcpy() instead of *t = *s so that GCC replaces it
+ * by __memcpy() when KASAN is active
+ */
+ memcpy(t, s, sizeof(*t));
*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
}
@@ -2161,8 +2165,11 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
t = PTRRELOC(t);
old = *t;
- /* Copy everything, then do fixups */
- *t = *s;
+ /*
+ * Copy everything, then do fixups. Use memcpy() instead of *t = *s
+ * so that GCC replaces it by __memcpy() when KASAN is active
+ */
+ memcpy(t, s, sizeof(*t));
/*
* If we are overriding a previous value derived from the real
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index b6fe883b1016..5ec3b3835925 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -18,6 +18,7 @@
#include <asm/dbell.h>
#include <asm/irq_regs.h>
#include <asm/kvm_ppc.h>
+#include <asm/trace.h>
#ifdef CONFIG_SMP
@@ -81,6 +82,7 @@ void doorbell_exception(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
+ trace_doorbell_entry(regs);
ppc_msgsync();
@@ -91,6 +93,7 @@ void doorbell_exception(struct pt_regs *regs)
smp_ipi_demux_relaxed(); /* already performed the barrier */
+ trace_doorbell_exit(regs);
irq_exit();
set_irq_regs(old_regs);
}
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
new file mode 100644
index 000000000000..3482118ffe76
--- /dev/null
+++ b/arch/powerpc/kernel/early_32.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Early init before relocation
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/asm-prototypes.h>
+
+/*
+ * We're called here very early in the boot.
+ *
+ * Note that the kernel may be running at an address which is different
+ * from the address that it was linked at, so we must use RELOC/PTRRELOC
+ * to access static data (including strings). -- paulus
+ */
+notrace unsigned long __init early_init(unsigned long dt_ptr)
+{
+ unsigned long offset = reloc_offset();
+
+ /* First zero the BSS */
+ memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
+
+ /*
+ * Identify the CPU type and fix up code sections
+ * that depend on which cpu we have.
+ */
+ identify_cpu(offset, mfspr(SPRN_PVR));
+
+ apply_feature_fixups();
+
+ return KERNELBASE + offset;
+}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index b61cfd29c76f..c18f3490a77e 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -36,15 +36,10 @@
#include <asm/asm-405.h>
#include <asm/feature-fixups.h>
#include <asm/barrier.h>
+#include <asm/kup.h>
+#include <asm/bug.h>
-/*
- * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
- */
-#if MSR_KERNEL >= 0x10000
-#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l
-#else
-#define LOAD_MSR_KERNEL(r, x) li r,(x)
-#endif
+#include "head_32.h"
/*
* Align to 4k in order to ensure that all functions modyfing srr0/srr1
@@ -150,8 +145,8 @@ transfer_to_handler:
stw r12,_CTR(r11)
stw r2,_XER(r11)
mfspr r12,SPRN_SPRG_THREAD
- addi r2,r12,-THREAD
beq 2f /* if from user, fix up THREAD.regs */
+ addi r2, r12, -THREAD
addi r11,r1,STACK_FRAME_OVERHEAD
stw r11,PT_REGS(r12)
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
@@ -161,6 +156,9 @@ transfer_to_handler:
andis. r12,r12,DBCR0_IDM@h
#endif
ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_lock r11, r12
+#endif
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
beq+ 3f
/* From user and task is ptraced - load up global dbcr0 */
@@ -186,6 +184,8 @@ transfer_to_handler:
2: /* if from kernel, check interrupted DOZE/NAP mode and
* check for stack overflow
*/
+ kuap_save_and_lock r11, r12, r9, r2, r0
+ addi r2, r12, -THREAD
lwz r9,KSP_LIMIT(r12)
cmplw r1,r9 /* if r1 <= ksp_limit */
ble- stack_ovf /* then the kernel stack overflowed */
@@ -207,26 +207,43 @@ transfer_to_handler_cont:
mtspr SPRN_NRI, r0
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * When tracing IRQ state (lockdep) we enable the MMU before we call
+ * the IRQ tracing functions as they might access vmalloc space or
+ * perform IOs for console output.
+ *
+ * To speed up the syscall path where interrupts stay on, let's check
+ * first if we are changing the MSR value at all.
+ */
+ tophys(r12, r1)
+ lwz r12,_MSR(r12)
+ andi. r12,r12,MSR_EE
+ bne 1f
+
+ /* MSR isn't changing, just transition directly */
+#endif
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r10
+ mtlr r9
+ SYNC
+ RFI /* jump to handler, enable MMU */
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to
+ * keep interrupts disabled at this point otherwise we might risk
+ * taking an interrupt before we tell lockdep they are enabled.
+ */
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
+ LOAD_MSR_KERNEL(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR1,r0
SYNC
RFI
-reenable_mmu: /* re-enable mmu so we can */
- mfmsr r10
- lwz r12,_MSR(r1)
- xor r10,r10,r12
- andi. r10,r10,MSR_EE /* Did EE change? */
- beq 1f
+reenable_mmu:
/*
- * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1.
- * If from user mode there is only one stack frame on the stack, and
- * accessing CALLER_ADDR1 will cause oops. So we need create a dummy
- * stack frame to make trace_hardirqs_off happy.
- *
- * This is handy because we also need to save a bunch of GPRs,
+ * We save a bunch of GPRs,
* r3 can be different from GPR3(r1) at this point, r9 and r11
* contains the old MSR and handler address respectively,
* r4 & r5 can contain page fault arguments that need to be passed
@@ -234,14 +251,19 @@ reenable_mmu: /* re-enable mmu so we can */
* they aren't useful past this point (aren't syscall arguments),
* the rest is restored from the exception frame.
*/
+
stwu r1,-32(r1)
stw r9,8(r1)
stw r11,12(r1)
stw r3,16(r1)
stw r4,20(r1)
stw r5,24(r1)
- bl trace_hardirqs_off
- lwz r5,24(r1)
+
+ /* If we are disabling interrupts (normal case), simply log it with
+ * lockdep
+ */
+1: bl trace_hardirqs_off
+2: lwz r5,24(r1)
lwz r4,20(r1)
lwz r3,16(r1)
lwz r11,12(r1)
@@ -251,15 +273,9 @@ reenable_mmu: /* re-enable mmu so we can */
lwz r6,GPR6(r1)
lwz r7,GPR7(r1)
lwz r8,GPR8(r1)
-1: mtctr r11
+ mtctr r11
mtlr r9
bctr /* jump to handler */
-#else /* CONFIG_TRACE_IRQFLAGS */
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r10
- mtlr r9
- SYNC
- RFI /* jump to handler, enable MMU */
#endif /* CONFIG_TRACE_IRQFLAGS */
#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
@@ -272,6 +288,7 @@ reenable_mmu: /* re-enable mmu so we can */
lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
+ kuap_restore r11, r2, r3, r4, r5
b fast_exception_return
#endif
@@ -301,6 +318,33 @@ stack_ovf:
SYNC
RFI
+#ifdef CONFIG_TRACE_IRQFLAGS
+trace_syscall_entry_irq_off:
+ /*
+ * Syscall shouldn't happen while interrupts are disabled,
+ * so let's do a warning here.
+ */
+0: trap
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+ bl trace_hardirqs_on
+
+ /* Now enable for real */
+ LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ mtmsr r10
+
+ REST_GPR(0, r1)
+ REST_4GPRS(3, r1)
+ REST_2GPRS(7, r1)
+ b DoSyscall
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+ .globl transfer_to_syscall
+transfer_to_syscall:
+#ifdef CONFIG_TRACE_IRQFLAGS
+ andi. r12,r9,MSR_EE
+ beq- trace_syscall_entry_irq_off
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
/*
* Handle a system call.
*/
@@ -312,33 +356,14 @@ _GLOBAL(DoSyscall)
stw r3,ORIG_GPR3(r1)
li r12,0
stw r12,RESULT(r1)
- lwz r11,_CCR(r1) /* Clear SO bit in CR */
- rlwinm r11,r11,0,4,2
- stw r11,_CCR(r1)
#ifdef CONFIG_TRACE_IRQFLAGS
- /* Return from syscalls can (and generally will) hard enable
- * interrupts. You aren't supposed to call a syscall with
- * interrupts disabled in the first place. However, to ensure
- * that we get it right vs. lockdep if it happens, we force
- * that hard enable here with appropriate tracing if we see
- * that we have been called with interrupts off
- */
+ /* Make sure interrupts are enabled */
mfmsr r11
andi. r12,r11,MSR_EE
- bne+ 1f
- /* We came in with interrupts disabled, we enable them now */
- bl trace_hardirqs_on
- mfmsr r11
- lwz r0,GPR0(r1)
- lwz r3,GPR3(r1)
- lwz r4,GPR4(r1)
- ori r11,r11,MSR_EE
- lwz r5,GPR5(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
- mtmsr r11
-1:
+ /* We came in with interrupts disabled, we WARN and mark them enabled
+ * for lockdep now */
+0: tweqi r12, 0
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
#endif /* CONFIG_TRACE_IRQFLAGS */
lwz r11,TI_FLAGS(r2)
andi. r11,r11,_TIF_SYSCALL_DOTRACE
@@ -392,8 +417,7 @@ syscall_exit_cont:
lwz r8,_MSR(r1)
#ifdef CONFIG_TRACE_IRQFLAGS
/* If we are going to return from the syscall with interrupts
- * off, we trace that here. It shouldn't happen though but we
- * want to catch the bugger if it does right ?
+ * off, we trace that here. It shouldn't normally happen.
*/
andi. r10,r8,MSR_EE
bne+ 1f
@@ -422,12 +446,11 @@ BEGIN_FTR_SECTION
lwarx r7,0,r1
END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
stwcx. r0,0,r1 /* to clear the reservation */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- andi. r4,r8,MSR_PR
- beq 3f
ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
-3:
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_unlock r5, r7
#endif
+ kuap_check r2, r4
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
mtlr r4
@@ -678,6 +701,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
stw r10,_CCR(r1)
stw r1,KSP(r3) /* Set old stack pointer */
+ kuap_check r2, r4
#ifdef CONFIG_SMP
/* We need a sync somewhere here to make sure that if the
* previous task gets rescheduled on another CPU, it sees all
@@ -820,6 +844,9 @@ restore_user:
bnel- load_dbcr0
#endif
ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_unlock r10, r11
+#endif
b restore
@@ -866,12 +893,12 @@ resume_kernel:
/* check current_thread_info->preempt_count */
lwz r0,TI_PREEMPT(r2)
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
- bne restore
+ bne restore_kuap
andi. r8,r8,_TIF_NEED_RESCHED
- beq+ restore
+ beq+ restore_kuap
lwz r3,_MSR(r1)
andi. r0,r3,MSR_EE /* interrupts off? */
- beq restore /* don't schedule if so */
+ beq restore_kuap /* don't schedule if so */
#ifdef CONFIG_TRACE_IRQFLAGS
/* Lockdep thinks irqs are enabled, we need to call
* preempt_schedule_irq with IRQs off, so we inform lockdep
@@ -879,10 +906,7 @@ resume_kernel:
*/
bl trace_hardirqs_off
#endif
-1: bl preempt_schedule_irq
- lwz r3,TI_FLAGS(r2)
- andi. r0,r3,_TIF_NEED_RESCHED
- bne- 1b
+ bl preempt_schedule_irq
#ifdef CONFIG_TRACE_IRQFLAGS
/* And now, to properly rebalance the above, we tell lockdep they
* are being turned back on, which will happen when we return
@@ -890,6 +914,8 @@ resume_kernel:
bl trace_hardirqs_on
#endif
#endif /* CONFIG_PREEMPT */
+restore_kuap:
+ kuap_restore r1, r2, r9, r10, r0
/* interrupts are hard-disabled at this point */
restore:
@@ -913,28 +939,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
* off in this assembly code while peeking at TI_FLAGS() and such. However
* we need to inform it if the exception turned interrupts off, and we
* are about to trun them back on.
- *
- * The problem here sadly is that we don't know whether the exceptions was
- * one that turned interrupts off or not. So we always tell lockdep about
- * turning them on here when we go back to wherever we came from with EE
- * on, even if that may meen some redudant calls being tracked. Maybe later
- * we could encode what the exception did somewhere or test the exception
- * type in the pt_regs but that sounds overkill
*/
andi. r10,r9,MSR_EE
beq 1f
- /*
- * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
- * which is the stack frame here, we need to force a stack frame
- * in case we came from user space.
- */
stwu r1,-32(r1)
mflr r0
stw r0,4(r1)
- stwu r1,-32(r1)
bl trace_hardirqs_on
- lwz r1,0(r1)
- lwz r1,0(r1)
+ addi r1, r1, 32
lwz r9,_MSR(r1)
1:
#endif /* CONFIG_TRACE_IRQFLAGS */
@@ -1197,6 +1209,7 @@ load_dbcr0:
.section .bss
.align 4
+ .global global_dbcr0
global_dbcr0:
.space 8*NR_CPUS
.previous
@@ -1207,9 +1220,10 @@ do_work: /* r10 contains MSR_KERNEL here */
beq do_user_signal
do_resched: /* r10 contains MSR_KERNEL here */
- /* Note: We don't need to inform lockdep that we are enabling
- * interrupts here. As far as it knows, they are already enabled
- */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bl trace_hardirqs_on
+ mfmsr r10
+#endif
ori r10,r10,MSR_EE
SYNC
MTMSRD(r10) /* hard-enable interrupts */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 15c67d2c0534..d978af78bf2a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -46,6 +46,7 @@
#include <asm/exception-64e.h>
#endif
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
/*
* System calls.
@@ -120,6 +121,9 @@ END_BTB_FLUSH_SECTION
addi r9,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2)
std r11,-16(r9) /* "regshere" marker */
+
+ kuap_check_amr r10, r11
+
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
beq 33f
@@ -275,6 +279,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
+ kuap_check_amr r10, r11
+
#ifdef CONFIG_PPC_BOOK3S
/*
* Clear MSR_RI, MSR_EE is already and remains disabled. We could do
@@ -296,6 +302,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r8, PACATMSCRATCH(r13)
#endif
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
ld r2,GPR2(r1)
ld r1,GPR1(r1)
@@ -306,8 +316,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
RFI_TO_USER
b . /* prevent speculative execution */
- /* exit to kernel */
-1: ld r2,GPR2(r1)
+1: /* exit to kernel */
+ kuap_restore_amr r2
+
+ ld r2,GPR2(r1)
ld r1,GPR1(r1)
mtlr r4
mtcr r5
@@ -594,6 +606,8 @@ _GLOBAL(_switch)
std r23,_CCR(r1)
std r1,KSP(r3) /* Set old stack pointer */
+ kuap_check_amr r9, r10
+
FLUSH_COUNT_CACHE
/*
@@ -851,13 +865,7 @@ resume_kernel:
* sure we are soft-disabled first and reconcile irq state.
*/
RECONCILE_IRQ_STATE(r3,r4)
-1: bl preempt_schedule_irq
-
- /* Re-test flags and eventually loop */
- ld r9, PACA_THREAD_INFO(r13)
- ld r4,TI_FLAGS(r9)
- andi. r0,r4,_TIF_NEED_RESCHED
- bne 1b
+ bl preempt_schedule_irq
/*
* arch_local_irq_restore() from preempt_schedule_irq above may
@@ -942,6 +950,8 @@ fast_exception_return:
ld r4,_XER(r1)
mtspr SPRN_XER,r4
+ kuap_check_amr r5, r6
+
REST_8GPRS(5, r1)
andi. r0,r3,MSR_RI
@@ -974,6 +984,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
REST_GPR(13, r1)
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
mtspr SPRN_SRR1,r3
ld r2,_CCR(r1)
@@ -1006,6 +1020,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r0,GPR0(r1)
ld r2,GPR2(r1)
ld r3,GPR3(r1)
+
+ kuap_restore_amr r4
+
ld r4,GPR4(r1)
ld r1,GPR1(r1)
RFI_TO_KERNEL
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9481a117e242..6b86055e5251 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -19,6 +19,7 @@
#include <asm/cpuidle.h>
#include <asm/head-64.h>
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
/*
* There are a few constraints to be concerned with.
@@ -120,7 +121,9 @@ EXC_VIRT_NONE(0x4000, 0x100)
mfspr r10,SPRN_SRR1 ; \
rlwinm. r10,r10,47-31,30,31 ; \
beq- 1f ; \
- cmpwi cr3,r10,2 ; \
+ cmpwi cr1,r10,2 ; \
+ mfspr r3,SPRN_SRR1 ; \
+ bltlr cr1 ; /* no state loss, return to idle caller */ \
BRANCH_TO_C000(r10, system_reset_idle_common) ; \
1: \
KVMTEST_PR(n) ; \
@@ -144,8 +147,11 @@ TRAMP_KVM(PACA_EXNMI, 0x100)
#ifdef CONFIG_PPC_P7_NAP
EXC_COMMON_BEGIN(system_reset_idle_common)
- mfspr r12,SPRN_SRR1
- b pnv_powersave_wakeup
+ /*
+ * This must be a direct branch (without linker branch stub) because
+ * we can not use TOC at this point as r2 may not be restored yet.
+ */
+ b idle_return_gpr_loss
#endif
/*
@@ -309,6 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
mfspr r11,SPRN_DSISR /* Save DSISR */
std r11,_DSISR(r1)
std r9,_CCR(r1) /* Save CR in stackframe */
+ kuap_save_amr_and_lock r9, r10, cr1
/* Save r9 through r13 from EXMC save area to stack frame. */
EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
mfmsr r11 /* get MSR value */
@@ -427,17 +434,17 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
* Then decrement MCE nesting after finishing with the stack.
*/
ld r3,_MSR(r1)
+ ld r4,_LINK(r1)
lhz r11,PACA_IN_MCE(r13)
subi r11,r11,1
sth r11,PACA_IN_MCE(r13)
- /* Turn off the RI bit because SRR1 is used by idle wakeup code. */
- /* Recoverability could be improved by reducing the use of SRR1. */
- li r11,0
- mtmsrd r11,1
-
- b pnv_powersave_wakeup_mce
+ mtlr r4
+ rlwinm r10,r3,47-31,30,31
+ cmpwi cr1,r10,2
+ bltlr cr1 /* no state loss, return to idle caller */
+ b idle_return_gpr_loss
#endif
/*
* Handle machine check early in real mode. We come here with
@@ -1109,6 +1116,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
EXCEPTION_PROLOG_COMMON_1()
+ /* We don't touch AMR here, we never go to virtual mode */
EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
EXCEPTION_PROLOG_COMMON_3(0xe60)
addi r3,r1,STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 45a8d0be1c96..25f063f56ec5 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -36,6 +36,7 @@
#include <linux/sysfs.h>
#include <linux/slab.h>
#include <linux/cma.h>
+#include <linux/hugetlb.h>
#include <asm/debugfs.h>
#include <asm/page.h>
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 529dcc21c3f9..cecd57e1d046 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -63,6 +63,7 @@ _GLOBAL(load_fp_state)
REST_32FPVSRS(0, R4, R3)
blr
EXPORT_SYMBOL(load_fp_state)
+_ASM_NOKPROBE_SYMBOL(load_fp_state); /* used by restore_math */
/*
* Store FP state into memory, including FPSCR
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index e25b615e9f9e..755fab9641d6 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -37,6 +37,8 @@
#include <asm/export.h>
#include <asm/feature-fixups.h>
+#include "head_32.h"
+
/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
@@ -160,6 +162,10 @@ __after_mmu_off:
bl flush_tlbs
bl initial_bats
+ bl load_segment_registers
+#ifdef CONFIG_KASAN
+ bl early_hash_table
+#endif
#if defined(CONFIG_BOOTX_TEXT)
bl setup_disp_bat
#endif
@@ -205,7 +211,7 @@ __after_mmu_off:
*/
turn_on_mmu:
mfmsr r0
- ori r0,r0,MSR_DR|MSR_IR
+ ori r0,r0,MSR_DR|MSR_IR|MSR_RI
mtspr SPRN_SRR1,r0
lis r0,start_here@h
ori r0,r0,start_here@l
@@ -242,103 +248,6 @@ __secondary_hold_spinloop:
__secondary_hold_acknowledge:
.long -1
-/*
- * Exception entry code. This code runs with address translation
- * turned off, i.e. using physical addresses.
- * We assume sprg3 has the physical address of the current
- * task's thread_struct.
- */
-#define EXCEPTION_PROLOG \
- mtspr SPRN_SPRG_SCRATCH0,r10; \
- mtspr SPRN_SPRG_SCRATCH1,r11; \
- mfcr r10; \
- EXCEPTION_PROLOG_1; \
- EXCEPTION_PROLOG_2
-
-#define EXCEPTION_PROLOG_1 \
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
- andi. r11,r11,MSR_PR; \
- tophys(r11,r1); /* use tophys(r1) if kernel */ \
- beq 1f; \
- mfspr r11,SPRN_SPRG_THREAD; \
- lwz r11,TASK_STACK-THREAD(r11); \
- addi r11,r11,THREAD_SIZE; \
- tophys(r11,r11); \
-1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
-
-
-#define EXCEPTION_PROLOG_2 \
- stw r10,_CCR(r11); /* save registers */ \
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG_SCRATCH1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r12,SPRN_SRR0; \
- mfspr r9,SPRN_SRR1; \
- stw r1,GPR1(r11); \
- stw r1,0(r11); \
- tovirt(r1,r11); /* set new kernel sp */ \
- li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
- MTMSRD(r10); /* (except for mach check in rtas) */ \
- stw r0,GPR0(r11); \
- lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \
- addi r10,r10,STACK_FRAME_REGS_MARKER@l; \
- stw r10,8(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-/*
- * Note: code which follows this uses cr0.eq (set if from kernel),
- * r11, r12 (SRR0), and r9 (SRR1).
- *
- * Note2: once we have set r1 we are in a position to take exceptions
- * again, and we could thus set MSR:RI at that point.
- */
-
-/*
- * Exception vectors.
- */
-#define EXCEPTION(n, label, hdlr, xfer) \
- . = n; \
- DO_KVM n; \
-label: \
- EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- li r10,MSR_KERNEL; \
- copyee(r10, r9); \
- bl tfer; \
-i##n: \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
- ret_from_except)
-
/* System reset */
/* core99 pmac starts the seconary here by changing the vector, and
putting it back to what it was (unknown_exception) when done. */
@@ -387,7 +296,11 @@ DataAccess:
EXCEPTION_PROLOG
mfspr r10,SPRN_DSISR
stw r10,_DSISR(r11)
+#ifdef CONFIG_PPC_KUAP
+ andis. r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
+#else
andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
+#endif
bne 1f /* if not, try to put a PTE */
mfspr r4,SPRN_DAR /* into the hash table */
rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
@@ -428,7 +341,7 @@ Alignment:
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
+ EXC_XFER_STD(0x600, alignment_exception)
/* Program check exception */
EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
@@ -449,24 +362,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
bl load_up_fpu /* if from user, just load it up */
b fast_exception_return
1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+ EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception)
/* Decrementer */
EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
/* System call */
. = 0xc00
DO_KVM 0xc00
SystemCall:
- EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
+ SYSCALL_ENTRY 0xc00
/* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
/*
* The Altivec unavailable trap is at 0x0f20. Foo.
@@ -522,9 +434,9 @@ InstructionTLBMiss:
andc. r1,r1,r0 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- ori r1, r1, 0xe05 /* clear out reserved bits */
- andc r1, r0, r1 /* PP = user? 2 : 0 */
+ rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
+ ori r1, r1, 0xe06 /* clear out reserved bits */
+ andc r1, r0, r1 /* PP = user? 1 : 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -590,11 +502,11 @@ DataLoadTLBMiss:
* we would need to update the pte atomically with lwarx/stwcx.
*/
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
+ rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */
rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r0,r1 /* PP = user? rw? 2: 3: 0 */
+ andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -670,9 +582,9 @@ DataStoreTLBMiss:
* we would need to update the pte atomically with lwarx/stwcx.
*/
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- li r1,0xe05 /* clear out reserved bits & PP lsb */
- andc r1,r0,r1 /* PP = user? 2: 0 */
+ rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
+ li r1,0xe06 /* clear out reserved bits & PP msb */
+ andc r1,r0,r1 /* PP = user? 1: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -698,35 +610,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
#define altivec_assist_exception unknown_exception
#endif
- EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE)
- EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE)
+ EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
+ EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD)
EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE)
- EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD)
+ EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD)
. = 0x3000
@@ -738,7 +650,7 @@ AltiVecUnavailable:
b fast_exception_return
#endif /* CONFIG_ALTIVEC */
1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception)
+ EXC_XFER_LITE(0xf20, altivec_unavailable_exception)
PerformanceMonitor:
EXCEPTION_PROLOG
@@ -880,11 +792,24 @@ _ENTRY(__restore_cpu_setup)
blr
#endif /* !defined(CONFIG_PPC_BOOK3S_32) */
-
/*
* Load stuff into the MMU. Intended to be called with
* IR=0 and DR=0.
*/
+#ifdef CONFIG_KASAN
+early_hash_table:
+ sync /* Force all PTE updates to finish */
+ isync
+ tlbia /* Clear all TLB entries */
+ sync /* wait for tlbia/tlbie to finish */
+ TLBSYNC /* ... on all CPUs */
+ /* Load the SDR1 register (hash table base & size) */
+ lis r6, early_hash - PAGE_OFFSET@h
+ ori r6, r6, 3 /* 256kB table */
+ mtspr SPRN_SDR1, r6
+ blr
+#endif
+
load_up_mmu:
sync /* Force all PTE updates to finish */
isync
@@ -896,14 +821,6 @@ load_up_mmu:
tophys(r6,r6)
lwz r6,_SDR1@l(r6)
mtspr SPRN_SDR1,r6
- li r0,16 /* load up segment register values */
- mtctr r0 /* for context 0 */
- lis r3,0x2000 /* Ku = 1, VSID = 0 */
- li r4,0
-3: mtsrin r3,r4
- addi r3,r3,0x111 /* increment VSID */
- addis r4,r4,0x1000 /* address of next segment */
- bdnz 3b
/* Load the BAT registers with the values set up by MMU_init.
MMU_init takes care of whether we're on a 601 or not. */
@@ -925,6 +842,32 @@ BEGIN_MMU_FTR_SECTION
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
+load_segment_registers:
+ li r0, NUM_USER_SEGMENTS /* load up user segment register values */
+ mtctr r0 /* for context 0 */
+ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
+#ifdef CONFIG_PPC_KUEP
+ oris r3, r3, SR_NX@h /* Set Nx */
+#endif
+#ifdef CONFIG_PPC_KUAP
+ oris r3, r3, SR_KS@h /* Set Ks */
+#endif
+ li r4, 0
+3: mtsrin r3, r4
+ addi r3, r3, 0x111 /* increment VSID */
+ addis r4, r4, 0x1000 /* address of next segment */
+ bdnz 3b
+ li r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */
+ mtctr r0 /* for context 0 */
+ rlwinm r3, r3, 0, ~SR_NX /* Nx = 0 */
+ rlwinm r3, r3, 0, ~SR_KS /* Ks = 0 */
+ oris r3, r3, SR_KP@h /* Kp = 1 */
+3: mtsrin r3, r4
+ addi r3, r3, 0x111 /* increment VSID */
+ addis r4, r4, 0x1000 /* address of next segment */
+ bdnz 3b
+ blr
+
/*
* This is where the main kernel code starts.
*/
@@ -950,11 +893,17 @@ start_here:
* Do early platform-specific initialization,
* and set up the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
bl __save_cpu_setup
bl MMU_init
+BEGIN_MMU_FTR_SECTION
+ bl MMU_init_hw_patch
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
/*
* Go back to running unmapped so we can load up new values
@@ -1006,7 +955,12 @@ _ENTRY(switch_mmu_context)
blt- 4f
mulli r3,r3,897 /* multiply context by skew factor */
rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */
- addis r3,r3,0x6000 /* Set Ks, Ku bits */
+#ifdef CONFIG_PPC_KUEP
+ oris r3, r3, SR_NX@h /* Set Nx */
+#endif
+#ifdef CONFIG_PPC_KUAP
+ oris r3, r3, SR_KS@h /* Set Ks */
+#endif
li r0,NUM_USER_SEGMENTS
mtctr r0
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
new file mode 100644
index 000000000000..4a692553651f
--- /dev/null
+++ b/arch/powerpc/kernel/head_32.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __HEAD_32_H__
+#define __HEAD_32_H__
+
+#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
+
+/*
+ * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
+ */
+.macro __LOAD_MSR_KERNEL r, x
+.if \x >= 0x8000
+ lis \r, (\x)@h
+ ori \r, \r, (\x)@l
+.else
+ li \r, (\x)
+.endif
+.endm
+#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
+
+/*
+ * Exception entry code. This code runs with address translation
+ * turned off, i.e. using physical addresses.
+ * We assume sprg3 has the physical address of the current
+ * task's thread_struct.
+ */
+
+.macro EXCEPTION_PROLOG
+ mtspr SPRN_SPRG_SCRATCH0,r10
+ mtspr SPRN_SPRG_SCRATCH1,r11
+ mfcr r10
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2
+.endm
+
+.macro EXCEPTION_PROLOG_1
+ mfspr r11,SPRN_SRR1 /* check whether user or kernel */
+ andi. r11,r11,MSR_PR
+ tophys(r11,r1) /* use tophys(r1) if kernel */
+ beq 1f
+ mfspr r11,SPRN_SPRG_THREAD
+ lwz r11,TASK_STACK-THREAD(r11)
+ addi r11,r11,THREAD_SIZE
+ tophys(r11,r11)
+1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
+.endm
+
+.macro EXCEPTION_PROLOG_2
+ stw r10,_CCR(r11) /* save registers */
+ stw r12,GPR12(r11)
+ stw r9,GPR9(r11)
+ mfspr r10,SPRN_SPRG_SCRATCH0
+ stw r10,GPR10(r11)
+ mfspr r12,SPRN_SPRG_SCRATCH1
+ stw r12,GPR11(r11)
+ mflr r10
+ stw r10,_LINK(r11)
+ mfspr r12,SPRN_SRR0
+ mfspr r9,SPRN_SRR1
+ stw r1,GPR1(r11)
+ stw r1,0(r11)
+ tovirt(r1,r11) /* set new kernel sp */
+#ifdef CONFIG_40x
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#else
+ li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */
+ MTMSRD(r10) /* (except for mach check in rtas) */
+#endif
+ stw r0,GPR0(r11)
+ lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ addi r10,r10,STACK_FRAME_REGS_MARKER@l
+ stw r10,8(r11)
+ SAVE_4GPRS(3, r11)
+ SAVE_2GPRS(7, r11)
+.endm
+
+.macro SYSCALL_ENTRY trapno
+ mfspr r12,SPRN_SPRG_THREAD
+ mfcr r10
+ lwz r11,TASK_STACK-THREAD(r12)
+ mflr r9
+ addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE
+ rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
+ tophys(r11,r11)
+ stw r10,_CCR(r11) /* save registers */
+ mfspr r10,SPRN_SRR0
+ stw r9,_LINK(r11)
+ mfspr r9,SPRN_SRR1
+ stw r1,GPR1(r11)
+ stw r1,0(r11)
+ tovirt(r1,r11) /* set new kernel sp */
+ stw r10,_NIP(r11)
+#ifdef CONFIG_40x
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#else
+ LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
+ MTMSRD(r10) /* (except for mach check in rtas) */
+#endif
+ lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ stw r2,GPR2(r11)
+ addi r10,r10,STACK_FRAME_REGS_MARKER@l
+ stw r9,_MSR(r11)
+ li r2, \trapno + 1
+ stw r10,8(r11)
+ stw r2,_TRAP(r11)
+ SAVE_GPR(0, r11)
+ SAVE_4GPRS(3, r11)
+ SAVE_2GPRS(7, r11)
+ addi r11,r1,STACK_FRAME_OVERHEAD
+ addi r2,r12,-THREAD
+ stw r11,PT_REGS(r12)
+#if defined(CONFIG_40x)
+ /* Check to see if the dbcr0 register is set up to debug. Use the
+ internal debug mode bit to do this. */
+ lwz r12,THREAD_DBCR0(r12)
+ andis. r12,r12,DBCR0_IDM@h
+#endif
+ ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+#if defined(CONFIG_40x)
+ beq+ 3f
+ /* From user and task is ptraced - load up global dbcr0 */
+ li r12,-1 /* clear all pending debug events */
+ mtspr SPRN_DBSR,r12
+ lis r11,global_dbcr0@ha
+ tophys(r11,r11)
+ addi r11,r11,global_dbcr0@l
+ lwz r12,0(r11)
+ mtspr SPRN_DBCR0,r12
+ lwz r12,4(r11)
+ addi r12,r12,-1
+ stw r12,4(r11)
+#endif
+
+3:
+ tovirt(r2, r2) /* set r2 to current */
+ lis r11, transfer_to_syscall@h
+ ori r11, r11, transfer_to_syscall@l
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * If MSR is changing we need to keep interrupts disabled at this point
+ * otherwise we might risk taking an interrupt before we tell lockdep
+ * they are enabled.
+ */
+ LOAD_MSR_KERNEL(r10, MSR_KERNEL)
+ rlwimi r10, r9, 0, MSR_EE
+#else
+ LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+#endif
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
+ mtspr SPRN_NRI, r0
+#endif
+ mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR0,r11
+ SYNC
+ RFI /* jump to handler, enable MMU */
+.endm
+
+/*
+ * Note: code which follows this uses cr0.eq (set if from kernel),
+ * r11, r12 (SRR0), and r9 (SRR1).
+ *
+ * Note2: once we have set r1 we are in a position to take exceptions
+ * again, and we could thus set MSR:RI at that point.
+ */
+
+/*
+ * Exception vectors.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define START_EXCEPTION(n, label) \
+ . = n; \
+ DO_KVM n; \
+label:
+
+#else
+#define START_EXCEPTION(n, label) \
+ . = n; \
+label:
+
+#endif
+
+#define EXCEPTION(n, label, hdlr, xfer) \
+ START_EXCEPTION(n, label) \
+ EXCEPTION_PROLOG; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ xfer(n, hdlr)
+
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
+ li r10,trap; \
+ stw r10,_TRAP(r11); \
+ LOAD_MSR_KERNEL(r10, msr); \
+ bl tfer; \
+ .long hdlr; \
+ .long ret
+
+#define EXC_XFER_STD(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
+ ret_from_except)
+
+#endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index a9c934f2319b..cf54b784100d 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -44,6 +44,8 @@
#include <asm/export.h>
#include <asm/asm-405.h>
+#include "head_32.h"
+
/* As with the other PowerPC ports, it is expected that when code
* execution begins here, the following registers contain valid, yet
* optional, information:
@@ -99,46 +101,6 @@ _ENTRY(saved_ksp_limit)
.space 4
/*
- * Exception vector entry code. This code runs with address translation
- * turned off (i.e. using physical addresses). We assume SPRG_THREAD has
- * the physical address of the current task thread_struct.
- * Note that we have to have decremented r1 before we write to any fields
- * of the exception frame, since a critical interrupt could occur at any
- * time, and it will write to the area immediately below the current r1.
- */
-#define NORMAL_EXCEPTION_PROLOG \
- mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\
- mtspr SPRN_SPRG_SCRATCH1,r11; \
- mtspr SPRN_SPRG_SCRATCH2,r1; \
- mfcr r10; /* save CR in r10 for now */\
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
- beq 1f; \
- mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r1,TASK_STACK-THREAD(r1); /* this thread's kernel stack */\
- addi r1,r1,THREAD_SIZE; \
-1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\
- tophys(r11,r1); \
- stw r10,_CCR(r11); /* save various registers */\
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG_SCRATCH1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH2; \
- mfspr r12,SPRN_SRR0; \
- stw r10,GPR1(r11); \
- mfspr r9,SPRN_SRR1; \
- stw r10,0(r11); \
- rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-/*
* Exception prolog for critical exceptions. This is a little different
* from the normal exception prolog above since a critical exception
* can potentially occur at any point during normal exception processing.
@@ -177,6 +139,9 @@ _ENTRY(saved_ksp_limit)
tovirt(r1,r11); \
rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
stw r0,GPR0(r11); \
+ lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
+ addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
+ stw r10, 8(r11); \
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
@@ -196,53 +161,12 @@ _ENTRY(saved_ksp_limit)
/*
* Exception vectors.
*/
-#define START_EXCEPTION(n, label) \
- . = n; \
-label:
-
-#define EXCEPTION(n, label, hdlr, xfer) \
- START_EXCEPTION(n, label); \
- NORMAL_EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
#define CRITICAL_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(n, label); \
CRITICAL_EXCEPTION_PROLOG; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, \
- ret_from_crit_exc)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- lis r10,msr@h; \
- ori r10,r10,msr@l; \
- copyee(r10, r9); \
- bl tfer; \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
- ret_from_except)
-
+ crit_transfer_to_handler, ret_from_crit_exc)
/*
* 0x0100 - Critical Interrupt Exception
@@ -393,7 +317,7 @@ label:
* This is caused by a fetch from non-execute or guarded pages.
*/
START_EXCEPTION(0x0400, InstructionAccess)
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
mr r4,r12 /* Pass SRR0 as arg2 */
li r5,0 /* Pass zero as arg3 */
EXC_XFER_LITE(0x400, handle_page_fault)
@@ -403,33 +327,32 @@ label:
/* 0x0600 - Alignment Exception */
START_EXCEPTION(0x0600, Alignment)
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */
stw r4,_DEAR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
+ EXC_XFER_STD(0x600, alignment_exception)
/* 0x0700 - Program Exception */
START_EXCEPTION(0x0700, ProgramCheck)
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
mfspr r4,SPRN_ESR /* Grab the ESR and save it */
stw r4,_ESR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x700, program_check_exception)
- EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD)
/* 0x0C00 - System Call Exception */
START_EXCEPTION(0x0C00, SystemCall)
- NORMAL_EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
+ SYSCALL_ENTRY 0xc00
- EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD)
/* 0x1000 - Programmable Interval Timer (PIT) Exception */
. = 0x1000
@@ -646,25 +569,25 @@ label:
mfspr r10, SPRN_SPRG_SCRATCH0
b InstructionAccess
- EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
#ifdef CONFIG_IBM405_ERR51
/* 405GP errata 51 */
START_EXCEPTION(0x1700, Trap_17)
b DTLBMiss
#else
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
#endif
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD)
/* Check for a single step debug exception while in an exception
* handler before state has been saved. This is to catch the case
@@ -726,11 +649,11 @@ label:
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_TEMPLATE(DebugException, 0x2002, \
(MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+ crit_transfer_to_handler, ret_from_crit_exc)
/* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
Decrementer:
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
lis r0,TSR_PIS@h
mtspr SPRN_TSR,r0 /* Clear the PIT exception */
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -738,9 +661,9 @@ Decrementer:
/* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
FITException:
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
addi r3,r1,STACK_FRAME_OVERHEAD;
- EXC_XFER_EE(0x1010, unknown_exception)
+ EXC_XFER_STD(0x1010, unknown_exception)
/* Watchdog Timer (WDT) Exception. (from 0x1020) */
WDTException:
@@ -748,15 +671,14 @@ WDTException:
addi r3,r1,STACK_FRAME_OVERHEAD;
EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
(MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
- NOCOPY, crit_transfer_to_handler,
- ret_from_crit_exc)
+ crit_transfer_to_handler, ret_from_crit_exc)
/*
* The other Data TLB exceptions bail out to this point
* if they can't resolve the lightweight TLB fault.
*/
DataAccess:
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
stw r5,_ESR(r11)
mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
@@ -848,6 +770,9 @@ start_here:
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 37117ab11584..f15fba58c744 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -203,6 +203,9 @@ _ENTRY(_start);
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
@@ -278,16 +281,15 @@ interrupt_base:
FP_UNAVAILABLE_EXCEPTION
#else
EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
- FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+ FloatingPointUnavailable, unknown_exception, EXC_XFER_STD)
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
- EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+ SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
- AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+ AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
@@ -295,7 +297,7 @@ interrupt_base:
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
/* Watchdog Timer Interrupt */
/* TODO: Add watchdog support */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 3fad8d499767..5321a11c2835 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -968,7 +968,9 @@ start_here_multiplatform:
/* Restore parameters passed from prom_init/kexec */
mr r3,r31
- bl early_setup /* also sets r13 and SPRG_PACA */
+ LOAD_REG_ADDR(r12, DOTSYM(early_setup))
+ mtctr r12
+ bctrl /* also sets r13 and SPRG_PACA */
LOAD_REG_ADDR(r3, start_here_common)
ld r4,PACAKMSR(r13)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 03c73b4c6435..885be7f3d29a 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -33,6 +33,8 @@
#include <asm/export.h>
#include <asm/code-patching-asm.h>
+#include "head_32.h"
+
#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
/* By simply checking Address >= 0x80000000, we know if its a kernel address */
#define SIMPLE_KERNEL_ADDRESS 1
@@ -123,102 +125,6 @@ instruction_counter:
.space 4
#endif
-/*
- * Exception entry code. This code runs with address translation
- * turned off, i.e. using physical addresses.
- * We assume sprg3 has the physical address of the current
- * task's thread_struct.
- */
-#define EXCEPTION_PROLOG \
- mtspr SPRN_SPRG_SCRATCH0, r10; \
- mtspr SPRN_SPRG_SCRATCH1, r11; \
- mfcr r10; \
- EXCEPTION_PROLOG_1; \
- EXCEPTION_PROLOG_2
-
-#define EXCEPTION_PROLOG_1 \
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
- andi. r11,r11,MSR_PR; \
- tophys(r11,r1); /* use tophys(r1) if kernel */ \
- beq 1f; \
- mfspr r11,SPRN_SPRG_THREAD; \
- lwz r11,TASK_STACK-THREAD(r11); \
- addi r11,r11,THREAD_SIZE; \
- tophys(r11,r11); \
-1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
-
-
-#define EXCEPTION_PROLOG_2 \
- stw r10,_CCR(r11); /* save registers */ \
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG_SCRATCH1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r12,SPRN_SRR0; \
- mfspr r9,SPRN_SRR1; \
- stw r1,GPR1(r11); \
- stw r1,0(r11); \
- tovirt(r1,r11); /* set new kernel sp */ \
- li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
- mtmsr r10; \
- stw r0,GPR0(r11); \
- lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \
- addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
- stw r10, 8(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-/*
- * Note: code which follows this uses cr0.eq (set if from kernel),
- * r11, r12 (SRR0), and r9 (SRR1).
- *
- * Note2: once we have set r1 we are in a position to take exceptions
- * again, and we could thus set MSR:RI at that point.
- */
-
-/*
- * Exception vectors.
- */
-#define EXCEPTION(n, label, hdlr, xfer) \
- . = n; \
-label: \
- EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- li r10,MSR_KERNEL; \
- copyee(r10, r9); \
- bl tfer; \
-i##n: \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
- ret_from_except)
-
/* System reset */
EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD)
@@ -261,7 +167,7 @@ Alignment:
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
+ EXC_XFER_STD(0x600, alignment_exception)
/* Program check exception */
EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
@@ -273,19 +179,18 @@ Alignment:
/* Decrementer */
EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
/* System call */
. = 0xc00
SystemCall:
- EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
+ SYSCALL_ENTRY 0xc00
/* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_STD)
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
@@ -615,13 +520,13 @@ DARFixed:/* Return from dcbx instruction bug workaround */
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
EXC_XFER_LITE(0x300, handle_page_fault)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
/* On the MPC8xx, these next four traps are used for development
* support of breakpoints and such. Someday I will get around to
@@ -643,7 +548,7 @@ DataBreakpoint:
mfspr r4,SPRN_BAR
stw r4,_DAR(r11)
mfspr r5,SPRN_DSISR
- EXC_XFER_EE(0x1c00, do_break)
+ EXC_XFER_STD(0x1c00, do_break)
11:
mtcr r10
mfspr r10, SPRN_SPRG_SCRATCH0
@@ -663,10 +568,10 @@ InstructionBreakpoint:
mfspr r10, SPRN_SPRG_SCRATCH0
rfi
#else
- EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
#endif
- EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
. = 0x2000
@@ -853,6 +758,9 @@ start_here:
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 1b22a8dea399..bfeb469e8106 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -6,6 +6,8 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
+#ifdef __ASSEMBLY__
+
/*
* Macros used for common Book-e exception handling
*/
@@ -81,6 +83,101 @@ END_BTB_FLUSH_SECTION
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
+.macro SYSCALL_ENTRY trapno intno
+ mfspr r10, SPRN_SPRG_THREAD
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mtspr SPRN_SPRG_WSCRATCH0, r10
+ stw r11, THREAD_NORMSAVE(0)(r10)
+ stw r13, THREAD_NORMSAVE(2)(r10)
+ mfcr r13 /* save CR in r13 for now */
+ mfspr r11, SPRN_SRR1
+ mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
+ bf 3, 1975f
+ b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1
+1975:
+ mr r12, r13
+ lwz r13, THREAD_NORMSAVE(2)(r10)
+FTR_SECTION_ELSE
+#endif
+ mfcr r12
+#ifdef CONFIG_KVM_BOOKE_HV
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
+#endif
+ BOOKE_CLEAR_BTB(r11)
+ lwz r11, TASK_STACK - THREAD(r10)
+ rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
+ ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
+ stw r12, _CCR(r11) /* save various registers */
+ mflr r12
+ stw r12,_LINK(r11)
+ mfspr r12,SPRN_SRR0
+ stw r1, GPR1(r11)
+ mfspr r9,SPRN_SRR1
+ stw r1, 0(r11)
+ mr r1, r11
+ stw r12,_NIP(r11)
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+ lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ stw r2,GPR2(r11)
+ addi r12, r12, STACK_FRAME_REGS_MARKER@l
+ stw r9,_MSR(r11)
+ li r2, \trapno + 1
+ stw r12, 8(r11)
+ stw r2,_TRAP(r11)
+ SAVE_GPR(0, r11)
+ SAVE_4GPRS(3, r11)
+ SAVE_2GPRS(7, r11)
+
+ addi r11,r1,STACK_FRAME_OVERHEAD
+ addi r2,r10,-THREAD
+ stw r11,PT_REGS(r10)
+ /* Check to see if the dbcr0 register is set up to debug. Use the
+ internal debug mode bit to do this. */
+ lwz r12,THREAD_DBCR0(r10)
+ andis. r12,r12,DBCR0_IDM@h
+ ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+ beq+ 3f
+ /* From user and task is ptraced - load up global dbcr0 */
+ li r12,-1 /* clear all pending debug events */
+ mtspr SPRN_DBSR,r12
+ lis r11,global_dbcr0@ha
+ tophys(r11,r11)
+ addi r11,r11,global_dbcr0@l
+#ifdef CONFIG_SMP
+ lwz r9,TASK_CPU(r2)
+ slwi r9,r9,3
+ add r11,r11,r9
+#endif
+ lwz r12,0(r11)
+ mtspr SPRN_DBCR0,r12
+ lwz r12,4(r11)
+ addi r12,r12,-1
+ stw r12,4(r11)
+
+3:
+ tovirt(r2, r2) /* set r2 to current */
+ lis r11, transfer_to_syscall@h
+ ori r11, r11, transfer_to_syscall@l
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * If MSR is changing we need to keep interrupts disabled at this point
+ * otherwise we might risk taking an interrupt before we tell lockdep
+ * they are enabled.
+ */
+ lis r10, MSR_KERNEL@h
+ ori r10, r10, MSR_KERNEL@l
+ rlwimi r10, r9, 0, MSR_EE
+#else
+ lis r10, (MSR_KERNEL | MSR_EE)@h
+ ori r10, r10, (MSR_KERNEL | MSR_EE)@l
+#endif
+ mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR0,r11
+ SYNC
+ RFI /* jump to handler, enable MMU */
+.endm
+
/* To handle the additional exception priority levels on 40x and Book-E
* processors we allocate a stack per additional priority level.
*
@@ -217,8 +314,7 @@ label:
CRITICAL_EXCEPTION_PROLOG(intno); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, \
- ret_from_crit_exc)
+ crit_transfer_to_handler, ret_from_crit_exc)
#define MCHECK_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(label); \
@@ -227,36 +323,23 @@ label:
stw r5,_ESR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, mcheck_transfer_to_handler, \
- ret_from_mcheck_exc)
+ mcheck_transfer_to_handler, ret_from_mcheck_exc)
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
li r10,trap; \
stw r10,_TRAP(r11); \
lis r10,msr@h; \
ori r10,r10,msr@l; \
- copyee(r10, r9); \
bl tfer; \
.long hdlr; \
.long ret
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
+ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
ret_from_except_full)
#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
+ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
ret_from_except)
/* Check for a single step debug exception while in an exception
@@ -323,7 +406,7 @@ label:
/* continue normal handling for a debug exception... */ \
2: mfspr r4,SPRN_DBSR; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc)
+ EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc)
#define DEBUG_CRIT_EXCEPTION \
START_EXCEPTION(DebugCrit); \
@@ -376,7 +459,7 @@ label:
/* continue normal handling for a critical exception... */ \
2: mfspr r4,SPRN_DBSR; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+ EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc)
#define DATA_STORAGE_EXCEPTION \
START_EXCEPTION(DataStorage) \
@@ -401,7 +484,7 @@ label:
mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \
stw r4,_DEAR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_EE(0x0600, alignment_exception)
+ EXC_XFER_STD(0x0600, alignment_exception)
#define PROGRAM_EXCEPTION \
START_EXCEPTION(Program) \
@@ -426,9 +509,9 @@ label:
bl load_up_fpu; /* if from user, just load it up */ \
b fast_exception_return; \
1: addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+ EXC_XFER_STD(0x800, kernel_fp_unavailable_exception)
-#ifndef __ASSEMBLY__
+#else /* __ASSEMBLY__ */
struct exception_regs {
unsigned long mas0;
unsigned long mas1;
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 32332e24e421..6621f230cc37 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -268,6 +268,9 @@ set_ivor:
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
mr r3,r30
mr r4,r31
bl machine_init
@@ -380,7 +383,7 @@ interrupt_base:
EXC_XFER_LITE(0x0300, handle_page_fault)
1:
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x0300, CacheLockingException)
+ EXC_XFER_LITE(0x0300, CacheLockingException)
/* Instruction Storage Interrupt */
INSTRUCTION_STORAGE_EXCEPTION
@@ -401,21 +404,20 @@ interrupt_base:
#ifdef CONFIG_E200
/* E200 treats 'normal' floating point instructions as FP Unavail exception */
EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
- program_check_exception, EXC_XFER_EE)
+ program_check_exception, EXC_XFER_STD)
#else
EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
#endif
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG(SYSCALL)
- EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+ SYSCALL_ENTRY 0xc00 SYSCALL
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
@@ -423,7 +425,7 @@ interrupt_base:
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
/* Watchdog Timer Interrupt */
#ifdef CONFIG_BOOKE_WDT
@@ -633,25 +635,25 @@ END_BTB_FLUSH_SECTION
bl load_up_spe
b fast_exception_return
1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x2010, KernelSPE)
+ EXC_XFER_LITE(0x2010, KernelSPE)
#elif defined(CONFIG_SPE_POSSIBLE)
EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
#endif /* CONFIG_SPE_POSSIBLE */
/* SPE Floating Point Data */
#ifdef CONFIG_SPE
EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
- SPEFloatingPointException, EXC_XFER_EE)
+ SPEFloatingPointException, EXC_XFER_STD)
/* SPE Floating Point Round */
EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
- SPEFloatingPointRoundException, EXC_XFER_EE)
+ SPEFloatingPointRoundException, EXC_XFER_STD)
#elif defined(CONFIG_SPE_POSSIBLE)
EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
#endif /* CONFIG_SPE_POSSIBLE */
@@ -674,10 +676,10 @@ END_BTB_FLUSH_SECTION
unknown_exception)
/* Hypercall */
- EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD)
/* Embedded Hypervisor Privilege */
- EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD)
interrupt_end:
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index fec8a6773119..da307dd93ee3 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -29,11 +29,15 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
#include <asm/hw_breakpoint.h>
#include <asm/processor.h>
#include <asm/sstep.h>
#include <asm/debug.h>
+#include <asm/debugfs.h>
+#include <asm/hvcall.h>
#include <linux/uaccess.h>
/*
@@ -174,7 +178,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
if (!ppc_breakpoint_available())
return -ENODEV;
length_max = 8; /* DABR */
- if (cpu_has_feature(CPU_FTR_DAWR)) {
+ if (dawr_enabled()) {
length_max = 512 ; /* 64 doublewords */
/* DAWR region can't cross 512 boundary */
if ((attr->bp_addr >> 9) !=
@@ -376,3 +380,59 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
{
/* TODO */
}
+
+bool dawr_force_enable;
+EXPORT_SYMBOL_GPL(dawr_force_enable);
+
+static ssize_t dawr_write_file_bool(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct arch_hw_breakpoint null_brk = {0, 0, 0};
+ size_t rc;
+
+ /* Send error to user if they hypervisor won't allow us to write DAWR */
+ if ((!dawr_force_enable) &&
+ (firmware_has_feature(FW_FEATURE_LPAR)) &&
+ (set_dawr(&null_brk) != H_SUCCESS))
+ return -1;
+
+ rc = debugfs_write_file_bool(file, user_buf, count, ppos);
+ if (rc)
+ return rc;
+
+ /* If we are clearing, make sure all CPUs have the DAWR cleared */
+ if (!dawr_force_enable)
+ smp_call_function((smp_call_func_t)set_dawr, &null_brk, 0);
+
+ return rc;
+}
+
+static const struct file_operations dawr_enable_fops = {
+ .read = debugfs_read_file_bool,
+ .write = dawr_write_file_bool,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+static int __init dawr_force_setup(void)
+{
+ dawr_force_enable = false;
+
+ if (cpu_has_feature(CPU_FTR_DAWR)) {
+ /* Don't setup sysfs file for user control on P8 */
+ dawr_force_enable = true;
+ return 0;
+ }
+
+ if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
+ /* Turn DAWR off by default, but allow admin to turn it on */
+ dawr_force_enable = false;
+ debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
+ powerpc_debugfs_root,
+ &dawr_force_enable,
+ &dawr_enable_fops);
+ }
+ return 0;
+}
+arch_initcall(dawr_force_setup);
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 7f5ac2e8581b..2dfbd5d5b932 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -1,956 +1,188 @@
/*
- * This file contains idle entry/exit functions for POWER7,
- * POWER8 and POWER9 CPUs.
+ * Copyright 2018, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
+ *
+ * This file contains general idle entry/exit functions to save
+ * and restore stack and NVGPRs which allows C code to call idle
+ * states that lose GPRs, and it will return transparently with
+ * SRR1 wakeup reason return value.
+ *
+ * The platform / CPU caller must ensure SPRs and any other non-GPR
+ * state is saved and restored correctly, handle KVM, interrupts, etc.
*/
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ppc-opcode.h>
-#include <asm/hw_irq.h>
-#include <asm/kvm_book3s_asm.h>
-#include <asm/opal.h>
#include <asm/cpuidle.h>
-#include <asm/exception-64s.h>
-#include <asm/book3s/64/mmu-hash.h>
-#include <asm/mmu.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-
-#undef DEBUG
-
-/*
- * Use unused space in the interrupt stack to save and restore
- * registers for winkle support.
- */
-#define _MMCR0 GPR0
-#define _SDR1 GPR3
-#define _PTCR GPR3
-#define _RPR GPR4
-#define _SPURR GPR5
-#define _PURR GPR6
-#define _TSCR GPR7
-#define _DSCR GPR8
-#define _AMOR GPR9
-#define _WORT GPR10
-#define _WORC GPR11
-#define _LPCR GPR12
-
-#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16
- .text
-
-/*
- * Used by threads before entering deep idle states. Saves SPRs
- * in interrupt stack frame
- */
-save_sprs_to_stack:
- /*
- * Note all register i.e per-core, per-subcore or per-thread is saved
- * here since any thread in the core might wake up first
- */
-BEGIN_FTR_SECTION
- /*
- * Note - SDR1 is dropped in Power ISA v3. Hence not restoring
- * SDR1 here
- */
- mfspr r3,SPRN_PTCR
- std r3,_PTCR(r1)
- mfspr r3,SPRN_LPCR
- std r3,_LPCR(r1)
-FTR_SECTION_ELSE
- mfspr r3,SPRN_SDR1
- std r3,_SDR1(r1)
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
- mfspr r3,SPRN_RPR
- std r3,_RPR(r1)
- mfspr r3,SPRN_SPURR
- std r3,_SPURR(r1)
- mfspr r3,SPRN_PURR
- std r3,_PURR(r1)
- mfspr r3,SPRN_TSCR
- std r3,_TSCR(r1)
- mfspr r3,SPRN_DSCR
- std r3,_DSCR(r1)
- mfspr r3,SPRN_AMOR
- std r3,_AMOR(r1)
- mfspr r3,SPRN_WORT
- std r3,_WORT(r1)
- mfspr r3,SPRN_WORC
- std r3,_WORC(r1)
/*
- * On POWER9, there are idle states such as stop4, invoked via cpuidle,
- * that lose hypervisor resources. In such cases, we need to save
- * additional SPRs before entering those idle states so that they can
- * be restored to their older values on wakeup from the idle state.
+ * Desired PSSCR in r3
*
- * On POWER8, the only such deep idle state is winkle which is used
- * only in the context of CPU-Hotplug, where these additional SPRs are
- * reinitiazed to a sane value. Hence there is no need to save/restore
- * these SPRs.
+ * No state will be lost regardless of wakeup mechanism (interrupt or NIA).
+ *
+ * An EC=0 type wakeup will return with a value of 0. SRESET wakeup (which can
+ * happen with xscom SRESET and possibly MCE) may clobber volatiles except LR,
+ * and must blr, to return to caller with r3 set according to caller's expected
+ * return code (for Book3S/64 that is SRR1).
*/
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-
-power9_save_additional_sprs:
- mfspr r3, SPRN_PID
- mfspr r4, SPRN_LDBAR
- std r3, STOP_PID(r13)
- std r4, STOP_LDBAR(r13)
-
- mfspr r3, SPRN_FSCR
- mfspr r4, SPRN_HFSCR
- std r3, STOP_FSCR(r13)
- std r4, STOP_HFSCR(r13)
-
- mfspr r3, SPRN_MMCRA
- mfspr r4, SPRN_MMCR0
- std r3, STOP_MMCRA(r13)
- std r4, _MMCR0(r1)
-
- mfspr r3, SPRN_MMCR1
- mfspr r4, SPRN_MMCR2
- std r3, STOP_MMCR1(r13)
- std r4, STOP_MMCR2(r13)
- blr
-
-power9_restore_additional_sprs:
- ld r3,_LPCR(r1)
- ld r4, STOP_PID(r13)
- mtspr SPRN_LPCR,r3
- mtspr SPRN_PID, r4
-
- ld r3, STOP_LDBAR(r13)
- ld r4, STOP_FSCR(r13)
- mtspr SPRN_LDBAR, r3
- mtspr SPRN_FSCR, r4
-
- ld r3, STOP_HFSCR(r13)
- ld r4, STOP_MMCRA(r13)
- mtspr SPRN_HFSCR, r3
- mtspr SPRN_MMCRA, r4
-
- ld r3, _MMCR0(r1)
- ld r4, STOP_MMCR1(r13)
- mtspr SPRN_MMCR0, r3
- mtspr SPRN_MMCR1, r4
-
- ld r3, STOP_MMCR2(r13)
- ld r4, PACA_SPRG_VDSO(r13)
- mtspr SPRN_MMCR2, r3
- mtspr SPRN_SPRG3, r4
+_GLOBAL(isa300_idle_stop_noloss)
+ mtspr SPRN_PSSCR,r3
+ PPC_STOP
+ li r3,0
blr
/*
- * Used by threads when the lock bit of core_idle_state is set.
- * Threads will spin in HMT_LOW until the lock bit is cleared.
- * r14 - pointer to core_idle_state
- * r15 - used to load contents of core_idle_state
- * r9 - used as a temporary variable
+ * Desired PSSCR in r3
+ *
+ * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
+ * The SRESET wakeup returns to this function's caller by calling
+ * idle_return_gpr_loss with r3 set to desired return value.
+ *
+ * A wakeup without GPR loss may alteratively be handled as in
+ * isa300_idle_stop_noloss and blr directly, as an optimisation.
+ *
+ * The caller is responsible for saving/restoring SPRs, MSR, timebase,
+ * etc.
*/
-
-core_idle_lock_held:
- HMT_LOW
-3: lwz r15,0(r14)
- andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bne 3b
- HMT_MEDIUM
- lwarx r15,0,r14
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bne- core_idle_lock_held
- blr
+_GLOBAL(isa300_idle_stop_mayloss)
+ mtspr SPRN_PSSCR,r3
+ std r1,PACAR1(r13)
+ mflr r4
+ mfcr r5
+ /* use stack red zone rather than a new frame for saving regs */
+ std r2,-8*0(r1)
+ std r14,-8*1(r1)
+ std r15,-8*2(r1)
+ std r16,-8*3(r1)
+ std r17,-8*4(r1)
+ std r18,-8*5(r1)
+ std r19,-8*6(r1)
+ std r20,-8*7(r1)
+ std r21,-8*8(r1)
+ std r22,-8*9(r1)
+ std r23,-8*10(r1)
+ std r24,-8*11(r1)
+ std r25,-8*12(r1)
+ std r26,-8*13(r1)
+ std r27,-8*14(r1)
+ std r28,-8*15(r1)
+ std r29,-8*16(r1)
+ std r30,-8*17(r1)
+ std r31,-8*18(r1)
+ std r4,-8*19(r1)
+ std r5,-8*20(r1)
+ /* 168 bytes */
+ PPC_STOP
+ b . /* catch bugs */
/*
- * Pass requested state in r3:
- * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
- * - Requested PSSCR value in POWER9
+ * Desired return value in r3
+ *
+ * The idle wakeup SRESET interrupt can call this after calling
+ * to return to the idle sleep function caller with r3 as the return code.
*
- * Address of idle handler to branch to in realmode in r4
+ * This must not be used if idle was entered via a _noloss function (use
+ * a simple blr instead).
*/
-pnv_powersave_common:
- /* Use r3 to pass state nap/sleep/winkle */
- /* NAP is a state loss, we create a regs frame on the
- * stack, fill it up with the state we care about and
- * stick a pointer to it in PACAR1. We really only
- * need to save PC, some CR bits and the NV GPRs,
- * but for now an interrupt frame will do.
- */
- mtctr r4
-
- mflr r0
- std r0,16(r1)
- stdu r1,-INT_FRAME_SIZE(r1)
- std r0,_LINK(r1)
- std r0,_NIP(r1)
-
- /* We haven't lost state ... yet */
- li r0,0
- stb r0,PACA_NAPSTATELOST(r13)
-
- /* Continue saving state */
- SAVE_GPR(2, r1)
- SAVE_NVGPRS(r1)
- mfcr r5
- std r5,_CCR(r1)
- std r1,PACAR1(r13)
-
-BEGIN_FTR_SECTION
- /*
- * POWER9 does not require real mode to stop, and presently does not
- * set hwthread_state for KVM (threads don't share MMU context), so
- * we can remain in virtual mode for this.
- */
- bctr
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- /*
- * POWER8
- * Go to real mode to do the nap, as required by the architecture.
- * Also, we need to be in real mode before setting hwthread_state,
- * because as soon as we do that, another thread can switch
- * the MMU context to the guest.
- */
- LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
- mtmsrd r7,0
- bctr
+_GLOBAL(idle_return_gpr_loss)
+ ld r1,PACAR1(r13)
+ ld r4,-8*19(r1)
+ ld r5,-8*20(r1)
+ mtlr r4
+ mtcr r5
+ /*
+ * KVM nap requires r2 to be saved, rather than just restoring it
+ * from PACATOC. This could be avoided for that less common case
+ * if KVM saved its r2.
+ */
+ ld r2,-8*0(r1)
+ ld r14,-8*1(r1)
+ ld r15,-8*2(r1)
+ ld r16,-8*3(r1)
+ ld r17,-8*4(r1)
+ ld r18,-8*5(r1)
+ ld r19,-8*6(r1)
+ ld r20,-8*7(r1)
+ ld r21,-8*8(r1)
+ ld r22,-8*9(r1)
+ ld r23,-8*10(r1)
+ ld r24,-8*11(r1)
+ ld r25,-8*12(r1)
+ ld r26,-8*13(r1)
+ ld r27,-8*14(r1)
+ ld r28,-8*15(r1)
+ ld r29,-8*16(r1)
+ ld r30,-8*17(r1)
+ ld r31,-8*18(r1)
+ blr
/*
* This is the sequence required to execute idle instructions, as
* specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
+ *
+ * The 0(r1) slot is used to save r2 in isa206, so use that here.
*/
#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
/* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r0,0(r1); \
+ std r2,0(r1); \
ptesync; \
- ld r0,0(r1); \
-236: cmpd cr0,r0,r0; \
+ ld r2,0(r1); \
+236: cmpd cr0,r2,r2; \
bne 236b; \
- IDLE_INST;
-
-
- .globl pnv_enter_arch207_idle_mode
-pnv_enter_arch207_idle_mode:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* Tell KVM we're entering idle */
- li r4,KVM_HWTHREAD_IN_IDLE
- /******************************************************/
- /* N O T E W E L L ! ! ! N O T E W E L L */
- /* The following store to HSTATE_HWTHREAD_STATE(r13) */
- /* MUST occur in real mode, i.e. with the MMU off, */
- /* and the MMU must stay off until we clear this flag */
- /* and test HSTATE_HWTHREAD_REQ(r13) in */
- /* pnv_powersave_wakeup in this file. */
- /* The reason is that another thread can switch the */
- /* MMU to a guest context whenever this flag is set */
- /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
- /* that would potentially cause this thread to start */
- /* executing instructions from guest memory in */
- /* hypervisor mode, leading to a host crash or data */
- /* corruption, or worse. */
- /******************************************************/
- stb r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
- stb r3,PACA_THREAD_IDLE_STATE(r13)
- cmpwi cr3,r3,PNV_THREAD_SLEEP
- bge cr3,2f
- IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
- /* No return */
-2:
- /* Sleep or winkle */
- lbz r7,PACA_THREAD_MASK(r13)
- ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
- li r5,0
- beq cr3,3f
- lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h
-3:
-lwarx_loop1:
- lwarx r15,0,r14
-
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bnel- core_idle_lock_held
-
- add r15,r15,r5 /* Add if winkle */
- andc r15,r15,r7 /* Clear thread bit */
-
- andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
-
-/*
- * If cr0 = 0, then current thread is the last thread of the core entering
- * sleep. Last thread needs to execute the hardware bug workaround code if
- * required by the platform.
- * Make the workaround call unconditionally here. The below branch call is
- * patched out when the idle states are discovered if the platform does not
- * require it.
- */
-.global pnv_fastsleep_workaround_at_entry
-pnv_fastsleep_workaround_at_entry:
- beq fastsleep_workaround_at_entry
-
- stwcx. r15,0,r14
- bne- lwarx_loop1
- isync
-
-common_enter: /* common code for all the threads entering sleep or winkle */
- bgt cr3,enter_winkle
- IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
-
-fastsleep_workaround_at_entry:
- oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- stwcx. r15,0,r14
- bne- lwarx_loop1
- isync
-
- /* Fast sleep workaround */
- li r3,1
- li r4,1
- bl opal_config_cpu_idle_state
-
- /* Unlock */
- xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- lwsync
- stw r15,0(r14)
- b common_enter
-
-enter_winkle:
- bl save_sprs_to_stack
-
- IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
-
-/*
- * r3 - PSSCR value corresponding to the requested stop state.
- */
-power_enter_stop:
-/*
- * Check if we are executing the lite variant with ESL=EC=0
- */
- andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
- clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
- bne .Lhandle_esl_ec_set
- PPC_STOP
- li r3,0 /* Since we didn't lose state, return 0 */
- std r3, PACA_REQ_PSSCR(r13)
-
- /*
- * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
- * it can determine if the wakeup reason is an HMI in
- * CHECK_HMI_INTERRUPT.
- *
- * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup
- * reason, so there is no point setting r12 to SRR1.
- *
- * Further, we clear r12 here, so that we don't accidentally enter the
- * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI.
- */
- li r12, 0
- b pnv_wakeup_noloss
-
-.Lhandle_esl_ec_set:
-BEGIN_FTR_SECTION
- /*
- * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after
- * a state-loss idle. Saving and restoring MMCR0 over idle is a
- * workaround.
- */
- mfspr r4,SPRN_MMCR0
- std r4,_MMCR0(r1)
-END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
+ IDLE_INST; \
+ b . /* catch bugs */
/*
- * Check if the requested state is a deep idle state.
- */
- LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
- ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
- cmpd r3,r4
- bge .Lhandle_deep_stop
- PPC_STOP /* Does not return (system reset interrupt) */
-
-.Lhandle_deep_stop:
-/*
- * Entering deep idle state.
- * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
- * stack and enter stop
- */
- lbz r7,PACA_THREAD_MASK(r13)
- ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
-
-lwarx_loop_stop:
- lwarx r15,0,r14
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bnel- core_idle_lock_held
- andc r15,r15,r7 /* Clear thread bit */
-
- stwcx. r15,0,r14
- bne- lwarx_loop_stop
- isync
-
- bl save_sprs_to_stack
-
- PPC_STOP /* Does not return (system reset interrupt) */
-
-/*
- * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
- * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE).
- */
-_GLOBAL(power7_idle_insn)
- /* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode)
- b pnv_powersave_common
-
-#define CHECK_HMI_INTERRUPT \
-BEGIN_FTR_SECTION_NESTED(66); \
- rlwinm r0,r12,45-31,0xf; /* extract wake reason field (P8) */ \
-FTR_SECTION_ELSE_NESTED(66); \
- rlwinm r0,r12,45-31,0xe; /* P7 wake reason field is 3 bits */ \
-ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
- cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
- bne+ 20f; \
- /* Invoke opal call to handle hmi */ \
- ld r2,PACATOC(r13); \
- ld r1,PACAR1(r13); \
- std r3,ORIG_GPR3(r1); /* Save original r3 */ \
- li r3,0; /* NULL argument */ \
- bl hmi_exception_realmode; \
- nop; \
- ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
-20: nop;
-
-/*
- * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
- * r3 contains desired PSSCR register value.
+ * Desired instruction type in r3
*
- * Offline (CPU unplug) case also must notify KVM that the CPU is
- * idle.
- */
-_GLOBAL(power9_offline_stop)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /*
- * Tell KVM we're entering idle.
- * This does not have to be done in real mode because the P9 MMU
- * is independent per-thread. Some steppings share radix/hash mode
- * between threads, but in that case KVM has a barrier sync in real
- * mode before and after switching between radix and hash.
- */
- li r4,KVM_HWTHREAD_IN_IDLE
- stb r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
- /* fall through */
-
-_GLOBAL(power9_idle_stop)
- std r3, PACA_REQ_PSSCR(r13)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-BEGIN_FTR_SECTION
- sync
- lwz r5, PACA_DONT_STOP(r13)
- cmpwi r5, 0
- bne 1f
-END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
-#endif
- mtspr SPRN_PSSCR,r3
- LOAD_REG_ADDR(r4,power_enter_stop)
- b pnv_powersave_common
- /* No return */
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-1:
- /*
- * We get here when TM / thread reconfiguration bug workaround
- * code wants to get the CPU into SMT4 mode, and therefore
- * we are being asked not to stop.
- */
- li r3, 0
- std r3, PACA_REQ_PSSCR(r13)
- blr /* return 0 for wakeup cause / SRR1 value */
-#endif
-
-/*
- * Called from machine check handler for powersave wakeups.
- * Low level machine check processing has already been done. Now just
- * go through the wake up path to get everything in order.
+ * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
+ * The SRESET wakeup returns to this function's caller by calling
+ * idle_return_gpr_loss with r3 set to desired return value.
*
- * r3 - The original SRR1 value.
- * Original SRR[01] have been clobbered.
- * MSR_RI is clear.
- */
-.global pnv_powersave_wakeup_mce
-pnv_powersave_wakeup_mce:
- /* Set cr3 for pnv_powersave_wakeup */
- rlwinm r11,r3,47-31,30,31
- cmpwi cr3,r11,2
-
- /*
- * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
- * reason into r12, which allows reuse of the system reset wakeup
- * code without being mistaken for another type of wakeup.
- */
- oris r12,r3,SRR1_WAKEMCE_RESVD@h
-
- b pnv_powersave_wakeup
-
-/*
- * Called from reset vector for powersave wakeups.
- * cr3 - set to gt if waking up with partial/complete hypervisor state loss
- * r12 - SRR1
- */
-.global pnv_powersave_wakeup
-pnv_powersave_wakeup:
- ld r2, PACATOC(r13)
-
-BEGIN_FTR_SECTION
- bl pnv_restore_hyp_resource_arch300
-FTR_SECTION_ELSE
- bl pnv_restore_hyp_resource_arch207
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
-
- li r0,PNV_THREAD_RUNNING
- stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
-
- mr r3,r12
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- lbz r0,HSTATE_HWTHREAD_STATE(r13)
- cmpwi r0,KVM_HWTHREAD_IN_KERNEL
- beq 0f
- li r0,KVM_HWTHREAD_IN_KERNEL
- stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* Order setting hwthread_state vs. testing hwthread_req */
- sync
-0: lbz r0,HSTATE_HWTHREAD_REQ(r13)
- cmpwi r0,0
- beq 1f
- b kvm_start_guest
-1:
-#endif
-
- /* Return SRR1 from power7_nap() */
- blt cr3,pnv_wakeup_noloss
- b pnv_wakeup_loss
-
-/*
- * Check whether we have woken up with hypervisor state loss.
- * If yes, restore hypervisor state and return back to link.
+ * A wakeup without GPR loss may alteratively be handled as in
+ * isa300_idle_stop_noloss and blr directly, as an optimisation.
*
- * cr3 - set to gt if waking up with partial/complete hypervisor state loss
- */
-pnv_restore_hyp_resource_arch300:
- /*
- * Workaround for POWER9, if we lost resources, the ERAT
- * might have been mixed up and needs flushing. We also need
- * to reload MMCR0 (see comment above). We also need to set
- * then clear bit 60 in MMCRA to ensure the PMU starts running.
- */
- blt cr3,1f
-BEGIN_FTR_SECTION
- PPC_INVALIDATE_ERAT
- ld r1,PACAR1(r13)
- ld r4,_MMCR0(r1)
- mtspr SPRN_MMCR0,r4
-END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
- mfspr r4,SPRN_MMCRA
- ori r4,r4,(1 << (63-60))
- mtspr SPRN_MMCRA,r4
- xori r4,r4,(1 << (63-60))
- mtspr SPRN_MMCRA,r4
-1:
- /*
- * POWER ISA 3. Use PSSCR to determine if we
- * are waking up from deep idle state
- */
- LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
- ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
-
- /*
- * 0-3 bits correspond to Power-Saving Level Status
- * which indicates the idle state we are waking up from
- */
- mfspr r5, SPRN_PSSCR
- rldicl r5,r5,4,60
- li r0, 0 /* clear requested_psscr to say we're awake */
- std r0, PACA_REQ_PSSCR(r13)
- cmpd cr4,r5,r4
- bge cr4,pnv_wakeup_tb_loss /* returns to caller */
-
- blr /* Waking up without hypervisor state loss. */
-
-/* Same calling convention as arch300 */
-pnv_restore_hyp_resource_arch207:
- /*
- * POWER ISA 2.07 or less.
- * Check if we slept with sleep or winkle.
- */
- lbz r4,PACA_THREAD_IDLE_STATE(r13)
- cmpwi cr2,r4,PNV_THREAD_NAP
- bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */
-
- /*
- * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
- * up from nap. At this stage CR3 shouldn't contains 'gt' since that
- * indicates we are waking with hypervisor state loss from nap.
- */
- bgt cr3,.
-
- blr /* Waking up without hypervisor state loss */
-
-/*
- * Called if waking up from idle state which can cause either partial or
- * complete hyp state loss.
- * In POWER8, called if waking up from fastsleep or winkle
- * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state
- *
- * r13 - PACA
- * cr3 - gt if waking up with partial/complete hypervisor state loss
- *
- * If ISA300:
- * cr4 - gt or eq if waking up from complete hypervisor state loss.
+ * The caller is responsible for saving/restoring SPRs, MSR, timebase,
+ * etc.
*
- * If ISA207:
- * r4 - PACA_THREAD_IDLE_STATE
+ * This must be called in real-mode (MSR_IDLE).
*/
-pnv_wakeup_tb_loss:
- ld r1,PACAR1(r13)
- /*
- * Before entering any idle state, the NVGPRs are saved in the stack.
- * If there was a state loss, or PACA_NAPSTATELOST was set, then the
- * NVGPRs are restored. If we are here, it is likely that state is lost,
- * but not guaranteed -- neither ISA207 nor ISA300 tests to reach
- * here are the same as the test to restore NVGPRS:
- * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300,
- * and SRR1 test for restoring NVGPRs.
- *
- * We are about to clobber NVGPRs now, so set NAPSTATELOST to
- * guarantee they will always be restored. This might be tightened
- * with careful reading of specs (particularly for ISA300) but this
- * is already a slow wakeup path and it's simpler to be safe.
- */
- li r0,1
- stb r0,PACA_NAPSTATELOST(r13)
-
- /*
- *
- * Save SRR1 and LR in NVGPRs as they might be clobbered in
- * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
- * to determine the wakeup reason if we branch to kvm_start_guest. LR
- * is required to return back to reset vector after hypervisor state
- * restore is complete.
- */
- mr r19,r12
- mr r18,r4
- mflr r17
-BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-
- ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
- lbz r7,PACA_THREAD_MASK(r13)
-
- /*
- * Take the core lock to synchronize against other threads.
- *
- * Lock bit is set in one of the 2 cases-
- * a. In the sleep/winkle enter path, the last thread is executing
- * fastsleep workaround code.
- * b. In the wake up path, another thread is executing fastsleep
- * workaround undo code or resyncing timebase or restoring context
- * In either case loop until the lock bit is cleared.
- */
-1:
- lwarx r15,0,r14
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bnel- core_idle_lock_held
- oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- stwcx. r15,0,r14
- bne- 1b
- isync
-
- andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
- cmpwi cr2,r9,0
-
- /*
- * At this stage
- * cr2 - eq if first thread to wakeup in core
- * cr3- gt if waking up with partial/complete hypervisor state loss
- * ISA300:
- * cr4 - gt or eq if waking up from complete hypervisor state loss.
- */
-
-BEGIN_FTR_SECTION
- /*
- * Were we in winkle?
- * If yes, check if all threads were in winkle, decrement our
- * winkle count, set all thread winkle bits if all were in winkle.
- * Check if our thread has a winkle bit set, and set cr4 accordingly
- * (to match ISA300, above). Pseudo-code for core idle state
- * transitions for ISA207 is as follows (everything happens atomically
- * due to store conditional and/or lock bit):
- *
- * nap_idle() { }
- * nap_wake() { }
- *
- * sleep_idle()
- * {
- * core_idle_state &= ~thread_in_core
- * }
- *
- * sleep_wake()
- * {
- * bool first_in_core, first_in_subcore;
- *
- * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
- * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
- *
- * core_idle_state |= thread_in_core;
- * }
- *
- * winkle_idle()
- * {
- * core_idle_state &= ~thread_in_core;
- * core_idle_state += 1 << WINKLE_COUNT_SHIFT;
- * }
- *
- * winkle_wake()
- * {
- * bool first_in_core, first_in_subcore, winkle_state_lost;
- *
- * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
- * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
- *
- * core_idle_state |= thread_in_core;
- *
- * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
- * core_idle_state |= THREAD_WINKLE_BITS;
- * core_idle_state -= 1 << WINKLE_COUNT_SHIFT;
- *
- * winkle_state_lost = core_idle_state &
- * (thread_in_core << WINKLE_THREAD_SHIFT);
- * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT);
- * }
- *
- */
- cmpwi r18,PNV_THREAD_WINKLE
+_GLOBAL(isa206_idle_insn_mayloss)
+ std r1,PACAR1(r13)
+ mflr r4
+ mfcr r5
+ /* use stack red zone rather than a new frame for saving regs */
+ std r2,-8*0(r1)
+ std r14,-8*1(r1)
+ std r15,-8*2(r1)
+ std r16,-8*3(r1)
+ std r17,-8*4(r1)
+ std r18,-8*5(r1)
+ std r19,-8*6(r1)
+ std r20,-8*7(r1)
+ std r21,-8*8(r1)
+ std r22,-8*9(r1)
+ std r23,-8*10(r1)
+ std r24,-8*11(r1)
+ std r25,-8*12(r1)
+ std r26,-8*13(r1)
+ std r27,-8*14(r1)
+ std r28,-8*15(r1)
+ std r29,-8*16(r1)
+ std r30,-8*17(r1)
+ std r31,-8*18(r1)
+ std r4,-8*19(r1)
+ std r5,-8*20(r1)
+ cmpwi r3,PNV_THREAD_NAP
+ bne 1f
+ IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
+1: cmpwi r3,PNV_THREAD_SLEEP
bne 2f
- andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
- subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
- beq 2f
- ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */
-2:
- /* Shift thread bit to winkle mask, then test if this thread is set,
- * and remove it from the winkle bits */
- slwi r8,r7,8
- and r8,r8,r15
- andc r15,r15,r8
- cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */
-
- lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
- and r4,r4,r15
- cmpwi r4,0 /* Check if first in subcore */
-
- or r15,r15,r7 /* Set thread bit */
- beq first_thread_in_subcore
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-
- or r15,r15,r7 /* Set thread bit */
- beq cr2,first_thread_in_core
-
- /* Not first thread in core or subcore to wake up */
- b clear_lock
-
-first_thread_in_subcore:
- /*
- * If waking up from sleep, subcore state is not lost. Hence
- * skip subcore state restore
- */
- blt cr4,subcore_state_restored
-
- /* Restore per-subcore state */
- ld r4,_SDR1(r1)
- mtspr SPRN_SDR1,r4
-
- ld r4,_RPR(r1)
- mtspr SPRN_RPR,r4
- ld r4,_AMOR(r1)
- mtspr SPRN_AMOR,r4
-
-subcore_state_restored:
- /*
- * Check if the thread is also the first thread in the core. If not,
- * skip to clear_lock.
- */
- bne cr2,clear_lock
-
-first_thread_in_core:
-
- /*
- * First thread in the core waking up from any state which can cause
- * partial or complete hypervisor state loss. It needs to
- * call the fastsleep workaround code if the platform requires it.
- * Call it unconditionally here. The below branch instruction will
- * be patched out if the platform does not have fastsleep or does not
- * require the workaround. Patching will be performed during the
- * discovery of idle-states.
- */
-.global pnv_fastsleep_workaround_at_exit
-pnv_fastsleep_workaround_at_exit:
- b fastsleep_workaround_at_exit
-
-timebase_resync:
- /*
- * Use cr3 which indicates that we are waking up with atleast partial
- * hypervisor state loss to determine if TIMEBASE RESYNC is needed.
- */
- ble cr3,.Ltb_resynced
- /* Time base re-sync */
- bl opal_resync_timebase;
- /*
- * If waking up from sleep (POWER8), per core state
- * is not lost, skip to clear_lock.
- */
-.Ltb_resynced:
- blt cr4,clear_lock
-
- /*
- * First thread in the core to wake up and its waking up with
- * complete hypervisor state loss. Restore per core hypervisor
- * state.
- */
-BEGIN_FTR_SECTION
- ld r4,_PTCR(r1)
- mtspr SPRN_PTCR,r4
- ld r4,_RPR(r1)
- mtspr SPRN_RPR,r4
- ld r4,_AMOR(r1)
- mtspr SPRN_AMOR,r4
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
- ld r4,_TSCR(r1)
- mtspr SPRN_TSCR,r4
- ld r4,_WORC(r1)
- mtspr SPRN_WORC,r4
-
-clear_lock:
- xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- lwsync
- stw r15,0(r14)
-
-common_exit:
- /*
- * Common to all threads.
- *
- * If waking up from sleep, hypervisor state is not lost. Hence
- * skip hypervisor state restore.
- */
- blt cr4,hypervisor_state_restored
-
- /* Waking up from winkle */
-
-BEGIN_MMU_FTR_SECTION
- b no_segments
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
- /* Restore SLB from PACA */
- ld r8,PACA_SLBSHADOWPTR(r13)
-
- .rept SLB_NUM_BOLTED
- li r3, SLBSHADOW_SAVEAREA
- LDX_BE r5, r8, r3
- addi r3, r3, 8
- LDX_BE r6, r8, r3
- andis. r7,r5,SLB_ESID_V@h
- beq 1f
- slbmte r6,r5
-1: addi r8,r8,16
- .endr
-no_segments:
-
- /* Restore per thread state */
-
- ld r4,_SPURR(r1)
- mtspr SPRN_SPURR,r4
- ld r4,_PURR(r1)
- mtspr SPRN_PURR,r4
- ld r4,_DSCR(r1)
- mtspr SPRN_DSCR,r4
- ld r4,_WORT(r1)
- mtspr SPRN_WORT,r4
-
- /* Call cur_cpu_spec->cpu_restore() */
- LOAD_REG_ADDR(r4, cur_cpu_spec)
- ld r4,0(r4)
- ld r12,CPU_SPEC_RESTORE(r4)
-#ifdef PPC64_ELF_ABI_v1
- ld r12,0(r12)
-#endif
- mtctr r12
- bctrl
-
-/*
- * On POWER9, we can come here on wakeup from a cpuidle stop state.
- * Hence restore the additional SPRs to the saved value.
- *
- * On POWER8, we come here only on winkle. Since winkle is used
- * only in the case of CPU-Hotplug, we don't need to restore
- * the additional SPRs.
- */
-BEGIN_FTR_SECTION
- bl power9_restore_additional_sprs
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-hypervisor_state_restored:
-
- mr r12,r19
- mtlr r17
- blr /* return to pnv_powersave_wakeup */
-
-fastsleep_workaround_at_exit:
- li r3,1
- li r4,0
- bl opal_config_cpu_idle_state
- b timebase_resync
-
-/*
- * R3 here contains the value that will be returned to the caller
- * of power7_nap.
- * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
- */
-.global pnv_wakeup_loss
-pnv_wakeup_loss:
- ld r1,PACAR1(r13)
-BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- REST_NVGPRS(r1)
- REST_GPR(2, r1)
- ld r4,PACAKMSR(r13)
- ld r5,_LINK(r1)
- ld r6,_CCR(r1)
- addi r1,r1,INT_FRAME_SIZE
- mtlr r5
- mtcr r6
- mtmsrd r4
- blr
+ IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
+2: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
-/*
- * R3 here contains the value that will be returned to the caller
- * of power7_nap.
- * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
- */
-pnv_wakeup_noloss:
- lbz r0,PACA_NAPSTATELOST(r13)
- cmpwi r0,0
- bne pnv_wakeup_loss
- ld r1,PACAR1(r13)
-BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- ld r4,PACAKMSR(r13)
- ld r5,_NIP(r1)
- ld r6,_CCR(r1)
- addi r1,r1,INT_FRAME_SIZE
- mtlr r5
- mtcr r6
- mtmsrd r4
- blr
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 8a936723c791..ada901af4950 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -81,10 +81,7 @@
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
-int __irq_offset_value;
-
#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(__irq_offset_value);
atomic_t ppc_n_lost_interrupts;
#ifdef CONFIG_TAU_INT
@@ -261,16 +258,9 @@ notrace void arch_local_irq_restore(unsigned long mask)
*/
irq_happened = get_irq_happened();
if (!irq_happened) {
- /*
- * FIXME. Here we'd like to be able to do:
- *
- * #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- * WARN_ON(!(mfmsr() & MSR_EE));
- * #endif
- *
- * But currently it hits in a few paths, we should fix those and
- * enable the warning.
- */
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON(!(mfmsr() & MSR_EE));
+#endif
return;
}
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index b5fec1f9751a..4581377cfc98 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -112,6 +112,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->srr1 = regs->msr;
mce->gpr3 = regs->gpr[3];
mce->in_use = 1;
+ mce->cpu = get_paca()->paca_index;
/* Mark it recovered if we have handled it and MSR(RI=1). */
if (handled && (regs->msr & MSR_RI))
@@ -121,6 +122,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->initiator = mce_err->initiator;
mce->severity = mce_err->severity;
+ mce->sync_error = mce_err->sync_error;
+ mce->error_class = mce_err->error_class;
/*
* Populate the mce error_type and type-specific error_type.
@@ -310,7 +313,11 @@ static void machine_check_process_queued_event(struct irq_work *work)
void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest)
{
- const char *level, *sevstr, *subtype;
+ const char *level, *sevstr, *subtype, *err_type;
+ uint64_t ea = 0, pa = 0;
+ int n = 0;
+ char dar_str[50];
+ char pa_str[50];
static const char *mc_ue_types[] = {
"Indeterminate",
"Instruction fetch",
@@ -357,6 +364,13 @@ void machine_check_print_event_info(struct machine_check_event *evt,
"Store (timeout)",
"Page table walk Load/Store (timeout)",
};
+ static const char *mc_error_class[] = {
+ "Unknown",
+ "Hardware error",
+ "Probable Hardware error (some chance of software cause)",
+ "Software error",
+ "Probable Software error (some chance of hardware cause)",
+ };
/* Print things out */
if (evt->version != MCE_V1) {
@@ -371,9 +385,9 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
case MCE_SEV_WARNING:
level = KERN_WARNING;
- sevstr = "";
+ sevstr = "Warning";
break;
- case MCE_SEV_ERROR_SYNC:
+ case MCE_SEV_SEVERE:
level = KERN_ERR;
sevstr = "Severe";
break;
@@ -384,101 +398,107 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
}
- printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
- evt->disposition == MCE_DISPOSITION_RECOVERED ?
- "Recovered" : "Not recovered");
-
- if (in_guest) {
- printk("%s Guest NIP: %016llx\n", level, evt->srr0);
- } else if (user_mode) {
- printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
- evt->srr0, current->pid, current->comm);
- } else {
- printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
- (void *)evt->srr0);
- }
-
- printk("%s Initiator: %s\n", level,
- evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
switch (evt->error_type) {
case MCE_ERROR_TYPE_UE:
+ err_type = "UE";
subtype = evt->u.ue_error.ue_error_type <
ARRAY_SIZE(mc_ue_types) ?
mc_ue_types[evt->u.ue_error.ue_error_type]
: "Unknown";
- printk("%s Error type: UE [%s]\n", level, subtype);
if (evt->u.ue_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.ue_error.effective_address);
+ ea = evt->u.ue_error.effective_address;
if (evt->u.ue_error.physical_address_provided)
- printk("%s Physical address: %016llx\n",
- level, evt->u.ue_error.physical_address);
+ pa = evt->u.ue_error.physical_address;
break;
case MCE_ERROR_TYPE_SLB:
+ err_type = "SLB";
subtype = evt->u.slb_error.slb_error_type <
ARRAY_SIZE(mc_slb_types) ?
mc_slb_types[evt->u.slb_error.slb_error_type]
: "Unknown";
- printk("%s Error type: SLB [%s]\n", level, subtype);
if (evt->u.slb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.slb_error.effective_address);
+ ea = evt->u.slb_error.effective_address;
break;
case MCE_ERROR_TYPE_ERAT:
+ err_type = "ERAT";
subtype = evt->u.erat_error.erat_error_type <
ARRAY_SIZE(mc_erat_types) ?
mc_erat_types[evt->u.erat_error.erat_error_type]
: "Unknown";
- printk("%s Error type: ERAT [%s]\n", level, subtype);
if (evt->u.erat_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.erat_error.effective_address);
+ ea = evt->u.erat_error.effective_address;
break;
case MCE_ERROR_TYPE_TLB:
+ err_type = "TLB";
subtype = evt->u.tlb_error.tlb_error_type <
ARRAY_SIZE(mc_tlb_types) ?
mc_tlb_types[evt->u.tlb_error.tlb_error_type]
: "Unknown";
- printk("%s Error type: TLB [%s]\n", level, subtype);
if (evt->u.tlb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.tlb_error.effective_address);
+ ea = evt->u.tlb_error.effective_address;
break;
case MCE_ERROR_TYPE_USER:
+ err_type = "User";
subtype = evt->u.user_error.user_error_type <
ARRAY_SIZE(mc_user_types) ?
mc_user_types[evt->u.user_error.user_error_type]
: "Unknown";
- printk("%s Error type: User [%s]\n", level, subtype);
if (evt->u.user_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.user_error.effective_address);
+ ea = evt->u.user_error.effective_address;
break;
case MCE_ERROR_TYPE_RA:
+ err_type = "Real address";
subtype = evt->u.ra_error.ra_error_type <
ARRAY_SIZE(mc_ra_types) ?
mc_ra_types[evt->u.ra_error.ra_error_type]
: "Unknown";
- printk("%s Error type: Real address [%s]\n", level, subtype);
if (evt->u.ra_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.ra_error.effective_address);
+ ea = evt->u.ra_error.effective_address;
break;
case MCE_ERROR_TYPE_LINK:
+ err_type = "Link";
subtype = evt->u.link_error.link_error_type <
ARRAY_SIZE(mc_link_types) ?
mc_link_types[evt->u.link_error.link_error_type]
: "Unknown";
- printk("%s Error type: Link [%s]\n", level, subtype);
if (evt->u.link_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.link_error.effective_address);
+ ea = evt->u.link_error.effective_address;
break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
- printk("%s Error type: Unknown\n", level);
+ err_type = "Unknown";
+ subtype = "";
break;
}
+
+ dar_str[0] = pa_str[0] = '\0';
+ if (ea && evt->srr0 != ea) {
+ /* Load/Store address */
+ n = sprintf(dar_str, "DAR: %016llx ", ea);
+ if (pa)
+ sprintf(dar_str + n, "paddr: %016llx ", pa);
+ } else if (pa) {
+ sprintf(pa_str, " paddr: %016llx", pa);
+ }
+
+ printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
+ level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
+ err_type, subtype, dar_str,
+ evt->disposition == MCE_DISPOSITION_RECOVERED ?
+ "Recovered" : "Not recovered");
+
+ if (in_guest || user_mode) {
+ printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
+ level, evt->cpu, current->pid, current->comm,
+ in_guest ? "Guest " : "", evt->srr0, pa_str);
+ } else {
+ printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
+ level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
+ }
+
+ subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
+ mc_error_class[evt->error_class] : "Unknown";
+ printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 6b800eec31f2..b5e876efe864 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -36,7 +36,7 @@
* Convert an address related to an mm to a PFN. NOTE: we are in real
* mode, we could potentially race with page table updates.
*/
-static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
{
pte_t *ptep;
unsigned long flags;
@@ -131,213 +131,232 @@ struct mce_ierror_table {
bool nip_valid; /* nip is a valid indicator of faulting address */
unsigned int error_type;
unsigned int error_subtype;
+ unsigned int error_class;
unsigned int initiator;
unsigned int severity;
+ bool sync_error;
};
static const struct mce_ierror_table mce_p7_ierror_table[] = {
{ 0x00000000001c0000, 0x0000000000040000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000001c0000, 0x0000000000080000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000001c0000, 0x00000000000c0000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000001c0000, 0x0000000000100000, true,
MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000001c0000, 0x0000000000140000, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000001c0000, 0x0000000000180000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000001c0000, 0x00000000001c0000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, 0, 0, 0, 0, 0 } };
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
static const struct mce_ierror_table mce_p8_ierror_table[] = {
{ 0x00000000081c0000, 0x0000000000040000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000000080000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000000c0000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000100000, true,
- MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000140000, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000180000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000001c0000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008000000, true,
- MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008040000, true,
MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, 0, 0, 0, 0, 0 } };
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
static const struct mce_ierror_table mce_p9_ierror_table[] = {
{ 0x00000000081c0000, 0x0000000000040000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000000080000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000000c0000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000100000, true,
- MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000140000, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000180000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000001c0000, true,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008000000, true,
- MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008040000, true,
MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000080c0000, true,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008100000, true,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008140000, false,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
{ 0x00000000081c0000, 0x0000000008180000, false,
MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
-{ 0x00000000081c0000, 0x00000000081c0000, true,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE,
MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, 0, 0, 0, 0, 0 } };
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
struct mce_derror_table {
unsigned long dsisr_value;
bool dar_valid; /* dar is a valid indicator of faulting address */
unsigned int error_type;
unsigned int error_subtype;
+ unsigned int error_class;
unsigned int initiator;
unsigned int severity;
+ bool sync_error;
};
static const struct mce_derror_table mce_p7_derror_table[] = {
{ 0x00008000, false,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00004000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000800, true,
- MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000400, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000080, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000100, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000040, true,
MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, false, 0, 0, 0, 0 } };
+ MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
static const struct mce_derror_table mce_p8_derror_table[] = {
{ 0x00008000, false,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00004000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00002000, true,
- MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00001000, true,
MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000800, true,
- MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000400, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000200, true,
MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000080, true,
MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000100, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, false, 0, 0, 0, 0 } };
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
static const struct mce_derror_table mce_p9_derror_table[] = {
{ 0x00008000, false,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00004000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00002000, true,
- MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00001000, true,
MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000800, true,
- MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000400, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000200, false,
- MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000080, true,
MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000100, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000040, true,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000020, false,
MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000010, false,
MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000008, false,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, false, 0, 0, 0, 0 } };
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
uint64_t *phys_addr)
@@ -404,6 +423,7 @@ static int mce_handle_ierror(struct pt_regs *regs,
/* now fill in mce_error_info */
mce_err->error_type = table[i].error_type;
+ mce_err->error_class = table[i].error_class;
switch (table[i].error_type) {
case MCE_ERROR_TYPE_UE:
mce_err->u.ue_error_type = table[i].error_subtype;
@@ -427,11 +447,12 @@ static int mce_handle_ierror(struct pt_regs *regs,
mce_err->u.link_error_type = table[i].error_subtype;
break;
}
+ mce_err->sync_error = table[i].sync_error;
mce_err->severity = table[i].severity;
mce_err->initiator = table[i].initiator;
if (table[i].nip_valid) {
*addr = regs->nip;
- if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ if (mce_err->sync_error &&
table[i].error_type == MCE_ERROR_TYPE_UE) {
unsigned long pfn;
@@ -448,8 +469,10 @@ static int mce_handle_ierror(struct pt_regs *regs,
}
mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
- mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->error_class = MCE_ECLASS_UNKNOWN;
+ mce_err->severity = MCE_SEV_SEVERE;
mce_err->initiator = MCE_INITIATOR_CPU;
+ mce_err->sync_error = true;
return 0;
}
@@ -496,6 +519,7 @@ static int mce_handle_derror(struct pt_regs *regs,
/* now fill in mce_error_info */
mce_err->error_type = table[i].error_type;
+ mce_err->error_class = table[i].error_class;
switch (table[i].error_type) {
case MCE_ERROR_TYPE_UE:
mce_err->u.ue_error_type = table[i].error_subtype;
@@ -519,11 +543,12 @@ static int mce_handle_derror(struct pt_regs *regs,
mce_err->u.link_error_type = table[i].error_subtype;
break;
}
+ mce_err->sync_error = table[i].sync_error;
mce_err->severity = table[i].severity;
mce_err->initiator = table[i].initiator;
if (table[i].dar_valid)
*addr = regs->dar;
- else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ else if (mce_err->sync_error &&
table[i].error_type == MCE_ERROR_TYPE_UE) {
/*
* We do a maximum of 4 nested MCE calls, see
@@ -539,8 +564,10 @@ static int mce_handle_derror(struct pt_regs *regs,
return handled;
mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
- mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->error_class = MCE_ECLASS_UNKNOWN;
+ mce_err->severity = MCE_SEV_SEVERE;
mce_err->initiator = MCE_INITIATOR_CPU;
+ mce_err->sync_error = true;
return 0;
}
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index e7382abee868..9cc91d03ab62 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -267,12 +267,12 @@ void copy_mm_to_paca(struct mm_struct *mm)
get_paca()->mm_ctx_id = context->id;
#ifdef CONFIG_PPC_MM_SLICES
- VM_BUG_ON(!mm->context.slb_addr_limit);
- get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
- memcpy(&get_paca()->mm_ctx_low_slices_psize,
- &context->low_slices_psize, sizeof(context->low_slices_psize));
- memcpy(&get_paca()->mm_ctx_high_slices_psize,
- &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
+ VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
+ get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context);
+ memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
+ LOW_SLICE_ARRAY_SZ);
+ memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
+ TASK_SLICE_ARRAY_SZ(context));
#else /* CONFIG_PPC_MM_SLICES */
get_paca()->mm_ctx_user_psize = context->user_psize;
get_paca()->mm_ctx_sllp = context->sllp;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dd9e0d5386ee..87da40129927 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -67,6 +67,7 @@
#include <asm/cpu_has_feature.h>
#include <asm/asm-prototypes.h>
#include <asm/stacktrace.h>
+#include <asm/hw_breakpoint.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
@@ -133,7 +134,8 @@ static int __init enable_strict_msr_control(char *str)
}
early_param("ppc_strict_facility_enable", enable_strict_msr_control);
-unsigned long msr_check_and_set(unsigned long bits)
+/* notrace because it's called by restore_math */
+unsigned long notrace msr_check_and_set(unsigned long bits)
{
unsigned long oldmsr = mfmsr();
unsigned long newmsr;
@@ -152,7 +154,8 @@ unsigned long msr_check_and_set(unsigned long bits)
}
EXPORT_SYMBOL_GPL(msr_check_and_set);
-void __msr_check_and_clear(unsigned long bits)
+/* notrace because it's called by restore_math */
+void notrace __msr_check_and_clear(unsigned long bits)
{
unsigned long oldmsr = mfmsr();
unsigned long newmsr;
@@ -525,7 +528,17 @@ void giveup_all(struct task_struct *tsk)
}
EXPORT_SYMBOL(giveup_all);
-void restore_math(struct pt_regs *regs)
+/*
+ * The exception exit path calls restore_math() with interrupts hard disabled
+ * but the soft irq state not "reconciled". ftrace code that calls
+ * local_irq_save/restore causes warnings.
+ *
+ * Rather than complicate the exit path, just don't trace restore_math. This
+ * could be done by having ftrace entry code check for this un-reconciled
+ * condition where MSR[EE]=0 and PACA_IRQ_HARD_DIS is not set, and
+ * temporarily fix it up for the duration of the ftrace call.
+ */
+void notrace restore_math(struct pt_regs *regs)
{
unsigned long msr;
@@ -784,7 +797,7 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk)
return __set_dabr(dabr, dabrx);
}
-static inline int set_dawr(struct arch_hw_breakpoint *brk)
+int set_dawr(struct arch_hw_breakpoint *brk)
{
unsigned long dawr, dawrx, mrd;
@@ -816,7 +829,7 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
{
memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
- if (cpu_has_feature(CPU_FTR_DAWR))
+ if (dawr_enabled())
// Power8 or later
set_dawr(brk);
else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
@@ -830,8 +843,8 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
/* Check if we have DAWR or DABR hardware */
bool ppc_breakpoint_available(void)
{
- if (cpu_has_feature(CPU_FTR_DAWR))
- return true; /* POWER8 DAWR */
+ if (dawr_enabled())
+ return true; /* POWER8 DAWR or POWER9 forced DAWR */
if (cpu_has_feature(CPU_FTR_ARCH_207S))
return false; /* POWER9 with DAWR disabled */
/* DABR: Everything but POWER8 and POWER9 */
@@ -1151,11 +1164,6 @@ static inline void restore_sprs(struct thread_struct *old_thread,
thread_pkey_regs_restore(new_thread, old_thread);
}
-#ifdef CONFIG_PPC_BOOK3S_64
-#define CP_SIZE 128
-static const u8 dummy_copy_buffer[CP_SIZE] __attribute__((aligned(CP_SIZE)));
-#endif
-
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
{
@@ -1729,7 +1737,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
#ifdef CONFIG_PPC_BOOK3S_64
- preload_new_slb_context(start, sp);
+ if (!radix_enabled())
+ preload_new_slb_context(start, sp);
#endif
#endif
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index f33ff4163a51..523bb99d7676 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -154,10 +154,8 @@ static struct prom_t __prombss prom;
static unsigned long __prombss prom_entry;
-#define PROM_SCRATCH_SIZE 256
-
static char __prombss of_stdout_device[256];
-static char __prombss prom_scratch[PROM_SCRATCH_SIZE];
+static char __prombss prom_scratch[256];
static unsigned long __prombss dt_header_start;
static unsigned long __prombss dt_struct_start, dt_struct_end;
@@ -224,6 +222,135 @@ static bool __prombss rtas_has_query_cpu_stopped;
#define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR)
#define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR)
+/* Copied from lib/string.c and lib/kstrtox.c */
+
+static int __init prom_strcmp(const char *cs, const char *ct)
+{
+ unsigned char c1, c2;
+
+ while (1) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ }
+ return 0;
+}
+
+static char __init *prom_strcpy(char *dest, const char *src)
+{
+ char *tmp = dest;
+
+ while ((*dest++ = *src++) != '\0')
+ /* nothing */;
+ return tmp;
+}
+
+static int __init prom_strncmp(const char *cs, const char *ct, size_t count)
+{
+ unsigned char c1, c2;
+
+ while (count) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ count--;
+ }
+ return 0;
+}
+
+static size_t __init prom_strlen(const char *s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+
+static int __init prom_memcmp(const void *cs, const void *ct, size_t count)
+{
+ const unsigned char *su1, *su2;
+ int res = 0;
+
+ for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
+ if ((res = *su1 - *su2) != 0)
+ break;
+ return res;
+}
+
+static char __init *prom_strstr(const char *s1, const char *s2)
+{
+ size_t l1, l2;
+
+ l2 = prom_strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ l1 = prom_strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!prom_memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+
+static size_t __init prom_strlcpy(char *dest, const char *src, size_t size)
+{
+ size_t ret = prom_strlen(src);
+
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ memcpy(dest, src, len);
+ dest[len] = '\0';
+ }
+ return ret;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+static int __init prom_strtobool(const char *s, bool *res)
+{
+ if (!s)
+ return -EINVAL;
+
+ switch (s[0]) {
+ case 'y':
+ case 'Y':
+ case '1':
+ *res = true;
+ return 0;
+ case 'n':
+ case 'N':
+ case '0':
+ *res = false;
+ return 0;
+ case 'o':
+ case 'O':
+ switch (s[1]) {
+ case 'n':
+ case 'N':
+ *res = true;
+ return 0;
+ case 'f':
+ case 'F':
+ *res = false;
+ return 0;
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+#endif
/* This is the one and *ONLY* place where we actually call open
* firmware.
@@ -555,7 +682,7 @@ static int __init prom_setprop(phandle node, const char *nodename,
add_string(&p, tohex((u32)(unsigned long) value));
add_string(&p, tohex(valuelen));
add_string(&p, tohex(ADDR(pname)));
- add_string(&p, tohex(strlen(pname)));
+ add_string(&p, tohex(prom_strlen(pname)));
add_string(&p, "property");
*p = 0;
return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);
@@ -631,33 +758,30 @@ static void __init early_cmdline_parse(void)
const char *opt;
char *p;
- int l __maybe_unused = 0;
+ int l = 0;
prom_cmd_line[0] = 0;
p = prom_cmd_line;
if ((long)prom.chosen > 0)
l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
-#ifdef CONFIG_CMDLINE
- if (l <= 0 || p[0] == '\0') /* dbl check */
- strlcpy(prom_cmd_line,
- CONFIG_CMDLINE, sizeof(prom_cmd_line));
-#endif /* CONFIG_CMDLINE */
+ if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */
+ prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line));
prom_printf("command line: %s\n", prom_cmd_line);
#ifdef CONFIG_PPC64
- opt = strstr(prom_cmd_line, "iommu=");
+ opt = prom_strstr(prom_cmd_line, "iommu=");
if (opt) {
prom_printf("iommu opt is: %s\n", opt);
opt += 6;
while (*opt && *opt == ' ')
opt++;
- if (!strncmp(opt, "off", 3))
+ if (!prom_strncmp(opt, "off", 3))
prom_iommu_off = 1;
- else if (!strncmp(opt, "force", 5))
+ else if (!prom_strncmp(opt, "force", 5))
prom_iommu_force_on = 1;
}
#endif
- opt = strstr(prom_cmd_line, "mem=");
+ opt = prom_strstr(prom_cmd_line, "mem=");
if (opt) {
opt += 4;
prom_memory_limit = prom_memparse(opt, (const char **)&opt);
@@ -669,13 +793,13 @@ static void __init early_cmdline_parse(void)
#ifdef CONFIG_PPC_PSERIES
prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
- opt = strstr(prom_cmd_line, "disable_radix");
+ opt = prom_strstr(prom_cmd_line, "disable_radix");
if (opt) {
opt += 13;
if (*opt && *opt == '=') {
bool val;
- if (kstrtobool(++opt, &val))
+ if (prom_strtobool(++opt, &val))
prom_radix_disable = false;
else
prom_radix_disable = val;
@@ -1028,7 +1152,7 @@ static int __init prom_count_smt_threads(void)
type[0] = 0;
prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "cpu"))
+ if (prom_strcmp(type, "cpu"))
continue;
/*
* There is an entry for each smt thread, each entry being
@@ -1138,8 +1262,14 @@ static void __init prom_check_platform_support(void)
int prop_len = prom_getproplen(prom.chosen,
"ibm,arch-vec-5-platform-support");
- /* First copy the architecture vec template */
- ibm_architecture_vec = ibm_architecture_vec_template;
+ /*
+ * First copy the architecture vec template
+ *
+ * use memcpy() instead of *vec = *vec_template so that GCC replaces it
+ * by __memcpy() when KASAN is active
+ */
+ memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template,
+ sizeof(ibm_architecture_vec));
if (prop_len > 1) {
int i;
@@ -1475,7 +1605,7 @@ static void __init prom_init_mem(void)
*/
prom_getprop(node, "name", type, sizeof(type));
}
- if (strcmp(type, "memory"))
+ if (prom_strcmp(type, "memory"))
continue;
plen = prom_getprop(node, "reg", regbuf, sizeof(regbuf));
@@ -1487,8 +1617,8 @@ static void __init prom_init_mem(void)
endp = p + (plen / sizeof(cell_t));
#ifdef DEBUG_PROM
- memset(path, 0, PROM_SCRATCH_SIZE);
- call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
+ memset(path, 0, sizeof(prom_scratch));
+ call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1);
prom_debug(" node %s :\n", path);
#endif /* DEBUG_PROM */
@@ -1756,19 +1886,19 @@ static void __init prom_initialize_tce_table(void)
prom_getprop(node, "device_type", type, sizeof(type));
prom_getprop(node, "model", model, sizeof(model));
- if ((type[0] == 0) || (strstr(type, "pci") == NULL))
+ if ((type[0] == 0) || (prom_strstr(type, "pci") == NULL))
continue;
/* Keep the old logic intact to avoid regression. */
if (compatible[0] != 0) {
- if ((strstr(compatible, "python") == NULL) &&
- (strstr(compatible, "Speedwagon") == NULL) &&
- (strstr(compatible, "Winnipeg") == NULL))
+ if ((prom_strstr(compatible, "python") == NULL) &&
+ (prom_strstr(compatible, "Speedwagon") == NULL) &&
+ (prom_strstr(compatible, "Winnipeg") == NULL))
continue;
} else if (model[0] != 0) {
- if ((strstr(model, "ython") == NULL) &&
- (strstr(model, "peedwagon") == NULL) &&
- (strstr(model, "innipeg") == NULL))
+ if ((prom_strstr(model, "ython") == NULL) &&
+ (prom_strstr(model, "peedwagon") == NULL) &&
+ (prom_strstr(model, "innipeg") == NULL))
continue;
}
@@ -1796,10 +1926,10 @@ static void __init prom_initialize_tce_table(void)
local_alloc_bottom = base;
/* It seems OF doesn't null-terminate the path :-( */
- memset(path, 0, PROM_SCRATCH_SIZE);
+ memset(path, 0, sizeof(prom_scratch));
/* Call OF to setup the TCE hardware */
if (call_prom("package-to-path", 3, 1, node,
- path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) {
+ path, sizeof(prom_scratch) - 1) == PROM_ERROR) {
prom_printf("package-to-path failed\n");
}
@@ -1917,12 +2047,12 @@ static void __init prom_hold_cpus(void)
type[0] = 0;
prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "cpu") != 0)
+ if (prom_strcmp(type, "cpu") != 0)
continue;
/* Skip non-configured cpus. */
if (prom_getprop(node, "status", type, sizeof(type)) > 0)
- if (strcmp(type, "okay") != 0)
+ if (prom_strcmp(type, "okay") != 0)
continue;
reg = cpu_to_be32(-1); /* make sparse happy */
@@ -1998,9 +2128,9 @@ static void __init prom_find_mmu(void)
return;
version[sizeof(version) - 1] = 0;
/* XXX might need to add other versions here */
- if (strcmp(version, "Open Firmware, 1.0.5") == 0)
+ if (prom_strcmp(version, "Open Firmware, 1.0.5") == 0)
of_workarounds = OF_WA_CLAIM;
- else if (strncmp(version, "FirmWorks,3.", 12) == 0) {
+ else if (prom_strncmp(version, "FirmWorks,3.", 12) == 0) {
of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL;
call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim");
} else
@@ -2033,7 +2163,7 @@ static void __init prom_init_stdout(void)
call_prom("instance-to-path", 3, 1, prom.stdout, path, 255);
prom_printf("OF stdout device is: %s\n", of_stdout_device);
prom_setprop(prom.chosen, "/chosen", "linux,stdout-path",
- path, strlen(path) + 1);
+ path, prom_strlen(path) + 1);
/* instance-to-package fails on PA-Semi */
stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout);
@@ -2043,7 +2173,7 @@ static void __init prom_init_stdout(void)
/* If it's a display, note it */
memset(type, 0, sizeof(type));
prom_getprop(stdout_node, "device_type", type, sizeof(type));
- if (strcmp(type, "display") == 0)
+ if (prom_strcmp(type, "display") == 0)
prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0);
}
}
@@ -2064,19 +2194,19 @@ static int __init prom_find_machine_type(void)
compat[len] = 0;
while (i < len) {
char *p = &compat[i];
- int sl = strlen(p);
+ int sl = prom_strlen(p);
if (sl == 0)
break;
- if (strstr(p, "Power Macintosh") ||
- strstr(p, "MacRISC"))
+ if (prom_strstr(p, "Power Macintosh") ||
+ prom_strstr(p, "MacRISC"))
return PLATFORM_POWERMAC;
#ifdef CONFIG_PPC64
/* We must make sure we don't detect the IBM Cell
* blades as pSeries due to some firmware issues,
* so we do it here.
*/
- if (strstr(p, "IBM,CBEA") ||
- strstr(p, "IBM,CPBW-1.0"))
+ if (prom_strstr(p, "IBM,CBEA") ||
+ prom_strstr(p, "IBM,CPBW-1.0"))
return PLATFORM_GENERIC;
#endif /* CONFIG_PPC64 */
i += sl + 1;
@@ -2093,7 +2223,7 @@ static int __init prom_find_machine_type(void)
compat, sizeof(compat)-1);
if (len <= 0)
return PLATFORM_GENERIC;
- if (strcmp(compat, "chrp"))
+ if (prom_strcmp(compat, "chrp"))
return PLATFORM_GENERIC;
/* Default to pSeries. We need to know if we are running LPAR */
@@ -2155,19 +2285,19 @@ static void __init prom_check_displays(void)
for (node = 0; prom_next_node(&node); ) {
memset(type, 0, sizeof(type));
prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "display") != 0)
+ if (prom_strcmp(type, "display") != 0)
continue;
/* It seems OF doesn't null-terminate the path :-( */
path = prom_scratch;
- memset(path, 0, PROM_SCRATCH_SIZE);
+ memset(path, 0, sizeof(prom_scratch));
/*
* leave some room at the end of the path for appending extra
* arguments
*/
if (call_prom("package-to-path", 3, 1, node, path,
- PROM_SCRATCH_SIZE-10) == PROM_ERROR)
+ sizeof(prom_scratch) - 10) == PROM_ERROR)
continue;
prom_printf("found display : %s, opening... ", path);
@@ -2259,9 +2389,9 @@ static unsigned long __init dt_find_string(char *str)
s = os = (char *)dt_string_start;
s += 4;
while (s < (char *)dt_string_end) {
- if (strcmp(s, str) == 0)
+ if (prom_strcmp(s, str) == 0)
return s - os;
- s += strlen(s) + 1;
+ s += prom_strlen(s) + 1;
}
return 0;
}
@@ -2294,7 +2424,7 @@ static void __init scan_dt_build_strings(phandle node,
}
/* skip "name" */
- if (strcmp(namep, "name") == 0) {
+ if (prom_strcmp(namep, "name") == 0) {
*mem_start = (unsigned long)namep;
prev_name = "name";
continue;
@@ -2306,7 +2436,7 @@ static void __init scan_dt_build_strings(phandle node,
namep = sstart + soff;
} else {
/* Trim off some if we can */
- *mem_start = (unsigned long)namep + strlen(namep) + 1;
+ *mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
dt_string_end = *mem_start;
}
prev_name = namep;
@@ -2363,8 +2493,8 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
/* get it again for debugging */
path = prom_scratch;
- memset(path, 0, PROM_SCRATCH_SIZE);
- call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
+ memset(path, 0, sizeof(prom_scratch));
+ call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1);
/* get and store all properties */
prev_name = "";
@@ -2375,7 +2505,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
break;
/* skip "name" */
- if (strcmp(pname, "name") == 0) {
+ if (prom_strcmp(pname, "name") == 0) {
prev_name = "name";
continue;
}
@@ -2406,7 +2536,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
call_prom("getprop", 4, 1, node, pname, valp, l);
*mem_start = _ALIGN(*mem_start, 4);
- if (!strcmp(pname, "phandle"))
+ if (!prom_strcmp(pname, "phandle"))
has_phandle = 1;
}
@@ -2476,8 +2606,8 @@ static void __init flatten_device_tree(void)
/* Add "phandle" in there, we'll need it */
namep = make_room(&mem_start, &mem_end, 16, 1);
- strcpy(namep, "phandle");
- mem_start = (unsigned long)namep + strlen(namep) + 1;
+ prom_strcpy(namep, "phandle");
+ mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
/* Build string array */
prom_printf("Building dt strings...\n");
@@ -2799,7 +2929,7 @@ static void __init fixup_device_tree_efika(void)
rv = prom_getprop(node, "model", prop, sizeof(prop));
if (rv == PROM_ERROR)
return;
- if (strcmp(prop, "EFIKA5K2"))
+ if (prom_strcmp(prop, "EFIKA5K2"))
return;
prom_printf("Applying EFIKA device tree fixups\n");
@@ -2807,13 +2937,13 @@ static void __init fixup_device_tree_efika(void)
/* Claiming to be 'chrp' is death */
node = call_prom("finddevice", 1, 1, ADDR("/"));
rv = prom_getprop(node, "device_type", prop, sizeof(prop));
- if (rv != PROM_ERROR && (strcmp(prop, "chrp") == 0))
+ if (rv != PROM_ERROR && (prom_strcmp(prop, "chrp") == 0))
prom_setprop(node, "/", "device_type", "efika", sizeof("efika"));
/* CODEGEN,description is exposed in /proc/cpuinfo so
fix that too */
rv = prom_getprop(node, "CODEGEN,description", prop, sizeof(prop));
- if (rv != PROM_ERROR && (strstr(prop, "CHRP")))
+ if (rv != PROM_ERROR && (prom_strstr(prop, "CHRP")))
prom_setprop(node, "/", "CODEGEN,description",
"Efika 5200B PowerPC System",
sizeof("Efika 5200B PowerPC System"));
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 667df97d2595..4cac45cb5de5 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -16,10 +16,18 @@
# If you really need to reference something from prom_init.o add
# it to the list below:
+grep "^CONFIG_KASAN=y$" .config >/dev/null
+if [ $? -eq 0 ]
+then
+ MEM_FUNCS="__memcpy __memset"
+else
+ MEM_FUNCS="memcpy memset"
+fi
+
WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
-_end enter_prom memcpy memset reloc_offset __secondary_hold
+_end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
-strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224
+logo_linux_clut224
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index d9ac7d94656e..684b0b315c32 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -43,6 +43,7 @@
#include <asm/tm.h>
#include <asm/asm-prototypes.h>
#include <asm/debug.h>
+#include <asm/hw_breakpoint.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -3088,7 +3089,7 @@ long arch_ptrace(struct task_struct *child, long request,
dbginfo.sizeof_condition = 0;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
- if (cpu_has_feature(CPU_FTR_DAWR))
+ if (dawr_enabled())
dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
#else
dbginfo.features = 0;
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 70568ccbd9fd..e1c9cf079503 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -104,6 +104,14 @@ static __init int barrier_nospec_debugfs_init(void)
return 0;
}
device_initcall(barrier_nospec_debugfs_init);
+
+static __init int security_feature_debugfs_init(void)
+{
+ debugfs_create_x64("security_features", 0400, powerpc_debugfs_root,
+ (u64 *)&powerpc_security_features);
+ return 0;
+}
+device_initcall(security_feature_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
#ifdef CONFIG_PPC_FSL_BOOK3E
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2e5dfb6e0823..aad9f5df6ab6 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -67,6 +67,7 @@
#include <asm/livepatch.h>
#include <asm/mmu_context.h>
#include <asm/cpu_has_feature.h>
+#include <asm/kasan.h>
#include "setup.h"
@@ -133,13 +134,11 @@ int crashing_cpu = -1;
/* also used by kexec */
void machine_shutdown(void)
{
-#ifdef CONFIG_FA_DUMP
/*
* if fadump is active, cleanup the fadump registration before we
* shutdown.
*/
fadump_cleanup();
-#endif
if (ppc_md.machine_shutdown)
ppc_md.machine_shutdown();
@@ -200,14 +199,15 @@ static void show_cpuinfo_summary(struct seq_file *m)
{
struct device_node *root;
const char *model = NULL;
-#if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
unsigned long bogosum = 0;
int i;
- for_each_online_cpu(i)
- bogosum += loops_per_jiffy;
- seq_printf(m, "total bogomips\t: %lu.%02lu\n",
- bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
-#endif /* CONFIG_SMP && CONFIG_PPC32 */
+
+ if (IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_PPC32)) {
+ for_each_online_cpu(i)
+ bogosum += loops_per_jiffy;
+ seq_printf(m, "total bogomips\t: %lu.%02lu\n",
+ bogosum / (500000 / HZ), bogosum / (5000 / HZ) % 100);
+ }
seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
if (ppc_md.name)
seq_printf(m, "platform\t: %s\n", ppc_md.name);
@@ -221,11 +221,10 @@ static void show_cpuinfo_summary(struct seq_file *m)
if (ppc_md.show_cpuinfo != NULL)
ppc_md.show_cpuinfo(m);
-#ifdef CONFIG_PPC32
/* Display the amount of memory */
- seq_printf(m, "Memory\t\t: %d MB\n",
- (unsigned int)(total_memory / (1024 * 1024)));
-#endif
+ if (IS_ENABLED(CONFIG_PPC32))
+ seq_printf(m, "Memory\t\t: %d MB\n",
+ (unsigned int)(total_memory / (1024 * 1024)));
}
static int show_cpuinfo(struct seq_file *m, void *v)
@@ -252,26 +251,24 @@ static int show_cpuinfo(struct seq_file *m, void *v)
else
seq_printf(m, "unknown (%08x)", pvr);
-#ifdef CONFIG_ALTIVEC
if (cpu_has_feature(CPU_FTR_ALTIVEC))
seq_printf(m, ", altivec supported");
-#endif /* CONFIG_ALTIVEC */
seq_printf(m, "\n");
#ifdef CONFIG_TAU
- if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) {
-#ifdef CONFIG_TAU_AVERAGE
- /* more straightforward, but potentially misleading */
- seq_printf(m, "temperature \t: %u C (uncalibrated)\n",
- cpu_temp(cpu_id));
-#else
- /* show the actual temp sensor range */
- u32 temp;
- temp = cpu_temp_both(cpu_id);
- seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
- temp & 0xff, temp >> 16);
-#endif
+ if (cpu_has_feature(CPU_FTR_TAU)) {
+ if (IS_ENABLED(CONFIG_TAU_AVERAGE)) {
+ /* more straightforward, but potentially misleading */
+ seq_printf(m, "temperature \t: %u C (uncalibrated)\n",
+ cpu_temp(cpu_id));
+ } else {
+ /* show the actual temp sensor range */
+ u32 temp;
+ temp = cpu_temp_both(cpu_id);
+ seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
+ temp & 0xff, temp >> 16);
+ }
}
#endif /* CONFIG_TAU */
@@ -335,11 +332,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n",
maj, min, PVR_VER(pvr), PVR_REV(pvr));
-#ifdef CONFIG_PPC32
- seq_printf(m, "bogomips\t: %lu.%02lu\n",
- loops_per_jiffy / (500000/HZ),
- (loops_per_jiffy / (5000/HZ)) % 100);
-#endif
+ if (IS_ENABLED(CONFIG_PPC32))
+ seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ),
+ (loops_per_jiffy / (5000 / HZ)) % 100);
+
seq_printf(m, "\n");
/* If this is the last cpu, print the summary */
@@ -401,8 +397,8 @@ void __init check_for_initrd(void)
#ifdef CONFIG_SMP
-int threads_per_core, threads_per_subcore, threads_shift;
-cpumask_t threads_core_mask;
+int threads_per_core, threads_per_subcore, threads_shift __read_mostly;
+cpumask_t threads_core_mask __read_mostly;
EXPORT_SYMBOL_GPL(threads_per_core);
EXPORT_SYMBOL_GPL(threads_per_subcore);
EXPORT_SYMBOL_GPL(threads_shift);
@@ -740,23 +736,19 @@ void __init setup_panic(void)
* BUG() in that case.
*/
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define KERNEL_COHERENCY 0
-#else
-#define KERNEL_COHERENCY 1
-#endif
+#define KERNEL_COHERENCY (!IS_ENABLED(CONFIG_NOT_COHERENT_CACHE))
static int __init check_cache_coherency(void)
{
struct device_node *np;
const void *prop;
- int devtree_coherency;
+ bool devtree_coherency;
np = of_find_node_by_path("/");
prop = of_get_property(np, "coherency-off", NULL);
of_node_put(np);
- devtree_coherency = prop ? 0 : 1;
+ devtree_coherency = prop ? false : true;
if (devtree_coherency != KERNEL_COHERENCY) {
printk(KERN_ERR
@@ -799,12 +791,6 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
static __init void print_system_info(void)
{
pr_info("-----------------------------------------------------\n");
-#ifdef CONFIG_PPC_BOOK3S_64
- pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
-#endif
-#ifdef CONFIG_PPC_BOOK3S_32
- pr_info("Hash_size = 0x%lx\n", Hash_size);
-#endif
pr_info("phys_mem_size = 0x%llx\n",
(unsigned long long)memblock_phys_mem_size());
@@ -826,18 +812,7 @@ static __init void print_system_info(void)
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- if (htab_address)
- pr_info("htab_address = 0x%p\n", htab_address);
- if (htab_hash_mask)
- pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
-#endif
-#ifdef CONFIG_PPC_BOOK3S_32
- if (Hash)
- pr_info("Hash = 0x%p\n", Hash);
- if (Hash_mask)
- pr_info("Hash_mask = 0x%lx\n", Hash_mask);
-#endif
+ print_system_hash_info();
if (PHYSICAL_START > 0)
pr_info("physical_start = 0x%llx\n",
@@ -868,6 +843,8 @@ static void smp_setup_pacas(void)
*/
void __init setup_arch(char **cmdline_p)
{
+ kasan_init();
+
*cmdline_p = boot_command_line;
/* Set a half-reasonable default so udelay does something sensible */
@@ -947,20 +924,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = klimit;
-#ifdef CONFIG_PPC_MM_SLICES
-#ifdef CONFIG_PPC64
- if (!radix_enabled())
- init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
-#elif defined(CONFIG_PPC_8xx)
- init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
-#else
-#error "context.addr_limit not initialized."
-#endif
-#endif
-
-#ifdef CONFIG_SPAPR_TCE_IOMMU
mm_iommu_init(&init_mm);
-#endif
irqstack_early_init();
exc_lvl_early_init();
emergency_stack_init();
@@ -969,9 +933,9 @@ void __init setup_arch(char **cmdline_p)
early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
-#ifdef CONFIG_DUMMY_CONSOLE
- conswitchp = &dummy_con;
-#endif
+ if (IS_ENABLED(CONFIG_DUMMY_CONSOLE))
+ conswitchp = &dummy_con;
+
if (ppc_md.setup_arch)
ppc_md.setup_arch();
@@ -983,10 +947,8 @@ void __init setup_arch(char **cmdline_p)
/* Initialize the MMU context management stuff. */
mmu_context_init();
-#ifdef CONFIG_PPC64
/* Interrupt code needs to be 64K-aligned. */
- if ((unsigned long)_stext & 0xffff)
+ if (IS_ENABLED(CONFIG_PPC64) && (unsigned long)_stext & 0xffff)
panic("Kernelbase not 64K-aligned (0x%lx)!\n",
(unsigned long)_stext);
-#endif
}
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 4a65e08a6042..3fb9f64f88fd 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -64,34 +64,6 @@ EXPORT_SYMBOL(DMA_MODE_READ);
EXPORT_SYMBOL(DMA_MODE_WRITE);
/*
- * We're called here very early in the boot.
- *
- * Note that the kernel may be running at an address which is different
- * from the address that it was linked at, so we must use RELOC/PTRRELOC
- * to access static data (including strings). -- paulus
- */
-notrace unsigned long __init early_init(unsigned long dt_ptr)
-{
- unsigned long offset = reloc_offset();
-
- /* First zero the BSS -- use memset_io, some platforms don't have
- * caches on yet */
- memset_io((void __iomem *)PTRRELOC(&__bss_start), 0,
- __bss_stop - __bss_start);
-
- /*
- * Identify the CPU type and fix up code sections
- * that depend on which cpu we have.
- */
- identify_cpu(offset, mfspr(SPRN_PVR));
-
- apply_feature_fixups();
-
- return KERNELBASE + offset;
-}
-
-
-/*
* This is run before start_kernel(), the kernel has been relocated
* and we are running with enough of the MMU enabled to have our
* proper kernel virtual addresses
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4f49e1a3594c..a400854a5036 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -68,6 +68,7 @@
#include <asm/cputhreads.h>
#include <asm/hw_irq.h>
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
#include "setup.h"
@@ -331,6 +332,12 @@ void __init early_setup(unsigned long dt_ptr)
*/
configure_exceptions();
+ /*
+ * Configure Kernel Userspace Protection. This needs to happen before
+ * feature fixups for platforms that implement this using features.
+ */
+ setup_kup();
+
/* Apply all the dynamic patching */
apply_feature_fixups();
setup_feature_keys();
@@ -383,6 +390,9 @@ void early_setup_secondary(void)
/* Initialize the hash table or TLB handling */
early_init_mmu_secondary();
+ /* Perform any KUP setup that is per-cpu */
+ setup_kup();
+
/*
* At this point, we can let interrupts switch to virtual mode
* (the MMU has been setup), so adjust the MSR in the PACA to
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 6794466f6420..06c299ef6132 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -565,7 +565,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
preempt_disable();
/* pull in MSR TS bits from user context */
- regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
+ regs->msr |= msr & MSR_TS_MASK;
/*
* Ensure that TM is enabled in regs->msr before we leave the signal
@@ -745,6 +745,31 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (MSR_TM_SUSPENDED(mfmsr()))
tm_reclaim_current(0);
+ /*
+ * Disable MSR[TS] bit also, so, if there is an exception in the
+ * code below (as a page fault in copy_ckvsx_to_user()), it does
+ * not recheckpoint this task if there was a context switch inside
+ * the exception.
+ *
+ * A major page fault can indirectly call schedule(). A reschedule
+ * process in the middle of an exception can have a side effect
+ * (Changing the CPU MSR[TS] state), since schedule() is called
+ * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
+ * (switch_to() calls tm_recheckpoint() for the 'new' process). In
+ * this case, the process continues to be the same in the CPU, but
+ * the CPU state just changed.
+ *
+ * This can cause a TM Bad Thing, since the MSR in the stack will
+ * have the MSR[TS]=0, and this is what will be used to RFID.
+ *
+ * Clearing MSR[TS] state here will avoid a recheckpoint if there
+ * is any process reschedule in kernel space. The MSR[TS] state
+ * does not need to be saved also, since it will be replaced with
+ * the MSR[TS] that came from user context later, at
+ * restore_tm_sigcontexts.
+ */
+ regs->msr &= ~MSR_TS_MASK;
+
if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
goto badframe;
if (MSR_TM_ACTIVE(msr)) {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index bc0503ef9c9c..325d60633dfa 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -43,7 +43,6 @@
#include <linux/timex.h>
#include <linux/kernel_stat.h>
#include <linux/time.h>
-#include <linux/clockchips.h>
#include <linux/init.h>
#include <linux/profile.h>
#include <linux/cpu.h>
@@ -151,6 +150,8 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
unsigned long ppc_tb_freq;
EXPORT_SYMBOL_GPL(ppc_tb_freq);
+bool tb_invalid;
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Factor for converting from cputime_t (timebase ticks) to
@@ -460,6 +461,13 @@ void __delay(unsigned long loops)
diff += 1000000000;
spin_cpu_relax();
} while (diff < loops);
+ } else if (tb_invalid) {
+ /*
+ * TB is in error state and isn't ticking anymore.
+ * HMI handler was unable to recover from TB error.
+ * Return immediately, so that kernel won't get stuck here.
+ */
+ spin_cpu_relax();
} else {
start = get_tbl();
while (get_tbl() - start < loops)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1fd45a8650e1..665f294725cb 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -2088,6 +2088,10 @@ void SPEFloatingPointException(struct pt_regs *regs)
int code = FPE_FLTUNK;
int err;
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
flush_spe_to_thread(current);
spefscr = current->thread.spefscr;
@@ -2133,6 +2137,10 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
extern int speround_handler(struct pt_regs *regs);
int err;
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
preempt_disable();
if (regs->msr & MSR_SPE)
giveup_spe(current);
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index ce199f6e4256..06f54d947057 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -26,9 +26,8 @@ GCOV_PROFILE := n
KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
-ccflags-y := -shared -fno-common -fno-builtin
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
+ -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both
asflags-y := -D__VDSO32__ -s
obj-y += vdso32_wrapper.o
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 28e7d112aa2f..32ebb3522ea1 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -12,9 +12,8 @@ GCOV_PROFILE := n
KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
-ccflags-y := -shared -fno-common -fno-builtin
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
+ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
asflags-y := -D__VDSO64__ -s
obj-y += vdso64_wrapper.o
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 21165da0052d..8eb867dbad5f 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -21,6 +21,7 @@ _GLOBAL(load_vr_state)
REST_32VRS(0,r4,r3)
blr
EXPORT_SYMBOL(load_vr_state)
+_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
/*
* Store VMX state into memory, including VSCR.
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 3c6ab22a0c4e..af3c15a1d41e 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -77,7 +77,7 @@ static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
-static DEFINE_PER_CPU(struct timer_list, wd_timer);
+static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer);
static DEFINE_PER_CPU(u64, wd_timer_tb);
/* SMP checker bits */
@@ -293,21 +293,21 @@ out:
nmi_exit();
}
-static void wd_timer_reset(unsigned int cpu, struct timer_list *t)
-{
- t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms);
- if (wd_timer_period_ms > 1000)
- t->expires = __round_jiffies_up(t->expires, cpu);
- add_timer_on(t, cpu);
-}
-
-static void wd_timer_fn(struct timer_list *t)
+static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
int cpu = smp_processor_id();
+ if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ return HRTIMER_NORESTART;
+
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return HRTIMER_NORESTART;
+
watchdog_timer_interrupt(cpu);
- wd_timer_reset(cpu, t);
+ hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms));
+
+ return HRTIMER_RESTART;
}
void arch_touch_nmi_watchdog(void)
@@ -323,37 +323,22 @@ void arch_touch_nmi_watchdog(void)
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
-static void start_watchdog_timer_on(unsigned int cpu)
-{
- struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
-
- per_cpu(wd_timer_tb, cpu) = get_tb();
-
- timer_setup(t, wd_timer_fn, TIMER_PINNED);
- wd_timer_reset(cpu, t);
-}
-
-static void stop_watchdog_timer_on(unsigned int cpu)
-{
- struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
-
- del_timer_sync(t);
-}
-
-static int start_wd_on_cpu(unsigned int cpu)
+static void start_watchdog(void *arg)
{
+ struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
+ int cpu = smp_processor_id();
unsigned long flags;
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
WARN_ON(1);
- return 0;
+ return;
}
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
- return 0;
+ return;
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
- return 0;
+ return;
wd_smp_lock(&flags);
cpumask_set_cpu(cpu, &wd_cpus_enabled);
@@ -363,27 +348,40 @@ static int start_wd_on_cpu(unsigned int cpu)
}
wd_smp_unlock(&flags);
- start_watchdog_timer_on(cpu);
+ *this_cpu_ptr(&wd_timer_tb) = get_tb();
- return 0;
+ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer->function = watchdog_timer_fn;
+ hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms),
+ HRTIMER_MODE_REL_PINNED);
}
-static int stop_wd_on_cpu(unsigned int cpu)
+static int start_watchdog_on_cpu(unsigned int cpu)
{
+ return smp_call_function_single(cpu, start_watchdog, NULL, true);
+}
+
+static void stop_watchdog(void *arg)
+{
+ struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
+ int cpu = smp_processor_id();
unsigned long flags;
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
- return 0; /* Can happen in CPU unplug case */
+ return; /* Can happen in CPU unplug case */
- stop_watchdog_timer_on(cpu);
+ hrtimer_cancel(hrtimer);
wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
wd_smp_unlock(&flags);
wd_smp_clear_cpu_pending(cpu, get_tb());
+}
- return 0;
+static int stop_watchdog_on_cpu(unsigned int cpu)
+{
+ return smp_call_function_single(cpu, stop_watchdog, NULL, true);
}
static void watchdog_calc_timeouts(void)
@@ -402,7 +400,7 @@ void watchdog_nmi_stop(void)
int cpu;
for_each_cpu(cpu, &wd_cpus_enabled)
- stop_wd_on_cpu(cpu);
+ stop_watchdog_on_cpu(cpu);
}
void watchdog_nmi_start(void)
@@ -411,7 +409,7 @@ void watchdog_nmi_start(void)
watchdog_calc_timeouts();
for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
- start_wd_on_cpu(cpu);
+ start_watchdog_on_cpu(cpu);
}
/*
@@ -423,7 +421,8 @@ int __init watchdog_nmi_probe(void)
err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"powerpc/watchdog:online",
- start_wd_on_cpu, stop_wd_on_cpu);
+ start_watchdog_on_cpu,
+ stop_watchdog_on_cpu);
if (err < 0) {
pr_warn("could not be initialized");
return err;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b2b29d4f9842..7bdcd4d7a9f0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -74,6 +74,7 @@
#include <asm/opal.h>
#include <asm/xics.h>
#include <asm/xive.h>
+#include <asm/hw_breakpoint.h>
#include "book3s.h"
@@ -3374,7 +3375,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_PURR, vcpu->arch.purr);
mtspr(SPRN_SPURR, vcpu->arch.spurr);
- if (cpu_has_feature(CPU_FTR_DAWR)) {
+ if (dawr_enabled()) {
mtspr(SPRN_DAWR, vcpu->arch.dawr);
mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3b9662a4207e..085509148d95 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -822,7 +822,7 @@ static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
raddr = per_cpu_ptr(addr, cpu);
l = (unsigned long)raddr;
- if (REGION_ID(l) == VMALLOC_REGION_ID) {
+ if (get_region_id(l) == VMALLOC_REGION_ID) {
l = vmalloc_to_phys(raddr);
raddr = (unsigned int *)l;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 3a5e719ef032..dd014308f065 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -35,6 +35,7 @@
#include <asm/thread_info.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
+#include <asm/cpuidle.h>
/* Sign-extend HDEC if not on POWER9 */
#define EXTEND_HDEC(reg) \
@@ -45,6 +46,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
#define NAPPING_NOVCPU 2
+#define NAPPING_UNSPLIT 3
/* Stack frame offsets for kvmppc_hv_entry */
#define SFS 208
@@ -290,17 +292,19 @@ kvm_novcpu_exit:
b kvmhv_switch_to_host
/*
- * We come in here when wakened from nap mode.
- * Relocation is off and most register values are lost.
- * r13 points to the PACA.
+ * We come in here when wakened from Linux offline idle code.
+ * Relocation is off
* r3 contains the SRR1 wakeup value, SRR1 is trashed.
*/
- .globl kvm_start_guest
-kvm_start_guest:
- /* Set runlatch bit the minute you wake up from nap */
- mfspr r0, SPRN_CTRLF
- ori r0, r0, 1
- mtspr SPRN_CTRLT, r0
+_GLOBAL(idle_kvm_start_guest)
+ ld r4,PACAEMERGSP(r13)
+ mfcr r5
+ mflr r0
+ std r1,0(r4)
+ std r5,8(r4)
+ std r0,16(r4)
+ subi r1,r4,STACK_FRAME_OVERHEAD
+ SAVE_NVGPRS(r1)
/*
* Could avoid this and pass it through in r3. For now,
@@ -308,27 +312,23 @@ kvm_start_guest:
*/
mtspr SPRN_SRR1,r3
- ld r2,PACATOC(r13)
-
li r0,0
stb r0,PACA_FTRACE_ENABLED(r13)
li r0,KVM_HWTHREAD_IN_KVM
stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* NV GPR values from power7_idle() will no longer be valid */
- li r0,1
- stb r0,PACA_NAPSTATELOST(r13)
-
- /* were we napping due to cede? */
+ /* kvm cede / napping does not come through here */
lbz r0,HSTATE_NAPPING(r13)
- cmpwi r0,NAPPING_CEDE
- beq kvm_end_cede
- cmpwi r0,NAPPING_NOVCPU
- beq kvm_novcpu_wakeup
+ twnei r0,0
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
+ b 1f
+
+kvm_unsplit_wakeup:
+ li r0, 0
+ stb r0, HSTATE_NAPPING(r13)
+
+1:
/*
* We weren't napping due to cede, so this must be a secondary
@@ -437,19 +437,25 @@ kvm_no_guest:
lbz r3, HSTATE_HWTHREAD_REQ(r13)
cmpwi r3, 0
bne 54f
-/*
- * We jump to pnv_wakeup_loss, which will return to the caller
- * of power7_nap in the powernv cpu offline loop. The value we
- * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss
- * requires SRR1 in r12.
- */
+
+ /*
+ * Jump to idle_return_gpr_loss, which returns to the
+ * idle_kvm_start_guest caller.
+ */
li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
- li r3, 0
- mfspr r12,SPRN_SRR1
- b pnv_wakeup_loss
+ /* set up r3 for return */
+ mfspr r3,SPRN_SRR1
+ REST_NVGPRS(r1)
+ addi r1, r1, STACK_FRAME_OVERHEAD
+ ld r0, 16(r1)
+ ld r5, 8(r1)
+ ld r1, 0(r1)
+ mtlr r0
+ mtcr r5
+ blr
53: HMT_LOW
ld r5, HSTATE_KVM_VCORE(r13)
@@ -534,6 +540,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
lbz r0, KVM_SPLIT_DO_NAP(r3)
cmpwi r0, 0
beq 57f
+ li r3, NAPPING_UNSPLIT
+ stb r3, HSTATE_NAPPING(r13)
li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
mfspr r5, SPRN_LPCR
rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
@@ -822,18 +830,21 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_IAMR, r5
mtspr SPRN_PSPB, r6
mtspr SPRN_FSCR, r7
- ld r5, VCPU_DAWR(r4)
- ld r6, VCPU_DAWRX(r4)
- ld r7, VCPU_CIABR(r4)
- ld r8, VCPU_TAR(r4)
/*
* Handle broken DAWR case by not writing it. This means we
* can still store the DAWR register for migration.
*/
-BEGIN_FTR_SECTION
+ LOAD_REG_ADDR(r5, dawr_force_enable)
+ lbz r5, 0(r5)
+ cmpdi r5, 0
+ beq 1f
+ ld r5, VCPU_DAWR(r4)
+ ld r6, VCPU_DAWRX(r4)
mtspr SPRN_DAWR, r5
mtspr SPRN_DAWRX, r6
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR)
+1:
+ ld r7, VCPU_CIABR(r4)
+ ld r8, VCPU_TAR(r4)
mtspr SPRN_CIABR, r7
mtspr SPRN_TAR, r8
ld r5, VCPU_IC(r4)
@@ -2513,11 +2524,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
blr
2:
-BEGIN_FTR_SECTION
- /* POWER9 with disabled DAWR */
+ LOAD_REG_ADDR(r11, dawr_force_enable)
+ lbz r11, 0(r11)
+ cmpdi r11, 0
li r3, H_HARDWARE
- blr
-END_FTR_SECTION_IFCLR(CPU_FTR_DAWR)
+ beqlr
/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
rlwimi r5, r4, 2, DAWRX_WT
@@ -2654,6 +2665,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lis r3, LPCR_PECEDP@h /* Do wake on privileged doorbell */
+ /* Go back to host stack */
+ ld r1, HSTATE_HOST_R1(r13)
+
/*
* Take a nap until a decrementer or external or doobell interrupt
* occurs, with PECE1 and PECE0 set in LPCR.
@@ -2682,26 +2696,42 @@ BEGIN_FTR_SECTION
* requested level = 0 (just stop dispatching)
*/
lis r3, (PSSCR_EC | PSSCR_ESL)@h
- mtspr SPRN_PSSCR, r3
/* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
li r4, LPCR_PECE_HVEE@higher
sldi r4, r4, 32
or r5, r5, r4
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+FTR_SECTION_ELSE
+ li r3, PNV_THREAD_NAP
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_LPCR,r5
isync
- li r0, 0
- std r0, HSTATE_SCRATCH0(r13)
- ptesync
- ld r0, HSTATE_SCRATCH0(r13)
-1: cmpd r0, r0
- bne 1b
+
BEGIN_FTR_SECTION
- nap
+ bl isa300_idle_stop_mayloss
FTR_SECTION_ELSE
- PPC_STOP
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
- b .
+ bl isa206_idle_insn_mayloss
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
+
+ mfspr r0, SPRN_CTRLF
+ ori r0, r0, 1
+ mtspr SPRN_CTRLT, r0
+
+ mtspr SPRN_SRR1, r3
+
+ li r0, 0
+ stb r0, PACA_FTRACE_ENABLED(r13)
+
+ li r0, KVM_HWTHREAD_IN_KVM
+ stb r0, HSTATE_HWTHREAD_STATE(r13)
+
+ lbz r0, HSTATE_NAPPING(r13)
+ cmpwi r0, NAPPING_CEDE
+ beq kvm_end_cede
+ cmpwi r0, NAPPING_NOVCPU
+ beq kvm_novcpu_wakeup
+ cmpwi r0, NAPPING_UNSPLIT
+ beq kvm_unsplit_wakeup
+ twi 31,0,0 /* Nap state must not be zero */
33: mr r4, r3
li r3, 0
@@ -2709,12 +2739,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
b 34f
kvm_end_cede:
+ /* Woken by external or decrementer interrupt */
+
/* get vcpu pointer */
ld r4, HSTATE_KVM_VCPU(r13)
- /* Woken by external or decrementer interrupt */
- ld r1, HSTATE_HOST_R1(r13)
-
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
addi r3, r4, VCPU_TB_RMINTR
bl kvmhv_accumulate_time
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 79396e184bca..c55f9c27bf79 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -8,9 +8,22 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
-obj-y += string.o alloc.o code-patching.o feature-fixups.o
+KASAN_SANITIZE_code-patching.o := n
+KASAN_SANITIZE_feature-fixups.o := n
-obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o
+ifdef CONFIG_KASAN
+CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
+endif
+
+obj-y += alloc.o code-patching.o feature-fixups.o
+
+ifndef CONFIG_KASAN
+obj-y += string.o memcmp_$(BITS).o
+obj-$(CONFIG_PPC32) += strlen_32.o
+endif
+
+obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
@@ -34,7 +47,7 @@ obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \
test_emulate_step_exec_instr.o
obj-y += checksum_$(BITS).o checksum_wrappers.o \
- string_$(BITS).o memcmp_$(BITS).o
+ string_$(BITS).o
obj-y += sstep.o ldstfp.o quad.o
obj64-y += quad.o
diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c
index 890d4ddd91d6..bb9307ce2440 100644
--- a/arch/powerpc/lib/checksum_wrappers.c
+++ b/arch/powerpc/lib/checksum_wrappers.c
@@ -29,6 +29,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst,
unsigned int csum;
might_sleep();
+ allow_read_from_user(src, len);
*err_ptr = 0;
@@ -60,6 +61,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst,
}
out:
+ prevent_read_from_user(src, len);
return (__force __wsum)csum;
}
EXPORT_SYMBOL(csum_and_copy_from_user);
@@ -70,6 +72,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
unsigned int csum;
might_sleep();
+ allow_write_to_user(dst, len);
*err_ptr = 0;
@@ -97,6 +100,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
}
out:
+ prevent_write_to_user(dst, len);
return (__force __wsum)csum;
}
EXPORT_SYMBOL(csum_and_copy_to_user);
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 506413a2c25e..90c9d4a1e36f 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -15,7 +15,6 @@
#include <linux/cpuhotplug.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
-#include <linux/kprobes.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -26,9 +25,9 @@
static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
unsigned int *patch_addr)
{
- int err;
+ int err = 0;
- __put_user_size(instr, patch_addr, 4, err);
+ __put_user_asm(instr, patch_addr, err, "stw");
if (err)
return err;
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index ba66846fe973..d5642481fb98 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -14,6 +14,7 @@
#include <asm/ppc_asm.h>
#include <asm/export.h>
#include <asm/code-patching-asm.h>
+#include <asm/kasan.h>
#define COPY_16_BYTES \
lwz r7,4(r4); \
@@ -68,6 +69,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)
+#ifndef CONFIG_KASAN
_GLOBAL(memset16)
rlwinm. r0 ,r5, 31, 1, 31
addi r6, r3, -4
@@ -81,6 +83,7 @@ _GLOBAL(memset16)
sth r4, 4(r6)
blr
EXPORT_SYMBOL(memset16)
+#endif
/*
* Use dcbz on the complete cache lines in the destination
@@ -91,7 +94,7 @@ EXPORT_SYMBOL(memset16)
* We therefore skip the optimised bloc that uses dcbz. This jump is
* replaced by a nop once cache is active. This is done in machine_init()
*/
-_GLOBAL(memset)
+_GLOBAL_KASAN(memset)
cmplwi 0,r5,4
blt 7f
@@ -151,6 +154,7 @@ _GLOBAL(memset)
bdnz 9b
blr
EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL_KASAN(memset)
/*
* This version uses dcbz on the complete cache lines in the
@@ -163,12 +167,12 @@ EXPORT_SYMBOL(memset)
* We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
* replaced by a nop once cache is active. This is done in machine_init()
*/
-_GLOBAL(memmove)
+_GLOBAL_KASAN(memmove)
cmplw 0,r3,r4
bgt backwards_memcpy
/* fall through */
-_GLOBAL(memcpy)
+_GLOBAL_KASAN(memcpy)
1: b generic_memcpy
patch_site 1b, patch__memcpy_nocache
@@ -244,6 +248,8 @@ _GLOBAL(memcpy)
65: blr
EXPORT_SYMBOL(memcpy)
EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL_KASAN(memcpy)
+EXPORT_SYMBOL_KASAN(memmove)
generic_memcpy:
srwi. r7,r5,3
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 3c3be02f33b7..7f6bd031c306 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -12,7 +12,9 @@
#include <asm/errno.h>
#include <asm/ppc_asm.h>
#include <asm/export.h>
+#include <asm/kasan.h>
+#ifndef CONFIG_KASAN
_GLOBAL(__memset16)
rlwimi r4,r4,16,0,15
/* fall through */
@@ -29,8 +31,9 @@ _GLOBAL(__memset64)
EXPORT_SYMBOL(__memset16)
EXPORT_SYMBOL(__memset32)
EXPORT_SYMBOL(__memset64)
+#endif
-_GLOBAL(memset)
+_GLOBAL_KASAN(memset)
neg r0,r3
rlwimi r4,r4,8,16,23
andi. r0,r0,7 /* # bytes to be 8-byte aligned */
@@ -96,8 +99,9 @@ _GLOBAL(memset)
stb r4,0(r6)
blr
EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL_KASAN(memset)
-_GLOBAL_TOC(memmove)
+_GLOBAL_TOC_KASAN(memmove)
cmplw 0,r3,r4
bgt backwards_memcpy
b memcpy
@@ -139,3 +143,4 @@ _GLOBAL(backwards_memcpy)
mtctr r7
b 1b
EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL_KASAN(memmove)
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 273ea67e60a1..25c3772c1dfb 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -11,6 +11,7 @@
#include <asm/export.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
+#include <asm/kasan.h>
#ifndef SELFTEST_CASE
/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
@@ -18,7 +19,7 @@
#endif
.align 7
-_GLOBAL_TOC(memcpy)
+_GLOBAL_TOC_KASAN(memcpy)
BEGIN_FTR_SECTION
#ifdef __LITTLE_ENDIAN__
cmpdi cr7,r5,0
@@ -230,3 +231,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
blr
#endif
EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL_KASAN(memcpy)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 3c1bd9fa23cd..0f499db315d6 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,53 +5,18 @@
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
-
obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(BITS).o pgtable_$(BITS).o \
+ pgtable-frag.o \
init-common.o mmu_context.o drmem.o
-obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
- tlb_nohash_low.o
-obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
-hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
-obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
-obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o \
- $(hash64-y) mmu_context_book3s64.o \
- pgtable-book3s64.o pgtable-frag.o
-obj-$(CONFIG_PPC32) += pgtable-frag.o
-obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
-obj-$(CONFIG_PPC_BOOK3S_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
-obj-$(CONFIG_PPC_BOOK3S) += tlb_hash$(BITS).o
-ifdef CONFIG_PPC_BOOK3S_64
-obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
-obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
-endif
-obj-$(CONFIG_40x) += 40x_mmu.o
-obj-$(CONFIG_44x) += 44x_mmu.o
-obj-$(CONFIG_PPC_8xx) += 8xx_mmu.o
-obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o
+obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/
+obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/
+obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/
obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
-obj-$(CONFIG_PPC_SPLPAR) += vphn.o
obj-$(CONFIG_PPC_MM_SLICES) += slice.o
-obj-y += hugetlbpage.o
-ifdef CONFIG_HUGETLB_PAGE
-obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o
-obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o
-obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
-endif
-obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
-obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
obj-$(CONFIG_HIGHMEM) += highmem.o
obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
-obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
obj-$(CONFIG_PPC_PTDUMP) += ptdump/
-obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
-
-# Disable kcov instrumentation on sensitive code
-# This is necessary for booting with kcov enabled on book3e machines
-KCOV_INSTRUMENT_tlb_nohash.o := n
-KCOV_INSTRUMENT_fsl_booke_mmu.o := n
-
-# Instrumenting the SLB fault path can lead to duplicate SLB entries
-KCOV_INSTRUMENT_slb.o := n
+obj-$(CONFIG_KASAN) += kasan/
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
new file mode 100644
index 000000000000..1732eaa740a9
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+KASAN_SANITIZE_mmu.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING
+endif
+
+obj-y += mmu.o hash_low.o mmu_context.o tlb.o
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/book3s32/hash_low.S
index a6c491f18a04..e27792d0b744 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -309,13 +309,13 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64)
_GLOBAL(create_hpte)
/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
- rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
+ rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */
+ rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */
and r8,r8,r0 /* writable if _RW & _DIRTY */
rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */
rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */
ori r8,r8,0xe04 /* clear out reserved bits */
- andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */
+ andc r8,r5,r8 /* PP = user? (rw&dirty? 1: 3): 0 */
BEGIN_FTR_SECTION
rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/book3s32/mmu.c
index 5d9c3ff728c9..fc073cb2c517 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -34,11 +34,12 @@
#include <asm/code-patching.h>
#include <asm/sections.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
-struct hash_pte *Hash, *Hash_end;
-unsigned long Hash_size, Hash_mask;
+struct hash_pte *Hash;
+static unsigned long Hash_size, Hash_mask;
unsigned long _SDR1;
+static unsigned int hash_mb, hash_mb2;
struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */
@@ -318,7 +319,6 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
*/
void __init MMU_init_hw(void)
{
- unsigned int hmask, mb, mb2;
unsigned int n_hpteg, lg_n_hpteg;
if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
@@ -355,26 +355,34 @@ void __init MMU_init_hw(void)
__func__, Hash_size, Hash_size);
_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
- Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
+ pr_info("Total memory = %lldMB; using %ldkB for hash table\n",
+ (unsigned long long)(total_memory >> 20), Hash_size >> 10);
- printk("Total memory = %lldMB; using %ldkB for hash table (at %p)\n",
- (unsigned long long)(total_memory >> 20), Hash_size >> 10, Hash);
+ Hash_mask = n_hpteg - 1;
+ hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
+ if (lg_n_hpteg > 16)
+ hash_mb2 = 16 - LG_HPTEG_SIZE;
+}
+
+void __init MMU_init_hw_patch(void)
+{
+ unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
+
+ if (ppc_md.progress)
+ ppc_md.progress("hash:patch", 0x345);
+ if (ppc_md.progress)
+ ppc_md.progress("hash:done", 0x205);
+
+ /* WARNING: Make sure nothing can trigger a KASAN check past this point */
/*
* Patch up the instructions in hashtable.S:create_hpte
*/
- if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345);
- Hash_mask = n_hpteg - 1;
- hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
- mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
- if (lg_n_hpteg > 16)
- mb2 = 16 - LG_HPTEG_SIZE;
-
modify_instruction_site(&patch__hash_page_A0, 0xffff,
((unsigned int)Hash - PAGE_OFFSET) >> 16);
- modify_instruction_site(&patch__hash_page_A1, 0x7c0, mb << 6);
- modify_instruction_site(&patch__hash_page_A2, 0x7c0, mb2 << 6);
+ modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6);
+ modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6);
modify_instruction_site(&patch__hash_page_B, 0xffff, hmask);
modify_instruction_site(&patch__hash_page_C, 0xffff, hmask);
@@ -383,11 +391,9 @@ void __init MMU_init_hw(void)
*/
modify_instruction_site(&patch__flush_hash_A0, 0xffff,
((unsigned int)Hash - PAGE_OFFSET) >> 16);
- modify_instruction_site(&patch__flush_hash_A1, 0x7c0, mb << 6);
- modify_instruction_site(&patch__flush_hash_A2, 0x7c0, mb2 << 6);
+ modify_instruction_site(&patch__flush_hash_A1, 0x7c0, hash_mb << 6);
+ modify_instruction_site(&patch__flush_hash_A2, 0x7c0, hash_mb2 << 6);
modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask);
-
- if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
}
void setup_initial_memory_limit(phys_addr_t first_memblock_base,
@@ -404,3 +410,33 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
else /* Anything else has 256M mapped */
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
}
+
+void __init print_system_hash_info(void)
+{
+ pr_info("Hash_size = 0x%lx\n", Hash_size);
+ if (Hash_mask)
+ pr_info("Hash_mask = 0x%lx\n", Hash_mask);
+}
+
+#ifdef CONFIG_PPC_KUEP
+void __init setup_kuep(bool disabled)
+{
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+
+ if (cpu_has_feature(CPU_FTR_601))
+ pr_warn("KUEP is not working on powerpc 601 (No NX bit in Seg Regs)\n");
+
+ if (disabled)
+ pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n");
+}
+#endif
+
+#ifdef CONFIG_PPC_KUAP
+void __init setup_kuap(bool disabled)
+{
+ pr_info("Activating Kernel Userspace Access Protection\n");
+
+ if (disabled)
+ pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n");
+}
+#endif
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/book3s32/mmu_context.c
index 921c1e33e941..921c1e33e941 100644
--- a/arch/powerpc/mm/mmu_context_hash32.c
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/book3s32/tlb.c
index cf8472cf3d59..8d56f0417f87 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/book3s32/tlb.c
@@ -32,7 +32,7 @@
#include <asm/tlbflush.h>
#include <asm/tlb.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
/*
* Called when unmapping pages to flush entries from the TLB/hash table.
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
new file mode 100644
index 000000000000..974b4fc19f4f
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-y := $(NO_MINIMAL_TOC)
+
+CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
+
+obj-y += hash_pgtable.o hash_utils.o slb.o \
+ mmu_context.o pgtable.o hash_tlb.o
+obj-$(CONFIG_PPC_NATIVE) += hash_native.o
+obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
+obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
+obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o
+obj-$(CONFIG_PPC_SPLPAR) += vphn.o
+obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o
+ifdef CONFIG_HUGETLB_PAGE
+obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o
+endif
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o
+obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o
+obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o
+obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
+
+# Instrumenting the SLB fault path can lead to duplicate SLB entries
+KCOV_INSTRUMENT_slb.o := n
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c
index 6fa6765a10eb..22e787123cdf 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/book3s64/hash_4k.c
@@ -1,6 +1,6 @@
/*
* Copyright IBM Corporation, 2015
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU Lesser General Public License
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c
index 3afa253d7f52..7084ce2951e6 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/book3s64/hash_64k.c
@@ -1,6 +1,6 @@
/*
* Copyright IBM Corporation, 2015
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU Lesser General Public License
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/book3s64/hash_hugepage.c
index dfbc3b32f09b..440823797de7 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_hugepage.c
@@ -1,6 +1,6 @@
/*
* Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
index b0d9209d9a86..eefa89c6117b 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
@@ -15,6 +15,9 @@
#include <asm/cacheflush.h>
#include <asm/machdep.h>
+unsigned int hpage_shift;
+EXPORT_SYMBOL(hpage_shift);
+
extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
unsigned long pa, unsigned long rlags,
unsigned long vflags, int psize, int ssize);
@@ -34,7 +37,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
/* Search the Linux page table for a match with va */
vpn = hpt_vpn(ea, vsid, ssize);
- /* At this point, we have a pte (old_pte) which can be used to build
+ /*
+ * At this point, we have a pte (old_pte) which can be used to build
* or update an HPTE. There are 2 cases:
*
* 1. There is a valid (present) pte with no associated HPTE (this is
@@ -55,8 +59,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
if (unlikely(!check_pte_access(access, old_pte)))
return 1;
- /* Try to lock the PTE, add ACCESSED and DIRTY if it was
- * a write access */
+ /*
+ * Try to lock the PTE, add ACCESSED and DIRTY if it was
+ * a write access
+ */
new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
if (access & _PAGE_WRITE)
new_pte |= _PAGE_DIRTY;
@@ -74,8 +80,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
rpte = __real_pte(__pte(old_pte), ptep, offset);
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
- /* No CPU has hugepages but lacks no execute, so we
- * don't need to worry about that case */
+ /*
+ * No CPU has hugepages but lacks no execute, so we
+ * don't need to worry about that case
+ */
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
/* Check if pte already has an hpte (case 2) */
@@ -145,3 +153,16 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr
old_pte, pte);
set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
}
+
+void hugetlbpage_init_default(void)
+{
+ /* Set default large page size. Currently, we pick 16M or 1M
+ * depending on what is available
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ hpage_shift = mmu_psize_defs[MMU_PAGE_16M].shift;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ hpage_shift = mmu_psize_defs[MMU_PAGE_1M].shift;
+ else if (mmu_psize_defs[MMU_PAGE_2M].shift)
+ hpage_shift = mmu_psize_defs[MMU_PAGE_2M].shift;
+}
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/book3s64/hash_native.c
index aaa28fd918fe..aaa28fd918fe 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index c08d49046a96..1fd025dba4a3 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -19,7 +19,7 @@
#include <asm/mmu.h>
#include <asm/tlb.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#define CREATE_TRACE_POINTS
#include <trace/events/thp.h>
@@ -112,9 +112,16 @@ int __meminit hash__vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys)
{
- int rc = htab_bolt_mapping(start, start + page_size, phys,
- pgprot_val(PAGE_KERNEL),
- mmu_vmemmap_psize, mmu_kernel_ssize);
+ int rc;
+
+ if ((start + page_size) >= H_VMEMMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
+ rc = htab_bolt_mapping(start, start + page_size, phys,
+ pgprot_val(PAGE_KERNEL),
+ mmu_vmemmap_psize, mmu_kernel_ssize);
if (rc < 0) {
int rc2 = htab_remove_mapping(start, start + page_size,
mmu_vmemmap_psize,
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/book3s64/hash_tlb.c
index 87d71dd25441..d4f0101447b1 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_tlb.c
@@ -55,7 +55,8 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
i = batch->index;
- /* Get page size (maybe move back to caller).
+ /*
+ * Get page size (maybe move back to caller).
*
* NOTE: when using special 64K mappings in 4K environment like
* for SPEs, we obtain the page size from the slice, which thus
@@ -77,10 +78,12 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
#endif
} else {
psize = pte_pagesize_index(mm, addr, pte);
- /* Mask the address for the standard page size. If we
+ /*
+ * Mask the address for the standard page size. If we
* have a 64k page kernel, but the hardware does not
* support 64k pages, this might be different from the
- * hardware page size encoded in the slice table. */
+ * hardware page size encoded in the slice table.
+ */
addr &= PAGE_MASK;
offset = PTRS_PER_PTE;
}
@@ -161,7 +164,8 @@ void hash__tlb_flush(struct mmu_gather *tlb)
{
struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
- /* If there's a TLB batch pending, then we must flush it because the
+ /*
+ * If there's a TLB batch pending, then we must flush it because the
* pages are going to be freed and we really don't want to have a CPU
* access a freed page because it has a stale TLB
*/
@@ -201,7 +205,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
BUG_ON(!mm->pgd);
- /* Note: Normally, we should only ever use a batch within a
+ /*
+ * Note: Normally, we should only ever use a batch within a
* PTE locked section. This violates the rule, but will work
* since we don't actually modify the PTEs, we just flush the
* hash while leaving the PTEs intact (including their reference
@@ -238,7 +243,8 @@ void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
unsigned long flags;
addr = _ALIGN_DOWN(addr, PMD_SIZE);
- /* Note: Normally, we should only ever use a batch within a
+ /*
+ * Note: Normally, we should only ever use a batch within a
* PTE locked section. This violates the rule, but will work
* since we don't actually modify the PTEs, we just flush the
* hash while leaving the PTEs intact (including their reference
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 0a4f939a8161..919a861a8ec0 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -37,6 +37,7 @@
#include <linux/context_tracking.h>
#include <linux/libfdt.h>
#include <linux/pkeys.h>
+#include <linux/hugetlb.h>
#include <asm/debugfs.h>
#include <asm/processor.h>
@@ -65,6 +66,8 @@
#include <asm/pte-walk.h>
#include <asm/asm-prototypes.h>
+#include <mm/mmu_decl.h>
+
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
#else
@@ -128,7 +131,8 @@ static DEFINE_SPINLOCK(linear_map_hash_lock);
struct mmu_hash_ops mmu_hash_ops;
EXPORT_SYMBOL(mmu_hash_ops);
-/* There are definitions of page sizes arrays to be used when none
+/*
+ * These are definitions of page sizes arrays to be used when none
* is provided by the firmware.
*/
@@ -145,7 +149,8 @@ static struct mmu_psize_def mmu_psize_defaults[] = {
},
};
-/* POWER4, GPUL, POWER5
+/*
+ * POWER4, GPUL, POWER5
*
* Support for 16Mb large pages
*/
@@ -479,7 +484,8 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
}
#ifdef CONFIG_HUGETLB_PAGE
-/* Scan for 16G memory blocks that have been set aside for huge pages
+/*
+ * Scan for 16G memory blocks that have been set aside for huge pages
* and reserve those blocks for 16G huge pages.
*/
static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
@@ -496,8 +502,10 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
if (type == NULL || strcmp(type, "memory") != 0)
return 0;
- /* This property is the log base 2 of the number of virtual pages that
- * will represent this memory block. */
+ /*
+ * This property is the log base 2 of the number of virtual pages that
+ * will represent this memory block.
+ */
page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);
if (page_count_prop == NULL)
return 0;
@@ -673,7 +681,8 @@ static void __init htab_init_page_sizes(void)
#endif /* CONFIG_PPC_64K_PAGES */
#ifdef CONFIG_SPARSEMEM_VMEMMAP
- /* We try to use 16M pages for vmemmap if that is supported
+ /*
+ * We try to use 16M pages for vmemmap if that is supported
* and we have at least 1G of RAM at boot
*/
if (mmu_psize_defs[MMU_PAGE_16M].shift &&
@@ -742,7 +751,8 @@ unsigned htab_shift_for_mem_size(unsigned long mem_size)
static unsigned long __init htab_get_table_size(void)
{
- /* If hash size isn't already provided by the platform, we try to
+ /*
+ * If hash size isn't already provided by the platform, we try to
* retrieve it from the device-tree. If it's not there neither, we
* calculate it now based on the total RAM size
*/
@@ -755,12 +765,12 @@ static unsigned long __init htab_get_table_size(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-void resize_hpt_for_hotplug(unsigned long new_mem_size)
+int resize_hpt_for_hotplug(unsigned long new_mem_size)
{
unsigned target_hpt_shift;
if (!mmu_hash_ops.resize_hpt)
- return;
+ return 0;
target_hpt_shift = htab_shift_for_mem_size(new_mem_size);
@@ -772,23 +782,25 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
* reduce unless the target shift is at least 2 below the
* current shift
*/
- if ((target_hpt_shift > ppc64_pft_size)
- || (target_hpt_shift < (ppc64_pft_size - 1))) {
- int rc;
-
- rc = mmu_hash_ops.resize_hpt(target_hpt_shift);
- if (rc && (rc != -ENODEV))
- printk(KERN_WARNING
- "Unable to resize hash page table to target order %d: %d\n",
- target_hpt_shift, rc);
- }
+ if (target_hpt_shift > ppc64_pft_size ||
+ target_hpt_shift < ppc64_pft_size - 1)
+ return mmu_hash_ops.resize_hpt(target_hpt_shift);
+
+ return 0;
}
int hash__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
- int rc = htab_bolt_mapping(start, end, __pa(start),
- pgprot_val(PAGE_KERNEL), mmu_linear_psize,
- mmu_kernel_ssize);
+ int rc;
+
+ if (end >= H_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
+ rc = htab_bolt_mapping(start, end, __pa(start),
+ pgprot_val(PAGE_KERNEL), mmu_linear_psize,
+ mmu_kernel_ssize);
if (rc < 0) {
int rc2 = htab_remove_mapping(start, end, mmu_linear_psize,
@@ -929,6 +941,11 @@ static void __init htab_initialize(void)
DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
base, size, prot);
+ if ((base + size) >= H_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ continue;
+ }
+
BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
prot, mmu_linear_psize, mmu_kernel_ssize));
}
@@ -968,6 +985,7 @@ void __init hash__early_init_devtree(void)
htab_scan_page_sizes();
}
+struct hash_mm_context init_hash_mm_context;
void __init hash__early_init_mmu(void)
{
#ifndef CONFIG_PPC_64K_PAGES
@@ -1013,11 +1031,11 @@ void __init hash__early_init_mmu(void)
__pgd_val_bits = HASH_PGD_VAL_BITS;
__kernel_virt_start = H_KERN_VIRT_START;
- __kernel_virt_size = H_KERN_VIRT_SIZE;
__vmalloc_start = H_VMALLOC_START;
__vmalloc_end = H_VMALLOC_END;
__kernel_io_start = H_KERN_IO_START;
- vmemmap = (struct page *)H_VMEMMAP_BASE;
+ __kernel_io_end = H_KERN_IO_END;
+ vmemmap = (struct page *)H_VMEMMAP_START;
ioremap_bot = IOREMAP_BASE;
#ifdef CONFIG_PCI
@@ -1035,12 +1053,16 @@ void __init hash__early_init_mmu(void)
if (!mmu_hash_ops.hpte_insert)
panic("hash__early_init_mmu: No MMU hash ops defined!\n");
- /* Initialize the MMU Hash table and create the linear mapping
+ /*
+ * Initialize the MMU Hash table and create the linear mapping
* of memory. Has to be done before SLB initialization as this is
* currently where the page size encoding is obtained.
*/
htab_initialize();
+ init_mm.context.hash_context = &init_hash_mm_context;
+ mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT);
+
pr_info("Initializing hash mmu with SLB\n");
/* Initialize SLB management */
slb_initialize();
@@ -1147,10 +1169,13 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
*/
static int subpage_protection(struct mm_struct *mm, unsigned long ea)
{
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
u32 spp = 0;
u32 **sbpm, *sbpp;
+ if (!spt)
+ return 0;
+
if (ea >= spt->maxaddr)
return 0;
if (ea < 0x100000000UL) {
@@ -1214,7 +1239,8 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
}
}
-/* Result code is:
+/*
+ * Result code is:
* 0 - handled
* 1 - normal page fault
* -1 - critical hash insertion error
@@ -1238,7 +1264,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
trace_hash_fault(ea, access, trap);
/* Get region & vsid */
- switch (REGION_ID(ea)) {
+ switch (get_region_id(ea)) {
case USER_REGION_ID:
user_region = 1;
if (! mm) {
@@ -1252,15 +1278,19 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
break;
case VMALLOC_REGION_ID:
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
- if (ea < VMALLOC_END)
- psize = mmu_vmalloc_psize;
- else
- psize = mmu_io_psize;
+ psize = mmu_vmalloc_psize;
+ ssize = mmu_kernel_ssize;
+ break;
+
+ case IO_REGION_ID:
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ psize = mmu_io_psize;
ssize = mmu_kernel_ssize;
break;
default:
- /* Not a valid range
- * Send the problem up to do_page_fault
+ /*
+ * Not a valid range
+ * Send the problem up to do_page_fault()
*/
rc = 1;
goto bail;
@@ -1285,7 +1315,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
flags |= HPTE_LOCAL_UPDATE;
#ifndef CONFIG_PPC_64K_PAGES
- /* If we use 4K pages and our psize is not 4K, then we might
+ /*
+ * If we use 4K pages and our psize is not 4K, then we might
* be hitting a special driver mapping, and need to align the
* address before we fetch the PTE.
*
@@ -1307,7 +1338,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
/* Add _PAGE_PRESENT to the required access perm */
access |= _PAGE_PRESENT;
- /* Pre-check access permissions (will be re-checked atomically
+ /*
+ * Pre-check access permissions (will be re-checked atomically
* in __hash_page_XX but this pre-check is a fast path
*/
if (!check_pte_access(access, pte_val(*ptep))) {
@@ -1354,7 +1386,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
psize = MMU_PAGE_4K;
}
- /* If this PTE is non-cacheable and we have restrictions on
+ /*
+ * If this PTE is non-cacheable and we have restrictions on
* using non cacheable large pages, then we switch to 4k
*/
if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) {
@@ -1395,7 +1428,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
flags, ssize, spp);
}
- /* Dump some info in case of hash insertion failure, they should
+ /*
+ * Dump some info in case of hash insertion failure, they should
* never happen so it is really useful to know if/when they do
*/
if (rc == -1)
@@ -1421,7 +1455,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
unsigned long flags = 0;
struct mm_struct *mm = current->mm;
- if (REGION_ID(ea) == VMALLOC_REGION_ID)
+ if ((get_region_id(ea) == VMALLOC_REGION_ID) ||
+ (get_region_id(ea) == IO_REGION_ID))
mm = &init_mm;
if (dsisr & DSISR_NOHPTE)
@@ -1437,8 +1472,9 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
unsigned long access = _PAGE_PRESENT | _PAGE_READ;
unsigned long flags = 0;
struct mm_struct *mm = current->mm;
+ unsigned int region_id = get_region_id(ea);
- if (REGION_ID(ea) == VMALLOC_REGION_ID)
+ if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
mm = &init_mm;
if (dsisr & DSISR_NOHPTE)
@@ -1455,7 +1491,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
* 2) user space access kernel space.
*/
access |= _PAGE_PRIVILEGED;
- if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID))
+ if ((msr & MSR_PR) || (region_id == USER_REGION_ID))
access &= ~_PAGE_PRIVILEGED;
if (trap == 0x400)
@@ -1470,7 +1506,7 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
int psize = get_slice_psize(mm, ea);
/* We only prefault standard pages for now */
- if (unlikely(psize != mm->context.user_psize))
+ if (unlikely(psize != mm_ctx_user_psize(&mm->context)))
return false;
/*
@@ -1499,7 +1535,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
int rc, ssize, update_flags = 0;
unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
- BUG_ON(REGION_ID(ea) != USER_REGION_ID);
+ BUG_ON(get_region_id(ea) != USER_REGION_ID);
if (!should_hash_preload(mm, ea))
return;
@@ -1549,7 +1585,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* Hash it in */
#ifdef CONFIG_PPC_64K_PAGES
- if (mm->context.user_psize == MMU_PAGE_64K)
+ if (mm_ctx_user_psize(&mm->context) == MMU_PAGE_64K)
rc = __hash_page_64K(ea, access, vsid, ptep, trap,
update_flags, ssize);
else
@@ -1562,8 +1598,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
*/
if (rc == -1)
hash_failure_debug(ea, access, vsid, trap, ssize,
- mm->context.user_psize,
- mm->context.user_psize,
+ mm_ctx_user_psize(&mm->context),
+ mm_ctx_user_psize(&mm->context),
pte_val(*ptep));
out_exit:
local_irq_restore(flags);
@@ -1634,7 +1670,8 @@ unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
return gslot;
}
-/* WARNING: This is called from hash_low_64.S, if you change this prototype,
+/*
+ * WARNING: This is called from hash_low_64.S, if you change this prototype,
* do not forget to update the assembly call site !
*/
void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
@@ -1855,7 +1892,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
{
- /* We don't currently support the first MEMBLOCK not mapping 0
+ /*
+ * We don't currently support the first MEMBLOCK not mapping 0
* physical on those processors
*/
BUG_ON(first_memblock_base != 0);
@@ -1909,3 +1947,14 @@ static int __init hash64_debugfs(void)
}
machine_device_initcall(pseries, hash64_debugfs);
#endif /* CONFIG_DEBUG_FS */
+
+void __init print_system_hash_info(void)
+{
+ pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
+
+ if (htab_hash_mask)
+ pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
+ pr_info("kernel vmalloc start = 0x%lx\n", KERN_VIRT_START);
+ pr_info("kernel IO start = 0x%lx\n", KERN_IO_START);
+ pr_info("kernel vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
+}
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/book3s64/iommu_api.c
index 8330f135294f..8330f135294f 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/book3s64/mmu_context.c
index f720c5cc0b5e..cb2b08635508 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -63,6 +63,13 @@ static int hash__init_new_context(struct mm_struct *mm)
if (index < 0)
return index;
+ mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
+ GFP_KERNEL);
+ if (!mm->context.hash_context) {
+ ida_free(&mmu_context_ida, index);
+ return -ENOMEM;
+ }
+
/*
* The old code would re-promote on fork, we don't do that when using
* slices as it could cause problem promoting slices that have been
@@ -77,10 +84,26 @@ static int hash__init_new_context(struct mm_struct *mm)
* We should not be calling init_new_context() on init_mm. Hence a
* check against 0 is OK.
*/
- if (mm->context.id == 0)
+ if (mm->context.id == 0) {
+ memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
slice_init_new_context_exec(mm);
+ } else {
+ /* This is fork. Copy hash_context details from current->mm */
+ memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ /* inherit subpage prot detalis if we have one. */
+ if (current->mm->context.hash_context->spt) {
+ mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
+ GFP_KERNEL);
+ if (!mm->context.hash_context->spt) {
+ ida_free(&mmu_context_ida, index);
+ kfree(mm->context.hash_context);
+ return -ENOMEM;
+ }
+ }
+#endif
- subpage_prot_init_new_context(mm);
+ }
pkey_mm_init(mm);
return index;
@@ -118,6 +141,7 @@ static int radix__init_new_context(struct mm_struct *mm)
asm volatile("ptesync;isync" : : : "memory");
mm->context.npu_context = NULL;
+ mm->context.hash_context = NULL;
return index;
}
@@ -162,6 +186,7 @@ static void destroy_contexts(mm_context_t *ctx)
if (context_id)
ida_free(&mmu_context_ida, context_id);
}
+ kfree(ctx->hash_context);
}
static void pmd_frag_destroy(void *pmd_frag)
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/book3s64/pgtable.c
index a4341aba0af4..16bda049187a 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -17,7 +17,7 @@
#include <asm/trace.h>
#include <asm/powernv.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#include <trace/events/thp.h>
unsigned long __pmd_frag_nr;
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 587807763737..ae7fca40e5b3 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -7,6 +7,7 @@
#include <asm/mman.h>
#include <asm/mmu_context.h>
+#include <asm/mmu.h>
#include <asm/setup.h>
#include <linux/pkeys.h>
#include <linux/of_device.h>
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index cab06331c0c0..cab06331c0c0 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 154472a28c77..c9bcf428dd2b 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -29,6 +29,7 @@
#include <asm/powernv.h>
#include <asm/sections.h>
#include <asm/trace.h>
+#include <asm/uaccess.h>
#include <trace/events/thp.h>
@@ -135,6 +136,10 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa,
*/
BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
+#ifdef CONFIG_PPC_64K_PAGES
+ BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT));
+#endif
+
if (unlikely(!slab_is_available()))
return early_map_kernel_page(ea, pa, flags, map_page_size,
nid, region_start, region_end);
@@ -334,6 +339,12 @@ void __init radix_init_pgtable(void)
* page tables will be allocated within the range. No
* need or a node (which we don't have yet).
*/
+
+ if ((reg->base + reg->size) >= RADIX_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ continue;
+ }
+
WARN_ON(create_physical_mapping(reg->base,
reg->base + reg->size,
-1));
@@ -531,8 +542,15 @@ static void radix_init_amor(void)
mtspr(SPRN_AMOR, (3ul << 62));
}
-static void radix_init_iamr(void)
+#ifdef CONFIG_PPC_KUEP
+void setup_kuep(bool disabled)
{
+ if (disabled || !early_radix_enabled())
+ return;
+
+ if (smp_processor_id() == boot_cpuid)
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+
/*
* Radix always uses key0 of the IAMR to determine if an access is
* allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
@@ -540,6 +558,25 @@ static void radix_init_iamr(void)
*/
mtspr(SPRN_IAMR, (1ul << 62));
}
+#endif
+
+#ifdef CONFIG_PPC_KUAP
+void setup_kuap(bool disabled)
+{
+ if (disabled || !early_radix_enabled())
+ return;
+
+ if (smp_processor_id() == boot_cpuid) {
+ pr_info("Activating Kernel Userspace Access Prevention\n");
+ cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP;
+ }
+
+ /* Make sure userspace can't change the AMR */
+ mtspr(SPRN_UAMOR, 0);
+ mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
+ isync();
+}
+#endif
void __init radix__early_init_mmu(void)
{
@@ -574,11 +611,11 @@ void __init radix__early_init_mmu(void)
__pgd_val_bits = RADIX_PGD_VAL_BITS;
__kernel_virt_start = RADIX_KERN_VIRT_START;
- __kernel_virt_size = RADIX_KERN_VIRT_SIZE;
__vmalloc_start = RADIX_VMALLOC_START;
__vmalloc_end = RADIX_VMALLOC_END;
__kernel_io_start = RADIX_KERN_IO_START;
- vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
+ __kernel_io_end = RADIX_KERN_IO_END;
+ vmemmap = (struct page *)RADIX_VMEMMAP_START;
ioremap_bot = IOREMAP_BASE;
#ifdef CONFIG_PCI
@@ -601,7 +638,6 @@ void __init radix__early_init_mmu(void)
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
- radix_init_iamr();
radix_init_pgtable();
/* Switch to the guard PID before turning on MMU */
radix__switch_mmu_context(NULL, &init_mm);
@@ -623,7 +659,6 @@ void radix__early_init_mmu_secondary(void)
__pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
radix_init_amor();
}
- radix_init_iamr();
radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
@@ -646,7 +681,8 @@ void radix__mmu_cleanup_all(void)
void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
{
- /* We don't currently support the first MEMBLOCK not mapping 0
+ /*
+ * We don't currently support the first MEMBLOCK not mapping 0
* physical on those processors
*/
BUG_ON(first_memblock_base != 0);
@@ -866,6 +902,11 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
+ if (end >= RADIX_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
return create_physical_mapping(start, end, nid);
}
@@ -893,6 +934,11 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
int ret;
+ if ((start + page_size) >= RADIX_VMEMMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
BUG_ON(ret);
@@ -958,45 +1004,44 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre
void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable)
{
- struct list_head *lh = (struct list_head *) pgtable;
+ struct list_head *lh = (struct list_head *) pgtable;
- assert_spin_locked(pmd_lockptr(mm, pmdp));
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
- /* FIFO */
- if (!pmd_huge_pte(mm, pmdp))
- INIT_LIST_HEAD(lh);
- else
- list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
- pmd_huge_pte(mm, pmdp) = pgtable;
+ /* FIFO */
+ if (!pmd_huge_pte(mm, pmdp))
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+ pmd_huge_pte(mm, pmdp) = pgtable;
}
pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
- pte_t *ptep;
- pgtable_t pgtable;
- struct list_head *lh;
-
- assert_spin_locked(pmd_lockptr(mm, pmdp));
-
- /* FIFO */
- pgtable = pmd_huge_pte(mm, pmdp);
- lh = (struct list_head *) pgtable;
- if (list_empty(lh))
- pmd_huge_pte(mm, pmdp) = NULL;
- else {
- pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
- list_del(lh);
- }
- ptep = (pte_t *) pgtable;
- *ptep = __pte(0);
- ptep++;
- *ptep = __pte(0);
- return pgtable;
-}
+ pte_t *ptep;
+ pgtable_t pgtable;
+ struct list_head *lh;
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ /* FIFO */
+ pgtable = pmd_huge_pte(mm, pmdp);
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ pmd_huge_pte(mm, pmdp) = NULL;
+ else {
+ pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+ list_del(lh);
+ }
+ ptep = (pte_t *) pgtable;
+ *ptep = __pte(0);
+ ptep++;
+ *ptep = __pte(0);
+ return pgtable;
+}
pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
pmd_t old_pmd;
unsigned long old;
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 6a23b9ebd2a1..6a23b9ebd2a1 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 5986df48359b..c22742218bd3 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -554,7 +554,8 @@ void slb_initialize(void)
asm volatile("isync; slbia; isync":::"memory");
create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
- /* For the boot cpu, we're running on the stack in init_thread_union,
+ /*
+ * For the boot cpu, we're running on the stack in init_thread_union,
* which is in the first segment of the linear mapping, and also
* get_paca()->kstack hasn't been initialized yet.
* For secondary cpus, we need to bolt the kernel stack entry now.
@@ -691,10 +692,10 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
unsigned long flags;
int ssize;
- if (id == KERNEL_REGION_ID) {
+ if (id == LINEAR_MAP_REGION_ID) {
/* We only support upto MAX_PHYSMEM_BITS */
- if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS))
+ if ((ea & EA_MASK) > (1UL << MAX_PHYSMEM_BITS))
return -EFAULT;
flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
@@ -702,20 +703,25 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
#ifdef CONFIG_SPARSEMEM_VMEMMAP
} else if (id == VMEMMAP_REGION_ID) {
- if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
+ if (ea >= H_VMEMMAP_END)
return -EFAULT;
flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
#endif
} else if (id == VMALLOC_REGION_ID) {
- if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
+ if (ea >= H_VMALLOC_END)
return -EFAULT;
- if (ea < H_VMALLOC_END)
- flags = local_paca->vmalloc_sllp;
- else
- flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
+ flags = local_paca->vmalloc_sllp;
+
+ } else if (id == IO_REGION_ID) {
+
+ if (ea >= H_KERN_IO_END)
+ return -EFAULT;
+
+ flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
+
} else {
return -EFAULT;
}
@@ -725,6 +731,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
ssize = MMU_SEGSIZE_256M;
context = get_kernel_context(ea);
+
return slb_insert_entry(ea, context, flags, ssize, true);
}
@@ -739,7 +746,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
* consider this as bad access if we take a SLB miss
* on an address above addr limit.
*/
- if (ea >= mm->context.slb_addr_limit)
+ if (ea >= mm_ctx_slb_addr_limit(&mm->context))
return -EFAULT;
context = get_user_context(&mm->context, ea);
@@ -761,7 +768,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
long do_slb_fault(struct pt_regs *regs, unsigned long ea)
{
- unsigned long id = REGION_ID(ea);
+ unsigned long id = get_region_id(ea);
/* IRQs are not reconciled here, so can't check irqs_disabled */
VM_WARN_ON(mfmsr() & MSR_EE);
@@ -784,7 +791,7 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea)
* first class kernel code. But for performance it's probably nicer
* if they go via fast_exception_return too.
*/
- if (id >= KERNEL_REGION_ID) {
+ if (id >= LINEAR_MAP_REGION_ID) {
long err;
#ifdef CONFIG_DEBUG_VM
/* Catch recursive kernel SLB faults. */
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 5e4178790dee..473dd430e306 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -25,10 +25,13 @@
*/
void subpage_prot_free(struct mm_struct *mm)
{
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
unsigned long i, j, addr;
u32 **p;
+ if (!spt)
+ return;
+
for (i = 0; i < 4; ++i) {
if (spt->low_prot[i]) {
free_page((unsigned long)spt->low_prot[i]);
@@ -48,13 +51,7 @@ void subpage_prot_free(struct mm_struct *mm)
free_page((unsigned long)p);
}
spt->maxaddr = 0;
-}
-
-void subpage_prot_init_new_context(struct mm_struct *mm)
-{
- struct subpage_prot_table *spt = &mm->context.spt;
-
- memset(spt, 0, sizeof(*spt));
+ kfree(spt);
}
static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
@@ -93,13 +90,18 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
static void subpage_prot_clear(unsigned long addr, unsigned long len)
{
struct mm_struct *mm = current->mm;
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt;
u32 **spm, *spp;
unsigned long i;
size_t nw;
unsigned long next, limit;
down_write(&mm->mmap_sem);
+
+ spt = mm_ctx_subpage_prot(&mm->context);
+ if (!spt)
+ goto err_out;
+
limit = addr + len;
if (limit > spt->maxaddr)
limit = spt->maxaddr;
@@ -127,6 +129,8 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len)
/* now flush any existing HPTEs for the range */
hpte_flush_range(mm, addr, nw);
}
+
+err_out:
up_write(&mm->mmap_sem);
}
@@ -189,7 +193,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
unsigned long, len, u32 __user *, map)
{
struct mm_struct *mm = current->mm;
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt;
u32 **spm, *spp;
unsigned long i;
size_t nw;
@@ -218,6 +222,21 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
return -EFAULT;
down_write(&mm->mmap_sem);
+
+ spt = mm_ctx_subpage_prot(&mm->context);
+ if (!spt) {
+ /*
+ * Allocate subpage prot table if not already done.
+ * Do this with mmap_sem held
+ */
+ spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL);
+ if (!spt) {
+ err = -ENOMEM;
+ goto out;
+ }
+ mm->context.hash_context->spt = spt;
+ }
+
subpage_mark_vma_nohuge(mm, addr, len);
for (limit = addr + len; addr < limit; addr = next) {
next = pmd_addr_end(addr, limit);
diff --git a/arch/powerpc/mm/vphn.c b/arch/powerpc/mm/book3s64/vphn.c
index f83044faac23..0ee7734afb50 100644
--- a/arch/powerpc/mm/vphn.c
+++ b/arch/powerpc/mm/book3s64/vphn.c
@@ -42,7 +42,8 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
u16 new = be16_to_cpup(field++);
if (is_32bit) {
- /* Let's concatenate the 16 bits of this field to the
+ /*
+ * Let's concatenate the 16 bits of this field to the
* 15 lower bits of the previous field
*/
unpacked[++nr_assoc_doms] =
@@ -56,7 +57,8 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
unpacked[++nr_assoc_doms] =
cpu_to_be32(new & VPHN_FIELD_MASK);
} else {
- /* Data is in the lower 15 bits of this field
+ /*
+ * Data is in the lower 15 bits of this field
* concatenated with the next 16 bit field
*/
last = new;
diff --git a/arch/powerpc/mm/vphn.h b/arch/powerpc/mm/book3s64/vphn.h
index f9ffdb3942fc..f0b93c2dd578 100644
--- a/arch/powerpc/mm/vphn.h
+++ b/arch/powerpc/mm/book3s64/vphn.h
@@ -2,8 +2,7 @@
#ifndef _ARCH_POWERPC_MM_VPHN_H_
#define _ARCH_POWERPC_MM_VPHN_H_
-/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers.
- */
+/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
#define VPHN_REGISTER_COUNT 6
/*
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index c8da352e8686..f137286740cb 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -105,7 +105,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
u64 vsid, vsidkey;
int psize, ssize;
- switch (REGION_ID(ea)) {
+ switch (get_region_id(ea)) {
case USER_REGION_ID:
pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
if (mm == NULL)
@@ -117,16 +117,20 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
break;
case VMALLOC_REGION_ID:
pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
- if (ea < VMALLOC_END)
- psize = mmu_vmalloc_psize;
- else
- psize = mmu_io_psize;
+ psize = mmu_vmalloc_psize;
ssize = mmu_kernel_ssize;
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
vsidkey = SLB_VSID_KERNEL;
break;
- case KERNEL_REGION_ID:
- pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea);
+ case IO_REGION_ID:
+ pr_devel("%s: 0x%llx -- IO_REGION_ID\n", __func__, ea);
+ psize = mmu_io_psize;
+ ssize = mmu_kernel_ssize;
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ vsidkey = SLB_VSID_KERNEL;
+ break;
+ case LINEAR_MAP_REGION_ID:
+ pr_devel("%s: 0x%llx -- LINEAR_MAP_REGION_ID\n", __func__, ea);
psize = mmu_linear_psize;
ssize = mmu_kernel_ssize;
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index b5d2658c26af..2f6154b76328 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -36,7 +36,7 @@
#include <asm/tlbflush.h>
#include <asm/dma.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
/*
* This address range defaults to a value that is safe for all
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 3f1803672c9b..641891df2046 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -366,8 +366,10 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
if (!drmem_info->lmbs)
return;
- for_each_drmem_lmb(lmb)
+ for_each_drmem_lmb(lmb) {
read_drconf_v1_cell(lmb, &prop);
+ lmb_set_nid(lmb);
+ }
}
static void __init init_drmem_v2_lmbs(const __be32 *prop)
@@ -412,6 +414,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
lmb->aa_index = dr_cell.aa_index;
lmb->flags = dr_cell.flags;
+
+ lmb_set_nid(lmb);
}
}
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 887f11bcf330..b5d3578d9f65 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -44,6 +44,7 @@
#include <asm/mmu_context.h>
#include <asm/siginfo.h>
#include <asm/debug.h>
+#include <asm/kup.h>
static inline bool notify_page_fault(struct pt_regs *regs)
{
@@ -223,19 +224,46 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
}
/* Is this a bad kernel fault ? */
-static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
- unsigned long address)
+static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address, bool is_write)
{
+ int is_exec = TRAP(regs) == 0x400;
+
/* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */
if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT |
DSISR_PROTFAULT))) {
- printk_ratelimited(KERN_CRIT "kernel tried to execute"
- " exec-protected page (%lx) -"
- "exploit attempt? (uid: %d)\n",
- address, from_kuid(&init_user_ns,
- current_uid()));
+ pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n",
+ address >= TASK_SIZE ? "exec-protected" : "user",
+ address,
+ from_kuid(&init_user_ns, current_uid()));
+
+ // Kernel exec fault is always bad
+ return true;
}
- return is_exec || (address >= TASK_SIZE);
+
+ if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) &&
+ !search_exception_tables(regs->nip)) {
+ pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
+ address,
+ from_kuid(&init_user_ns, current_uid()));
+ }
+
+ // Kernel fault on kernel address is bad
+ if (address >= TASK_SIZE)
+ return true;
+
+ // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
+ if (!search_exception_tables(regs->nip))
+ return true;
+
+ // Read/write fault in a valid region (the exception table search passed
+ // above), but blocked by KUAP is bad, it can never succeed.
+ if (bad_kuap_fault(regs, is_write))
+ return true;
+
+ // What's left? Kernel fault on user in well defined regions (extable
+ // matched), and allowed by KUAP in the faulting context.
+ return false;
}
static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
@@ -455,9 +483,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
/*
* The kernel should never take an execute fault nor should it
- * take a page fault to a kernel address.
+ * take a page fault to a kernel address or a page fault to a user
+ * address outside of dedicated places
*/
- if (unlikely(!is_user && bad_kernel_fault(is_exec, error_code, address)))
+ if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write)))
return SIGSEGV;
/*
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
index 82a0e37557a5..320c1672b2ae 100644
--- a/arch/powerpc/mm/highmem.c
+++ b/arch/powerpc/mm/highmem.c
@@ -43,9 +43,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-#ifdef CONFIG_DEBUG_HIGHMEM
- BUG_ON(!pte_none(*(kmap_pte-idx)));
-#endif
+ WARN_ON(IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !pte_none(*(kmap_pte - idx)));
__set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1);
local_flush_tlb_page(NULL, vaddr);
@@ -56,7 +54,6 @@ EXPORT_SYMBOL(kmap_atomic_prot);
void __kunmap_atomic(void *kvaddr)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- int type __maybe_unused;
if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
pagefault_enable();
@@ -64,14 +61,12 @@ void __kunmap_atomic(void *kvaddr)
return;
}
- type = kmap_atomic_idx();
-
-#ifdef CONFIG_DEBUG_HIGHMEM
- {
+ if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM)) {
+ int type = kmap_atomic_idx();
unsigned int idx;
idx = type + KM_TYPE_NR * smp_processor_id();
- BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+ WARN_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
/*
* force other mappings to Oops if they'll try to access
@@ -80,7 +75,6 @@ void __kunmap_atomic(void *kvaddr)
pte_clear(&init_mm, vaddr, kmap_pte-idx);
local_flush_tlb_page(NULL, vaddr);
}
-#endif
kmap_atomic_idx_pop();
pagefault_enable();
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9e732bb2c84a..c5c9ff2d7afc 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -26,20 +26,8 @@
#include <asm/hugetlb.h>
#include <asm/pte-walk.h>
-
-#ifdef CONFIG_HUGETLB_PAGE
-
-#define PAGE_SHIFT_64K 16
-#define PAGE_SHIFT_512K 19
-#define PAGE_SHIFT_8M 23
-#define PAGE_SHIFT_16M 24
-#define PAGE_SHIFT_16G 34
-
bool hugetlb_disabled = false;
-unsigned int HPAGE_SHIFT;
-EXPORT_SYMBOL(HPAGE_SHIFT);
-
#define hugepd_none(hpd) (hpd_val(hpd) == 0)
#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *)))
@@ -98,19 +86,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
for (i = 0; i < num_hugepd; i++, hpdp++) {
if (unlikely(!hugepd_none(*hpdp)))
break;
- else {
-#ifdef CONFIG_PPC_BOOK3S_64
- *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS |
- (shift_to_mmu_psize(pshift) << 2));
-#elif defined(CONFIG_PPC_8xx)
- *hpdp = __hugepd(__pa(new) | _PMD_USER |
- (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M :
- _PMD_PAGE_512K) | _PMD_PRESENT);
-#else
- /* We use the old format for PPC_FSL_BOOK3E */
- *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift);
-#endif
- }
+ hugepd_populate(hpdp, new, pshift);
}
/* If we bailed from the for loop early, an error occurred, clean up */
if (i < num_hugepd) {
@@ -250,7 +226,7 @@ int __init alloc_bootmem_huge_page(struct hstate *h)
return __alloc_bootmem_huge_page(h);
}
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
+#ifndef CONFIG_PPC_BOOK3S_64
#define HUGEPD_FREELIST_SIZE \
((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
@@ -542,23 +518,6 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
return (__boundary - 1 < end - 1) ? __boundary : end;
}
-int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- pte_t *ptep;
- unsigned long sz = 1UL << hugepd_shift(hugepd);
- unsigned long next;
-
- ptep = hugepte_offset(hugepd, addr, pdshift);
- do {
- next = hugepte_addr_end(addr, end, sz);
- if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
- return 0;
- } while (ptep++, addr = next, addr != end);
-
- return 1;
-}
-
#ifdef CONFIG_PPC_MM_SLICES
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
@@ -578,24 +537,15 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
{
-#ifdef CONFIG_PPC_MM_SLICES
/* With radix we don't use slice, so derive it from vma*/
- if (!radix_enabled()) {
+ if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) {
unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
return 1UL << mmu_psize_to_shift(psize);
}
-#endif
return vma_kernel_pagesize(vma);
}
-static inline bool is_power_of_4(unsigned long x)
-{
- if (is_power_of_2(x))
- return (__ilog2(x) % 2) ? false : true;
- return false;
-}
-
static int __init add_huge_page_size(unsigned long long size)
{
int shift = __ffs(size);
@@ -603,37 +553,13 @@ static int __init add_huge_page_size(unsigned long long size)
/* Check that it is a page size supported by the hardware and
* that it fits within pagetable and slice limits. */
- if (size <= PAGE_SIZE)
- return -EINVAL;
-#if defined(CONFIG_PPC_FSL_BOOK3E)
- if (!is_power_of_4(size))
+ if (size <= PAGE_SIZE || !is_power_of_2(size))
return -EINVAL;
-#elif !defined(CONFIG_PPC_8xx)
- if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT))
- return -EINVAL;
-#endif
- if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
+ mmu_psize = check_and_get_huge_psize(size);
+ if (mmu_psize < 0)
return -EINVAL;
-#ifdef CONFIG_PPC_BOOK3S_64
- /*
- * We need to make sure that for different page sizes reported by
- * firmware we only add hugetlb support for page sizes that can be
- * supported by linux page table layout.
- * For now we have
- * Radix: 2M and 1G
- * Hash: 16M and 16G
- */
- if (radix_enabled()) {
- if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
- return -EINVAL;
- } else {
- if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
- return -EINVAL;
- }
-#endif
-
BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
/* Return if huge page size has already been setup */
@@ -669,10 +595,10 @@ static int __init hugetlbpage_init(void)
return 0;
}
-#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx)
- if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() &&
+ !mmu_has_feature(MMU_FTR_16M_PAGE))
return -ENODEV;
-#endif
+
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
unsigned shift;
unsigned pdshift;
@@ -710,29 +636,13 @@ static int __init hugetlbpage_init(void)
pgtable_cache_add(PTE_INDEX_SIZE);
else if (pdshift > shift)
pgtable_cache_add(pdshift - shift);
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
- else
+ else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx))
pgtable_cache_add(PTE_T_ORDER);
-#endif
}
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
- /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */
- if (mmu_psize_defs[MMU_PAGE_4M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
- else if (mmu_psize_defs[MMU_PAGE_512K].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift;
-#else
- /* Set default large page size. Currently, we pick 16M or 1M
- * depending on what is available
- */
- if (mmu_psize_defs[MMU_PAGE_16M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
- else if (mmu_psize_defs[MMU_PAGE_1M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
- else if (mmu_psize_defs[MMU_PAGE_2M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift;
-#endif
+ if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
+ hugetlbpage_init_default();
+
return 0;
}
@@ -756,113 +666,8 @@ void flush_dcache_icache_hugepage(struct page *page)
}
}
-#endif /* CONFIG_HUGETLB_PAGE */
-
-/*
- * We have 4 cases for pgds and pmds:
- * (1) invalid (all zeroes)
- * (2) pointer to next table, as normal; bottom 6 bits == 0
- * (3) leaf pte for huge page _PAGE_PTE set
- * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
- *
- * So long as we atomically load page table pointers we are safe against teardown,
- * we can follow the address down to the the page and take a ref on it.
- * This function need to be called with interrupts disabled. We use this variant
- * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
- */
-pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
- bool *is_thp, unsigned *hpage_shift)
-{
- pgd_t pgd, *pgdp;
- pud_t pud, *pudp;
- pmd_t pmd, *pmdp;
- pte_t *ret_pte;
- hugepd_t *hpdp = NULL;
- unsigned pdshift = PGDIR_SHIFT;
-
- if (hpage_shift)
- *hpage_shift = 0;
-
- if (is_thp)
- *is_thp = false;
-
- pgdp = pgdir + pgd_index(ea);
- pgd = READ_ONCE(*pgdp);
- /*
- * Always operate on the local stack value. This make sure the
- * value don't get updated by a parallel THP split/collapse,
- * page fault or a page unmap. The return pte_t * is still not
- * stable. So should be checked there for above conditions.
- */
- if (pgd_none(pgd))
- return NULL;
- else if (pgd_huge(pgd)) {
- ret_pte = (pte_t *) pgdp;
- goto out;
- } else if (is_hugepd(__hugepd(pgd_val(pgd))))
- hpdp = (hugepd_t *)&pgd;
- else {
- /*
- * Even if we end up with an unmap, the pgtable will not
- * be freed, because we do an rcu free and here we are
- * irq disabled
- */
- pdshift = PUD_SHIFT;
- pudp = pud_offset(&pgd, ea);
- pud = READ_ONCE(*pudp);
-
- if (pud_none(pud))
- return NULL;
- else if (pud_huge(pud)) {
- ret_pte = (pte_t *) pudp;
- goto out;
- } else if (is_hugepd(__hugepd(pud_val(pud))))
- hpdp = (hugepd_t *)&pud;
- else {
- pdshift = PMD_SHIFT;
- pmdp = pmd_offset(&pud, ea);
- pmd = READ_ONCE(*pmdp);
- /*
- * A hugepage collapse is captured by pmd_none, because
- * it mark the pmd none and do a hpte invalidate.
- */
- if (pmd_none(pmd))
- return NULL;
-
- if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
- if (is_thp)
- *is_thp = true;
- ret_pte = (pte_t *) pmdp;
- goto out;
- }
- /*
- * pmd_large check below will handle the swap pmd pte
- * we need to do both the check because they are config
- * dependent.
- */
- if (pmd_huge(pmd) || pmd_large(pmd)) {
- ret_pte = (pte_t *) pmdp;
- goto out;
- } else if (is_hugepd(__hugepd(pmd_val(pmd))))
- hpdp = (hugepd_t *)&pmd;
- else
- return pte_offset_kernel(&pmd, ea);
- }
- }
- if (!hpdp)
- return NULL;
-
- ret_pte = hugepte_offset(*hpdp, ea, pdshift);
- pdshift = hugepd_shift(*hpdp);
-out:
- if (hpage_shift)
- *hpage_shift = pdshift;
- return ret_pte;
-}
-EXPORT_SYMBOL_GPL(__find_linux_pte);
-
-int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
+static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long pte_end;
struct page *head, *page;
@@ -908,3 +713,20 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
return 1;
}
+
+int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ pte_t *ptep;
+ unsigned long sz = 1UL << hugepd_shift(hugepd);
+ unsigned long next;
+
+ ptep = hugepte_offset(hugepd, addr, pdshift);
+ do {
+ next = hugepte_addr_end(addr, end, sz);
+ if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
+ return 0;
+ } while (ptep++, addr = next, addr != end);
+
+ return 1;
+}
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 1e6910eb70ed..3bcae9e5e954 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -24,6 +24,32 @@
#include <linux/string.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
+#include <asm/kup.h>
+
+static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
+static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
+
+static int __init parse_nosmep(char *p)
+{
+ disable_kuep = true;
+ pr_warn("Disabling Kernel Userspace Execution Prevention\n");
+ return 0;
+}
+early_param("nosmep", parse_nosmep);
+
+static int __init parse_nosmap(char *p)
+{
+ disable_kuap = true;
+ pr_warn("Disabling Kernel Userspace Access Protection\n");
+ return 0;
+}
+early_param("nosmap", parse_nosmap);
+
+void __ref setup_kup(void)
+{
+ setup_kuep(disable_kuep);
+ setup_kuap(disable_kuap);
+}
#define CTOR(shift) static void ctor_##shift(void *addr) \
{ \
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 41a3513cadc9..c3121b6c8cbd 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -45,8 +45,10 @@
#include <asm/tlb.h>
#include <asm/sections.h>
#include <asm/hugetlb.h>
+#include <asm/kup.h>
+#include <asm/kasan.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
/* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */
@@ -178,6 +180,10 @@ void __init MMU_init(void)
btext_unmap();
#endif
+ kasan_mmu_init();
+
+ setup_kup();
+
/* Shortly after that, the entire linear mapping will be available */
memblock_set_current_limit(lowmem_end_addr);
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a4c155af1597..45b02fa11cd8 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -66,7 +66,7 @@
#include <asm/iommu.h>
#include <asm/vdso.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
phys_addr_t memstart_addr = ~0;
EXPORT_SYMBOL_GPL(memstart_addr);
diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile
new file mode 100644
index 000000000000..6577897673dd
--- /dev/null
+++ b/arch/powerpc/mm/kasan/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+KASAN_SANITIZE := n
+
+obj-$(CONFIG_PPC32) += kasan_init_32.o
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
new file mode 100644
index 000000000000..0d62be3cba47
--- /dev/null
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/printk.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+#include <linux/vmalloc.h>
+#include <asm/pgalloc.h>
+#include <asm/code-patching.h>
+#include <mm/mmu_decl.h>
+
+static void kasan_populate_pte(pte_t *ptep, pgprot_t prot)
+{
+ unsigned long va = (unsigned long)kasan_early_shadow_page;
+ phys_addr_t pa = __pa(kasan_early_shadow_page);
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE; i++, ptep++)
+ __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
+}
+
+static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
+{
+ pmd_t *pmd;
+ unsigned long k_cur, k_next;
+
+ pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start);
+
+ for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) {
+ pte_t *new;
+
+ k_next = pgd_addr_end(k_cur, k_end);
+ if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
+ continue;
+
+ new = pte_alloc_one_kernel(&init_mm);
+
+ if (!new)
+ return -ENOMEM;
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ kasan_populate_pte(new, PAGE_READONLY);
+ else
+ kasan_populate_pte(new, PAGE_KERNEL_RO);
+ pmd_populate_kernel(&init_mm, pmd, new);
+ }
+ return 0;
+}
+
+static void __ref *kasan_get_one_page(void)
+{
+ if (slab_is_available())
+ return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+
+ return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static int __ref kasan_init_region(void *start, size_t size)
+{
+ unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
+ unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
+ unsigned long k_cur;
+ int ret;
+ void *block = NULL;
+
+ ret = kasan_init_shadow_page_tables(k_start, k_end);
+ if (ret)
+ return ret;
+
+ if (!slab_is_available())
+ block = memblock_alloc(k_end - k_start, PAGE_SIZE);
+
+ for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
+ void *va = block ? block + k_cur - k_start : kasan_get_one_page();
+ pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+
+ if (!va)
+ return -ENOMEM;
+
+ __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
+ }
+ flush_tlb_kernel_range(k_start, k_end);
+ return 0;
+}
+
+static void __init kasan_remap_early_shadow_ro(void)
+{
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY);
+ else
+ kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO);
+
+ flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
+}
+
+void __init kasan_mmu_init(void)
+{
+ int ret;
+ struct memblock_region *reg;
+
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+ ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ if (ret)
+ panic("kasan: kasan_init_shadow_page_tables() failed");
+ }
+
+ for_each_memblock(memory, reg) {
+ phys_addr_t base = reg->base;
+ phys_addr_t top = min(base + reg->size, total_lowmem);
+
+ if (base >= top)
+ continue;
+
+ ret = kasan_init_region(__va(base), top - base);
+ if (ret)
+ panic("kasan: kasan_init_region() failed");
+ }
+}
+
+void __init kasan_init(void)
+{
+ kasan_remap_early_shadow_ro();
+
+ clear_page(kasan_early_shadow_page);
+
+ /* At this point kasan is fully initialized. Enable error messages */
+ init_task.kasan_depth = 0;
+ pr_info("KASAN init done\n");
+}
+
+#ifdef CONFIG_MODULES
+void *module_alloc(unsigned long size)
+{
+ void *base = vmalloc_exec(size);
+
+ if (!base)
+ return NULL;
+
+ if (!kasan_init_region(base, size))
+ return base;
+
+ vfree(base);
+
+ return NULL;
+}
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0};
+
+static void __init kasan_early_hash_table(void)
+{
+ modify_instruction_site(&patch__hash_page_A0, 0xffff, __pa(early_hash) >> 16);
+ modify_instruction_site(&patch__flush_hash_A0, 0xffff, __pa(early_hash) >> 16);
+
+ Hash = (struct hash_pte *)early_hash;
+}
+#else
+static void __init kasan_early_hash_table(void) {}
+#endif
+
+void __init kasan_early_init(void)
+{
+ unsigned long addr = KASAN_SHADOW_START;
+ unsigned long end = KASAN_SHADOW_END;
+ unsigned long next;
+ pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr);
+
+ BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK);
+
+ kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL);
+
+ do {
+ next = pgd_addr_end(addr, end);
+ pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte);
+ } while (pmd++, addr = next, addr != end);
+
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ kasan_early_hash_table();
+}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f6787f90e158..cd525d709072 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -54,7 +54,7 @@
#include <asm/swiotlb.h>
#include <asm/rtas.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#ifndef CPU_FTR_COHERENT_ICACHE
#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */
@@ -109,8 +109,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
-int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
- bool want_memblock)
+int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -131,8 +131,8 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int __meminit arch_remove_memory(int nid, u64 start, u64 size,
- struct vmem_altmap *altmap)
+int __ref arch_remove_memory(int nid, u64 start, u64 size,
+ struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -161,7 +161,8 @@ int __meminit arch_remove_memory(int nid, u64 start, u64 size,
*/
vm_unmap_aliases();
- resize_hpt_for_hotplug(memblock_phys_mem_size());
+ if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
+ pr_warn("Hash collision while resizing HPT\n");
return ret;
}
@@ -309,6 +310,10 @@ void __init mem_init(void)
mem_init_print_info(NULL);
#ifdef CONFIG_PPC32
pr_info("Kernel virtual memory layout:\n");
+#ifdef CONFIG_KASAN
+ pr_info(" * 0x%08lx..0x%08lx : kasan shadow mem\n",
+ KASAN_SHADOW_START, KASAN_SHADOW_END);
+#endif
pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP);
#ifdef CONFIG_HIGHMEM
pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n",
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index bb52320b7369..6b049d82b98a 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -98,7 +98,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
switch_mmu_context(prev, next, tsk);
}
-#ifdef CONFIG_PPC32
+#ifndef CONFIG_PPC_BOOK3S_64
void arch_exit_mmap(struct mm_struct *mm)
{
void *frag = pte_frag_get(&mm->context);
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 74ff61dabcb1..7bac0aa2026a 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -83,6 +83,8 @@ static inline void _tlbivax_bcast(unsigned long address, unsigned int pid,
}
#endif
+static inline void print_system_hash_info(void) {}
+
#else /* CONFIG_PPC_MMU_NOHASH */
extern void hash_preload(struct mm_struct *mm, unsigned long ea,
@@ -92,6 +94,8 @@ extern void hash_preload(struct mm_struct *mm, unsigned long ea,
extern void _tlbie(unsigned long address);
extern void _tlbia(void);
+void print_system_hash_info(void);
+
#endif /* CONFIG_PPC_MMU_NOHASH */
#ifdef CONFIG_PPC32
@@ -104,8 +108,8 @@ extern int __map_without_bats;
extern unsigned int rtas_data, rtas_size;
struct hash_pte;
-extern struct hash_pte *Hash, *Hash_end;
-extern unsigned long Hash_size, Hash_mask;
+extern struct hash_pte *Hash;
+extern u8 early_hash[];
#endif /* CONFIG_PPC32 */
@@ -130,6 +134,7 @@ extern void wii_memory_fixups(void);
*/
#ifdef CONFIG_PPC32
extern void MMU_init_hw(void);
+void MMU_init_hw_patch(void);
unsigned long mmu_mapin_ram(unsigned long base, unsigned long top);
#endif
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/nohash/40x.c
index b9cf6f8764b0..460459b6f53e 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/nohash/40x.c
@@ -49,7 +49,7 @@
#include <asm/machdep.h>
#include <asm/setup.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
extern int __map_without_ltlbs;
/*
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/nohash/44x.c
index aad127acdbaa..c07983ebc02e 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/nohash/44x.c
@@ -31,7 +31,7 @@
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
/* Used by the 44x TLB replacement exception handler.
* Just needed it declared someplace.
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/nohash/8xx.c
index fe1f6443d57f..70d55b615b62 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -17,7 +17,7 @@
#include <asm/fixmap.h>
#include <asm/code-patching.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT)
@@ -213,3 +213,27 @@ void flush_instruction_cache(void)
mtspr(SPRN_IC_CST, IDC_INVALL);
isync();
}
+
+#ifdef CONFIG_PPC_KUEP
+void __init setup_kuep(bool disabled)
+{
+ if (disabled)
+ return;
+
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+
+ mtspr(SPRN_MI_AP, MI_APG_KUEP);
+}
+#endif
+
+#ifdef CONFIG_PPC_KUAP
+void __init setup_kuap(bool disabled)
+{
+ pr_info("Activating Kernel Userspace Access Protection\n");
+
+ if (disabled)
+ pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n");
+
+ mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+#endif
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
new file mode 100644
index 000000000000..33b6f6f29d3f
--- /dev/null
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+
+obj-y += mmu_context.o tlb.o tlb_low.o
+obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o
+obj-$(CONFIG_40x) += 40x.o
+obj-$(CONFIG_44x) += 44x.o
+obj-$(CONFIG_PPC_8xx) += 8xx.o
+obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke.o
+ifdef CONFIG_HUGETLB_PAGE
+obj-$(CONFIG_PPC_FSL_BOOK3E) += book3e_hugetlbpage.o
+endif
+
+# Disable kcov instrumentation on sensitive code
+# This is necessary for booting with kcov enabled on book3e machines
+KCOV_INSTRUMENT_tlb.o := n
+KCOV_INSTRUMENT_fsl_booke.o := n
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
index f84ec46cdb26..61915f4d3c7f 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
@@ -11,8 +11,9 @@
#include <asm/mmu.h>
-#ifdef CONFIG_PPC_FSL_BOOK3E
#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+
static inline int tlb1_next(void)
{
struct paca_struct *paca = get_paca();
@@ -29,33 +30,6 @@ static inline int tlb1_next(void)
tcd->esel_next = next;
return this;
}
-#else
-static inline int tlb1_next(void)
-{
- int index, ncams;
-
- ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
-
- index = this_cpu_read(next_tlbcam_idx);
-
- /* Just round-robin the entries and wrap when we hit the end */
- if (unlikely(index == ncams - 1))
- __this_cpu_write(next_tlbcam_idx, tlbcam_index);
- else
- __this_cpu_inc(next_tlbcam_idx);
-
- return index;
-}
-#endif /* !PPC64 */
-#endif /* FSL */
-
-static inline int mmu_get_tsize(int psize)
-{
- return mmu_psize_defs[psize].enc;
-}
-
-#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_PPC64)
-#include <asm/paca.h>
static inline void book3e_tlb_lock(void)
{
@@ -98,6 +72,23 @@ static inline void book3e_tlb_unlock(void)
paca->tcd_ptr->lock = 0;
}
#else
+static inline int tlb1_next(void)
+{
+ int index, ncams;
+
+ ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+
+ index = this_cpu_read(next_tlbcam_idx);
+
+ /* Just round-robin the entries and wrap when we hit the end */
+ if (unlikely(index == ncams - 1))
+ __this_cpu_write(next_tlbcam_idx, tlbcam_index);
+ else
+ __this_cpu_inc(next_tlbcam_idx);
+
+ return index;
+}
+
static inline void book3e_tlb_lock(void)
{
}
@@ -139,10 +130,7 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
unsigned long psize, tsize, shift;
unsigned long flags;
struct mm_struct *mm;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
int index;
-#endif
if (unlikely(is_kernel_addr(ea)))
return;
@@ -166,11 +154,9 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
return;
}
-#ifdef CONFIG_PPC_FSL_BOOK3E
/* We have to use the CAM(TLB1) on FSL parts for hugepages */
index = tlb1_next();
mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
-#endif
mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
mas2 = ea & ~((1UL << shift) - 1);
diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
index 1032ef7aaf62..75e9e2c35fe2 100644
--- a/arch/powerpc/mm/pgtable-book3e.c
+++ b/arch/powerpc/mm/nohash/book3e_pgtable.c
@@ -15,7 +15,7 @@
#include <asm/tlb.h>
#include <asm/dma.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
@@ -55,7 +55,7 @@ void vmemmap_remove_mapping(unsigned long start,
#endif
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-static __ref void *early_alloc_pgtable(unsigned long size)
+static void __init *early_alloc_pgtable(unsigned long size)
{
void *ptr;
@@ -74,7 +74,7 @@ static __ref void *early_alloc_pgtable(unsigned long size)
* map_kernel_page adds an entry to the ioremap page table
* and adds an entry to the HPT, possibly bolting it
*/
-int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
{
pgd_t *pgdp;
pud_t *pudp;
@@ -98,20 +98,17 @@ int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
#ifndef __PAGETABLE_PUD_FOLDED
if (pgd_none(*pgdp)) {
pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
- BUG_ON(pudp == NULL);
pgd_populate(&init_mm, pgdp, pudp);
}
#endif /* !__PAGETABLE_PUD_FOLDED */
pudp = pud_offset(pgdp, ea);
if (pud_none(*pudp)) {
pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
- BUG_ON(pmdp == NULL);
pud_populate(&init_mm, pudp, pmdp);
}
pmdp = pmd_offset(pudp, ea);
if (!pmd_present(*pmdp)) {
ptep = early_alloc_pgtable(PAGE_SIZE);
- BUG_ON(ptep == NULL);
pmd_populate_kernel(&init_mm, pmdp, ptep);
}
ptep = pte_offset_kernel(pmdp, ea);
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/nohash/fsl_booke.c
index 210cbc1faf63..71a1a36751dd 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -54,7 +54,7 @@
#include <asm/setup.h>
#include <asm/paca.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
unsigned int tlbcam_index;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/nohash/mmu_context.c
index 1945c5f19f5e..ae4505d5b4b8 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/nohash/mmu_context.c
@@ -52,7 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
/*
* The MPC8xx has only 16 contexts. We rotate through them on each task switch.
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/nohash/tlb.c
index ac23dc1c6535..24f88efb05bf 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -46,7 +46,7 @@
#include <asm/hugetlb.h>
#include <asm/paca.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
/*
* This struct lists the sw-supported page sizes. The hardawre MMU may support
@@ -433,11 +433,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
unsigned long rid = (address & rmask) | 0x1000000000000000ul;
unsigned long vpte = address & ~rmask;
-#ifdef CONFIG_PPC_64K_PAGES
- vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful;
-#else
vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
-#endif
vpte |= rid;
__flush_tlb_page(tlb->mm, vpte, tsize, 0);
}
@@ -625,21 +621,12 @@ static void early_init_this_mmu(void)
case PPC_HTW_IBM:
mas4 |= MAS4_INDD;
-#ifdef CONFIG_PPC_64K_PAGES
- mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT;
- mmu_pte_psize = MMU_PAGE_256M;
-#else
mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
mmu_pte_psize = MMU_PAGE_1M;
-#endif
break;
case PPC_HTW_NONE:
-#ifdef CONFIG_PPC_64K_PAGES
- mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT;
-#else
mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
-#endif
mmu_pte_psize = mmu_virtual_psize;
break;
}
@@ -800,5 +787,9 @@ void __init early_init_mmu(void)
#ifdef CONFIG_PPC_47x
early_init_mmu_47x();
#endif
+
+#ifdef CONFIG_PPC_MM_SLICES
+ mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT);
+#endif
}
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/nohash/tlb_low.S
index e066a658acac..e066a658acac 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/nohash/tlb_low.S
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
index 9ed90064f542..58959ce15415 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
@@ -24,11 +24,7 @@
#include <asm/kvm_booke_hv_asm.h>
#include <asm/feature-fixups.h>
-#ifdef CONFIG_PPC_64K_PAGES
-#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1)
-#else
#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE)
-#endif
#define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE)
#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE)
#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE)
@@ -167,13 +163,11 @@ MMU_FTR_SECTION_ELSE
ldx r14,r14,r15 /* grab pgd entry */
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
-#ifndef CONFIG_PPC_64K_PAGES
rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
clrrdi r15,r15,3
cmpdi cr0,r14,0
bge tlb_miss_fault_bolted /* Bad pgd entry or hugepage; bail */
ldx r14,r14,r15 /* grab pud entry */
-#endif /* CONFIG_PPC_64K_PAGES */
rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
clrrdi r15,r15,3
@@ -682,18 +676,7 @@ normal_tlb_miss:
* order to handle the weird page table format used by linux
*/
ori r10,r15,0x1
-#ifdef CONFIG_PPC_64K_PAGES
- /* For the top bits, 16 bytes per PTE */
- rldicl r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4
- /* Now create the bottom bits as 0 in position 0x8000 and
- * the rest calculated for 8 bytes per PTE
- */
- rldicl r15,r16,64-(PAGE_SHIFT-3),64-15
- /* Insert the bottom bits in */
- rlwimi r14,r15,0,16,31
-#else
rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4
-#endif
sldi r15,r10,60
clrrdi r14,r14,3
or r10,r15,r14
@@ -732,11 +715,7 @@ finish_normal_tlb_miss:
/* Check page size, if not standard, update MAS1 */
rldicl r11,r14,64-8,64-8
-#ifdef CONFIG_PPC_64K_PAGES
- cmpldi cr0,r11,BOOK3E_PAGESZ_64K
-#else
cmpldi cr0,r11,BOOK3E_PAGESZ_4K
-#endif
beq- 1f
mfspr r11,SPRN_MAS1
rlwimi r11,r14,31,21,24
@@ -857,14 +836,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
cmpdi cr0,r15,0
bge virt_page_table_tlb_miss_fault
-#ifndef CONFIG_PPC_64K_PAGES
/* Get to PUD entry */
rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
clrrdi r10,r11,3
ldx r15,r10,r15
cmpdi cr0,r15,0
bge virt_page_table_tlb_miss_fault
-#endif /* CONFIG_PPC_64K_PAGES */
/* Get to PMD entry */
rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
@@ -1106,14 +1083,12 @@ htw_tlb_miss:
cmpdi cr0,r15,0
bge htw_tlb_miss_fault
-#ifndef CONFIG_PPC_64K_PAGES
/* Get to PUD entry */
rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
clrrdi r10,r11,3
ldx r15,r10,r15
cmpdi cr0,r15,0
bge htw_tlb_miss_fault
-#endif /* CONFIG_PPC_64K_PAGES */
/* Get to PMD entry */
rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
@@ -1132,9 +1107,7 @@ htw_tlb_miss:
* 4K page we need to extract a bit from the virtual address and
* insert it into the "PA52" bit of the RPN.
*/
-#ifndef CONFIG_PPC_64K_PAGES
rlwimi r15,r16,32-9,20,20
-#endif
/* Now we build the MAS:
*
* MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
@@ -1144,11 +1117,7 @@ htw_tlb_miss:
* MAS 2 : Use defaults
* MAS 3+7 : Needs to be done
*/
-#ifdef CONFIG_PPC_64K_PAGES
- ori r10,r15,(BOOK3E_PAGESZ_64K << MAS3_SPSIZE_SHIFT)
-#else
ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
-#endif
BEGIN_MMU_FTR_SECTION
srdi r16,r10,32
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f976676004ad..57e64273cb33 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -32,7 +32,6 @@
#include <asm/sparsemem.h>
#include <asm/prom.h>
#include <asm/smp.h>
-#include <asm/cputhreads.h>
#include <asm/topology.h>
#include <asm/firmware.h>
#include <asm/paca.h>
@@ -908,16 +907,22 @@ static int __init early_numa(char *p)
}
early_param("numa", early_numa);
-static bool topology_updates_enabled = true;
+/*
+ * The platform can inform us through one of several mechanisms
+ * (post-migration device tree updates, PRRN or VPHN) that the NUMA
+ * assignment of a resource has changed. This controls whether we act
+ * on that. Disabled by default.
+ */
+static bool topology_updates_enabled;
static int __init early_topology_updates(char *p)
{
if (!p)
return 0;
- if (!strcmp(p, "off")) {
- pr_info("Disabling topology updates\n");
- topology_updates_enabled = false;
+ if (!strcmp(p, "on")) {
+ pr_warn("Caution: enabling topology updates\n");
+ topology_updates_enabled = true;
}
return 0;
@@ -1063,7 +1068,7 @@ u64 memory_hotplug_max(void)
/* Virtual Processor Home Node (VPHN) support */
#ifdef CONFIG_PPC_SPLPAR
-#include "vphn.h"
+#include "book3s64/vphn.h"
struct topology_update_data {
struct topology_update_data *next;
@@ -1498,6 +1503,9 @@ int start_topology_update(void)
{
int rc = 0;
+ if (!topology_updates_enabled)
+ return 0;
+
if (firmware_has_feature(FW_FEATURE_PRRN)) {
if (!prrn_enabled) {
prrn_enabled = 1;
@@ -1531,6 +1539,9 @@ int stop_topology_update(void)
{
int rc = 0;
+ if (!topology_updates_enabled)
+ return 0;
+
if (prrn_enabled) {
prrn_enabled = 0;
#ifdef CONFIG_SMP
@@ -1588,11 +1599,13 @@ static ssize_t topology_write(struct file *file, const char __user *buf,
kbuf[read_len] = '\0';
- if (!strncmp(kbuf, "on", 2))
+ if (!strncmp(kbuf, "on", 2)) {
+ topology_updates_enabled = true;
start_topology_update();
- else if (!strncmp(kbuf, "off", 3))
+ } else if (!strncmp(kbuf, "off", 3)) {
stop_topology_update();
- else
+ topology_updates_enabled = false;
+ } else
return -EINVAL;
return count;
@@ -1607,9 +1620,7 @@ static const struct file_operations topology_ops = {
static int topology_update_init(void)
{
- /* Do not poll for changes if disabled at boot */
- if (topology_updates_enabled)
- start_topology_update();
+ start_topology_update();
if (vphn_enabled)
topology_schedule_update();
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index d3d61d29b4f1..db4a6253df92 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -30,6 +30,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
+#include <asm/hugetlb.h>
static inline int is_exec_fault(void)
{
@@ -299,3 +300,116 @@ unsigned long vmalloc_to_phys(void *va)
return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
}
EXPORT_SYMBOL_GPL(vmalloc_to_phys);
+
+/*
+ * We have 4 cases for pgds and pmds:
+ * (1) invalid (all zeroes)
+ * (2) pointer to next table, as normal; bottom 6 bits == 0
+ * (3) leaf pte for huge page _PAGE_PTE set
+ * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
+ *
+ * So long as we atomically load page table pointers we are safe against teardown,
+ * we can follow the address down to the the page and take a ref on it.
+ * This function need to be called with interrupts disabled. We use this variant
+ * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
+ */
+pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
+ bool *is_thp, unsigned *hpage_shift)
+{
+ pgd_t pgd, *pgdp;
+ pud_t pud, *pudp;
+ pmd_t pmd, *pmdp;
+ pte_t *ret_pte;
+ hugepd_t *hpdp = NULL;
+ unsigned pdshift = PGDIR_SHIFT;
+
+ if (hpage_shift)
+ *hpage_shift = 0;
+
+ if (is_thp)
+ *is_thp = false;
+
+ pgdp = pgdir + pgd_index(ea);
+ pgd = READ_ONCE(*pgdp);
+ /*
+ * Always operate on the local stack value. This make sure the
+ * value don't get updated by a parallel THP split/collapse,
+ * page fault or a page unmap. The return pte_t * is still not
+ * stable. So should be checked there for above conditions.
+ */
+ if (pgd_none(pgd))
+ return NULL;
+
+ if (pgd_huge(pgd)) {
+ ret_pte = (pte_t *)pgdp;
+ goto out;
+ }
+ if (is_hugepd(__hugepd(pgd_val(pgd)))) {
+ hpdp = (hugepd_t *)&pgd;
+ goto out_huge;
+ }
+
+ /*
+ * Even if we end up with an unmap, the pgtable will not
+ * be freed, because we do an rcu free and here we are
+ * irq disabled
+ */
+ pdshift = PUD_SHIFT;
+ pudp = pud_offset(&pgd, ea);
+ pud = READ_ONCE(*pudp);
+
+ if (pud_none(pud))
+ return NULL;
+
+ if (pud_huge(pud)) {
+ ret_pte = (pte_t *)pudp;
+ goto out;
+ }
+ if (is_hugepd(__hugepd(pud_val(pud)))) {
+ hpdp = (hugepd_t *)&pud;
+ goto out_huge;
+ }
+ pdshift = PMD_SHIFT;
+ pmdp = pmd_offset(&pud, ea);
+ pmd = READ_ONCE(*pmdp);
+ /*
+ * A hugepage collapse is captured by pmd_none, because
+ * it mark the pmd none and do a hpte invalidate.
+ */
+ if (pmd_none(pmd))
+ return NULL;
+
+ if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
+ if (is_thp)
+ *is_thp = true;
+ ret_pte = (pte_t *)pmdp;
+ goto out;
+ }
+ /*
+ * pmd_large check below will handle the swap pmd pte
+ * we need to do both the check because they are config
+ * dependent.
+ */
+ if (pmd_huge(pmd) || pmd_large(pmd)) {
+ ret_pte = (pte_t *)pmdp;
+ goto out;
+ }
+ if (is_hugepd(__hugepd(pmd_val(pmd)))) {
+ hpdp = (hugepd_t *)&pmd;
+ goto out_huge;
+ }
+
+ return pte_offset_kernel(&pmd, ea);
+
+out_huge:
+ if (!hpdp)
+ return NULL;
+
+ ret_pte = hugepte_offset(*hpdp, ea, pdshift);
+ pdshift = hugepd_shift(*hpdp);
+out:
+ if (hpage_shift)
+ *hpage_shift = pdshift;
+ return ret_pte;
+}
+EXPORT_SYMBOL_GPL(__find_linux_pte);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 6e56a6240bfa..16ada373b32b 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -36,26 +36,13 @@
#include <asm/setup.h>
#include <asm/sections.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
unsigned long ioremap_bot;
EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
extern char etext[], _stext[], _sinittext[], _einittext[];
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- if (!slab_is_available())
- return memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
-
- return (pte_t *)pte_fragment_alloc(mm, 1);
-}
-
-pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- return (pgtable_t)pte_fragment_alloc(mm, 0);
-}
-
void __iomem *
ioremap(phys_addr_t addr, unsigned long size)
{
@@ -205,7 +192,29 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
-int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
+static void __init *early_alloc_pgtable(unsigned long size)
+{
+ void *ptr = memblock_alloc(size, size);
+
+ if (!ptr)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, size, size);
+
+ return ptr;
+}
+
+static pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va)
+{
+ if (pmd_none(*pmdp)) {
+ pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
+
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ return pte_offset_kernel(pmdp, va);
+}
+
+
+int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
{
pmd_t *pd;
pte_t *pg;
@@ -214,7 +223,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
/* Use upper 10 bits of VA to index the first level map */
pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va);
/* Use middle 10 bits of VA to index the second-level map */
- pg = pte_alloc_kernel(pd, va);
+ if (likely(slab_is_available()))
+ pg = pte_alloc_kernel(pd, va);
+ else
+ pg = early_pte_alloc_kernel(pd, va);
if (pg != 0) {
err = 0;
/* The PTE should never be already set nor present in the
@@ -384,6 +396,9 @@ void mark_rodata_ro(void)
PFN_DOWN((unsigned long)__start_rodata);
change_page_attr(page, numpages, PAGE_KERNEL_RO);
+
+ // mark_initmem_nx() should have already run by now
+ ptdump_check_wx();
}
#endif
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index fb1375c07e8c..d2d976ff8a0e 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -52,7 +52,7 @@
#include <asm/firmware.h>
#include <asm/dma.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#ifdef CONFIG_PPC_BOOK3S_64
@@ -90,14 +90,13 @@ unsigned long __pgd_val_bits;
EXPORT_SYMBOL(__pgd_val_bits);
unsigned long __kernel_virt_start;
EXPORT_SYMBOL(__kernel_virt_start);
-unsigned long __kernel_virt_size;
-EXPORT_SYMBOL(__kernel_virt_size);
unsigned long __vmalloc_start;
EXPORT_SYMBOL(__vmalloc_start);
unsigned long __vmalloc_end;
EXPORT_SYMBOL(__vmalloc_end);
unsigned long __kernel_io_start;
EXPORT_SYMBOL(__kernel_io_start);
+unsigned long __kernel_io_end;
struct page *vmemmap;
EXPORT_SYMBOL(vmemmap);
unsigned long __pte_frag_nr;
@@ -121,6 +120,11 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
if (pgprot_val(prot) & H_PAGE_4K_PFN)
return NULL;
+ if ((ea + size) >= (void *)IOREMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return NULL;
+ }
+
WARN_ON(pa & ~PAGE_MASK);
WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
WARN_ON(size & ~PAGE_MASK);
@@ -328,6 +332,9 @@ void mark_rodata_ro(void)
radix__mark_rodata_ro();
else
hash__mark_rodata_ro();
+
+ // mark_initmem_nx() should have already run by now
+ ptdump_check_wx();
}
void mark_initmem_nx(void)
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index b430e4e08af6..b9bda0105841 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -500,7 +500,7 @@ static void populate_markers(void)
address_markers[7].start_address = IOREMAP_BASE;
address_markers[8].start_address = IOREMAP_END;
#ifdef CONFIG_PPC_BOOK3S_64
- address_markers[9].start_address = H_VMEMMAP_BASE;
+ address_markers[9].start_address = H_VMEMMAP_START;
#else
address_markers[9].start_address = VMEMMAP_BASE;
#endif
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 37138428ab55..646876d9da64 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -31,7 +31,7 @@
#include "ptdump.h"
#ifdef CONFIG_PPC32
-#define KERN_VIRT_START 0
+#define KERN_VIRT_START PAGE_OFFSET
#endif
/*
@@ -68,6 +68,8 @@ struct pg_state {
unsigned long last_pa;
unsigned int level;
u64 current_flags;
+ bool check_wx;
+ unsigned long wx_pages;
};
struct addr_marker {
@@ -101,9 +103,25 @@ static struct addr_marker address_markers[] = {
{ 0, "Fixmap start" },
{ 0, "Fixmap end" },
#endif
+#ifdef CONFIG_KASAN
+ { 0, "kasan shadow mem start" },
+ { 0, "kasan shadow mem end" },
+#endif
{ -1, NULL },
};
+#define pt_dump_seq_printf(m, fmt, args...) \
+({ \
+ if (m) \
+ seq_printf(m, fmt, ##args); \
+})
+
+#define pt_dump_seq_putc(m, c) \
+({ \
+ if (m) \
+ seq_putc(m, c); \
+})
+
static void dump_flag_info(struct pg_state *st, const struct flag_info
*flag, u64 pte, int num)
{
@@ -121,19 +139,19 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info
val = pte & flag->val;
if (flag->shift)
val = val >> flag->shift;
- seq_printf(st->seq, " %s:%llx", flag->set, val);
+ pt_dump_seq_printf(st->seq, " %s:%llx", flag->set, val);
} else {
if ((pte & flag->mask) == flag->val)
s = flag->set;
else
s = flag->clear;
if (s)
- seq_printf(st->seq, " %s", s);
+ pt_dump_seq_printf(st->seq, " %s", s);
}
st->current_flags &= ~flag->mask;
}
if (st->current_flags != 0)
- seq_printf(st->seq, " unknown flags:%llx", st->current_flags);
+ pt_dump_seq_printf(st->seq, " unknown flags:%llx", st->current_flags);
}
static void dump_addr(struct pg_state *st, unsigned long addr)
@@ -148,12 +166,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
#define REG "0x%08lx"
#endif
- seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
+ pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) {
- seq_printf(st->seq, "[" REG "]", st->start_pa);
+ pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa);
delta = PAGE_SIZE >> 10;
} else {
- seq_printf(st->seq, " " REG " ", st->start_pa);
+ pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
delta = (addr - st->start_address) >> 10;
}
/* Work out what appropriate unit to use */
@@ -161,10 +179,24 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
delta >>= 10;
unit++;
}
- seq_printf(st->seq, "%9lu%c", delta, *unit);
+ pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit);
}
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+ if (!st->check_wx)
+ return;
+
+ if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X)))
+ return;
+
+ WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
+ (void *)st->start_address, (void *)st->start_address);
+
+ st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
static void note_page(struct pg_state *st, unsigned long addr,
unsigned int level, u64 val)
{
@@ -178,7 +210,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
st->start_address = addr;
st->start_pa = pa;
st->last_pa = pa;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
/*
* Dump the section of virtual memory when:
* - the PTE flags from one entry to the next differs.
@@ -194,6 +226,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
/* Check the PTE flags */
if (st->current_flags) {
+ note_prot_wx(st, addr);
dump_addr(st, addr);
/* Dump all the flags */
@@ -202,7 +235,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
st->current_flags,
pg_level[st->level].num);
- seq_putc(st->seq, '\n');
+ pt_dump_seq_putc(st->seq, '\n');
}
/*
@@ -211,7 +244,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
*/
while (addr >= st->marker[1].start_address) {
st->marker++;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
}
st->start_address = addr;
st->start_pa = pa;
@@ -303,8 +336,9 @@ static void populate_markers(void)
address_markers[i++].start_address = PHB_IO_END;
address_markers[i++].start_address = IOREMAP_BASE;
address_markers[i++].start_address = IOREMAP_END;
+ /* What is the ifdef about? */
#ifdef CONFIG_PPC_BOOK3S_64
- address_markers[i++].start_address = H_VMEMMAP_BASE;
+ address_markers[i++].start_address = H_VMEMMAP_START;
#else
address_markers[i++].start_address = VMEMMAP_BASE;
#endif
@@ -322,6 +356,10 @@ static void populate_markers(void)
#endif
address_markers[i++].start_address = FIXADDR_START;
address_markers[i++].start_address = FIXADDR_TOP;
+#ifdef CONFIG_KASAN
+ address_markers[i++].start_address = KASAN_SHADOW_START;
+ address_markers[i++].start_address = KASAN_SHADOW_END;
+#endif
#endif /* CONFIG_PPC64 */
}
@@ -366,6 +404,30 @@ static void build_pgtable_complete_mask(void)
pg_level[i].mask |= pg_level[i].flag[j].mask;
}
+#ifdef CONFIG_PPC_DEBUG_WX
+void ptdump_check_wx(void)
+{
+ struct pg_state st = {
+ .seq = NULL,
+ .marker = address_markers,
+ .check_wx = true,
+ };
+
+ if (radix_enabled())
+ st.start_address = PAGE_OFFSET;
+ else
+ st.start_address = KERN_VIRT_START;
+
+ walk_pagetables(&st);
+
+ if (st.wx_pages)
+ pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
+ st.wx_pages);
+ else
+ pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+}
+#endif
+
static int ptdump_init(void)
{
struct dentry *debugfs_file;
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index aec91dbcdc0b..97fbf7b54422 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -101,7 +101,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
{
struct vm_area_struct *vma;
- if ((mm->context.slb_addr_limit - len) < addr)
+ if ((mm_ctx_slb_addr_limit(&mm->context) - len) < addr)
return 0;
vma = find_vma(mm, addr);
return (!vma || (addr + len) <= vm_start_gap(vma));
@@ -118,13 +118,11 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
unsigned long start = slice << SLICE_HIGH_SHIFT;
unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
-#ifdef CONFIG_PPC64
/* Hack, so that each addresses is controlled by exactly one
* of the high or low area bitmaps, the first high area starts
* at 4GB, not 0 */
if (start == 0)
- start = SLICE_LOW_TOP;
-#endif
+ start = (unsigned long)SLICE_LOW_TOP;
return !slice_area_is_free(mm, start, end - start);
}
@@ -150,40 +148,6 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
__set_bit(i, ret->high_slices);
}
-#ifdef CONFIG_PPC_BOOK3S_64
-static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
-{
-#ifdef CONFIG_PPC_64K_PAGES
- if (psize == MMU_PAGE_64K)
- return &mm->context.mask_64k;
-#endif
- if (psize == MMU_PAGE_4K)
- return &mm->context.mask_4k;
-#ifdef CONFIG_HUGETLB_PAGE
- if (psize == MMU_PAGE_16M)
- return &mm->context.mask_16m;
- if (psize == MMU_PAGE_16G)
- return &mm->context.mask_16g;
-#endif
- BUG();
-}
-#elif defined(CONFIG_PPC_8xx)
-static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
-{
- if (psize == mmu_virtual_psize)
- return &mm->context.mask_base_psize;
-#ifdef CONFIG_HUGETLB_PAGE
- if (psize == MMU_PAGE_512K)
- return &mm->context.mask_512k;
- if (psize == MMU_PAGE_8M)
- return &mm->context.mask_8m;
-#endif
- BUG();
-}
-#else
-#error "Must define the slice masks for page sizes supported by the platform"
-#endif
-
static bool slice_check_range_fits(struct mm_struct *mm,
const struct slice_mask *available,
unsigned long start, unsigned long len)
@@ -246,14 +210,14 @@ static void slice_convert(struct mm_struct *mm,
slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
slice_print_mask(" mask", mask);
- psize_mask = slice_mask_for_size(mm, psize);
+ psize_mask = slice_mask_for_size(&mm->context, psize);
/* We need to use a spinlock here to protect against
* concurrent 64k -> 4k demotion ...
*/
spin_lock_irqsave(&slice_convert_lock, flags);
- lpsizes = mm->context.low_slices_psize;
+ lpsizes = mm_ctx_low_slices(&mm->context);
for (i = 0; i < SLICE_NUM_LOW; i++) {
if (!(mask->low_slices & (1u << i)))
continue;
@@ -263,7 +227,7 @@ static void slice_convert(struct mm_struct *mm,
/* Update the slice_mask */
old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf;
- old_mask = slice_mask_for_size(mm, old_psize);
+ old_mask = slice_mask_for_size(&mm->context, old_psize);
old_mask->low_slices &= ~(1u << i);
psize_mask->low_slices |= 1u << i;
@@ -272,8 +236,8 @@ static void slice_convert(struct mm_struct *mm,
(((unsigned long)psize) << (mask_index * 4));
}
- hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
+ hpsizes = mm_ctx_high_slices(&mm->context);
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm_ctx_slb_addr_limit(&mm->context)); i++) {
if (!test_bit(i, mask->high_slices))
continue;
@@ -282,7 +246,7 @@ static void slice_convert(struct mm_struct *mm,
/* Update the slice_mask */
old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf;
- old_mask = slice_mask_for_size(mm, old_psize);
+ old_mask = slice_mask_for_size(&mm->context, old_psize);
__clear_bit(i, old_mask->high_slices);
__set_bit(i, psize_mask->high_slices);
@@ -292,8 +256,8 @@ static void slice_convert(struct mm_struct *mm,
}
slice_dbg(" lsps=%lx, hsps=%lx\n",
- (unsigned long)mm->context.low_slices_psize,
- (unsigned long)mm->context.high_slices_psize);
+ (unsigned long)mm_ctx_low_slices(&mm->context),
+ (unsigned long)mm_ctx_high_slices(&mm->context));
spin_unlock_irqrestore(&slice_convert_lock, flags);
@@ -393,7 +357,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
* DEFAULT_MAP_WINDOW we should apply this.
*/
if (high_limit > DEFAULT_MAP_WINDOW)
- addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW;
+ addr += mm_ctx_slb_addr_limit(&mm->context) - DEFAULT_MAP_WINDOW;
while (addr > min_addr) {
info.high_limit = addr;
@@ -505,20 +469,20 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
return -ENOMEM;
}
- if (high_limit > mm->context.slb_addr_limit) {
+ if (high_limit > mm_ctx_slb_addr_limit(&mm->context)) {
/*
* Increasing the slb_addr_limit does not require
* slice mask cache to be recalculated because it should
* be already initialised beyond the old address limit.
*/
- mm->context.slb_addr_limit = high_limit;
+ mm_ctx_set_slb_addr_limit(&mm->context, high_limit);
on_each_cpu(slice_flush_segments, mm, 1);
}
/* Sanity checks */
BUG_ON(mm->task_size == 0);
- BUG_ON(mm->context.slb_addr_limit == 0);
+ BUG_ON(mm_ctx_slb_addr_limit(&mm->context) == 0);
VM_BUG_ON(radix_enabled());
slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
@@ -538,7 +502,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* First make up a "good" mask of slices that have the right size
* already
*/
- maskp = slice_mask_for_size(mm, psize);
+ maskp = slice_mask_for_size(&mm->context, psize);
/*
* Here "good" means slices that are already the right page size,
@@ -565,7 +529,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* a pointer to good mask for the next code to use.
*/
if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
- compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
+ compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K);
if (fixed)
slice_or_mask(&good_mask, maskp, compat_maskp);
else
@@ -642,14 +606,13 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
newaddr = slice_find_area(mm, len, &potential_mask,
psize, topdown, high_limit);
-#ifdef CONFIG_PPC_64K_PAGES
- if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) {
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && newaddr == -ENOMEM &&
+ psize == MMU_PAGE_64K) {
/* retry the search with 4k-page slices included */
slice_or_mask(&potential_mask, &potential_mask, compat_maskp);
newaddr = slice_find_area(mm, len, &potential_mask,
psize, topdown, high_limit);
}
-#endif
if (newaddr == -ENOMEM)
return -ENOMEM;
@@ -696,7 +659,7 @@ unsigned long arch_get_unmapped_area(struct file *filp,
unsigned long flags)
{
return slice_get_unmapped_area(addr, len, flags,
- current->mm->context.user_psize, 0);
+ mm_ctx_user_psize(&current->mm->context), 0);
}
unsigned long arch_get_unmapped_area_topdown(struct file *filp,
@@ -706,7 +669,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
const unsigned long flags)
{
return slice_get_unmapped_area(addr0, len, flags,
- current->mm->context.user_psize, 1);
+ mm_ctx_user_psize(&current->mm->context), 1);
}
unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
@@ -717,10 +680,10 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
VM_BUG_ON(radix_enabled());
if (slice_addr_is_low(addr)) {
- psizes = mm->context.low_slices_psize;
+ psizes = mm_ctx_low_slices(&mm->context);
index = GET_LOW_SLICE_INDEX(addr);
} else {
- psizes = mm->context.high_slices_psize;
+ psizes = mm_ctx_high_slices(&mm->context);
index = GET_HIGH_SLICE_INDEX(addr);
}
mask_index = index & 0x1;
@@ -741,27 +704,22 @@ void slice_init_new_context_exec(struct mm_struct *mm)
* case of fork it is just inherited from the mm being
* duplicated.
*/
-#ifdef CONFIG_PPC64
- mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
-#else
- mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
-#endif
-
- mm->context.user_psize = psize;
+ mm_ctx_set_slb_addr_limit(&mm->context, SLB_ADDR_LIMIT_DEFAULT);
+ mm_ctx_set_user_psize(&mm->context, psize);
/*
* Set all slice psizes to the default.
*/
- lpsizes = mm->context.low_slices_psize;
+ lpsizes = mm_ctx_low_slices(&mm->context);
memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
- hpsizes = mm->context.high_slices_psize;
+ hpsizes = mm_ctx_high_slices(&mm->context);
memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
/*
* Slice mask cache starts zeroed, fill the default size cache.
*/
- mask = slice_mask_for_size(mm, psize);
+ mask = slice_mask_for_size(&mm->context, psize);
mask->low_slices = ~0UL;
if (SLICE_NUM_HIGH)
bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
@@ -777,7 +735,7 @@ void slice_setup_new_exec(void)
if (!is_32bit_task())
return;
- mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+ mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW);
}
#endif
@@ -816,22 +774,21 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
unsigned long len)
{
const struct slice_mask *maskp;
- unsigned int psize = mm->context.user_psize;
+ unsigned int psize = mm_ctx_user_psize(&mm->context);
VM_BUG_ON(radix_enabled());
- maskp = slice_mask_for_size(mm, psize);
-#ifdef CONFIG_PPC_64K_PAGES
+ maskp = slice_mask_for_size(&mm->context, psize);
+
/* We need to account for 4k slices too */
- if (psize == MMU_PAGE_64K) {
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
const struct slice_mask *compat_maskp;
struct slice_mask available;
- compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
+ compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K);
slice_or_mask(&available, maskp, compat_maskp);
return !slice_check_range_fits(mm, &available, addr, len);
}
-#endif
return !slice_check_range_fits(mm, maskp, addr, len);
}
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index ab26df5bacb9..c155dcbb8691 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -5,7 +5,8 @@ obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o
obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
- isa207-common.o power8-pmu.o power9-pmu.o
+ isa207-common.o power8-pmu.o power9-pmu.o \
+ generic-compat-pmu.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b0723002a396..a66fb9c01c9e 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -22,6 +22,10 @@
#include <asm/ptrace.h>
#include <asm/code-patching.h>
+#ifdef CONFIG_PPC64
+#include "internal.h"
+#endif
+
#define BHRB_MAX_ENTRIES 32
#define BHRB_TARGET 0x0000000000000002
#define BHRB_PREDICTION 0x0000000000000001
@@ -2294,3 +2298,27 @@ int register_power_pmu(struct power_pmu *pmu)
power_pmu_prepare_cpu, NULL);
return 0;
}
+
+#ifdef CONFIG_PPC64
+static int __init init_ppc64_pmu(void)
+{
+ /* run through all the pmu drivers one at a time */
+ if (!init_power5_pmu())
+ return 0;
+ else if (!init_power5p_pmu())
+ return 0;
+ else if (!init_power6_pmu())
+ return 0;
+ else if (!init_power7_pmu())
+ return 0;
+ else if (!init_power8_pmu())
+ return 0;
+ else if (!init_power9_pmu())
+ return 0;
+ else if (!init_ppc970_pmu())
+ return 0;
+ else
+ return init_generic_compat_pmu();
+}
+early_initcall(init_ppc64_pmu);
+#endif
diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c
new file mode 100644
index 000000000000..5e5a54d5588e
--- /dev/null
+++ b/arch/powerpc/perf/generic-compat-pmu.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019 Madhavan Srinivasan, IBM Corporation.
+
+#define pr_fmt(fmt) "generic-compat-pmu: " fmt
+
+#include "isa207-common.h"
+
+/*
+ * Raw event encoding:
+ *
+ * 60 56 52 48 44 40 36 32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *
+ * 28 24 20 16 12 8 4 0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * [ pmc ] [unit ] [ ] m [ pmcxsel ]
+ * | |
+ * | *- mark
+ * |
+ * |
+ * *- combine
+ *
+ * Below uses IBM bit numbering.
+ *
+ * MMCR1[x:y] = unit (PMCxUNIT)
+ * MMCR1[24] = pmc1combine[0]
+ * MMCR1[25] = pmc1combine[1]
+ * MMCR1[26] = pmc2combine[0]
+ * MMCR1[27] = pmc2combine[1]
+ * MMCR1[28] = pmc3combine[0]
+ * MMCR1[29] = pmc3combine[1]
+ * MMCR1[30] = pmc4combine[0]
+ * MMCR1[31] = pmc4combine[1]
+ *
+ */
+
+/*
+ * Some power9 event codes.
+ */
+#define EVENT(_name, _code) _name = _code,
+
+enum {
+EVENT(PM_CYC, 0x0001e)
+EVENT(PM_INST_CMPL, 0x00002)
+};
+
+#undef EVENT
+
+GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
+GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
+
+static struct attribute *generic_compat_events_attr[] = {
+ GENERIC_EVENT_PTR(PM_CYC),
+ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ NULL
+};
+
+static struct attribute_group generic_compat_pmu_events_group = {
+ .name = "events",
+ .attrs = generic_compat_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-19");
+PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
+PMU_FORMAT_ATTR(mark, "config:8");
+PMU_FORMAT_ATTR(combine, "config:10-11");
+PMU_FORMAT_ATTR(unit, "config:12-15");
+PMU_FORMAT_ATTR(pmc, "config:16-19");
+
+static struct attribute *generic_compat_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_pmcxsel.attr,
+ &format_attr_mark.attr,
+ &format_attr_combine.attr,
+ &format_attr_unit.attr,
+ &format_attr_pmc.attr,
+ NULL,
+};
+
+static struct attribute_group generic_compat_pmu_format_group = {
+ .name = "format",
+ .attrs = generic_compat_pmu_format_attr,
+};
+
+static const struct attribute_group *generic_compat_pmu_attr_groups[] = {
+ &generic_compat_pmu_format_group,
+ &generic_compat_pmu_events_group,
+ NULL,
+};
+
+static int compat_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+#undef C
+
+static struct power_pmu generic_compat_pmu = {
+ .name = "GENERIC_COMPAT",
+ .n_counter = MAX_PMU_COUNTERS,
+ .add_fields = ISA207_ADD_FIELDS,
+ .test_adder = ISA207_TEST_ADDER,
+ .compute_mmcr = isa207_compute_mmcr,
+ .get_constraint = isa207_get_constraint,
+ .disable_pmc = isa207_disable_pmc,
+ .flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
+ .n_generic = ARRAY_SIZE(compat_generic_events),
+ .generic_events = compat_generic_events,
+ .cache_events = &generic_compat_cache_events,
+ .attr_groups = generic_compat_pmu_attr_groups,
+};
+
+int init_generic_compat_pmu(void)
+{
+ int rc = 0;
+
+ rc = register_power_pmu(&generic_compat_pmu);
+ if (rc)
+ return rc;
+
+ /* Tell userspace that EBB is supported */
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+ return 0;
+}
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index b1c37cc3fa98..31fa753e2eb2 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -43,12 +43,17 @@ static DEFINE_PER_CPU(u64 *, thread_imc_mem);
static struct imc_pmu *thread_imc_pmu;
static int thread_imc_mem_size;
+/* Trace IMC data structures */
+static DEFINE_PER_CPU(u64 *, trace_imc_mem);
+static struct imc_pmu_ref *trace_imc_refc;
+static int trace_imc_mem_size;
+
static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct imc_pmu, pmu);
}
-PMU_FORMAT_ATTR(event, "config:0-40");
+PMU_FORMAT_ATTR(event, "config:0-61");
PMU_FORMAT_ATTR(offset, "config:0-31");
PMU_FORMAT_ATTR(rvalue, "config:32");
PMU_FORMAT_ATTR(mode, "config:33-40");
@@ -65,6 +70,25 @@ static struct attribute_group imc_format_group = {
.attrs = imc_format_attrs,
};
+/* Format attribute for imc trace-mode */
+PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19");
+PMU_FORMAT_ATTR(cpmc_event, "config:20-27");
+PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29");
+PMU_FORMAT_ATTR(cpmc_load, "config:30-61");
+static struct attribute *trace_imc_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_cpmc_reserved.attr,
+ &format_attr_cpmc_event.attr,
+ &format_attr_cpmc_samplesel.attr,
+ &format_attr_cpmc_load.attr,
+ NULL,
+};
+
+static struct attribute_group trace_imc_format_group = {
+.name = "format",
+.attrs = trace_imc_format_attrs,
+};
+
/* Get the cpumask printed to a buffer "buf" */
static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
struct device_attribute *attr,
@@ -487,6 +511,11 @@ static int nest_imc_event_init(struct perf_event *event)
* Get the base memory addresss for this cpu.
*/
chip_id = cpu_to_chip_id(event->cpu);
+
+ /* Return, if chip_id is not valid */
+ if (chip_id < 0)
+ return -ENODEV;
+
pcni = pmu->mem_info;
do {
if (pcni->id == chip_id) {
@@ -494,7 +523,7 @@ static int nest_imc_event_init(struct perf_event *event)
break;
}
pcni++;
- } while (pcni);
+ } while (pcni->vbase != 0);
if (!flag)
return -ENODEV;
@@ -788,8 +817,11 @@ static int core_imc_event_init(struct perf_event *event)
}
/*
- * Allocates a page of memory for each of the online cpus, and write the
- * physical base address of that page to the LDBAR for that cpu.
+ * Allocates a page of memory for each of the online cpus, and load
+ * LDBAR with 0.
+ * The physical base address of the page allocated for a cpu will be
+ * written to the LDBAR for that cpu, when the thread-imc event
+ * is added.
*
* LDBAR Register Layout:
*
@@ -807,7 +839,7 @@ static int core_imc_event_init(struct perf_event *event)
*/
static int thread_imc_mem_alloc(int cpu_id, int size)
{
- u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id);
+ u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
int nid = cpu_to_node(cpu_id);
if (!local_mem) {
@@ -824,9 +856,7 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
per_cpu(thread_imc_mem, cpu_id) = local_mem;
}
- ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
-
- mtspr(SPRN_LDBAR, ldbar_value);
+ mtspr(SPRN_LDBAR, 0);
return 0;
}
@@ -858,6 +888,9 @@ static int thread_imc_event_init(struct perf_event *event)
if (event->attr.type != event->pmu->type)
return -ENOENT;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
/* Sampling not supported */
if (event->hw.sample_period)
return -EINVAL;
@@ -977,6 +1010,7 @@ static int thread_imc_event_add(struct perf_event *event, int flags)
{
int core_id;
struct imc_pmu_ref *ref;
+ u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id());
if (flags & PERF_EF_START)
imc_event_start(event, flags);
@@ -985,6 +1019,9 @@ static int thread_imc_event_add(struct perf_event *event, int flags)
return -EINVAL;
core_id = smp_processor_id() / threads_per_core;
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
+ mtspr(SPRN_LDBAR, ldbar_value);
+
/*
* imc pmus are enabled only when it is used.
* See if this is triggered for the first time.
@@ -1016,11 +1053,7 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
int core_id;
struct imc_pmu_ref *ref;
- /*
- * Take a snapshot and calculate the delta and update
- * the event counter values.
- */
- imc_event_update(event);
+ mtspr(SPRN_LDBAR, 0);
core_id = smp_processor_id() / threads_per_core;
ref = &core_imc_refc[core_id];
@@ -1039,6 +1072,240 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
ref->refc = 0;
}
mutex_unlock(&ref->lock);
+ /*
+ * Take a snapshot and calculate the delta and update
+ * the event counter values.
+ */
+ imc_event_update(event);
+}
+
+/*
+ * Allocate a page of memory for each cpu, and load LDBAR with 0.
+ */
+static int trace_imc_mem_alloc(int cpu_id, int size)
+{
+ u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
+ int phys_id = cpu_to_node(cpu_id), rc = 0;
+ int core_id = (cpu_id / threads_per_core);
+
+ if (!local_mem) {
+ local_mem = page_address(alloc_pages_node(phys_id,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size)));
+ if (!local_mem)
+ return -ENOMEM;
+ per_cpu(trace_imc_mem, cpu_id) = local_mem;
+
+ /* Initialise the counters for trace mode */
+ rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem),
+ get_hard_smp_processor_id(cpu_id));
+ if (rc) {
+ pr_info("IMC:opal init failed for trace imc\n");
+ return rc;
+ }
+ }
+
+ /* Init the mutex, if not already */
+ trace_imc_refc[core_id].id = core_id;
+ mutex_init(&trace_imc_refc[core_id].lock);
+
+ mtspr(SPRN_LDBAR, 0);
+ return 0;
+}
+
+static int ppc_trace_imc_cpu_online(unsigned int cpu)
+{
+ return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+}
+
+static int ppc_trace_imc_cpu_offline(unsigned int cpu)
+{
+ mtspr(SPRN_LDBAR, 0);
+ return 0;
+}
+
+static int trace_imc_cpu_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
+ "perf/powerpc/imc_trace:online",
+ ppc_trace_imc_cpu_online,
+ ppc_trace_imc_cpu_offline);
+}
+
+static u64 get_trace_imc_event_base_addr(void)
+{
+ return (u64)per_cpu(trace_imc_mem, smp_processor_id());
+}
+
+/*
+ * Function to parse trace-imc data obtained
+ * and to prepare the perf sample.
+ */
+static int trace_imc_prepare_sample(struct trace_imc_data *mem,
+ struct perf_sample_data *data,
+ u64 *prev_tb,
+ struct perf_event_header *header,
+ struct perf_event *event)
+{
+ /* Sanity checks for a valid record */
+ if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
+ *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
+ else
+ return -EINVAL;
+
+ if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
+ be64_to_cpu(READ_ONCE(mem->tb2)))
+ return -EINVAL;
+
+ /* Prepare perf sample */
+ data->ip = be64_to_cpu(READ_ONCE(mem->ip));
+ data->period = event->hw.last_period;
+
+ header->type = PERF_RECORD_SAMPLE;
+ header->size = sizeof(*header) + event->header_size;
+ header->misc = 0;
+
+ if (is_kernel_addr(data->ip))
+ header->misc |= PERF_RECORD_MISC_KERNEL;
+ else
+ header->misc |= PERF_RECORD_MISC_USER;
+
+ perf_event_header__init_id(header, data, event);
+
+ return 0;
+}
+
+static void dump_trace_imc_data(struct perf_event *event)
+{
+ struct trace_imc_data *mem;
+ int i, ret;
+ u64 prev_tb = 0;
+
+ mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
+ for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
+ i++, mem++) {
+ struct perf_sample_data data;
+ struct perf_event_header header;
+
+ ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event);
+ if (ret) /* Exit, if not a valid record */
+ break;
+ else {
+ /* If this is a valid record, create the sample */
+ struct perf_output_handle handle;
+
+ if (perf_output_begin(&handle, event, header.size))
+ return;
+
+ perf_output_sample(&handle, &header, &data, event);
+ perf_output_end(&handle);
+ }
+ }
+}
+
+static int trace_imc_event_add(struct perf_event *event, int flags)
+{
+ int core_id = smp_processor_id() / threads_per_core;
+ struct imc_pmu_ref *ref = NULL;
+ u64 local_mem, ldbar_value;
+
+ /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */
+ local_mem = get_trace_imc_event_base_addr();
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
+
+ if (core_imc_refc)
+ ref = &core_imc_refc[core_id];
+ if (!ref) {
+ /* If core-imc is not enabled, use trace-imc reference count */
+ if (trace_imc_refc)
+ ref = &trace_imc_refc[core_id];
+ if (!ref)
+ return -EINVAL;
+ }
+ mtspr(SPRN_LDBAR, ldbar_value);
+ mutex_lock(&ref->lock);
+ if (ref->refc == 0) {
+ if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ mutex_unlock(&ref->lock);
+ pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
+ mtspr(SPRN_LDBAR, 0);
+ return -EINVAL;
+ }
+ }
+ ++ref->refc;
+ mutex_unlock(&ref->lock);
+
+ return 0;
+}
+
+static void trace_imc_event_read(struct perf_event *event)
+{
+ return;
+}
+
+static void trace_imc_event_stop(struct perf_event *event, int flags)
+{
+ u64 local_mem = get_trace_imc_event_base_addr();
+ dump_trace_imc_data(event);
+ memset((void *)local_mem, 0, sizeof(u64));
+}
+
+static void trace_imc_event_start(struct perf_event *event, int flags)
+{
+ return;
+}
+
+static void trace_imc_event_del(struct perf_event *event, int flags)
+{
+ int core_id = smp_processor_id() / threads_per_core;
+ struct imc_pmu_ref *ref = NULL;
+
+ if (core_imc_refc)
+ ref = &core_imc_refc[core_id];
+ if (!ref) {
+ /* If core-imc is not enabled, use trace-imc reference count */
+ if (trace_imc_refc)
+ ref = &trace_imc_refc[core_id];
+ if (!ref)
+ return;
+ }
+ mtspr(SPRN_LDBAR, 0);
+ mutex_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ mutex_unlock(&ref->lock);
+ pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ ref->refc = 0;
+ }
+ mutex_unlock(&ref->lock);
+ trace_imc_event_stop(event, flags);
+}
+
+static int trace_imc_event_init(struct perf_event *event)
+{
+ struct task_struct *target;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ /* Return if this is a couting event */
+ if (event->attr.sample_period == 0)
+ return -ENOENT;
+
+ event->hw.idx = -1;
+ target = event->hw.target;
+
+ event->pmu->task_ctx_nr = perf_hw_context;
+ return 0;
}
/* update_pmu_ops : Populate the appropriate operations for "pmu" */
@@ -1071,6 +1338,14 @@ static int update_pmu_ops(struct imc_pmu *pmu)
pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
break;
+ case IMC_DOMAIN_TRACE:
+ pmu->pmu.event_init = trace_imc_event_init;
+ pmu->pmu.add = trace_imc_event_add;
+ pmu->pmu.del = trace_imc_event_del;
+ pmu->pmu.start = trace_imc_event_start;
+ pmu->pmu.stop = trace_imc_event_stop;
+ pmu->pmu.read = trace_imc_event_read;
+ pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group;
default:
break;
}
@@ -1163,6 +1438,18 @@ static void cleanup_all_thread_imc_memory(void)
}
}
+static void cleanup_all_trace_imc_memory(void)
+{
+ int i, order = get_order(trace_imc_mem_size);
+
+ for_each_online_cpu(i) {
+ if (per_cpu(trace_imc_mem, i))
+ free_pages((u64)per_cpu(trace_imc_mem, i), order);
+
+ }
+ kfree(trace_imc_refc);
+}
+
/* Function to free the attr_groups which are dynamically allocated */
static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
{
@@ -1204,6 +1491,11 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
cleanup_all_thread_imc_memory();
}
+
+ if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
+ cleanup_all_trace_imc_memory();
+ }
}
/*
@@ -1286,6 +1578,27 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
thread_imc_pmu = pmu_ptr;
break;
+ case IMC_DOMAIN_TRACE:
+ /* Update the pmu name */
+ pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+ if (!pmu_ptr->pmu.name)
+ return -ENOMEM;
+
+ nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+ trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
+ GFP_KERNEL);
+ if (!trace_imc_refc)
+ return -ENOMEM;
+
+ trace_imc_mem_size = pmu_ptr->counter_mem_size;
+ for_each_online_cpu(cpu) {
+ res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+ if (res) {
+ cleanup_all_trace_imc_memory();
+ goto err;
+ }
+ }
+ break;
default:
return -EINVAL;
}
@@ -1359,6 +1672,14 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
}
break;
+ case IMC_DOMAIN_TRACE:
+ ret = trace_imc_cpu_init();
+ if (ret) {
+ cleanup_all_trace_imc_memory();
+ goto err_free_mem;
+ }
+
+ break;
default:
return -EINVAL; /* Unknown domain */
}
diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h
new file mode 100644
index 000000000000..f755c64da137
--- /dev/null
+++ b/arch/powerpc/perf/internal.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019 Madhavan Srinivasan, IBM Corporation.
+
+extern int init_ppc970_pmu(void);
+extern int init_power5_pmu(void);
+extern int init_power5p_pmu(void);
+extern int init_power6_pmu(void);
+extern int init_power7_pmu(void);
+extern int init_power8_pmu(void);
+extern int init_power9_pmu(void);
+extern int init_generic_compat_pmu(void);
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index 0526dac66007..9aa803504cb2 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -677,7 +677,7 @@ static struct power_pmu power5p_pmu = {
.cache_events = &power5p_cache_events,
};
-static int __init init_power5p_pmu(void)
+int init_power5p_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
(strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
@@ -686,5 +686,3 @@ static int __init init_power5p_pmu(void)
return register_power_pmu(&power5p_pmu);
}
-
-early_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index 4dc99f9f7962..30cb13d081a9 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -618,7 +618,7 @@ static struct power_pmu power5_pmu = {
.flags = PPMU_HAS_SSLOT,
};
-static int __init init_power5_pmu(void)
+int init_power5_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
@@ -626,5 +626,3 @@ static int __init init_power5_pmu(void)
return register_power_pmu(&power5_pmu);
}
-
-early_initcall(init_power5_pmu);
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 9c9d646b68a1..80ec48632cfe 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -540,7 +540,7 @@ static struct power_pmu power6_pmu = {
.cache_events = &power6_cache_events,
};
-static int __init init_power6_pmu(void)
+int init_power6_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
@@ -548,5 +548,3 @@ static int __init init_power6_pmu(void)
return register_power_pmu(&power6_pmu);
}
-
-early_initcall(init_power6_pmu);
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 6dbae9884ec4..bb6efd5d2530 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -445,7 +445,7 @@ static struct power_pmu power7_pmu = {
.cache_events = &power7_cache_events,
};
-static int __init init_power7_pmu(void)
+int init_power7_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
@@ -456,5 +456,3 @@ static int __init init_power7_pmu(void)
return register_power_pmu(&power7_pmu);
}
-
-early_initcall(init_power7_pmu);
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index d12a2db26353..bcc3409a06de 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -379,7 +379,7 @@ static struct power_pmu power8_pmu = {
.bhrb_nr = 32,
};
-static int __init init_power8_pmu(void)
+int init_power8_pmu(void)
{
int rc;
@@ -399,4 +399,3 @@ static int __init init_power8_pmu(void)
return 0;
}
-early_initcall(init_power8_pmu);
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 063c9d9f2516..6b1dc9a83ede 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -63,8 +63,6 @@ EVENT(PM_RUN_CYC_ALT, 0x200f4)
/* Instruction Dispatched */
EVENT(PM_INST_DISP, 0x200f2)
EVENT(PM_INST_DISP_ALT, 0x300f2)
-/* Alternate Branch event code */
-EVENT(PM_BR_CMPL_ALT, 0x10012)
/* Branch event that are not strongly biased */
EVENT(PM_BR_2PATH, 0x20036)
/* ALternate branch event that are not strongly biased */
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 030544e35959..3a31ac6f4805 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -437,7 +437,7 @@ static struct power_pmu power9_pmu = {
.bhrb_nr = 32,
};
-static int __init init_power9_pmu(void)
+int init_power9_pmu(void)
{
int rc = 0;
unsigned int pvr = mfspr(SPRN_PVR);
@@ -467,4 +467,3 @@ static int __init init_power9_pmu(void)
return 0;
}
-early_initcall(init_power9_pmu);
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index 8b6a8a36fa38..1d3370914022 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -490,7 +490,7 @@ static struct power_pmu ppc970_pmu = {
.flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
};
-static int __init init_ppc970_pmu(void)
+int init_ppc970_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
(strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
@@ -499,5 +499,3 @@ static int __init init_ppc970_pmu(void)
return register_power_pmu(&ppc970_pmu);
}
-
-early_initcall(init_ppc970_pmu);
diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c
index 5c31d8292d3b..e7c2c3fb011a 100644
--- a/arch/powerpc/platforms/83xx/usb.c
+++ b/arch/powerpc/platforms/83xx/usb.c
@@ -221,8 +221,10 @@ int mpc837x_usb_cfg(void)
int ret = 0;
np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
- if (!np || !of_device_is_available(np))
+ if (!np || !of_device_is_available(np)) {
+ of_node_put(np);
return -ENODEV;
+ }
prop = of_get_property(np, "phy_type", NULL);
if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index 8d5a25d43ef3..e9617d35fd1f 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -153,10 +153,9 @@ int mpc8xx_pic_init(void)
if (mpc8xx_pic_host == NULL) {
printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
ret = -ENOMEM;
- goto out;
}
- return 0;
+ ret = 0;
out:
of_node_put(np);
return ret;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 50cd09b4e05d..d0e172d47574 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -25,6 +25,8 @@ config PPC_BOOK3S_32
bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
config PPC_85xx
bool "Freescale 85xx"
@@ -34,6 +36,9 @@ config PPC_8xx
bool "Freescale 8xx"
select FSL_SOC
select SYS_SUPPORTS_HUGETLBFS
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
+ select PPC_MM_SLICES if HUGETLB_PAGE
config 40x
bool "AMCC 40x"
@@ -75,6 +80,7 @@ config PPC_BOOK3S_64
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_NUMA_BALANCING
select IRQ_WORK
+ select PPC_MM_SLICES
config PPC_BOOK3E_64
bool "Embedded processors"
@@ -326,6 +332,8 @@ config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64 && HUGETLB_PAGE
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
default y
help
Enable support for the Power ISA 3.0 Radix style MMU. Currently this
@@ -345,6 +353,37 @@ config PPC_RADIX_MMU_DEFAULT
If you're unsure, say Y.
+config PPC_HAVE_KUEP
+ bool
+
+config PPC_KUEP
+ bool "Kernel Userspace Execution Prevention"
+ depends on PPC_HAVE_KUEP
+ default y
+ help
+ Enable support for Kernel Userspace Execution Prevention (KUEP)
+
+ If you're unsure, say Y.
+
+config PPC_HAVE_KUAP
+ bool
+
+config PPC_KUAP
+ bool "Kernel Userspace Access Protection"
+ depends on PPC_HAVE_KUAP
+ default y
+ help
+ Enable support for Kernel Userspace Access Protection (KUAP)
+
+ If you're unsure, say Y.
+
+config PPC_KUAP_DEBUG
+ bool "Extra debugging for Kernel Userspace Access Protection"
+ depends on PPC_HAVE_KUAP && (PPC_RADIX_MMU || PPC_32)
+ help
+ Add extra debugging for Kernel Userspace Access Protection (KUAP)
+ If you're unsure, say N.
+
config ARCH_ENABLE_HUGEPAGE_MIGRATION
def_bool y
depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
@@ -354,14 +393,16 @@ config PPC_MMU_NOHASH
def_bool y
depends on !PPC_BOOK3S
+config PPC_MMU_NOHASH_32
+ def_bool y
+ depends on PPC_MMU_NOHASH && PPC32
+
config PPC_BOOK3E_MMU
def_bool y
depends on FSL_BOOKE || PPC_BOOK3E
config PPC_MM_SLICES
bool
- default y if PPC_BOOK3S_64
- default y if PPC_8xx && HUGETLB_PAGE
config PPC_HAVE_PMU_SUPPORT
bool
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 7f12c7b78c0f..6646f152d57b 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -194,7 +194,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
* faults need to be deferred to process context.
*/
if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
- (REGION_ID(ea) != USER_REGION_ID)) {
+ (get_region_id(ea) != USER_REGION_ID)) {
spin_unlock(&spu->register_lock);
ret = hash_page(ea,
@@ -224,7 +224,7 @@ static void __spu_kernel_slb(void *addr, struct copro_slb *slb)
unsigned long ea = (unsigned long)addr;
u64 llp;
- if (REGION_ID(ea) == KERNEL_REGION_ID)
+ if (get_region_id(ea) == LINEAR_MAP_REGION_ID)
llp = mmu_psize_defs[mmu_linear_psize].sllp;
else
llp = mmu_psize_defs[mmu_virtual_psize].sllp;
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 0409714e8070..829bf3697dc9 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -44,7 +44,8 @@
#define HOLLY_PCI_CFG_PHYS 0x7c000000
-int holly_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn)
+static int holly_exclude_device(struct pci_controller *hose, u_char bus,
+ u_char devfn)
{
if (bus == 0 && PCI_SLOT(devfn) == 0)
return PCIBIOS_DEVICE_NOT_FOUND;
@@ -187,13 +188,13 @@ static void __init holly_init_IRQ(void)
tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
}
-void holly_show_cpuinfo(struct seq_file *m)
+static void holly_show_cpuinfo(struct seq_file *m)
{
seq_printf(m, "vendor\t\t: IBM\n");
seq_printf(m, "machine\t\t: PPC750 GX/CL\n");
}
-void __noreturn holly_restart(char *cmd)
+static void __noreturn holly_restart(char *cmd)
{
__be32 __iomem *ocn_bar1 = NULL;
unsigned long bar;
@@ -233,18 +234,6 @@ void __noreturn holly_restart(char *cmd)
for (;;) ;
}
-void holly_power_off(void)
-{
- local_irq_disable();
- /* No way to shut power off with software */
- for (;;) ;
-}
-
-void holly_halt(void)
-{
- holly_power_off();
-}
-
/*
* Called very early, device-tree isn't unflattened
*/
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 20ebf35d7913..f4247ade71ca 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -2,6 +2,12 @@
CFLAGS_bootx_init.o += -fPIC
CFLAGS_bootx_init.o += $(call cc-option, -fno-stack-protector)
+KASAN_SANITIZE_bootx_init.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_bootx_init.o += -DDISABLE_BRANCH_PROFILING
+endif
+
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index e52f9b06dd9c..c9133f7908ca 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -16,6 +16,7 @@
#include <linux/device.h>
#include <linux/cpu.h>
+#include <asm/asm-prototypes.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
#include <asm/opal.h>
@@ -48,10 +49,10 @@ static u64 pnv_default_stop_mask;
static bool default_stop_found;
/*
- * First deep stop state. Used to figure out when to save/restore
- * hypervisor context.
+ * First stop state levels when SPR and TB loss can occur.
*/
-u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
+static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
+static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
/*
* psscr value and mask of the deepest stop idle state.
@@ -62,6 +63,8 @@ static u64 pnv_deepest_stop_psscr_mask;
static u64 pnv_deepest_stop_flag;
static bool deepest_stop_found;
+static unsigned long power7_offline_type;
+
static int pnv_save_sprs_for_deep_states(void)
{
int cpu;
@@ -72,12 +75,12 @@ static int pnv_save_sprs_for_deep_states(void)
* all cpus at boot. Get these reg values of current cpu and use the
* same across all cpus.
*/
- uint64_t lpcr_val = mfspr(SPRN_LPCR);
- uint64_t hid0_val = mfspr(SPRN_HID0);
- uint64_t hid1_val = mfspr(SPRN_HID1);
- uint64_t hid4_val = mfspr(SPRN_HID4);
- uint64_t hid5_val = mfspr(SPRN_HID5);
- uint64_t hmeer_val = mfspr(SPRN_HMEER);
+ uint64_t lpcr_val = mfspr(SPRN_LPCR);
+ uint64_t hid0_val = mfspr(SPRN_HID0);
+ uint64_t hid1_val = mfspr(SPRN_HID1);
+ uint64_t hid4_val = mfspr(SPRN_HID4);
+ uint64_t hid5_val = mfspr(SPRN_HID5);
+ uint64_t hmeer_val = mfspr(SPRN_HMEER);
uint64_t msr_val = MSR_IDLE;
uint64_t psscr_val = pnv_deepest_stop_psscr_val;
@@ -137,89 +140,6 @@ static int pnv_save_sprs_for_deep_states(void)
return 0;
}
-static void pnv_alloc_idle_core_states(void)
-{
- int i, j;
- int nr_cores = cpu_nr_cores();
- u32 *core_idle_state;
-
- /*
- * core_idle_state - The lower 8 bits track the idle state of
- * each thread of the core.
- *
- * The most significant bit is the lock bit.
- *
- * Initially all the bits corresponding to threads_per_core
- * are set. They are cleared when the thread enters deep idle
- * state like sleep and winkle/stop.
- *
- * Initially the lock bit is cleared. The lock bit has 2
- * purposes:
- * a. While the first thread in the core waking up from
- * idle is restoring core state, it prevents other
- * threads in the core from switching to process
- * context.
- * b. While the last thread in the core is saving the
- * core state, it prevents a different thread from
- * waking up.
- */
- for (i = 0; i < nr_cores; i++) {
- int first_cpu = i * threads_per_core;
- int node = cpu_to_node(first_cpu);
- size_t paca_ptr_array_size;
-
- core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
- *core_idle_state = (1 << threads_per_core) - 1;
- paca_ptr_array_size = (threads_per_core *
- sizeof(struct paca_struct *));
-
- for (j = 0; j < threads_per_core; j++) {
- int cpu = first_cpu + j;
-
- paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
- paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
- paca_ptrs[cpu]->thread_mask = 1 << j;
- }
- }
-
- update_subcore_sibling_mask();
-
- if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
- int rc = pnv_save_sprs_for_deep_states();
-
- if (likely(!rc))
- return;
-
- /*
- * The stop-api is unable to restore hypervisor
- * resources on wakeup from platform idle states which
- * lose full context. So disable such states.
- */
- supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
- pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
- pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
-
- if (cpu_has_feature(CPU_FTR_ARCH_300) &&
- (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
- /*
- * Use the default stop state for CPU-Hotplug
- * if available.
- */
- if (default_stop_found) {
- pnv_deepest_stop_psscr_val =
- pnv_default_stop_val;
- pnv_deepest_stop_psscr_mask =
- pnv_default_stop_mask;
- pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
- pnv_deepest_stop_psscr_val);
- } else { /* Fallback to snooze loop for CPU-Hotplug */
- deepest_stop_found = false;
- pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
- }
- }
- }
-}
-
u32 pnv_get_supported_cpuidle_states(void)
{
return supported_cpuidle_states;
@@ -238,6 +158,9 @@ static void pnv_fastsleep_workaround_apply(void *info)
*err = 1;
}
+static bool power7_fastsleep_workaround_entry = true;
+static bool power7_fastsleep_workaround_exit = true;
+
/*
* Used to store fastsleep workaround state
* 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
@@ -269,21 +192,15 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
* fastsleep_workaround_applyonce = 1 implies
* fastsleep workaround needs to be left in 'applied' state on all
* the cores. Do this by-
- * 1. Patching out the call to 'undo' workaround in fastsleep exit path
- * 2. Sending ipi to all the cores which have at least one online thread
- * 3. Patching out the call to 'apply' workaround in fastsleep entry
- * path
+ * 1. Disable the 'undo' workaround in fastsleep exit path
+ * 2. Sendi IPIs to all the cores which have at least one online thread
+ * 3. Disable the 'apply' workaround in fastsleep entry path
+ *
* There is no need to send ipi to cores which have all threads
* offlined, as last thread of the core entering fastsleep or deeper
* state would have applied workaround.
*/
- err = patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_exit,
- PPC_INST_NOP);
- if (err) {
- pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
- goto fail;
- }
+ power7_fastsleep_workaround_exit = false;
get_online_cpus();
primary_thread_mask = cpu_online_cores_map();
@@ -296,13 +213,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
goto fail;
}
- err = patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_entry,
- PPC_INST_NOP);
- if (err) {
- pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
- goto fail;
- }
+ power7_fastsleep_workaround_entry = false;
fastsleep_workaround_applyonce = 1;
@@ -315,27 +226,346 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
show_fastsleep_workaround_applyonce,
store_fastsleep_workaround_applyonce);
-static unsigned long __power7_idle_type(unsigned long type)
+static inline void atomic_start_thread_idle(void)
{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ int thread_nr = cpu_thread_in_core(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ clear_bit(thread_nr, state);
+}
+
+static inline void atomic_stop_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ int thread_nr = cpu_thread_in_core(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ set_bit(thread_nr, state);
+}
+
+static inline void atomic_lock_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state)))
+ barrier();
+}
+
+static inline void atomic_unlock_and_stop_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long thread = 1UL << cpu_thread_in_core(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ u64 s = READ_ONCE(*state);
+ u64 new, tmp;
+
+ BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
+ BUG_ON(s & thread);
+
+again:
+ new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
+ tmp = cmpxchg(state, s, new);
+ if (unlikely(tmp != s)) {
+ s = tmp;
+ goto again;
+ }
+}
+
+static inline void atomic_unlock_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
+ clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
+}
+
+/* P7 and P8 */
+struct p7_sprs {
+ /* per core */
+ u64 tscr;
+ u64 worc;
+
+ /* per subcore */
+ u64 sdr1;
+ u64 rpr;
+
+ /* per thread */
+ u64 lpcr;
+ u64 hfscr;
+ u64 fscr;
+ u64 purr;
+ u64 spurr;
+ u64 dscr;
+ u64 wort;
+
+ /* per thread SPRs that get lost in shallow states */
+ u64 amr;
+ u64 iamr;
+ u64 amor;
+ u64 uamor;
+};
+
+static unsigned long power7_idle_insn(unsigned long type)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ unsigned long thread = 1UL << cpu_thread_in_core(cpu);
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
unsigned long srr1;
+ bool full_winkle;
+ struct p7_sprs sprs = {}; /* avoid false use-uninitialised */
+ bool sprs_saved = false;
+ int rc;
- if (!prep_irq_for_idle_irqsoff())
- return 0;
+ if (unlikely(type != PNV_THREAD_NAP)) {
+ atomic_lock_thread_idle();
+
+ BUG_ON(!(*state & thread));
+ *state &= ~thread;
+
+ if (power7_fastsleep_workaround_entry) {
+ if ((*state & core_thread_mask) == 0) {
+ rc = opal_config_cpu_idle_state(
+ OPAL_CONFIG_IDLE_FASTSLEEP,
+ OPAL_CONFIG_IDLE_APPLY);
+ BUG_ON(rc);
+ }
+ }
+
+ if (type == PNV_THREAD_WINKLE) {
+ sprs.tscr = mfspr(SPRN_TSCR);
+ sprs.worc = mfspr(SPRN_WORC);
+
+ sprs.sdr1 = mfspr(SPRN_SDR1);
+ sprs.rpr = mfspr(SPRN_RPR);
+
+ sprs.lpcr = mfspr(SPRN_LPCR);
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ sprs.hfscr = mfspr(SPRN_HFSCR);
+ sprs.fscr = mfspr(SPRN_FSCR);
+ }
+ sprs.purr = mfspr(SPRN_PURR);
+ sprs.spurr = mfspr(SPRN_SPURR);
+ sprs.dscr = mfspr(SPRN_DSCR);
+ sprs.wort = mfspr(SPRN_WORT);
+
+ sprs_saved = true;
+
+ /*
+ * Increment winkle counter and set all winkle bits if
+ * all threads are winkling. This allows wakeup side to
+ * distinguish between fast sleep and winkle state
+ * loss. Fast sleep still has to resync the timebase so
+ * this may not be a really big win.
+ */
+ *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+ if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
+ >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
+ == threads_per_core)
+ *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+ }
+
+ atomic_unlock_thread_idle();
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ sprs.amr = mfspr(SPRN_AMR);
+ sprs.iamr = mfspr(SPRN_IAMR);
+ sprs.amor = mfspr(SPRN_AMOR);
+ sprs.uamor = mfspr(SPRN_UAMOR);
+ }
+
+ local_paca->thread_idle_state = type;
+ srr1 = isa206_idle_insn_mayloss(type); /* go idle */
+ local_paca->thread_idle_state = PNV_THREAD_RUNNING;
+
+ WARN_ON_ONCE(!srr1);
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
+ /*
+ * We don't need an isync after the mtsprs here because
+ * the upcoming mtmsrd is execution synchronizing.
+ */
+ mtspr(SPRN_AMR, sprs.amr);
+ mtspr(SPRN_IAMR, sprs.iamr);
+ mtspr(SPRN_AMOR, sprs.amor);
+ mtspr(SPRN_UAMOR, sprs.uamor);
+ }
+ }
+
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+ hmi_exception_realmode(NULL);
+
+ if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
+ if (unlikely(type != PNV_THREAD_NAP)) {
+ atomic_lock_thread_idle();
+ if (type == PNV_THREAD_WINKLE) {
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+ *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+ *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
+ }
+ atomic_unlock_and_stop_thread_idle();
+ }
+ return srr1;
+ }
+
+ /* HV state loss */
+ BUG_ON(type == PNV_THREAD_NAP);
+
+ atomic_lock_thread_idle();
+
+ full_winkle = false;
+ if (type == PNV_THREAD_WINKLE) {
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+ *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+ if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
+ *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
+ full_winkle = true;
+ BUG_ON(!sprs_saved);
+ }
+ }
+
+ WARN_ON(*state & thread);
+
+ if ((*state & core_thread_mask) != 0)
+ goto core_woken;
+
+ /* Per-core SPRs */
+ if (full_winkle) {
+ mtspr(SPRN_TSCR, sprs.tscr);
+ mtspr(SPRN_WORC, sprs.worc);
+ }
+
+ if (power7_fastsleep_workaround_exit) {
+ rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
+ OPAL_CONFIG_IDLE_UNDO);
+ BUG_ON(rc);
+ }
+
+ /* TB */
+ if (opal_resync_timebase() != OPAL_SUCCESS)
+ BUG();
+
+core_woken:
+ if (!full_winkle)
+ goto subcore_woken;
+
+ if ((*state & local_paca->subcore_sibling_mask) != 0)
+ goto subcore_woken;
+
+ /* Per-subcore SPRs */
+ mtspr(SPRN_SDR1, sprs.sdr1);
+ mtspr(SPRN_RPR, sprs.rpr);
+
+subcore_woken:
+ /*
+ * isync after restoring shared SPRs and before unlocking. Unlock
+ * only contains hwsync which does not necessarily do the right
+ * thing for SPRs.
+ */
+ isync();
+ atomic_unlock_and_stop_thread_idle();
+
+ /* Fast sleep does not lose SPRs */
+ if (!full_winkle)
+ return srr1;
+
+ /* Per-thread SPRs */
+ mtspr(SPRN_LPCR, sprs.lpcr);
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ mtspr(SPRN_HFSCR, sprs.hfscr);
+ mtspr(SPRN_FSCR, sprs.fscr);
+ }
+ mtspr(SPRN_PURR, sprs.purr);
+ mtspr(SPRN_SPURR, sprs.spurr);
+ mtspr(SPRN_DSCR, sprs.dscr);
+ mtspr(SPRN_WORT, sprs.wort);
+
+ mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
+
+ /*
+ * The SLB has to be restored here, but it sometimes still
+ * contains entries, so the __ variant must be used to prevent
+ * multi hits.
+ */
+ __slb_restore_bolted_realmode();
+
+ return srr1;
+}
+
+extern unsigned long idle_kvm_start_guest(unsigned long srr1);
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long power7_offline(void)
+{
+ unsigned long srr1;
+
+ mtmsr(MSR_IDLE);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* Tell KVM we're entering idle. */
+ /******************************************************/
+ /* N O T E W E L L ! ! ! N O T E W E L L */
+ /* The following store to HSTATE_HWTHREAD_STATE(r13) */
+ /* MUST occur in real mode, i.e. with the MMU off, */
+ /* and the MMU must stay off until we clear this flag */
+ /* and test HSTATE_HWTHREAD_REQ(r13) in */
+ /* pnv_powersave_wakeup in this file. */
+ /* The reason is that another thread can switch the */
+ /* MMU to a guest context whenever this flag is set */
+ /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
+ /* that would potentially cause this thread to start */
+ /* executing instructions from guest memory in */
+ /* hypervisor mode, leading to a host crash or data */
+ /* corruption, or worse. */
+ /******************************************************/
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
+#endif
__ppc64_runlatch_off();
- srr1 = power7_idle_insn(type);
+ srr1 = power7_idle_insn(power7_offline_type);
__ppc64_runlatch_on();
- fini_irq_for_idle_irqsoff();
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ smp_mb();
+ if (local_paca->kvm_hstate.hwthread_req)
+ srr1 = idle_kvm_start_guest(srr1);
+#endif
+
+ mtmsr(MSR_KERNEL);
return srr1;
}
+#endif
void power7_idle_type(unsigned long type)
{
unsigned long srr1;
- srr1 = __power7_idle_type(type);
+ if (!prep_irq_for_idle_irqsoff())
+ return;
+
+ mtmsr(MSR_IDLE);
+ __ppc64_runlatch_off();
+ srr1 = power7_idle_insn(type);
+ __ppc64_runlatch_on();
+ mtmsr(MSR_KERNEL);
+
+ fini_irq_for_idle_irqsoff();
irq_set_pending_from_srr1(srr1);
}
@@ -347,33 +577,292 @@ void power7_idle(void)
power7_idle_type(PNV_THREAD_NAP);
}
-static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
- unsigned long stop_psscr_mask)
+struct p9_sprs {
+ /* per core */
+ u64 ptcr;
+ u64 rpr;
+ u64 tscr;
+ u64 ldbar;
+
+ /* per thread */
+ u64 lpcr;
+ u64 hfscr;
+ u64 fscr;
+ u64 pid;
+ u64 purr;
+ u64 spurr;
+ u64 dscr;
+ u64 wort;
+
+ u64 mmcra;
+ u32 mmcr0;
+ u32 mmcr1;
+ u64 mmcr2;
+
+ /* per thread SPRs that get lost in shallow states */
+ u64 amr;
+ u64 iamr;
+ u64 amor;
+ u64 uamor;
+};
+
+static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
{
- unsigned long psscr;
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
unsigned long srr1;
+ unsigned long pls;
+ unsigned long mmcr0 = 0;
+ struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
+ bool sprs_saved = false;
- if (!prep_irq_for_idle_irqsoff())
- return 0;
+ if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
+ /* EC=ESL=0 case */
+
+ BUG_ON(!mmu_on);
+
+ /*
+ * Wake synchronously. SRESET via xscom may still cause
+ * a 0x100 powersave wakeup with SRR1 reason!
+ */
+ srr1 = isa300_idle_stop_noloss(psscr); /* go idle */
+ if (likely(!srr1))
+ return 0;
+
+ /*
+ * Registers not saved, can't recover!
+ * This would be a hardware bug
+ */
+ BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
+
+ goto out;
+ }
+
+ /* EC=ESL=1 case */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
+ local_paca->requested_psscr = psscr;
+ /* order setting requested_psscr vs testing dont_stop */
+ smp_mb();
+ if (atomic_read(&local_paca->dont_stop)) {
+ local_paca->requested_psscr = 0;
+ return 0;
+ }
+ }
+#endif
+
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
+ /*
+ * POWER9 DD2 can incorrectly set PMAO when waking up
+ * after a state-loss idle. Saving and restoring MMCR0
+ * over idle is a workaround.
+ */
+ mmcr0 = mfspr(SPRN_MMCR0);
+ }
+ if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) {
+ sprs.lpcr = mfspr(SPRN_LPCR);
+ sprs.hfscr = mfspr(SPRN_HFSCR);
+ sprs.fscr = mfspr(SPRN_FSCR);
+ sprs.pid = mfspr(SPRN_PID);
+ sprs.purr = mfspr(SPRN_PURR);
+ sprs.spurr = mfspr(SPRN_SPURR);
+ sprs.dscr = mfspr(SPRN_DSCR);
+ sprs.wort = mfspr(SPRN_WORT);
+
+ sprs.mmcra = mfspr(SPRN_MMCRA);
+ sprs.mmcr0 = mfspr(SPRN_MMCR0);
+ sprs.mmcr1 = mfspr(SPRN_MMCR1);
+ sprs.mmcr2 = mfspr(SPRN_MMCR2);
+
+ sprs.ptcr = mfspr(SPRN_PTCR);
+ sprs.rpr = mfspr(SPRN_RPR);
+ sprs.tscr = mfspr(SPRN_TSCR);
+ sprs.ldbar = mfspr(SPRN_LDBAR);
+
+ sprs_saved = true;
+
+ atomic_start_thread_idle();
+ }
+
+ sprs.amr = mfspr(SPRN_AMR);
+ sprs.iamr = mfspr(SPRN_IAMR);
+ sprs.amor = mfspr(SPRN_AMOR);
+ sprs.uamor = mfspr(SPRN_UAMOR);
+
+ srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ local_paca->requested_psscr = 0;
+#endif
psscr = mfspr(SPRN_PSSCR);
- psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+ WARN_ON_ONCE(!srr1);
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+ if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
+ unsigned long mmcra;
+
+ /*
+ * We don't need an isync after the mtsprs here because the
+ * upcoming mtmsrd is execution synchronizing.
+ */
+ mtspr(SPRN_AMR, sprs.amr);
+ mtspr(SPRN_IAMR, sprs.iamr);
+ mtspr(SPRN_AMOR, sprs.amor);
+ mtspr(SPRN_UAMOR, sprs.uamor);
+
+ /*
+ * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
+ * might have been corrupted and needs flushing. We also need
+ * to reload MMCR0 (see mmcr0 comment above).
+ */
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
+ asm volatile(PPC_INVALIDATE_ERAT);
+ mtspr(SPRN_MMCR0, mmcr0);
+ }
+
+ /*
+ * DD2.2 and earlier need to set then clear bit 60 in MMCRA
+ * to ensure the PMU starts running.
+ */
+ mmcra = mfspr(SPRN_MMCRA);
+ mmcra |= PPC_BIT(60);
+ mtspr(SPRN_MMCRA, mmcra);
+ mmcra &= ~PPC_BIT(60);
+ mtspr(SPRN_MMCRA, mmcra);
+ }
+
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+ hmi_exception_realmode(NULL);
+
+ /*
+ * On POWER9, SRR1 bits do not match exactly as expected.
+ * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
+ * just always test PSSCR for SPR/TB state loss.
+ */
+ pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
+ if (likely(pls < pnv_first_spr_loss_level)) {
+ if (sprs_saved)
+ atomic_stop_thread_idle();
+ goto out;
+ }
+
+ /* HV state loss */
+ BUG_ON(!sprs_saved);
+
+ atomic_lock_thread_idle();
+
+ if ((*state & core_thread_mask) != 0)
+ goto core_woken;
+
+ /* Per-core SPRs */
+ mtspr(SPRN_PTCR, sprs.ptcr);
+ mtspr(SPRN_RPR, sprs.rpr);
+ mtspr(SPRN_TSCR, sprs.tscr);
+ mtspr(SPRN_LDBAR, sprs.ldbar);
+
+ if (pls >= pnv_first_tb_loss_level) {
+ /* TB loss */
+ if (opal_resync_timebase() != OPAL_SUCCESS)
+ BUG();
+ }
+
+ /*
+ * isync after restoring shared SPRs and before unlocking. Unlock
+ * only contains hwsync which does not necessarily do the right
+ * thing for SPRs.
+ */
+ isync();
+
+core_woken:
+ atomic_unlock_and_stop_thread_idle();
+
+ /* Per-thread SPRs */
+ mtspr(SPRN_LPCR, sprs.lpcr);
+ mtspr(SPRN_HFSCR, sprs.hfscr);
+ mtspr(SPRN_FSCR, sprs.fscr);
+ mtspr(SPRN_PID, sprs.pid);
+ mtspr(SPRN_PURR, sprs.purr);
+ mtspr(SPRN_SPURR, sprs.spurr);
+ mtspr(SPRN_DSCR, sprs.dscr);
+ mtspr(SPRN_WORT, sprs.wort);
+
+ mtspr(SPRN_MMCRA, sprs.mmcra);
+ mtspr(SPRN_MMCR0, sprs.mmcr0);
+ mtspr(SPRN_MMCR1, sprs.mmcr1);
+ mtspr(SPRN_MMCR2, sprs.mmcr2);
+
+ mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
+
+ if (!radix_enabled())
+ __slb_restore_bolted_realmode();
+
+out:
+ if (mmu_on)
+ mtmsr(MSR_KERNEL);
+
+ return srr1;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long power9_offline_stop(unsigned long psscr)
+{
+ unsigned long srr1;
+
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
__ppc64_runlatch_off();
- srr1 = power9_idle_stop(psscr);
+ srr1 = power9_idle_stop(psscr, true);
__ppc64_runlatch_on();
+#else
+ /*
+ * Tell KVM we're entering idle.
+ * This does not have to be done in real mode because the P9 MMU
+ * is independent per-thread. Some steppings share radix/hash mode
+ * between threads, but in that case KVM has a barrier sync in real
+ * mode before and after switching between radix and hash.
+ *
+ * kvm_start_guest must still be called in real mode though, hence
+ * the false argument.
+ */
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
- fini_irq_for_idle_irqsoff();
+ __ppc64_runlatch_off();
+ srr1 = power9_idle_stop(psscr, false);
+ __ppc64_runlatch_on();
+
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ smp_mb();
+ if (local_paca->kvm_hstate.hwthread_req)
+ srr1 = idle_kvm_start_guest(srr1);
+ mtmsr(MSR_KERNEL);
+#endif
return srr1;
}
+#endif
void power9_idle_type(unsigned long stop_psscr_val,
unsigned long stop_psscr_mask)
{
+ unsigned long psscr;
unsigned long srr1;
- srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
+ if (!prep_irq_for_idle_irqsoff())
+ return;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+
+ __ppc64_runlatch_off();
+ srr1 = power9_idle_stop(psscr, true);
+ __ppc64_runlatch_on();
+
+ fini_irq_for_idle_irqsoff();
+
irq_set_pending_from_srr1(srr1);
}
@@ -409,7 +898,7 @@ void pnv_power9_force_smt4_catch(void)
atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
}
/* order setting dont_stop vs testing requested_psscr */
- mb();
+ smp_mb();
for (thr = 0; thr < threads_per_core; ++thr) {
if (!paca_ptrs[cpu0+thr]->requested_psscr)
++awake_threads;
@@ -481,7 +970,6 @@ void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
unsigned long pnv_cpu_offline(unsigned int cpu)
{
unsigned long srr1;
- u32 idle_states = pnv_get_supported_cpuidle_states();
__ppc64_runlatch_off();
@@ -492,15 +980,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
pnv_deepest_stop_psscr_val;
srr1 = power9_offline_stop(psscr);
-
- } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
- (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
- srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
- } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
- (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
- srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
- } else if (idle_states & OPAL_PM_NAP_ENABLED) {
- srr1 = power7_idle_insn(PNV_THREAD_NAP);
+ } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
+ srr1 = power7_offline();
} else {
/* This is the fallback method. We emulate snooze */
while (!generic_check_cpu_restart(cpu)) {
@@ -596,33 +1077,44 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
* @dt_idle_states: Number of idle state entries
* Returns 0 on success
*/
-static int __init pnv_power9_idle_init(void)
+static void __init pnv_power9_idle_init(void)
{
u64 max_residency_ns = 0;
int i;
/*
- * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
- * and the pnv_default_stop_{val,mask}.
- *
- * pnv_first_deep_stop_state should be set to the first stop
- * level to cause hypervisor state loss.
- *
* pnv_deepest_stop_{val,mask} should be set to values corresponding to
* the deepest stop state.
*
* pnv_default_stop_{val,mask} should be set to values corresponding to
- * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
+ * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
*/
- pnv_first_deep_stop_state = MAX_STOP_STATE;
+ pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
+ pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
for (i = 0; i < nr_pnv_idle_states; i++) {
int err;
struct pnv_idle_states_t *state = &pnv_idle_states[i];
u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
+ if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
+ (pnv_first_tb_loss_level > psscr_rl))
+ pnv_first_tb_loss_level = psscr_rl;
+
if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
- pnv_first_deep_stop_state > psscr_rl)
- pnv_first_deep_stop_state = psscr_rl;
+ (pnv_first_spr_loss_level > psscr_rl))
+ pnv_first_spr_loss_level = psscr_rl;
+
+ /*
+ * The idle code does not deal with TB loss occurring
+ * in a shallower state than SPR loss, so force it to
+ * behave like SPRs are lost if TB is lost. POWER9 would
+ * never encouter this, but a POWER8 core would if it
+ * implemented the stop instruction. So this is for forward
+ * compatibility.
+ */
+ if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
+ (pnv_first_spr_loss_level > psscr_rl))
+ pnv_first_spr_loss_level = psscr_rl;
err = validate_psscr_val_mask(&state->psscr_val,
&state->psscr_mask,
@@ -647,6 +1139,7 @@ static int __init pnv_power9_idle_init(void)
pnv_default_stop_val = state->psscr_val;
pnv_default_stop_mask = state->psscr_mask;
default_stop_found = true;
+ WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
}
}
@@ -666,10 +1159,40 @@ static int __init pnv_power9_idle_init(void)
pnv_deepest_stop_psscr_mask);
}
- pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
- pnv_first_deep_stop_state);
+ pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%lld\n",
+ pnv_first_spr_loss_level);
- return 0;
+ pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%lld\n",
+ pnv_first_tb_loss_level);
+}
+
+static void __init pnv_disable_deep_states(void)
+{
+ /*
+ * The stop-api is unable to restore hypervisor
+ * resources on wakeup from platform idle states which
+ * lose full context. So disable such states.
+ */
+ supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
+ pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
+ pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+ (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
+ /*
+ * Use the default stop state for CPU-Hotplug
+ * if available.
+ */
+ if (default_stop_found) {
+ pnv_deepest_stop_psscr_val = pnv_default_stop_val;
+ pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
+ pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
+ pnv_deepest_stop_psscr_val);
+ } else { /* Fallback to snooze loop for CPU-Hotplug */
+ deepest_stop_found = false;
+ pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
+ }
+ }
}
/*
@@ -684,10 +1207,8 @@ static void __init pnv_probe_idle_states(void)
return;
}
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (pnv_power9_idle_init())
- return;
- }
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ pnv_power9_idle_init();
for (i = 0; i < nr_pnv_idle_states; i++)
supported_cpuidle_states |= pnv_idle_states[i].flags;
@@ -807,11 +1328,33 @@ out:
static int __init pnv_init_idle_states(void)
{
+ int cpu;
int rc = 0;
- supported_cpuidle_states = 0;
+
+ /* Set up PACA fields */
+ for_each_present_cpu(cpu) {
+ struct paca_struct *p = paca_ptrs[cpu];
+
+ p->idle_state = 0;
+ if (cpu == cpu_first_thread_sibling(cpu))
+ p->idle_state = (1 << threads_per_core) - 1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* P7/P8 nap */
+ p->thread_idle_state = PNV_THREAD_RUNNING;
+ } else {
+ /* P9 stop */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ p->requested_psscr = 0;
+ atomic_set(&p->dont_stop, 0);
+#endif
+ }
+ }
/* In case we error out nr_pnv_idle_states will be zero */
nr_pnv_idle_states = 0;
+ supported_cpuidle_states = 0;
+
if (cpuidle_disable != IDLE_NO_OVERRIDE)
goto out;
rc = pnv_parse_cpuidle_dt();
@@ -819,27 +1362,40 @@ static int __init pnv_init_idle_states(void)
return rc;
pnv_probe_idle_states();
- if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
- patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_entry,
- PPC_INST_NOP);
- patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_exit,
- PPC_INST_NOP);
- } else {
- /*
- * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
- * workaround is needed to use fastsleep. Provide sysfs
- * control to choose how this workaround has to be applied.
- */
- device_create_file(cpu_subsys.dev_root,
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+ power7_fastsleep_workaround_entry = false;
+ power7_fastsleep_workaround_exit = false;
+ } else {
+ /*
+ * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
+ * workaround is needed to use fastsleep. Provide sysfs
+ * control to choose how this workaround has to be
+ * applied.
+ */
+ device_create_file(cpu_subsys.dev_root,
&dev_attr_fastsleep_workaround_applyonce);
- }
+ }
+
+ update_subcore_sibling_mask();
+
+ if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
+ ppc_md.power_save = power7_idle;
+ power7_offline_type = PNV_THREAD_NAP;
+ }
- pnv_alloc_idle_core_states();
+ if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
+ (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT))
+ power7_offline_type = PNV_THREAD_WINKLE;
+ else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
+ (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+ power7_offline_type = PNV_THREAD_SLEEP;
+ }
- if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
- ppc_md.power_save = power7_idle;
+ if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
+ if (pnv_save_sprs_for_deep_states())
+ pnv_disable_deep_states();
+ }
out:
return 0;
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index daad8c45c8e7..36c8fa3647a2 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -121,6 +121,8 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
#define OPAL_CALL(name, opcode) \
int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
+ int64_t a4, int64_t a5, int64_t a6, int64_t a7); \
+int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
int64_t a4, int64_t a5, int64_t a6, int64_t a7) \
{ \
return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \
@@ -218,6 +220,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
+OPAL_CALL(opal_handle_hmi2, OPAL_HANDLE_HMI2);
OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
@@ -260,6 +263,9 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
+OPAL_CALL(opal_xive_get_queue_state, OPAL_XIVE_GET_QUEUE_STATE);
+OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE);
+OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE);
OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 58a07948c76e..3e497b91d210 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -127,7 +127,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
nr_chips))
goto error;
- pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info),
+ pmu_ptr->mem_info = kcalloc(nr_chips + 1, sizeof(*pmu_ptr->mem_info),
GFP_KERNEL);
if (!pmu_ptr->mem_info)
goto error;
@@ -284,6 +284,9 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
case IMC_TYPE_THREAD:
domain = IMC_DOMAIN_THREAD;
break;
+ case IMC_TYPE_TRACE:
+ domain = IMC_DOMAIN_TRACE;
+ break;
default:
pr_warn("IMC Unknown Device type \n");
domain = -1;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 2b0eca104f86..f2b063b027f0 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -505,7 +505,7 @@ static int opal_recover_mce(struct pt_regs *regs,
recovered = 0;
}
- if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) {
+ if (!recovered && evt->sync_error) {
/*
* Try to kill processes if we get a synchronous machine check
* (e.g., one caused by execution of this instruction). This
@@ -614,6 +614,27 @@ int opal_hmi_exception_early(struct pt_regs *regs)
return 0;
}
+int opal_hmi_exception_early2(struct pt_regs *regs)
+{
+ s64 rc;
+ __be64 out_flags;
+
+ /*
+ * call opal hmi handler.
+ * Check 64-bit flag mask to find out if an event was generated,
+ * and whether TB is still valid or not etc.
+ */
+ rc = opal_handle_hmi2(&out_flags);
+ if (rc != OPAL_SUCCESS)
+ return 0;
+
+ if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT)
+ local_paca->hmi_event_available = 1;
+ if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL)
+ tb_invalid = true;
+ return 1;
+}
+
/* HMI exception handler called in virtual mode during check_irq_replay. */
int opal_handle_hmi_exception(struct pt_regs *regs)
{
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3ead4c237ed0..126602b4e399 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -847,11 +847,11 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
if (rc)
- pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc);
+ pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
if (rc)
- pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
+ pe_err(pe, "OPAL error %lld trying to setup PELT table\n", rc);
pe->pbus = NULL;
pe->pdev = NULL;
@@ -1174,11 +1174,12 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
pe->rid = bus->busn_res.start << 8;
if (all)
- pe_info(pe, "Secondary bus %d..%d associated with PE#%x\n",
- bus->busn_res.start, bus->busn_res.end, pe->pe_number);
+ pe_info(pe, "Secondary bus %pad..%pad associated with PE#%x\n",
+ &bus->busn_res.start, &bus->busn_res.end,
+ pe->pe_number);
else
- pe_info(pe, "Secondary bus %d associated with PE#%x\n",
- bus->busn_res.start, pe->pe_number);
+ pe_info(pe, "Secondary bus %pad associated with PE#%x\n",
+ &bus->busn_res.start, pe->pe_number);
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
@@ -1448,7 +1449,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
tbl = pe->table_group.tables[0];
rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (rc)
- pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+ pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
pnv_pci_ioda2_set_bypass(pe, false);
if (pe->table_group.group) {
@@ -1836,7 +1837,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
struct pnv_ioda_pe *pe;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return -ENODEV;
+ return false;
pe = &phb->ioda.pe_array[pdn->pe_number];
if (pe->tce_bypass_enabled) {
@@ -1859,7 +1860,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
/* Configure the bypass mode */
s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe);
if (rc)
- return rc;
+ return false;
/* 4GB offset bypasses 32-bit space */
pdev->dev.archdata.dma_offset = (1ULL << 32);
return true;
@@ -2286,8 +2287,8 @@ found:
__pa(addr) + tce32_segsz * i,
tce32_segsz, IOMMU_PAGE_SIZE_4K);
if (rc) {
- pe_err(pe, " Failed to configure 32-bit TCE table,"
- " err %ld\n", rc);
+ pe_err(pe, " Failed to configure 32-bit TCE table, err %lld\n",
+ rc);
goto fail;
}
}
@@ -2332,9 +2333,9 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
const __u64 win_size = tbl->it_size << tbl->it_page_shift;
- pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num,
- start_addr, start_addr + win_size - 1,
- IOMMU_PAGE_SIZE(tbl));
+ pe_info(pe, "Setting up window#%d %llx..%llx pg=%lx\n",
+ num, start_addr, start_addr + win_size - 1,
+ IOMMU_PAGE_SIZE(tbl));
/*
* Map TCE table through TVT. The TVE index is the PE number
@@ -2348,7 +2349,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
size << 3,
IOMMU_PAGE_SIZE(tbl));
if (rc) {
- pe_err(pe, "Failed to configure TCE table, err %ld\n", rc);
+ pe_err(pe, "Failed to configure TCE table, err %lld\n", rc);
return rc;
}
@@ -3450,7 +3451,7 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
#ifdef CONFIG_IOMMU_API
rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (rc)
- pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+ pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
#endif
pnv_pci_ioda2_set_bypass(pe, false);
@@ -3484,7 +3485,7 @@ static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
phb->ioda.reserved_pe_idx, win, 0, idx);
if (rc != OPAL_SUCCESS)
- pe_warn(pe, "Error %ld unmapping (%d) segment#%d\n",
+ pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n",
rc, win, idx);
map[idx] = IODA_INVALID_PE;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 8e36da379252..be26ab3d99e0 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -2,6 +2,7 @@
#ifndef __POWERNV_PCI_H
#define __POWERNV_PCI_H
+#include <linux/compiler.h> /* for __printf */
#include <linux/iommu.h>
#include <asm/iommu.h>
#include <asm/msi_bitmap.h>
@@ -204,6 +205,7 @@ extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
__u64 window_size, __u32 levels);
extern int pnv_eeh_post_init(void);
+__printf(3, 4)
extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...);
#define pe_err(pe, fmt, ...) \
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 14befee4b3f1..3cf40f689aac 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -401,7 +401,10 @@ static void __init pnv_setup_machdep_opal(void)
/* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
ppc_md.machine_check_exception = opal_machine_check;
ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
- ppc_md.hmi_exception_early = opal_hmi_exception_early;
+ if (opal_check_token(OPAL_HANDLE_HMI2))
+ ppc_md.hmi_exception_early = opal_hmi_exception_early2;
+ else
+ ppc_md.hmi_exception_early = opal_hmi_exception_early;
ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
}
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 45563004feda..1d7a9fd30dd1 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -183,7 +183,7 @@ static void unsplit_core(void)
cpu = smp_processor_id();
if (cpu_thread_in_core(cpu) != 0) {
while (mfspr(SPRN_HID0) & mask)
- power7_idle_insn(PNV_THREAD_NAP);
+ power7_idle_type(PNV_THREAD_NAP);
per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
return;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index d291b618a559..47087832f8b2 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -379,7 +379,7 @@ static int dlpar_add_lmb(struct drmem_lmb *);
static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
- int nid, rc;
+ int rc;
if (!lmb_is_removable(lmb))
return -EINVAL;
@@ -389,14 +389,14 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
return rc;
block_sz = pseries_memory_block_size();
- nid = memory_add_physaddr_to_nid(lmb->base_addr);
- __remove_memory(nid, lmb->base_addr, block_sz);
+ __remove_memory(lmb->nid, lmb->base_addr, block_sz);
/* Update memory regions for memory remove */
memblock_remove(lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
+ lmb_clear_nid(lmb);
lmb->flags &= ~DRCONF_MEM_ASSIGNED;
return 0;
@@ -653,7 +653,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
static int dlpar_add_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
- int nid, rc;
+ int rc;
if (lmb->flags & DRCONF_MEM_ASSIGNED)
return -EINVAL;
@@ -664,13 +664,11 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
return rc;
}
+ lmb_set_nid(lmb);
block_sz = memory_block_size_bytes();
- /* Find the node id for this address */
- nid = memory_add_physaddr_to_nid(lmb->base_addr);
-
/* Add the memory */
- rc = __add_memory(nid, lmb->base_addr, block_sz);
+ rc = __add_memory(lmb->nid, lmb->base_addr, block_sz);
if (rc) {
invalidate_lmb_associativity_index(lmb);
return rc;
@@ -678,8 +676,9 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
rc = dlpar_online_lmb(lmb);
if (rc) {
- __remove_memory(nid, lmb->base_addr, block_sz);
+ __remove_memory(lmb->nid, lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
+ lmb_clear_nid(lmb);
} else {
lmb->flags |= DRCONF_MEM_ASSIGNED;
}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 36eb1ddbac69..03bbb299320e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -105,7 +105,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
unsigned long attrs)
{
u64 proto_tce;
- __be64 *tcep, *tces;
+ __be64 *tcep;
u64 rpn;
proto_tce = TCE_PCI_READ; // Read allowed
@@ -113,7 +113,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
- tces = tcep = ((__be64 *)tbl->it_base) + index;
+ tcep = ((__be64 *)tbl->it_base) + index;
while (npages--) {
/* can't move this out since we might cross MEMBLOCK boundary */
@@ -129,9 +129,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
{
- __be64 *tcep, *tces;
+ __be64 *tcep;
- tces = tcep = ((__be64 *)tbl->it_base) + index;
+ tcep = ((__be64 *)tbl->it_base) + index;
while (npages--)
*(tcep++) = 0;
@@ -945,7 +945,7 @@ static phys_addr_t ddw_memory_hotplug_max(void)
for_each_node_by_type(memory, "memory") {
unsigned long start, size;
- int ranges, n_mem_addr_cells, n_mem_size_cells, len;
+ int n_mem_addr_cells, n_mem_size_cells, len;
const __be32 *memcell_buf;
memcell_buf = of_get_property(memory, "reg", &len);
@@ -955,9 +955,6 @@ static phys_addr_t ddw_memory_hotplug_max(void)
n_mem_addr_cells = of_n_addr_cells(memory);
n_mem_size_cells = of_n_size_cells(memory);
- /* ranges in cell */
- ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
-
start = of_read_number(memcell_buf, n_mem_addr_cells);
memcell_buf += n_mem_addr_cells;
size = of_read_number(memcell_buf, n_mem_size_cells);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index f2a9f0adc2d3..1034ef1fe2b4 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -901,8 +901,10 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
break;
case H_PARAMETER:
+ pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n");
return -EINVAL;
case H_RESOURCE:
+ pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n");
return -EPERM;
default:
pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc);
@@ -918,7 +920,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
if (rc != 0) {
switch (state.commit_rc) {
case H_PTEG_FULL:
- pr_warn("Hash collision while resizing HPT\n");
return -ENOSPC;
default:
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index 27f0a915c8a9..f860a897a9e0 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -106,7 +106,7 @@ static ssize_t pmem_drc_remove_node(u32 drc_index)
int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
{
- u32 count, drc_index;
+ u32 drc_index;
int rc;
/* slim chance, but we might get a hotplug event while booting */
@@ -123,7 +123,6 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
return -EINVAL;
}
- count = hp_elog->_drc_u.drc_count;
drc_index = hp_elog->_drc_u.drc_index;
lock_device_hotplug();
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 452dcfd7e5dd..c97d15352f9f 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -539,44 +539,44 @@ static void pseries_print_mce_info(struct pt_regs *regs,
int disposition = rtas_error_disposition(errp);
static const char * const initiators[] = {
- "Unknown",
- "CPU",
- "PCI",
- "ISA",
- "Memory",
- "Power Mgmt",
+ [0] = "Unknown",
+ [1] = "CPU",
+ [2] = "PCI",
+ [3] = "ISA",
+ [4] = "Memory",
+ [5] = "Power Mgmt",
};
static const char * const mc_err_types[] = {
- "UE",
- "SLB",
- "ERAT",
- "Unknown",
- "TLB",
- "D-Cache",
- "Unknown",
- "I-Cache",
+ [0] = "UE",
+ [1] = "SLB",
+ [2] = "ERAT",
+ [3] = "Unknown",
+ [4] = "TLB",
+ [5] = "D-Cache",
+ [6] = "Unknown",
+ [7] = "I-Cache",
};
static const char * const mc_ue_types[] = {
- "Indeterminate",
- "Instruction fetch",
- "Page table walk ifetch",
- "Load/Store",
- "Page table walk Load/Store",
+ [0] = "Indeterminate",
+ [1] = "Instruction fetch",
+ [2] = "Page table walk ifetch",
+ [3] = "Load/Store",
+ [4] = "Page table walk Load/Store",
};
/* SLB sub errors valid values are 0x0, 0x1, 0x2 */
static const char * const mc_slb_types[] = {
- "Parity",
- "Multihit",
- "Indeterminate",
+ [0] = "Parity",
+ [1] = "Multihit",
+ [2] = "Indeterminate",
};
/* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
static const char * const mc_soft_types[] = {
- "Unknown",
- "Parity",
- "Multihit",
- "Indeterminate",
+ [0] = "Unknown",
+ [1] = "Parity",
+ [2] = "Multihit",
+ [3] = "Indeterminate",
};
if (!rtas_error_extended(errp)) {
@@ -707,6 +707,87 @@ out:
return disposition;
}
+#ifdef CONFIG_MEMORY_FAILURE
+
+static DEFINE_PER_CPU(int, rtas_ue_count);
+static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]);
+
+#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
+#define UE_LOGICAL_ADDR_PROVIDED 0x20
+
+
+static void pseries_hwpoison_work_fn(struct work_struct *work)
+{
+ unsigned long paddr;
+ int index;
+
+ while (__this_cpu_read(rtas_ue_count) > 0) {
+ index = __this_cpu_read(rtas_ue_count) - 1;
+ paddr = __this_cpu_read(rtas_ue_paddr[index]);
+ memory_failure(paddr >> PAGE_SHIFT, 0);
+ __this_cpu_dec(rtas_ue_count);
+ }
+}
+
+static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn);
+
+static void queue_ue_paddr(unsigned long paddr)
+{
+ int index;
+
+ index = __this_cpu_inc_return(rtas_ue_count) - 1;
+ if (index >= MAX_MC_EVT) {
+ __this_cpu_dec(rtas_ue_count);
+ return;
+ }
+ this_cpu_write(rtas_ue_paddr[index], paddr);
+ schedule_work(&hwpoison_work);
+}
+
+static void pseries_do_memory_failure(struct pt_regs *regs,
+ struct pseries_mc_errorlog *mce_log)
+{
+ unsigned long paddr;
+
+ if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
+ paddr = be64_to_cpu(mce_log->logical_address);
+ } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
+ unsigned long pfn;
+
+ pfn = addr_to_pfn(regs,
+ be64_to_cpu(mce_log->effective_address));
+ if (pfn == ULONG_MAX)
+ return;
+ paddr = pfn << PAGE_SHIFT;
+ } else {
+ return;
+ }
+ queue_ue_paddr(paddr);
+}
+
+static void pseries_process_ue(struct pt_regs *regs,
+ struct rtas_error_log *errp)
+{
+ struct pseries_errorlog *pseries_log;
+ struct pseries_mc_errorlog *mce_log;
+
+ if (!rtas_error_extended(errp))
+ return;
+
+ pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+ if (!pseries_log)
+ return;
+
+ mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+
+ if (mce_log->error_type == MC_ERROR_TYPE_UE)
+ pseries_do_memory_failure(regs, mce_log);
+}
+#else
+static inline void pseries_process_ue(struct pt_regs *regs,
+ struct rtas_error_log *errp) { }
+#endif /*CONFIG_MEMORY_FAILURE */
+
/*
* Process MCE rtas errlog event.
*/
@@ -765,6 +846,8 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
recovered = 1;
}
+ pseries_process_ue(regs, err);
+
/* Queue irq work to log this rtas event later. */
irq_work_queue(&mce_errlog_process_work);
diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
index 4314ba5baf43..7c6d8b14f440 100644
--- a/arch/powerpc/purgatory/Makefile
+++ b/arch/powerpc/purgatory/Makefile
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+
+KASAN_SANITIZE := n
+
targets += trampoline.o purgatory.ro kexec-purgatory.c
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 1ca127d052a6..0c037e933e55 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -437,6 +437,12 @@ void xive_native_sync_source(u32 hw_irq)
}
EXPORT_SYMBOL_GPL(xive_native_sync_source);
+void xive_native_sync_queue(u32 hw_irq)
+{
+ opal_xive_sync(XIVE_SYNC_QUEUE, hw_irq);
+}
+EXPORT_SYMBOL_GPL(xive_native_sync_queue);
+
static const struct xive_ops xive_native_ops = {
.populate_irq_data = xive_native_populate_irq_data,
.configure_irq = xive_native_configure_irq,
@@ -711,3 +717,96 @@ bool xive_native_has_single_escalation(void)
return xive_has_single_esc;
}
EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
+
+int xive_native_get_queue_info(u32 vp_id, u32 prio,
+ u64 *out_qpage,
+ u64 *out_qsize,
+ u64 *out_qeoi_page,
+ u32 *out_escalate_irq,
+ u64 *out_qflags)
+{
+ __be64 qpage;
+ __be64 qsize;
+ __be64 qeoi_page;
+ __be32 escalate_irq;
+ __be64 qflags;
+ s64 rc;
+
+ rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize,
+ &qeoi_page, &escalate_irq, &qflags);
+ if (rc) {
+ pr_err("OPAL failed to get queue info for VCPU %d/%d : %lld\n",
+ vp_id, prio, rc);
+ return -EIO;
+ }
+
+ if (out_qpage)
+ *out_qpage = be64_to_cpu(qpage);
+ if (out_qsize)
+ *out_qsize = be32_to_cpu(qsize);
+ if (out_qeoi_page)
+ *out_qeoi_page = be64_to_cpu(qeoi_page);
+ if (out_escalate_irq)
+ *out_escalate_irq = be32_to_cpu(escalate_irq);
+ if (out_qflags)
+ *out_qflags = be64_to_cpu(qflags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_queue_info);
+
+int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex)
+{
+ __be32 opal_qtoggle;
+ __be32 opal_qindex;
+ s64 rc;
+
+ rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle,
+ &opal_qindex);
+ if (rc) {
+ pr_err("OPAL failed to get queue state for VCPU %d/%d : %lld\n",
+ vp_id, prio, rc);
+ return -EIO;
+ }
+
+ if (qtoggle)
+ *qtoggle = be32_to_cpu(opal_qtoggle);
+ if (qindex)
+ *qindex = be32_to_cpu(opal_qindex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_queue_state);
+
+int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex)
+{
+ s64 rc;
+
+ rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex);
+ if (rc) {
+ pr_err("OPAL failed to set queue state for VCPU %d/%d : %lld\n",
+ vp_id, prio, rc);
+ return -EIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
+
+int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
+{
+ __be64 state;
+ s64 rc;
+
+ rc = opal_xive_get_vp_state(vp_id, &state);
+ if (rc) {
+ pr_err("OPAL failed to get vp state for VCPU %d : %lld\n",
+ vp_id, rc);
+ return -EIO;
+ }
+
+ if (out_state)
+ *out_state = be64_to_cpu(state);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_vp_state);
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 3050f9323254..f142570ad860 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -7,6 +7,7 @@ subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header)
GCOV_PROFILE := n
KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
# Disable ftrace for the entire directory
ORIG_CFLAGS := $(KBUILD_CFLAGS)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 13c6a47e6150..1b0149b2bb6c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -80,6 +80,7 @@ static int set_indicator_token = RTAS_UNKNOWN_SERVICE;
#endif
static unsigned long in_xmon __read_mostly = 0;
static int xmon_on = IS_ENABLED(CONFIG_XMON_DEFAULT);
+static bool xmon_is_ro = IS_ENABLED(CONFIG_XMON_DEFAULT_RO_MODE);
static unsigned long adrs;
static int size = 1;
@@ -202,6 +203,8 @@ static void dump_tlb_book3e(void);
#define GETWORD(v) (((v)[0] << 24) + ((v)[1] << 16) + ((v)[2] << 8) + (v)[3])
#endif
+static const char *xmon_ro_msg = "Operation disabled: xmon in read-only mode\n";
+
static char *help_string = "\
Commands:\n\
b show breakpoints\n\
@@ -989,6 +992,10 @@ cmds(struct pt_regs *excp)
memlocate();
break;
case 'z':
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+ }
memzcan();
break;
case 'i':
@@ -1042,6 +1049,10 @@ cmds(struct pt_regs *excp)
set_lpp_cmd();
break;
case 'b':
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+ }
bpt_cmds();
break;
case 'C':
@@ -1055,6 +1066,10 @@ cmds(struct pt_regs *excp)
bootcmds();
break;
case 'p':
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+ }
proccall();
break;
case 'P':
@@ -1777,6 +1792,11 @@ read_spr(int n, unsigned long *vp)
static void
write_spr(int n, unsigned long val)
{
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ return;
+ }
+
if (setjmp(bus_error_jmp) == 0) {
catch_spr_faults = 1;
sync();
@@ -2016,6 +2036,12 @@ mwrite(unsigned long adrs, void *buf, int size)
char *p, *q;
n = 0;
+
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ return n;
+ }
+
if (setjmp(bus_error_jmp) == 0) {
catch_memory_errors = 1;
sync();
@@ -2434,7 +2460,6 @@ static void dump_one_paca(int cpu)
DUMP(p, mmiowb_state.mmiowb_pending, "%#-*x");
#endif
DUMP(p, irq_work_pending, "%#-*x");
- DUMP(p, nap_state_lost, "%#-*x");
DUMP(p, sprg_vdso, "%#-*llx");
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -2442,19 +2467,16 @@ static void dump_one_paca(int cpu)
#endif
#ifdef CONFIG_PPC_POWERNV
- DUMP(p, core_idle_state_ptr, "%-*px");
- DUMP(p, thread_idle_state, "%#-*x");
- DUMP(p, thread_mask, "%#-*x");
- DUMP(p, subcore_sibling_mask, "%#-*x");
- DUMP(p, requested_psscr, "%#-*llx");
- DUMP(p, stop_sprs.pid, "%#-*llx");
- DUMP(p, stop_sprs.ldbar, "%#-*llx");
- DUMP(p, stop_sprs.fscr, "%#-*llx");
- DUMP(p, stop_sprs.hfscr, "%#-*llx");
- DUMP(p, stop_sprs.mmcr1, "%#-*llx");
- DUMP(p, stop_sprs.mmcr2, "%#-*llx");
- DUMP(p, stop_sprs.mmcra, "%#-*llx");
- DUMP(p, dont_stop.counter, "%#-*x");
+ DUMP(p, idle_state, "%#-*lx");
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+ DUMP(p, thread_idle_state, "%#-*x");
+ DUMP(p, subcore_sibling_mask, "%#-*x");
+ } else {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ DUMP(p, requested_psscr, "%#-*llx");
+ DUMP(p, dont_stop.counter, "%#-*x");
+#endif
+ }
#endif
DUMP(p, accounting.utime, "%#-*lx");
@@ -2887,9 +2909,17 @@ memops(int cmd)
scanhex((void *)&mcount);
switch( cmd ){
case 'm':
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+ }
memmove((void *)mdest, (void *)msrc, mcount);
break;
case 's':
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+ }
memset((void *)mdest, mval, mcount);
break;
case 'd':
@@ -3799,6 +3829,14 @@ static int __init early_parse_xmon(char *p)
} else if (strncmp(p, "on", 2) == 0) {
xmon_init(1);
xmon_on = 1;
+ } else if (strncmp(p, "rw", 2) == 0) {
+ xmon_init(1);
+ xmon_on = 1;
+ xmon_is_ro = false;
+ } else if (strncmp(p, "ro", 2) == 0) {
+ xmon_init(1);
+ xmon_on = 1;
+ xmon_is_ro = true;
} else if (strncmp(p, "off", 3) == 0)
xmon_on = 0;
else